001/* 002 * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved. 003 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 004 * 005 * This code is free software; you can redistribute it and/or modify it 006 * under the terms of the GNU General Public License version 2 only, as 007 * published by the Free Software Foundation. 008 * 009 * This code is distributed in the hope that it will be useful, but WITHOUT 010 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 011 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 012 * version 2 for more details (a copy is included in the LICENSE file that 013 * accompanied this code). 014 * 015 * You should have received a copy of the GNU General Public License version 016 * 2 along with this work; if not, write to the Free Software Foundation, 017 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 018 * 019 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 020 * or visit www.oracle.com if you need additional information or have any 021 * questions. 022 */ 023package com.oracle.graal.lir.amd64; 024 025import static com.oracle.graal.lir.LIRInstruction.OperandFlag.*; 026import static jdk.internal.jvmci.code.ValueUtil.*; 027import static jdk.internal.jvmci.common.UnsafeAccess.*; 028 029import java.lang.reflect.*; 030 031import jdk.internal.jvmci.amd64.*; 032import jdk.internal.jvmci.amd64.AMD64.*; 033import jdk.internal.jvmci.code.*; 034import jdk.internal.jvmci.meta.*; 035 036import com.oracle.graal.asm.*; 037import com.oracle.graal.asm.amd64.*; 038import com.oracle.graal.asm.amd64.AMD64Address.*; 039import com.oracle.graal.asm.amd64.AMD64Assembler.*; 040import com.oracle.graal.lir.*; 041import com.oracle.graal.lir.asm.*; 042import com.oracle.graal.lir.gen.*; 043 044/** 045 * Emits code which compares two arrays of the same length. If the CPU supports any vector 046 * instructions specialized code is emitted to leverage these instructions. 047 */ 048@Opcode("ARRAY_EQUALS") 049public final class AMD64ArrayEqualsOp extends AMD64LIRInstruction { 050 public static final LIRInstructionClass<AMD64ArrayEqualsOp> TYPE = LIRInstructionClass.create(AMD64ArrayEqualsOp.class); 051 052 private final Kind kind; 053 private final int arrayBaseOffset; 054 private final int arrayIndexScale; 055 056 @Def({REG}) protected Value resultValue; 057 @Alive({REG}) protected Value array1Value; 058 @Alive({REG}) protected Value array2Value; 059 @Alive({REG}) protected Value lengthValue; 060 @Temp({REG}) protected Value temp1; 061 @Temp({REG}) protected Value temp2; 062 @Temp({REG}) protected Value temp3; 063 @Temp({REG}) protected Value temp4; 064 @Temp({REG, ILLEGAL}) protected Value vectorTemp1; 065 @Temp({REG, ILLEGAL}) protected Value vectorTemp2; 066 067 public AMD64ArrayEqualsOp(LIRGeneratorTool tool, Kind kind, Value result, Value array1, Value array2, Value length) { 068 super(TYPE); 069 this.kind = kind; 070 071 Class<?> arrayClass = Array.newInstance(kind.toJavaClass(), 0).getClass(); 072 this.arrayBaseOffset = unsafe.arrayBaseOffset(arrayClass); 073 this.arrayIndexScale = unsafe.arrayIndexScale(arrayClass); 074 075 this.resultValue = result; 076 this.array1Value = array1; 077 this.array2Value = array2; 078 this.lengthValue = length; 079 080 // Allocate some temporaries. 081 this.temp1 = tool.newVariable(LIRKind.unknownReference(tool.target().wordKind)); 082 this.temp2 = tool.newVariable(LIRKind.unknownReference(tool.target().wordKind)); 083 this.temp3 = tool.newVariable(LIRKind.value(tool.target().wordKind)); 084 this.temp4 = tool.newVariable(LIRKind.value(tool.target().wordKind)); 085 086 // We only need the vector temporaries if we generate SSE code. 087 if (supportsSSE41(tool.target())) { 088 this.vectorTemp1 = tool.newVariable(LIRKind.value(Kind.Double)); 089 this.vectorTemp2 = tool.newVariable(LIRKind.value(Kind.Double)); 090 } else { 091 this.vectorTemp1 = Value.ILLEGAL; 092 this.vectorTemp2 = Value.ILLEGAL; 093 } 094 } 095 096 @Override 097 public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) { 098 Register result = asRegister(resultValue); 099 Register array1 = asRegister(temp1); 100 Register array2 = asRegister(temp2); 101 Register length = asRegister(temp3); 102 103 Label trueLabel = new Label(); 104 Label falseLabel = new Label(); 105 Label done = new Label(); 106 107 // Load array base addresses. 108 masm.leaq(array1, new AMD64Address(asRegister(array1Value), arrayBaseOffset)); 109 masm.leaq(array2, new AMD64Address(asRegister(array2Value), arrayBaseOffset)); 110 111 // Get array length in bytes. 112 masm.imull(length, asRegister(lengthValue), arrayIndexScale); 113 masm.movl(result, length); // copy 114 115 if (supportsSSE41(crb.target)) { 116 emitSSE41Compare(crb, masm, result, array1, array2, length, trueLabel, falseLabel); 117 } 118 119 emit8ByteCompare(crb, masm, result, array1, array2, length, trueLabel, falseLabel); 120 emitTailCompares(masm, result, array1, array2, length, trueLabel, falseLabel); 121 122 // Return true 123 masm.bind(trueLabel); 124 masm.movl(result, 1); 125 masm.jmpb(done); 126 127 // Return false 128 masm.bind(falseLabel); 129 masm.xorl(result, result); 130 131 // That's it 132 masm.bind(done); 133 } 134 135 /** 136 * Returns if the underlying AMD64 architecture supports SSE 4.1 instructions. 137 * 138 * @param target target description of the underlying architecture 139 * @return true if the underlying architecture supports SSE 4.1 140 */ 141 private static boolean supportsSSE41(TargetDescription target) { 142 AMD64 arch = (AMD64) target.arch; 143 return arch.getFeatures().contains(CPUFeature.SSE4_1); 144 } 145 146 /** 147 * Vector size used in {@link #emitSSE41Compare}. 148 */ 149 private static final int SSE4_1_VECTOR_SIZE = 16; 150 151 /** 152 * Emits code that uses SSE4.1 128-bit (16-byte) vector compares. 153 */ 154 private void emitSSE41Compare(CompilationResultBuilder crb, AMD64MacroAssembler masm, Register result, Register array1, Register array2, Register length, Label trueLabel, Label falseLabel) { 155 assert supportsSSE41(crb.target); 156 157 Register vector1 = asDoubleReg(vectorTemp1); 158 Register vector2 = asDoubleReg(vectorTemp2); 159 160 Label loop = new Label(); 161 Label compareTail = new Label(); 162 163 // Compare 16-byte vectors 164 masm.andl(result, SSE4_1_VECTOR_SIZE - 1); // tail count (in bytes) 165 masm.andl(length, ~(SSE4_1_VECTOR_SIZE - 1)); // vector count (in bytes) 166 masm.jccb(ConditionFlag.Zero, compareTail); 167 168 masm.leaq(array1, new AMD64Address(array1, length, Scale.Times1, 0)); 169 masm.leaq(array2, new AMD64Address(array2, length, Scale.Times1, 0)); 170 masm.negq(length); 171 172 // Align the main loop 173 masm.align(crb.target.wordSize * 2); 174 masm.bind(loop); 175 masm.movdqu(vector1, new AMD64Address(array1, length, Scale.Times1, 0)); 176 masm.movdqu(vector2, new AMD64Address(array2, length, Scale.Times1, 0)); 177 masm.pxor(vector1, vector2); 178 masm.ptest(vector1, vector1); 179 masm.jcc(ConditionFlag.NotZero, falseLabel); 180 masm.addq(length, SSE4_1_VECTOR_SIZE); 181 masm.jcc(ConditionFlag.NotZero, loop); 182 183 masm.testl(result, result); 184 masm.jcc(ConditionFlag.Zero, trueLabel); 185 186 /* 187 * Compare the remaining bytes with an unaligned memory load aligned to the end of the 188 * array. 189 */ 190 masm.movdqu(vector1, new AMD64Address(array1, result, Scale.Times1, -SSE4_1_VECTOR_SIZE)); 191 masm.movdqu(vector2, new AMD64Address(array2, result, Scale.Times1, -SSE4_1_VECTOR_SIZE)); 192 masm.pxor(vector1, vector2); 193 masm.ptest(vector1, vector1); 194 masm.jcc(ConditionFlag.NotZero, falseLabel); 195 masm.jmp(trueLabel); 196 197 masm.bind(compareTail); 198 masm.movl(length, result); 199 } 200 201 /** 202 * Vector size used in {@link #emit8ByteCompare}. 203 */ 204 private static final int VECTOR_SIZE = 8; 205 206 /** 207 * Emits code that uses 8-byte vector compares. 208 */ 209 private void emit8ByteCompare(CompilationResultBuilder crb, AMD64MacroAssembler masm, Register result, Register array1, Register array2, Register length, Label trueLabel, Label falseLabel) { 210 Label loop = new Label(); 211 Label compareTail = new Label(); 212 213 Register temp = asRegister(temp4); 214 215 masm.andl(result, VECTOR_SIZE - 1); // tail count (in bytes) 216 masm.andl(length, ~(VECTOR_SIZE - 1)); // vector count (in bytes) 217 masm.jccb(ConditionFlag.Zero, compareTail); 218 219 masm.leaq(array1, new AMD64Address(array1, length, Scale.Times1, 0)); 220 masm.leaq(array2, new AMD64Address(array2, length, Scale.Times1, 0)); 221 masm.negq(length); 222 223 // Align the main loop 224 masm.align(crb.target.wordSize * 2); 225 masm.bind(loop); 226 masm.movq(temp, new AMD64Address(array1, length, Scale.Times1, 0)); 227 masm.cmpq(temp, new AMD64Address(array2, length, Scale.Times1, 0)); 228 masm.jccb(ConditionFlag.NotEqual, falseLabel); 229 masm.addq(length, VECTOR_SIZE); 230 masm.jccb(ConditionFlag.NotZero, loop); 231 232 masm.testl(result, result); 233 masm.jccb(ConditionFlag.Zero, trueLabel); 234 235 /* 236 * Compare the remaining bytes with an unaligned memory load aligned to the end of the 237 * array. 238 */ 239 masm.movq(temp, new AMD64Address(array1, result, Scale.Times1, -VECTOR_SIZE)); 240 masm.cmpq(temp, new AMD64Address(array2, result, Scale.Times1, -VECTOR_SIZE)); 241 masm.jccb(ConditionFlag.NotEqual, falseLabel); 242 masm.jmpb(trueLabel); 243 244 masm.bind(compareTail); 245 masm.movl(length, result); 246 } 247 248 /** 249 * Emits code to compare the remaining 1 to 4 bytes. 250 */ 251 private void emitTailCompares(AMD64MacroAssembler masm, Register result, Register array1, Register array2, Register length, Label trueLabel, Label falseLabel) { 252 Label compare2Bytes = new Label(); 253 Label compare1Byte = new Label(); 254 255 Register temp = asRegister(temp4); 256 257 if (kind.getByteCount() <= 4) { 258 // Compare trailing 4 bytes, if any. 259 masm.testl(result, 4); 260 masm.jccb(ConditionFlag.Zero, compare2Bytes); 261 masm.movl(temp, new AMD64Address(array1, 0)); 262 masm.cmpl(temp, new AMD64Address(array2, 0)); 263 masm.jccb(ConditionFlag.NotEqual, falseLabel); 264 265 if (kind.getByteCount() <= 2) { 266 // Move array pointers forward. 267 masm.leaq(array1, new AMD64Address(array1, 4)); 268 masm.leaq(array2, new AMD64Address(array2, 4)); 269 270 // Compare trailing 2 bytes, if any. 271 masm.bind(compare2Bytes); 272 masm.testl(result, 2); 273 masm.jccb(ConditionFlag.Zero, compare1Byte); 274 masm.movzwl(temp, new AMD64Address(array1, 0)); 275 masm.movzwl(length, new AMD64Address(array2, 0)); 276 masm.cmpl(temp, length); 277 masm.jccb(ConditionFlag.NotEqual, falseLabel); 278 279 // The one-byte tail compare is only required for boolean and byte arrays. 280 if (kind.getByteCount() <= 1) { 281 // Move array pointers forward before we compare the last trailing byte. 282 masm.leaq(array1, new AMD64Address(array1, 2)); 283 masm.leaq(array2, new AMD64Address(array2, 2)); 284 285 // Compare trailing byte, if any. 286 masm.bind(compare1Byte); 287 masm.testl(result, 1); 288 masm.jccb(ConditionFlag.Zero, trueLabel); 289 masm.movzbl(temp, new AMD64Address(array1, 0)); 290 masm.movzbl(length, new AMD64Address(array2, 0)); 291 masm.cmpl(temp, length); 292 masm.jccb(ConditionFlag.NotEqual, falseLabel); 293 } else { 294 masm.bind(compare1Byte); 295 } 296 } else { 297 masm.bind(compare2Bytes); 298 } 299 } 300 } 301}