001/*
002 * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
003 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
004 *
005 * This code is free software; you can redistribute it and/or modify it
006 * under the terms of the GNU General Public License version 2 only, as
007 * published by the Free Software Foundation.
008 *
009 * This code is distributed in the hope that it will be useful, but WITHOUT
010 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
011 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
012 * version 2 for more details (a copy is included in the LICENSE file that
013 * accompanied this code).
014 *
015 * You should have received a copy of the GNU General Public License version
016 * 2 along with this work; if not, write to the Free Software Foundation,
017 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
018 *
019 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
020 * or visit www.oracle.com if you need additional information or have any
021 * questions.
022 */
023package com.oracle.graal.lir.amd64;
024
025import static com.oracle.graal.lir.LIRInstruction.OperandFlag.*;
026import static jdk.internal.jvmci.code.ValueUtil.*;
027import static jdk.internal.jvmci.common.UnsafeAccess.*;
028
029import java.lang.reflect.*;
030
031import jdk.internal.jvmci.amd64.*;
032import jdk.internal.jvmci.amd64.AMD64.*;
033import jdk.internal.jvmci.code.*;
034import jdk.internal.jvmci.meta.*;
035
036import com.oracle.graal.asm.*;
037import com.oracle.graal.asm.amd64.*;
038import com.oracle.graal.asm.amd64.AMD64Address.*;
039import com.oracle.graal.asm.amd64.AMD64Assembler.*;
040import com.oracle.graal.lir.*;
041import com.oracle.graal.lir.asm.*;
042import com.oracle.graal.lir.gen.*;
043
044/**
045 * Emits code which compares two arrays of the same length. If the CPU supports any vector
046 * instructions specialized code is emitted to leverage these instructions.
047 */
048@Opcode("ARRAY_EQUALS")
049public final class AMD64ArrayEqualsOp extends AMD64LIRInstruction {
050    public static final LIRInstructionClass<AMD64ArrayEqualsOp> TYPE = LIRInstructionClass.create(AMD64ArrayEqualsOp.class);
051
052    private final Kind kind;
053    private final int arrayBaseOffset;
054    private final int arrayIndexScale;
055
056    @Def({REG}) protected Value resultValue;
057    @Alive({REG}) protected Value array1Value;
058    @Alive({REG}) protected Value array2Value;
059    @Alive({REG}) protected Value lengthValue;
060    @Temp({REG}) protected Value temp1;
061    @Temp({REG}) protected Value temp2;
062    @Temp({REG}) protected Value temp3;
063    @Temp({REG}) protected Value temp4;
064    @Temp({REG, ILLEGAL}) protected Value vectorTemp1;
065    @Temp({REG, ILLEGAL}) protected Value vectorTemp2;
066
067    public AMD64ArrayEqualsOp(LIRGeneratorTool tool, Kind kind, Value result, Value array1, Value array2, Value length) {
068        super(TYPE);
069        this.kind = kind;
070
071        Class<?> arrayClass = Array.newInstance(kind.toJavaClass(), 0).getClass();
072        this.arrayBaseOffset = unsafe.arrayBaseOffset(arrayClass);
073        this.arrayIndexScale = unsafe.arrayIndexScale(arrayClass);
074
075        this.resultValue = result;
076        this.array1Value = array1;
077        this.array2Value = array2;
078        this.lengthValue = length;
079
080        // Allocate some temporaries.
081        this.temp1 = tool.newVariable(LIRKind.unknownReference(tool.target().wordKind));
082        this.temp2 = tool.newVariable(LIRKind.unknownReference(tool.target().wordKind));
083        this.temp3 = tool.newVariable(LIRKind.value(tool.target().wordKind));
084        this.temp4 = tool.newVariable(LIRKind.value(tool.target().wordKind));
085
086        // We only need the vector temporaries if we generate SSE code.
087        if (supportsSSE41(tool.target())) {
088            this.vectorTemp1 = tool.newVariable(LIRKind.value(Kind.Double));
089            this.vectorTemp2 = tool.newVariable(LIRKind.value(Kind.Double));
090        } else {
091            this.vectorTemp1 = Value.ILLEGAL;
092            this.vectorTemp2 = Value.ILLEGAL;
093        }
094    }
095
096    @Override
097    public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
098        Register result = asRegister(resultValue);
099        Register array1 = asRegister(temp1);
100        Register array2 = asRegister(temp2);
101        Register length = asRegister(temp3);
102
103        Label trueLabel = new Label();
104        Label falseLabel = new Label();
105        Label done = new Label();
106
107        // Load array base addresses.
108        masm.leaq(array1, new AMD64Address(asRegister(array1Value), arrayBaseOffset));
109        masm.leaq(array2, new AMD64Address(asRegister(array2Value), arrayBaseOffset));
110
111        // Get array length in bytes.
112        masm.imull(length, asRegister(lengthValue), arrayIndexScale);
113        masm.movl(result, length); // copy
114
115        if (supportsSSE41(crb.target)) {
116            emitSSE41Compare(crb, masm, result, array1, array2, length, trueLabel, falseLabel);
117        }
118
119        emit8ByteCompare(crb, masm, result, array1, array2, length, trueLabel, falseLabel);
120        emitTailCompares(masm, result, array1, array2, length, trueLabel, falseLabel);
121
122        // Return true
123        masm.bind(trueLabel);
124        masm.movl(result, 1);
125        masm.jmpb(done);
126
127        // Return false
128        masm.bind(falseLabel);
129        masm.xorl(result, result);
130
131        // That's it
132        masm.bind(done);
133    }
134
135    /**
136     * Returns if the underlying AMD64 architecture supports SSE 4.1 instructions.
137     *
138     * @param target target description of the underlying architecture
139     * @return true if the underlying architecture supports SSE 4.1
140     */
141    private static boolean supportsSSE41(TargetDescription target) {
142        AMD64 arch = (AMD64) target.arch;
143        return arch.getFeatures().contains(CPUFeature.SSE4_1);
144    }
145
146    /**
147     * Vector size used in {@link #emitSSE41Compare}.
148     */
149    private static final int SSE4_1_VECTOR_SIZE = 16;
150
151    /**
152     * Emits code that uses SSE4.1 128-bit (16-byte) vector compares.
153     */
154    private void emitSSE41Compare(CompilationResultBuilder crb, AMD64MacroAssembler masm, Register result, Register array1, Register array2, Register length, Label trueLabel, Label falseLabel) {
155        assert supportsSSE41(crb.target);
156
157        Register vector1 = asDoubleReg(vectorTemp1);
158        Register vector2 = asDoubleReg(vectorTemp2);
159
160        Label loop = new Label();
161        Label compareTail = new Label();
162
163        // Compare 16-byte vectors
164        masm.andl(result, SSE4_1_VECTOR_SIZE - 1); // tail count (in bytes)
165        masm.andl(length, ~(SSE4_1_VECTOR_SIZE - 1)); // vector count (in bytes)
166        masm.jccb(ConditionFlag.Zero, compareTail);
167
168        masm.leaq(array1, new AMD64Address(array1, length, Scale.Times1, 0));
169        masm.leaq(array2, new AMD64Address(array2, length, Scale.Times1, 0));
170        masm.negq(length);
171
172        // Align the main loop
173        masm.align(crb.target.wordSize * 2);
174        masm.bind(loop);
175        masm.movdqu(vector1, new AMD64Address(array1, length, Scale.Times1, 0));
176        masm.movdqu(vector2, new AMD64Address(array2, length, Scale.Times1, 0));
177        masm.pxor(vector1, vector2);
178        masm.ptest(vector1, vector1);
179        masm.jcc(ConditionFlag.NotZero, falseLabel);
180        masm.addq(length, SSE4_1_VECTOR_SIZE);
181        masm.jcc(ConditionFlag.NotZero, loop);
182
183        masm.testl(result, result);
184        masm.jcc(ConditionFlag.Zero, trueLabel);
185
186        /*
187         * Compare the remaining bytes with an unaligned memory load aligned to the end of the
188         * array.
189         */
190        masm.movdqu(vector1, new AMD64Address(array1, result, Scale.Times1, -SSE4_1_VECTOR_SIZE));
191        masm.movdqu(vector2, new AMD64Address(array2, result, Scale.Times1, -SSE4_1_VECTOR_SIZE));
192        masm.pxor(vector1, vector2);
193        masm.ptest(vector1, vector1);
194        masm.jcc(ConditionFlag.NotZero, falseLabel);
195        masm.jmp(trueLabel);
196
197        masm.bind(compareTail);
198        masm.movl(length, result);
199    }
200
201    /**
202     * Vector size used in {@link #emit8ByteCompare}.
203     */
204    private static final int VECTOR_SIZE = 8;
205
206    /**
207     * Emits code that uses 8-byte vector compares.
208     */
209    private void emit8ByteCompare(CompilationResultBuilder crb, AMD64MacroAssembler masm, Register result, Register array1, Register array2, Register length, Label trueLabel, Label falseLabel) {
210        Label loop = new Label();
211        Label compareTail = new Label();
212
213        Register temp = asRegister(temp4);
214
215        masm.andl(result, VECTOR_SIZE - 1); // tail count (in bytes)
216        masm.andl(length, ~(VECTOR_SIZE - 1));  // vector count (in bytes)
217        masm.jccb(ConditionFlag.Zero, compareTail);
218
219        masm.leaq(array1, new AMD64Address(array1, length, Scale.Times1, 0));
220        masm.leaq(array2, new AMD64Address(array2, length, Scale.Times1, 0));
221        masm.negq(length);
222
223        // Align the main loop
224        masm.align(crb.target.wordSize * 2);
225        masm.bind(loop);
226        masm.movq(temp, new AMD64Address(array1, length, Scale.Times1, 0));
227        masm.cmpq(temp, new AMD64Address(array2, length, Scale.Times1, 0));
228        masm.jccb(ConditionFlag.NotEqual, falseLabel);
229        masm.addq(length, VECTOR_SIZE);
230        masm.jccb(ConditionFlag.NotZero, loop);
231
232        masm.testl(result, result);
233        masm.jccb(ConditionFlag.Zero, trueLabel);
234
235        /*
236         * Compare the remaining bytes with an unaligned memory load aligned to the end of the
237         * array.
238         */
239        masm.movq(temp, new AMD64Address(array1, result, Scale.Times1, -VECTOR_SIZE));
240        masm.cmpq(temp, new AMD64Address(array2, result, Scale.Times1, -VECTOR_SIZE));
241        masm.jccb(ConditionFlag.NotEqual, falseLabel);
242        masm.jmpb(trueLabel);
243
244        masm.bind(compareTail);
245        masm.movl(length, result);
246    }
247
248    /**
249     * Emits code to compare the remaining 1 to 4 bytes.
250     */
251    private void emitTailCompares(AMD64MacroAssembler masm, Register result, Register array1, Register array2, Register length, Label trueLabel, Label falseLabel) {
252        Label compare2Bytes = new Label();
253        Label compare1Byte = new Label();
254
255        Register temp = asRegister(temp4);
256
257        if (kind.getByteCount() <= 4) {
258            // Compare trailing 4 bytes, if any.
259            masm.testl(result, 4);
260            masm.jccb(ConditionFlag.Zero, compare2Bytes);
261            masm.movl(temp, new AMD64Address(array1, 0));
262            masm.cmpl(temp, new AMD64Address(array2, 0));
263            masm.jccb(ConditionFlag.NotEqual, falseLabel);
264
265            if (kind.getByteCount() <= 2) {
266                // Move array pointers forward.
267                masm.leaq(array1, new AMD64Address(array1, 4));
268                masm.leaq(array2, new AMD64Address(array2, 4));
269
270                // Compare trailing 2 bytes, if any.
271                masm.bind(compare2Bytes);
272                masm.testl(result, 2);
273                masm.jccb(ConditionFlag.Zero, compare1Byte);
274                masm.movzwl(temp, new AMD64Address(array1, 0));
275                masm.movzwl(length, new AMD64Address(array2, 0));
276                masm.cmpl(temp, length);
277                masm.jccb(ConditionFlag.NotEqual, falseLabel);
278
279                // The one-byte tail compare is only required for boolean and byte arrays.
280                if (kind.getByteCount() <= 1) {
281                    // Move array pointers forward before we compare the last trailing byte.
282                    masm.leaq(array1, new AMD64Address(array1, 2));
283                    masm.leaq(array2, new AMD64Address(array2, 2));
284
285                    // Compare trailing byte, if any.
286                    masm.bind(compare1Byte);
287                    masm.testl(result, 1);
288                    masm.jccb(ConditionFlag.Zero, trueLabel);
289                    masm.movzbl(temp, new AMD64Address(array1, 0));
290                    masm.movzbl(length, new AMD64Address(array2, 0));
291                    masm.cmpl(temp, length);
292                    masm.jccb(ConditionFlag.NotEqual, falseLabel);
293                } else {
294                    masm.bind(compare1Byte);
295                }
296            } else {
297                masm.bind(compare2Bytes);
298            }
299        }
300    }
301}