comparison src/cpu/x86/vm/x86.ad @ 6614:006050192a5a

6340864: Implement vectorization optimizations in hotspot-server Summary: Added asm encoding and mach nodes for vector arithmetic instructions on x86. Reviewed-by: roland
author kvn
date Mon, 20 Aug 2012 09:07:21 -0700
parents 2c368ea3e844
children da91efe96a93
comparison
equal deleted inserted replaced
6594:d5ec46c7da5c 6614:006050192a5a
497 0, 0/*abio*/, 497 0, 0/*abio*/,
498 Op_RegP /* Return address */, 0, /* the memories */ 498 Op_RegP /* Return address */, 0, /* the memories */
499 Op_RegF, Op_RegF, Op_RegF, Op_RegD, Op_RegD, Op_RegD, 499 Op_RegF, Op_RegF, Op_RegF, Op_RegD, Op_RegD, Op_RegD,
500 0 /*bottom*/ 500 0 /*bottom*/
501 }; 501 };
502
503 const bool Matcher::match_rule_supported(int opcode) {
504 if (!has_match_rule(opcode))
505 return false;
506
507 switch (opcode) {
508 case Op_PopCountI:
509 case Op_PopCountL:
510 if (!UsePopCountInstruction)
511 return false;
512 case Op_MulVI:
513 if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX
514 return false;
515 break;
516 }
517
518 return true; // Per default match rules are supported.
519 }
502 520
503 // Max vector size in bytes. 0 if not supported. 521 // Max vector size in bytes. 0 if not supported.
504 const int Matcher::vector_width_in_bytes(BasicType bt) { 522 const int Matcher::vector_width_in_bytes(BasicType bt) {
505 assert(is_java_primitive(bt), "only primitive type vectors"); 523 assert(is_java_primitive(bt), "only primitive type vectors");
506 if (UseSSE < 2) return 0; 524 if (UseSSE < 2) return 0;
1437 predicate(UseAVX > 0); 1455 predicate(UseAVX > 0);
1438 match(Set dst (AbsF src)); 1456 match(Set dst (AbsF src));
1439 ins_cost(150); 1457 ins_cost(150);
1440 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 1458 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
1441 ins_encode %{ 1459 ins_encode %{
1460 bool vector256 = false;
1442 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 1461 __ vandps($dst$$XMMRegister, $src$$XMMRegister,
1443 ExternalAddress(float_signmask())); 1462 ExternalAddress(float_signmask()), vector256);
1444 %} 1463 %}
1445 ins_pipe(pipe_slow); 1464 ins_pipe(pipe_slow);
1446 %} 1465 %}
1447 1466
1448 instruct absD_reg(regD dst) %{ 1467 instruct absD_reg(regD dst) %{
1462 match(Set dst (AbsD src)); 1481 match(Set dst (AbsD src));
1463 ins_cost(150); 1482 ins_cost(150);
1464 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 1483 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t"
1465 "# abs double by sign masking" %} 1484 "# abs double by sign masking" %}
1466 ins_encode %{ 1485 ins_encode %{
1486 bool vector256 = false;
1467 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 1487 __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
1468 ExternalAddress(double_signmask())); 1488 ExternalAddress(double_signmask()), vector256);
1469 %} 1489 %}
1470 ins_pipe(pipe_slow); 1490 ins_pipe(pipe_slow);
1471 %} 1491 %}
1472 1492
1473 instruct negF_reg(regF dst) %{ 1493 instruct negF_reg(regF dst) %{
1485 predicate(UseAVX > 0); 1505 predicate(UseAVX > 0);
1486 match(Set dst (NegF src)); 1506 match(Set dst (NegF src));
1487 ins_cost(150); 1507 ins_cost(150);
1488 format %{ "vxorps $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 1508 format %{ "vxorps $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
1489 ins_encode %{ 1509 ins_encode %{
1510 bool vector256 = false;
1490 __ vxorps($dst$$XMMRegister, $src$$XMMRegister, 1511 __ vxorps($dst$$XMMRegister, $src$$XMMRegister,
1491 ExternalAddress(float_signflip())); 1512 ExternalAddress(float_signflip()), vector256);
1492 %} 1513 %}
1493 ins_pipe(pipe_slow); 1514 ins_pipe(pipe_slow);
1494 %} 1515 %}
1495 1516
1496 instruct negD_reg(regD dst) %{ 1517 instruct negD_reg(regD dst) %{
1510 match(Set dst (NegD src)); 1531 match(Set dst (NegD src));
1511 ins_cost(150); 1532 ins_cost(150);
1512 format %{ "vxorpd $dst, $src, [0x8000000000000000]\t" 1533 format %{ "vxorpd $dst, $src, [0x8000000000000000]\t"
1513 "# neg double by sign flipping" %} 1534 "# neg double by sign flipping" %}
1514 ins_encode %{ 1535 ins_encode %{
1536 bool vector256 = false;
1515 __ vxorpd($dst$$XMMRegister, $src$$XMMRegister, 1537 __ vxorpd($dst$$XMMRegister, $src$$XMMRegister,
1516 ExternalAddress(double_signflip())); 1538 ExternalAddress(double_signflip()), vector256);
1517 %} 1539 %}
1518 ins_pipe(pipe_slow); 1540 ins_pipe(pipe_slow);
1519 %} 1541 %}
1520 1542
1521 instruct sqrtF_reg(regF dst, regF src) %{ 1543 instruct sqrtF_reg(regF dst, regF src) %{
2380 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 2402 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
2381 %} 2403 %}
2382 ins_pipe( fpu_reg_reg ); 2404 ins_pipe( fpu_reg_reg );
2383 %} 2405 %}
2384 2406
2407 // ====================VECTOR ARITHMETIC=======================================
2408
2409 // --------------------------------- ADD --------------------------------------
2410
2411 // Bytes vector add
2412 instruct vadd4B(vecS dst, vecS src) %{
2413 predicate(n->as_Vector()->length() == 4);
2414 match(Set dst (AddVB dst src));
2415 format %{ "paddb $dst,$src\t! add packed4B" %}
2416 ins_encode %{
2417 __ paddb($dst$$XMMRegister, $src$$XMMRegister);
2418 %}
2419 ins_pipe( pipe_slow );
2420 %}
2421
2422 instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{
2423 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2424 match(Set dst (AddVB src1 src2));
2425 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %}
2426 ins_encode %{
2427 bool vector256 = false;
2428 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2429 %}
2430 ins_pipe( pipe_slow );
2431 %}
2432
2433 instruct vadd8B(vecD dst, vecD src) %{
2434 predicate(n->as_Vector()->length() == 8);
2435 match(Set dst (AddVB dst src));
2436 format %{ "paddb $dst,$src\t! add packed8B" %}
2437 ins_encode %{
2438 __ paddb($dst$$XMMRegister, $src$$XMMRegister);
2439 %}
2440 ins_pipe( pipe_slow );
2441 %}
2442
2443 instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{
2444 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
2445 match(Set dst (AddVB src1 src2));
2446 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %}
2447 ins_encode %{
2448 bool vector256 = false;
2449 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2450 %}
2451 ins_pipe( pipe_slow );
2452 %}
2453
2454 instruct vadd16B(vecX dst, vecX src) %{
2455 predicate(n->as_Vector()->length() == 16);
2456 match(Set dst (AddVB dst src));
2457 format %{ "paddb $dst,$src\t! add packed16B" %}
2458 ins_encode %{
2459 __ paddb($dst$$XMMRegister, $src$$XMMRegister);
2460 %}
2461 ins_pipe( pipe_slow );
2462 %}
2463
2464 instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{
2465 predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
2466 match(Set dst (AddVB src1 src2));
2467 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %}
2468 ins_encode %{
2469 bool vector256 = false;
2470 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2471 %}
2472 ins_pipe( pipe_slow );
2473 %}
2474
2475 instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{
2476 predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
2477 match(Set dst (AddVB src (LoadVector mem)));
2478 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %}
2479 ins_encode %{
2480 bool vector256 = false;
2481 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2482 %}
2483 ins_pipe( pipe_slow );
2484 %}
2485
2486 instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{
2487 predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
2488 match(Set dst (AddVB src1 src2));
2489 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %}
2490 ins_encode %{
2491 bool vector256 = true;
2492 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2493 %}
2494 ins_pipe( pipe_slow );
2495 %}
2496
2497 instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{
2498 predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
2499 match(Set dst (AddVB src (LoadVector mem)));
2500 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %}
2501 ins_encode %{
2502 bool vector256 = true;
2503 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2504 %}
2505 ins_pipe( pipe_slow );
2506 %}
2507
2508 // Shorts/Chars vector add
2509 instruct vadd2S(vecS dst, vecS src) %{
2510 predicate(n->as_Vector()->length() == 2);
2511 match(Set dst (AddVS dst src));
2512 format %{ "paddw $dst,$src\t! add packed2S" %}
2513 ins_encode %{
2514 __ paddw($dst$$XMMRegister, $src$$XMMRegister);
2515 %}
2516 ins_pipe( pipe_slow );
2517 %}
2518
2519 instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{
2520 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
2521 match(Set dst (AddVS src1 src2));
2522 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %}
2523 ins_encode %{
2524 bool vector256 = false;
2525 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2526 %}
2527 ins_pipe( pipe_slow );
2528 %}
2529
2530 instruct vadd4S(vecD dst, vecD src) %{
2531 predicate(n->as_Vector()->length() == 4);
2532 match(Set dst (AddVS dst src));
2533 format %{ "paddw $dst,$src\t! add packed4S" %}
2534 ins_encode %{
2535 __ paddw($dst$$XMMRegister, $src$$XMMRegister);
2536 %}
2537 ins_pipe( pipe_slow );
2538 %}
2539
2540 instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{
2541 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2542 match(Set dst (AddVS src1 src2));
2543 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %}
2544 ins_encode %{
2545 bool vector256 = false;
2546 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2547 %}
2548 ins_pipe( pipe_slow );
2549 %}
2550
2551 instruct vadd8S(vecX dst, vecX src) %{
2552 predicate(n->as_Vector()->length() == 8);
2553 match(Set dst (AddVS dst src));
2554 format %{ "paddw $dst,$src\t! add packed8S" %}
2555 ins_encode %{
2556 __ paddw($dst$$XMMRegister, $src$$XMMRegister);
2557 %}
2558 ins_pipe( pipe_slow );
2559 %}
2560
2561 instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{
2562 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
2563 match(Set dst (AddVS src1 src2));
2564 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %}
2565 ins_encode %{
2566 bool vector256 = false;
2567 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2568 %}
2569 ins_pipe( pipe_slow );
2570 %}
2571
2572 instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{
2573 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
2574 match(Set dst (AddVS src (LoadVector mem)));
2575 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %}
2576 ins_encode %{
2577 bool vector256 = false;
2578 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2579 %}
2580 ins_pipe( pipe_slow );
2581 %}
2582
2583 instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{
2584 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
2585 match(Set dst (AddVS src1 src2));
2586 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %}
2587 ins_encode %{
2588 bool vector256 = true;
2589 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2590 %}
2591 ins_pipe( pipe_slow );
2592 %}
2593
2594 instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{
2595 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
2596 match(Set dst (AddVS src (LoadVector mem)));
2597 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %}
2598 ins_encode %{
2599 bool vector256 = true;
2600 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2601 %}
2602 ins_pipe( pipe_slow );
2603 %}
2604
2605 // Integers vector add
2606 instruct vadd2I(vecD dst, vecD src) %{
2607 predicate(n->as_Vector()->length() == 2);
2608 match(Set dst (AddVI dst src));
2609 format %{ "paddd $dst,$src\t! add packed2I" %}
2610 ins_encode %{
2611 __ paddd($dst$$XMMRegister, $src$$XMMRegister);
2612 %}
2613 ins_pipe( pipe_slow );
2614 %}
2615
2616 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{
2617 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
2618 match(Set dst (AddVI src1 src2));
2619 format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %}
2620 ins_encode %{
2621 bool vector256 = false;
2622 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2623 %}
2624 ins_pipe( pipe_slow );
2625 %}
2626
2627 instruct vadd4I(vecX dst, vecX src) %{
2628 predicate(n->as_Vector()->length() == 4);
2629 match(Set dst (AddVI dst src));
2630 format %{ "paddd $dst,$src\t! add packed4I" %}
2631 ins_encode %{
2632 __ paddd($dst$$XMMRegister, $src$$XMMRegister);
2633 %}
2634 ins_pipe( pipe_slow );
2635 %}
2636
2637 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{
2638 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2639 match(Set dst (AddVI src1 src2));
2640 format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %}
2641 ins_encode %{
2642 bool vector256 = false;
2643 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2644 %}
2645 ins_pipe( pipe_slow );
2646 %}
2647
2648 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{
2649 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2650 match(Set dst (AddVI src (LoadVector mem)));
2651 format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %}
2652 ins_encode %{
2653 bool vector256 = false;
2654 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2655 %}
2656 ins_pipe( pipe_slow );
2657 %}
2658
2659 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{
2660 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
2661 match(Set dst (AddVI src1 src2));
2662 format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %}
2663 ins_encode %{
2664 bool vector256 = true;
2665 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2666 %}
2667 ins_pipe( pipe_slow );
2668 %}
2669
2670 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{
2671 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
2672 match(Set dst (AddVI src (LoadVector mem)));
2673 format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %}
2674 ins_encode %{
2675 bool vector256 = true;
2676 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2677 %}
2678 ins_pipe( pipe_slow );
2679 %}
2680
2681 // Longs vector add
2682 instruct vadd2L(vecX dst, vecX src) %{
2683 predicate(n->as_Vector()->length() == 2);
2684 match(Set dst (AddVL dst src));
2685 format %{ "paddq $dst,$src\t! add packed2L" %}
2686 ins_encode %{
2687 __ paddq($dst$$XMMRegister, $src$$XMMRegister);
2688 %}
2689 ins_pipe( pipe_slow );
2690 %}
2691
2692 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{
2693 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
2694 match(Set dst (AddVL src1 src2));
2695 format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %}
2696 ins_encode %{
2697 bool vector256 = false;
2698 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2699 %}
2700 ins_pipe( pipe_slow );
2701 %}
2702
2703 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{
2704 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
2705 match(Set dst (AddVL src (LoadVector mem)));
2706 format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %}
2707 ins_encode %{
2708 bool vector256 = false;
2709 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2710 %}
2711 ins_pipe( pipe_slow );
2712 %}
2713
2714 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{
2715 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
2716 match(Set dst (AddVL src1 src2));
2717 format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %}
2718 ins_encode %{
2719 bool vector256 = true;
2720 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2721 %}
2722 ins_pipe( pipe_slow );
2723 %}
2724
2725 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{
2726 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
2727 match(Set dst (AddVL src (LoadVector mem)));
2728 format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %}
2729 ins_encode %{
2730 bool vector256 = true;
2731 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2732 %}
2733 ins_pipe( pipe_slow );
2734 %}
2735
2736 // Floats vector add
2737 instruct vadd2F(vecD dst, vecD src) %{
2738 predicate(n->as_Vector()->length() == 2);
2739 match(Set dst (AddVF dst src));
2740 format %{ "addps $dst,$src\t! add packed2F" %}
2741 ins_encode %{
2742 __ addps($dst$$XMMRegister, $src$$XMMRegister);
2743 %}
2744 ins_pipe( pipe_slow );
2745 %}
2746
2747 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{
2748 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
2749 match(Set dst (AddVF src1 src2));
2750 format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %}
2751 ins_encode %{
2752 bool vector256 = false;
2753 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2754 %}
2755 ins_pipe( pipe_slow );
2756 %}
2757
2758 instruct vadd4F(vecX dst, vecX src) %{
2759 predicate(n->as_Vector()->length() == 4);
2760 match(Set dst (AddVF dst src));
2761 format %{ "addps $dst,$src\t! add packed4F" %}
2762 ins_encode %{
2763 __ addps($dst$$XMMRegister, $src$$XMMRegister);
2764 %}
2765 ins_pipe( pipe_slow );
2766 %}
2767
2768 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{
2769 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2770 match(Set dst (AddVF src1 src2));
2771 format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %}
2772 ins_encode %{
2773 bool vector256 = false;
2774 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2775 %}
2776 ins_pipe( pipe_slow );
2777 %}
2778
2779 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{
2780 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2781 match(Set dst (AddVF src (LoadVector mem)));
2782 format %{ "vaddps $dst,$src,$mem\t! add packed4F" %}
2783 ins_encode %{
2784 bool vector256 = false;
2785 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2786 %}
2787 ins_pipe( pipe_slow );
2788 %}
2789
2790 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{
2791 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
2792 match(Set dst (AddVF src1 src2));
2793 format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %}
2794 ins_encode %{
2795 bool vector256 = true;
2796 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2797 %}
2798 ins_pipe( pipe_slow );
2799 %}
2800
2801 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{
2802 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
2803 match(Set dst (AddVF src (LoadVector mem)));
2804 format %{ "vaddps $dst,$src,$mem\t! add packed8F" %}
2805 ins_encode %{
2806 bool vector256 = true;
2807 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2808 %}
2809 ins_pipe( pipe_slow );
2810 %}
2811
2812 // Doubles vector add
2813 instruct vadd2D(vecX dst, vecX src) %{
2814 predicate(n->as_Vector()->length() == 2);
2815 match(Set dst (AddVD dst src));
2816 format %{ "addpd $dst,$src\t! add packed2D" %}
2817 ins_encode %{
2818 __ addpd($dst$$XMMRegister, $src$$XMMRegister);
2819 %}
2820 ins_pipe( pipe_slow );
2821 %}
2822
2823 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{
2824 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
2825 match(Set dst (AddVD src1 src2));
2826 format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %}
2827 ins_encode %{
2828 bool vector256 = false;
2829 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2830 %}
2831 ins_pipe( pipe_slow );
2832 %}
2833
2834 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{
2835 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
2836 match(Set dst (AddVD src (LoadVector mem)));
2837 format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %}
2838 ins_encode %{
2839 bool vector256 = false;
2840 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2841 %}
2842 ins_pipe( pipe_slow );
2843 %}
2844
2845 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{
2846 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2847 match(Set dst (AddVD src1 src2));
2848 format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %}
2849 ins_encode %{
2850 bool vector256 = true;
2851 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2852 %}
2853 ins_pipe( pipe_slow );
2854 %}
2855
2856 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{
2857 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2858 match(Set dst (AddVD src (LoadVector mem)));
2859 format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %}
2860 ins_encode %{
2861 bool vector256 = true;
2862 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2863 %}
2864 ins_pipe( pipe_slow );
2865 %}
2866
2867 // --------------------------------- SUB --------------------------------------
2868
2869 // Bytes vector sub
2870 instruct vsub4B(vecS dst, vecS src) %{
2871 predicate(n->as_Vector()->length() == 4);
2872 match(Set dst (SubVB dst src));
2873 format %{ "psubb $dst,$src\t! sub packed4B" %}
2874 ins_encode %{
2875 __ psubb($dst$$XMMRegister, $src$$XMMRegister);
2876 %}
2877 ins_pipe( pipe_slow );
2878 %}
2879
2880 instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{
2881 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2882 match(Set dst (SubVB src1 src2));
2883 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %}
2884 ins_encode %{
2885 bool vector256 = false;
2886 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2887 %}
2888 ins_pipe( pipe_slow );
2889 %}
2890
2891 instruct vsub8B(vecD dst, vecD src) %{
2892 predicate(n->as_Vector()->length() == 8);
2893 match(Set dst (SubVB dst src));
2894 format %{ "psubb $dst,$src\t! sub packed8B" %}
2895 ins_encode %{
2896 __ psubb($dst$$XMMRegister, $src$$XMMRegister);
2897 %}
2898 ins_pipe( pipe_slow );
2899 %}
2900
2901 instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{
2902 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
2903 match(Set dst (SubVB src1 src2));
2904 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %}
2905 ins_encode %{
2906 bool vector256 = false;
2907 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2908 %}
2909 ins_pipe( pipe_slow );
2910 %}
2911
2912 instruct vsub16B(vecX dst, vecX src) %{
2913 predicate(n->as_Vector()->length() == 16);
2914 match(Set dst (SubVB dst src));
2915 format %{ "psubb $dst,$src\t! sub packed16B" %}
2916 ins_encode %{
2917 __ psubb($dst$$XMMRegister, $src$$XMMRegister);
2918 %}
2919 ins_pipe( pipe_slow );
2920 %}
2921
2922 instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{
2923 predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
2924 match(Set dst (SubVB src1 src2));
2925 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %}
2926 ins_encode %{
2927 bool vector256 = false;
2928 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2929 %}
2930 ins_pipe( pipe_slow );
2931 %}
2932
2933 instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{
2934 predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
2935 match(Set dst (SubVB src (LoadVector mem)));
2936 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %}
2937 ins_encode %{
2938 bool vector256 = false;
2939 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2940 %}
2941 ins_pipe( pipe_slow );
2942 %}
2943
2944 instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{
2945 predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
2946 match(Set dst (SubVB src1 src2));
2947 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %}
2948 ins_encode %{
2949 bool vector256 = true;
2950 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2951 %}
2952 ins_pipe( pipe_slow );
2953 %}
2954
2955 instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{
2956 predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
2957 match(Set dst (SubVB src (LoadVector mem)));
2958 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %}
2959 ins_encode %{
2960 bool vector256 = true;
2961 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2962 %}
2963 ins_pipe( pipe_slow );
2964 %}
2965
2966 // Shorts/Chars vector sub
2967 instruct vsub2S(vecS dst, vecS src) %{
2968 predicate(n->as_Vector()->length() == 2);
2969 match(Set dst (SubVS dst src));
2970 format %{ "psubw $dst,$src\t! sub packed2S" %}
2971 ins_encode %{
2972 __ psubw($dst$$XMMRegister, $src$$XMMRegister);
2973 %}
2974 ins_pipe( pipe_slow );
2975 %}
2976
2977 instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{
2978 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
2979 match(Set dst (SubVS src1 src2));
2980 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %}
2981 ins_encode %{
2982 bool vector256 = false;
2983 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2984 %}
2985 ins_pipe( pipe_slow );
2986 %}
2987
2988 instruct vsub4S(vecD dst, vecD src) %{
2989 predicate(n->as_Vector()->length() == 4);
2990 match(Set dst (SubVS dst src));
2991 format %{ "psubw $dst,$src\t! sub packed4S" %}
2992 ins_encode %{
2993 __ psubw($dst$$XMMRegister, $src$$XMMRegister);
2994 %}
2995 ins_pipe( pipe_slow );
2996 %}
2997
2998 instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{
2999 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3000 match(Set dst (SubVS src1 src2));
3001 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %}
3002 ins_encode %{
3003 bool vector256 = false;
3004 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3005 %}
3006 ins_pipe( pipe_slow );
3007 %}
3008
3009 instruct vsub8S(vecX dst, vecX src) %{
3010 predicate(n->as_Vector()->length() == 8);
3011 match(Set dst (SubVS dst src));
3012 format %{ "psubw $dst,$src\t! sub packed8S" %}
3013 ins_encode %{
3014 __ psubw($dst$$XMMRegister, $src$$XMMRegister);
3015 %}
3016 ins_pipe( pipe_slow );
3017 %}
3018
3019 instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{
3020 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3021 match(Set dst (SubVS src1 src2));
3022 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %}
3023 ins_encode %{
3024 bool vector256 = false;
3025 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3026 %}
3027 ins_pipe( pipe_slow );
3028 %}
3029
3030 instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{
3031 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3032 match(Set dst (SubVS src (LoadVector mem)));
3033 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %}
3034 ins_encode %{
3035 bool vector256 = false;
3036 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3037 %}
3038 ins_pipe( pipe_slow );
3039 %}
3040
3041 instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{
3042 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
3043 match(Set dst (SubVS src1 src2));
3044 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %}
3045 ins_encode %{
3046 bool vector256 = true;
3047 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3048 %}
3049 ins_pipe( pipe_slow );
3050 %}
3051
3052 instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{
3053 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
3054 match(Set dst (SubVS src (LoadVector mem)));
3055 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %}
3056 ins_encode %{
3057 bool vector256 = true;
3058 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3059 %}
3060 ins_pipe( pipe_slow );
3061 %}
3062
3063 // Integers vector sub
3064 instruct vsub2I(vecD dst, vecD src) %{
3065 predicate(n->as_Vector()->length() == 2);
3066 match(Set dst (SubVI dst src));
3067 format %{ "psubd $dst,$src\t! sub packed2I" %}
3068 ins_encode %{
3069 __ psubd($dst$$XMMRegister, $src$$XMMRegister);
3070 %}
3071 ins_pipe( pipe_slow );
3072 %}
3073
3074 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{
3075 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3076 match(Set dst (SubVI src1 src2));
3077 format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %}
3078 ins_encode %{
3079 bool vector256 = false;
3080 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3081 %}
3082 ins_pipe( pipe_slow );
3083 %}
3084
3085 instruct vsub4I(vecX dst, vecX src) %{
3086 predicate(n->as_Vector()->length() == 4);
3087 match(Set dst (SubVI dst src));
3088 format %{ "psubd $dst,$src\t! sub packed4I" %}
3089 ins_encode %{
3090 __ psubd($dst$$XMMRegister, $src$$XMMRegister);
3091 %}
3092 ins_pipe( pipe_slow );
3093 %}
3094
3095 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{
3096 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3097 match(Set dst (SubVI src1 src2));
3098 format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %}
3099 ins_encode %{
3100 bool vector256 = false;
3101 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3102 %}
3103 ins_pipe( pipe_slow );
3104 %}
3105
3106 instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{
3107 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3108 match(Set dst (SubVI src (LoadVector mem)));
3109 format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %}
3110 ins_encode %{
3111 bool vector256 = false;
3112 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3113 %}
3114 ins_pipe( pipe_slow );
3115 %}
3116
3117 instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{
3118 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
3119 match(Set dst (SubVI src1 src2));
3120 format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %}
3121 ins_encode %{
3122 bool vector256 = true;
3123 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3124 %}
3125 ins_pipe( pipe_slow );
3126 %}
3127
3128 instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{
3129 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
3130 match(Set dst (SubVI src (LoadVector mem)));
3131 format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %}
3132 ins_encode %{
3133 bool vector256 = true;
3134 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3135 %}
3136 ins_pipe( pipe_slow );
3137 %}
3138
3139 // Longs vector sub
3140 instruct vsub2L(vecX dst, vecX src) %{
3141 predicate(n->as_Vector()->length() == 2);
3142 match(Set dst (SubVL dst src));
3143 format %{ "psubq $dst,$src\t! sub packed2L" %}
3144 ins_encode %{
3145 __ psubq($dst$$XMMRegister, $src$$XMMRegister);
3146 %}
3147 ins_pipe( pipe_slow );
3148 %}
3149
3150 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{
3151 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3152 match(Set dst (SubVL src1 src2));
3153 format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %}
3154 ins_encode %{
3155 bool vector256 = false;
3156 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3157 %}
3158 ins_pipe( pipe_slow );
3159 %}
3160
3161 instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{
3162 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3163 match(Set dst (SubVL src (LoadVector mem)));
3164 format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %}
3165 ins_encode %{
3166 bool vector256 = false;
3167 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3168 %}
3169 ins_pipe( pipe_slow );
3170 %}
3171
3172 instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{
3173 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
3174 match(Set dst (SubVL src1 src2));
3175 format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %}
3176 ins_encode %{
3177 bool vector256 = true;
3178 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3179 %}
3180 ins_pipe( pipe_slow );
3181 %}
3182
3183 instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{
3184 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
3185 match(Set dst (SubVL src (LoadVector mem)));
3186 format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %}
3187 ins_encode %{
3188 bool vector256 = true;
3189 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3190 %}
3191 ins_pipe( pipe_slow );
3192 %}
3193
3194 // Floats vector sub
3195 instruct vsub2F(vecD dst, vecD src) %{
3196 predicate(n->as_Vector()->length() == 2);
3197 match(Set dst (SubVF dst src));
3198 format %{ "subps $dst,$src\t! sub packed2F" %}
3199 ins_encode %{
3200 __ subps($dst$$XMMRegister, $src$$XMMRegister);
3201 %}
3202 ins_pipe( pipe_slow );
3203 %}
3204
3205 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{
3206 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3207 match(Set dst (SubVF src1 src2));
3208 format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %}
3209 ins_encode %{
3210 bool vector256 = false;
3211 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3212 %}
3213 ins_pipe( pipe_slow );
3214 %}
3215
3216 instruct vsub4F(vecX dst, vecX src) %{
3217 predicate(n->as_Vector()->length() == 4);
3218 match(Set dst (SubVF dst src));
3219 format %{ "subps $dst,$src\t! sub packed4F" %}
3220 ins_encode %{
3221 __ subps($dst$$XMMRegister, $src$$XMMRegister);
3222 %}
3223 ins_pipe( pipe_slow );
3224 %}
3225
3226 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{
3227 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3228 match(Set dst (SubVF src1 src2));
3229 format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %}
3230 ins_encode %{
3231 bool vector256 = false;
3232 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3233 %}
3234 ins_pipe( pipe_slow );
3235 %}
3236
3237 instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{
3238 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3239 match(Set dst (SubVF src (LoadVector mem)));
3240 format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %}
3241 ins_encode %{
3242 bool vector256 = false;
3243 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3244 %}
3245 ins_pipe( pipe_slow );
3246 %}
3247
3248 instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{
3249 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3250 match(Set dst (SubVF src1 src2));
3251 format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %}
3252 ins_encode %{
3253 bool vector256 = true;
3254 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3255 %}
3256 ins_pipe( pipe_slow );
3257 %}
3258
3259 instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{
3260 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3261 match(Set dst (SubVF src (LoadVector mem)));
3262 format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %}
3263 ins_encode %{
3264 bool vector256 = true;
3265 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3266 %}
3267 ins_pipe( pipe_slow );
3268 %}
3269
3270 // Doubles vector sub
3271 instruct vsub2D(vecX dst, vecX src) %{
3272 predicate(n->as_Vector()->length() == 2);
3273 match(Set dst (SubVD dst src));
3274 format %{ "subpd $dst,$src\t! sub packed2D" %}
3275 ins_encode %{
3276 __ subpd($dst$$XMMRegister, $src$$XMMRegister);
3277 %}
3278 ins_pipe( pipe_slow );
3279 %}
3280
3281 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{
3282 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3283 match(Set dst (SubVD src1 src2));
3284 format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %}
3285 ins_encode %{
3286 bool vector256 = false;
3287 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3288 %}
3289 ins_pipe( pipe_slow );
3290 %}
3291
3292 instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{
3293 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3294 match(Set dst (SubVD src (LoadVector mem)));
3295 format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %}
3296 ins_encode %{
3297 bool vector256 = false;
3298 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3299 %}
3300 ins_pipe( pipe_slow );
3301 %}
3302
3303 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{
3304 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3305 match(Set dst (SubVD src1 src2));
3306 format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %}
3307 ins_encode %{
3308 bool vector256 = true;
3309 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3310 %}
3311 ins_pipe( pipe_slow );
3312 %}
3313
3314 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{
3315 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3316 match(Set dst (SubVD src (LoadVector mem)));
3317 format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %}
3318 ins_encode %{
3319 bool vector256 = true;
3320 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3321 %}
3322 ins_pipe( pipe_slow );
3323 %}
3324
3325 // --------------------------------- MUL --------------------------------------
3326
3327 // Shorts/Chars vector mul
3328 instruct vmul2S(vecS dst, vecS src) %{
3329 predicate(n->as_Vector()->length() == 2);
3330 match(Set dst (MulVS dst src));
3331 format %{ "pmullw $dst,$src\t! mul packed2S" %}
3332 ins_encode %{
3333 __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
3334 %}
3335 ins_pipe( pipe_slow );
3336 %}
3337
3338 instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{
3339 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3340 match(Set dst (MulVS src1 src2));
3341 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %}
3342 ins_encode %{
3343 bool vector256 = false;
3344 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3345 %}
3346 ins_pipe( pipe_slow );
3347 %}
3348
3349 instruct vmul4S(vecD dst, vecD src) %{
3350 predicate(n->as_Vector()->length() == 4);
3351 match(Set dst (MulVS dst src));
3352 format %{ "pmullw $dst,$src\t! mul packed4S" %}
3353 ins_encode %{
3354 __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
3355 %}
3356 ins_pipe( pipe_slow );
3357 %}
3358
3359 instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{
3360 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3361 match(Set dst (MulVS src1 src2));
3362 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %}
3363 ins_encode %{
3364 bool vector256 = false;
3365 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3366 %}
3367 ins_pipe( pipe_slow );
3368 %}
3369
3370 instruct vmul8S(vecX dst, vecX src) %{
3371 predicate(n->as_Vector()->length() == 8);
3372 match(Set dst (MulVS dst src));
3373 format %{ "pmullw $dst,$src\t! mul packed8S" %}
3374 ins_encode %{
3375 __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
3376 %}
3377 ins_pipe( pipe_slow );
3378 %}
3379
3380 instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{
3381 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3382 match(Set dst (MulVS src1 src2));
3383 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %}
3384 ins_encode %{
3385 bool vector256 = false;
3386 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3387 %}
3388 ins_pipe( pipe_slow );
3389 %}
3390
3391 instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{
3392 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3393 match(Set dst (MulVS src (LoadVector mem)));
3394 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %}
3395 ins_encode %{
3396 bool vector256 = false;
3397 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3398 %}
3399 ins_pipe( pipe_slow );
3400 %}
3401
3402 instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{
3403 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
3404 match(Set dst (MulVS src1 src2));
3405 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %}
3406 ins_encode %{
3407 bool vector256 = true;
3408 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3409 %}
3410 ins_pipe( pipe_slow );
3411 %}
3412
3413 instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{
3414 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
3415 match(Set dst (MulVS src (LoadVector mem)));
3416 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %}
3417 ins_encode %{
3418 bool vector256 = true;
3419 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3420 %}
3421 ins_pipe( pipe_slow );
3422 %}
3423
3424 // Integers vector mul (sse4_1)
3425 instruct vmul2I(vecD dst, vecD src) %{
3426 predicate(UseSSE > 3 && n->as_Vector()->length() == 2);
3427 match(Set dst (MulVI dst src));
3428 format %{ "pmulld $dst,$src\t! mul packed2I" %}
3429 ins_encode %{
3430 __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
3431 %}
3432 ins_pipe( pipe_slow );
3433 %}
3434
3435 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{
3436 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3437 match(Set dst (MulVI src1 src2));
3438 format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %}
3439 ins_encode %{
3440 bool vector256 = false;
3441 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3442 %}
3443 ins_pipe( pipe_slow );
3444 %}
3445
3446 instruct vmul4I(vecX dst, vecX src) %{
3447 predicate(UseSSE > 3 && n->as_Vector()->length() == 4);
3448 match(Set dst (MulVI dst src));
3449 format %{ "pmulld $dst,$src\t! mul packed4I" %}
3450 ins_encode %{
3451 __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
3452 %}
3453 ins_pipe( pipe_slow );
3454 %}
3455
3456 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{
3457 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3458 match(Set dst (MulVI src1 src2));
3459 format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %}
3460 ins_encode %{
3461 bool vector256 = false;
3462 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3463 %}
3464 ins_pipe( pipe_slow );
3465 %}
3466
3467 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{
3468 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3469 match(Set dst (MulVI src (LoadVector mem)));
3470 format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %}
3471 ins_encode %{
3472 bool vector256 = false;
3473 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3474 %}
3475 ins_pipe( pipe_slow );
3476 %}
3477
3478 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{
3479 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
3480 match(Set dst (MulVI src1 src2));
3481 format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %}
3482 ins_encode %{
3483 bool vector256 = true;
3484 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3485 %}
3486 ins_pipe( pipe_slow );
3487 %}
3488
3489 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{
3490 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
3491 match(Set dst (MulVI src (LoadVector mem)));
3492 format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %}
3493 ins_encode %{
3494 bool vector256 = true;
3495 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3496 %}
3497 ins_pipe( pipe_slow );
3498 %}
3499
3500 // Floats vector mul
3501 instruct vmul2F(vecD dst, vecD src) %{
3502 predicate(n->as_Vector()->length() == 2);
3503 match(Set dst (MulVF dst src));
3504 format %{ "mulps $dst,$src\t! mul packed2F" %}
3505 ins_encode %{
3506 __ mulps($dst$$XMMRegister, $src$$XMMRegister);
3507 %}
3508 ins_pipe( pipe_slow );
3509 %}
3510
3511 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{
3512 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3513 match(Set dst (MulVF src1 src2));
3514 format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %}
3515 ins_encode %{
3516 bool vector256 = false;
3517 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3518 %}
3519 ins_pipe( pipe_slow );
3520 %}
3521
3522 instruct vmul4F(vecX dst, vecX src) %{
3523 predicate(n->as_Vector()->length() == 4);
3524 match(Set dst (MulVF dst src));
3525 format %{ "mulps $dst,$src\t! mul packed4F" %}
3526 ins_encode %{
3527 __ mulps($dst$$XMMRegister, $src$$XMMRegister);
3528 %}
3529 ins_pipe( pipe_slow );
3530 %}
3531
3532 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{
3533 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3534 match(Set dst (MulVF src1 src2));
3535 format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %}
3536 ins_encode %{
3537 bool vector256 = false;
3538 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3539 %}
3540 ins_pipe( pipe_slow );
3541 %}
3542
3543 instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{
3544 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3545 match(Set dst (MulVF src (LoadVector mem)));
3546 format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %}
3547 ins_encode %{
3548 bool vector256 = false;
3549 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3550 %}
3551 ins_pipe( pipe_slow );
3552 %}
3553
3554 instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{
3555 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3556 match(Set dst (MulVF src1 src2));
3557 format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %}
3558 ins_encode %{
3559 bool vector256 = true;
3560 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3561 %}
3562 ins_pipe( pipe_slow );
3563 %}
3564
3565 instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{
3566 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3567 match(Set dst (MulVF src (LoadVector mem)));
3568 format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %}
3569 ins_encode %{
3570 bool vector256 = true;
3571 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3572 %}
3573 ins_pipe( pipe_slow );
3574 %}
3575
3576 // Doubles vector mul
3577 instruct vmul2D(vecX dst, vecX src) %{
3578 predicate(n->as_Vector()->length() == 2);
3579 match(Set dst (MulVD dst src));
3580 format %{ "mulpd $dst,$src\t! mul packed2D" %}
3581 ins_encode %{
3582 __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
3583 %}
3584 ins_pipe( pipe_slow );
3585 %}
3586
3587 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{
3588 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3589 match(Set dst (MulVD src1 src2));
3590 format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %}
3591 ins_encode %{
3592 bool vector256 = false;
3593 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3594 %}
3595 ins_pipe( pipe_slow );
3596 %}
3597
3598 instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{
3599 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3600 match(Set dst (MulVD src (LoadVector mem)));
3601 format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %}
3602 ins_encode %{
3603 bool vector256 = false;
3604 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3605 %}
3606 ins_pipe( pipe_slow );
3607 %}
3608
3609 instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{
3610 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3611 match(Set dst (MulVD src1 src2));
3612 format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %}
3613 ins_encode %{
3614 bool vector256 = true;
3615 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3616 %}
3617 ins_pipe( pipe_slow );
3618 %}
3619
3620 instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{
3621 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3622 match(Set dst (MulVD src (LoadVector mem)));
3623 format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %}
3624 ins_encode %{
3625 bool vector256 = true;
3626 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3627 %}
3628 ins_pipe( pipe_slow );
3629 %}
3630
3631 // --------------------------------- DIV --------------------------------------
3632
3633 // Floats vector div
3634 instruct vdiv2F(vecD dst, vecD src) %{
3635 predicate(n->as_Vector()->length() == 2);
3636 match(Set dst (DivVF dst src));
3637 format %{ "divps $dst,$src\t! div packed2F" %}
3638 ins_encode %{
3639 __ divps($dst$$XMMRegister, $src$$XMMRegister);
3640 %}
3641 ins_pipe( pipe_slow );
3642 %}
3643
3644 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{
3645 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3646 match(Set dst (DivVF src1 src2));
3647 format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %}
3648 ins_encode %{
3649 bool vector256 = false;
3650 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3651 %}
3652 ins_pipe( pipe_slow );
3653 %}
3654
3655 instruct vdiv4F(vecX dst, vecX src) %{
3656 predicate(n->as_Vector()->length() == 4);
3657 match(Set dst (DivVF dst src));
3658 format %{ "divps $dst,$src\t! div packed4F" %}
3659 ins_encode %{
3660 __ divps($dst$$XMMRegister, $src$$XMMRegister);
3661 %}
3662 ins_pipe( pipe_slow );
3663 %}
3664
3665 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{
3666 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3667 match(Set dst (DivVF src1 src2));
3668 format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %}
3669 ins_encode %{
3670 bool vector256 = false;
3671 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3672 %}
3673 ins_pipe( pipe_slow );
3674 %}
3675
3676 instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{
3677 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3678 match(Set dst (DivVF src (LoadVector mem)));
3679 format %{ "vdivps $dst,$src,$mem\t! div packed4F" %}
3680 ins_encode %{
3681 bool vector256 = false;
3682 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3683 %}
3684 ins_pipe( pipe_slow );
3685 %}
3686
3687 instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{
3688 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3689 match(Set dst (DivVF src1 src2));
3690 format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %}
3691 ins_encode %{
3692 bool vector256 = true;
3693 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3694 %}
3695 ins_pipe( pipe_slow );
3696 %}
3697
3698 instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{
3699 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3700 match(Set dst (DivVF src (LoadVector mem)));
3701 format %{ "vdivps $dst,$src,$mem\t! div packed8F" %}
3702 ins_encode %{
3703 bool vector256 = true;
3704 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3705 %}
3706 ins_pipe( pipe_slow );
3707 %}
3708
3709 // Doubles vector div
3710 instruct vdiv2D(vecX dst, vecX src) %{
3711 predicate(n->as_Vector()->length() == 2);
3712 match(Set dst (DivVD dst src));
3713 format %{ "divpd $dst,$src\t! div packed2D" %}
3714 ins_encode %{
3715 __ divpd($dst$$XMMRegister, $src$$XMMRegister);
3716 %}
3717 ins_pipe( pipe_slow );
3718 %}
3719
3720 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{
3721 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3722 match(Set dst (DivVD src1 src2));
3723 format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %}
3724 ins_encode %{
3725 bool vector256 = false;
3726 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3727 %}
3728 ins_pipe( pipe_slow );
3729 %}
3730
3731 instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{
3732 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3733 match(Set dst (DivVD src (LoadVector mem)));
3734 format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %}
3735 ins_encode %{
3736 bool vector256 = false;
3737 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3738 %}
3739 ins_pipe( pipe_slow );
3740 %}
3741
3742 instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{
3743 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3744 match(Set dst (DivVD src1 src2));
3745 format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %}
3746 ins_encode %{
3747 bool vector256 = true;
3748 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3749 %}
3750 ins_pipe( pipe_slow );
3751 %}
3752
3753 instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{
3754 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3755 match(Set dst (DivVD src (LoadVector mem)));
3756 format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %}
3757 ins_encode %{
3758 bool vector256 = true;
3759 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3760 %}
3761 ins_pipe( pipe_slow );
3762 %}
3763
3764 // ------------------------------ LeftShift -----------------------------------
3765
3766 // Shorts/Chars vector left shift
3767 instruct vsll2S(vecS dst, regF shift) %{
3768 predicate(n->as_Vector()->length() == 2);
3769 match(Set dst (LShiftVS dst shift));
3770 format %{ "psllw $dst,$shift\t! left shift packed2S" %}
3771 ins_encode %{
3772 __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
3773 %}
3774 ins_pipe( pipe_slow );
3775 %}
3776
3777 instruct vsll2S_imm(vecS dst, immI8 shift) %{
3778 predicate(n->as_Vector()->length() == 2);
3779 match(Set dst (LShiftVS dst shift));
3780 format %{ "psllw $dst,$shift\t! left shift packed2S" %}
3781 ins_encode %{
3782 __ psllw($dst$$XMMRegister, (int)$shift$$constant);
3783 %}
3784 ins_pipe( pipe_slow );
3785 %}
3786
3787 instruct vsll2S_reg(vecS dst, vecS src, regF shift) %{
3788 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3789 match(Set dst (LShiftVS src shift));
3790 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %}
3791 ins_encode %{
3792 bool vector256 = false;
3793 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
3794 %}
3795 ins_pipe( pipe_slow );
3796 %}
3797
3798 instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
3799 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3800 match(Set dst (LShiftVS src shift));
3801 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %}
3802 ins_encode %{
3803 bool vector256 = false;
3804 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
3805 %}
3806 ins_pipe( pipe_slow );
3807 %}
3808
3809 instruct vsll4S(vecD dst, regF shift) %{
3810 predicate(n->as_Vector()->length() == 4);
3811 match(Set dst (LShiftVS dst shift));
3812 format %{ "psllw $dst,$shift\t! left shift packed4S" %}
3813 ins_encode %{
3814 __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
3815 %}
3816 ins_pipe( pipe_slow );
3817 %}
3818
3819 instruct vsll4S_imm(vecD dst, immI8 shift) %{
3820 predicate(n->as_Vector()->length() == 4);
3821 match(Set dst (LShiftVS dst shift));
3822 format %{ "psllw $dst,$shift\t! left shift packed4S" %}
3823 ins_encode %{
3824 __ psllw($dst$$XMMRegister, (int)$shift$$constant);
3825 %}
3826 ins_pipe( pipe_slow );
3827 %}
3828
3829 instruct vsll4S_reg(vecD dst, vecD src, regF shift) %{
3830 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3831 match(Set dst (LShiftVS src shift));
3832 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %}
3833 ins_encode %{
3834 bool vector256 = false;
3835 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
3836 %}
3837 ins_pipe( pipe_slow );
3838 %}
3839
3840 instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
3841 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3842 match(Set dst (LShiftVS src shift));
3843 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %}
3844 ins_encode %{
3845 bool vector256 = false;
3846 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
3847 %}
3848 ins_pipe( pipe_slow );
3849 %}
3850
3851 instruct vsll8S(vecX dst, regF shift) %{
3852 predicate(n->as_Vector()->length() == 8);
3853 match(Set dst (LShiftVS dst shift));
3854 format %{ "psllw $dst,$shift\t! left shift packed8S" %}
3855 ins_encode %{
3856 __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
3857 %}
3858 ins_pipe( pipe_slow );
3859 %}
3860
3861 instruct vsll8S_imm(vecX dst, immI8 shift) %{
3862 predicate(n->as_Vector()->length() == 8);
3863 match(Set dst (LShiftVS dst shift));
3864 format %{ "psllw $dst,$shift\t! left shift packed8S" %}
3865 ins_encode %{
3866 __ psllw($dst$$XMMRegister, (int)$shift$$constant);
3867 %}
3868 ins_pipe( pipe_slow );
3869 %}
3870
3871 instruct vsll8S_reg(vecX dst, vecX src, regF shift) %{
3872 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3873 match(Set dst (LShiftVS src shift));
3874 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %}
3875 ins_encode %{
3876 bool vector256 = false;
3877 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
3878 %}
3879 ins_pipe( pipe_slow );
3880 %}
3881
3882 instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
3883 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3884 match(Set dst (LShiftVS src shift));
3885 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %}
3886 ins_encode %{
3887 bool vector256 = false;
3888 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
3889 %}
3890 ins_pipe( pipe_slow );
3891 %}
3892
3893 instruct vsll16S_reg(vecY dst, vecY src, regF shift) %{
3894 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
3895 match(Set dst (LShiftVS src shift));
3896 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %}
3897 ins_encode %{
3898 bool vector256 = true;
3899 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
3900 %}
3901 ins_pipe( pipe_slow );
3902 %}
3903
3904 instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
3905 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
3906 match(Set dst (LShiftVS src shift));
3907 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %}
3908 ins_encode %{
3909 bool vector256 = true;
3910 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
3911 %}
3912 ins_pipe( pipe_slow );
3913 %}
3914
3915 // Integers vector left shift
3916 instruct vsll2I(vecD dst, regF shift) %{
3917 predicate(n->as_Vector()->length() == 2);
3918 match(Set dst (LShiftVI dst shift));
3919 format %{ "pslld $dst,$shift\t! left shift packed2I" %}
3920 ins_encode %{
3921 __ pslld($dst$$XMMRegister, $shift$$XMMRegister);
3922 %}
3923 ins_pipe( pipe_slow );
3924 %}
3925
3926 instruct vsll2I_imm(vecD dst, immI8 shift) %{
3927 predicate(n->as_Vector()->length() == 2);
3928 match(Set dst (LShiftVI dst shift));
3929 format %{ "pslld $dst,$shift\t! left shift packed2I" %}
3930 ins_encode %{
3931 __ pslld($dst$$XMMRegister, (int)$shift$$constant);
3932 %}
3933 ins_pipe( pipe_slow );
3934 %}
3935
3936 instruct vsll2I_reg(vecD dst, vecD src, regF shift) %{
3937 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3938 match(Set dst (LShiftVI src shift));
3939 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %}
3940 ins_encode %{
3941 bool vector256 = false;
3942 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
3943 %}
3944 ins_pipe( pipe_slow );
3945 %}
3946
3947 instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
3948 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3949 match(Set dst (LShiftVI src shift));
3950 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %}
3951 ins_encode %{
3952 bool vector256 = false;
3953 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
3954 %}
3955 ins_pipe( pipe_slow );
3956 %}
3957
3958 instruct vsll4I(vecX dst, regF shift) %{
3959 predicate(n->as_Vector()->length() == 4);
3960 match(Set dst (LShiftVI dst shift));
3961 format %{ "pslld $dst,$shift\t! left shift packed4I" %}
3962 ins_encode %{
3963 __ pslld($dst$$XMMRegister, $shift$$XMMRegister);
3964 %}
3965 ins_pipe( pipe_slow );
3966 %}
3967
3968 instruct vsll4I_imm(vecX dst, immI8 shift) %{
3969 predicate(n->as_Vector()->length() == 4);
3970 match(Set dst (LShiftVI dst shift));
3971 format %{ "pslld $dst,$shift\t! left shift packed4I" %}
3972 ins_encode %{
3973 __ pslld($dst$$XMMRegister, (int)$shift$$constant);
3974 %}
3975 ins_pipe( pipe_slow );
3976 %}
3977
3978 instruct vsll4I_reg(vecX dst, vecX src, regF shift) %{
3979 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3980 match(Set dst (LShiftVI src shift));
3981 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %}
3982 ins_encode %{
3983 bool vector256 = false;
3984 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
3985 %}
3986 ins_pipe( pipe_slow );
3987 %}
3988
3989 instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
3990 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3991 match(Set dst (LShiftVI src shift));
3992 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %}
3993 ins_encode %{
3994 bool vector256 = false;
3995 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
3996 %}
3997 ins_pipe( pipe_slow );
3998 %}
3999
4000 instruct vsll8I_reg(vecY dst, vecY src, regF shift) %{
4001 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
4002 match(Set dst (LShiftVI src shift));
4003 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %}
4004 ins_encode %{
4005 bool vector256 = true;
4006 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4007 %}
4008 ins_pipe( pipe_slow );
4009 %}
4010
4011 instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
4012 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
4013 match(Set dst (LShiftVI src shift));
4014 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %}
4015 ins_encode %{
4016 bool vector256 = true;
4017 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4018 %}
4019 ins_pipe( pipe_slow );
4020 %}
4021
4022 // Longs vector left shift
4023 instruct vsll2L(vecX dst, regF shift) %{
4024 predicate(n->as_Vector()->length() == 2);
4025 match(Set dst (LShiftVL dst shift));
4026 format %{ "psllq $dst,$shift\t! left shift packed2L" %}
4027 ins_encode %{
4028 __ psllq($dst$$XMMRegister, $shift$$XMMRegister);
4029 %}
4030 ins_pipe( pipe_slow );
4031 %}
4032
4033 instruct vsll2L_imm(vecX dst, immI8 shift) %{
4034 predicate(n->as_Vector()->length() == 2);
4035 match(Set dst (LShiftVL dst shift));
4036 format %{ "psllq $dst,$shift\t! left shift packed2L" %}
4037 ins_encode %{
4038 __ psllq($dst$$XMMRegister, (int)$shift$$constant);
4039 %}
4040 ins_pipe( pipe_slow );
4041 %}
4042
4043 instruct vsll2L_reg(vecX dst, vecX src, regF shift) %{
4044 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4045 match(Set dst (LShiftVL src shift));
4046 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %}
4047 ins_encode %{
4048 bool vector256 = false;
4049 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4050 %}
4051 ins_pipe( pipe_slow );
4052 %}
4053
4054 instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{
4055 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4056 match(Set dst (LShiftVL src shift));
4057 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %}
4058 ins_encode %{
4059 bool vector256 = false;
4060 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4061 %}
4062 ins_pipe( pipe_slow );
4063 %}
4064
4065 instruct vsll4L_reg(vecY dst, vecY src, regF shift) %{
4066 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
4067 match(Set dst (LShiftVL src shift));
4068 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %}
4069 ins_encode %{
4070 bool vector256 = true;
4071 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4072 %}
4073 ins_pipe( pipe_slow );
4074 %}
4075
4076 instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{
4077 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
4078 match(Set dst (LShiftVL src shift));
4079 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %}
4080 ins_encode %{
4081 bool vector256 = true;
4082 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4083 %}
4084 ins_pipe( pipe_slow );
4085 %}
4086
4087 // ----------------------- LogicalRightShift -----------------------------------
4088
4089 // Shorts/Chars vector logical right shift produces incorrect Java result
4090 // for negative data because java code convert short value into int with
4091 // sign extension before a shift.
4092
4093 // Integers vector logical right shift
4094 instruct vsrl2I(vecD dst, regF shift) %{
4095 predicate(n->as_Vector()->length() == 2);
4096 match(Set dst (URShiftVI dst shift));
4097 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %}
4098 ins_encode %{
4099 __ psrld($dst$$XMMRegister, $shift$$XMMRegister);
4100 %}
4101 ins_pipe( pipe_slow );
4102 %}
4103
4104 instruct vsrl2I_imm(vecD dst, immI8 shift) %{
4105 predicate(n->as_Vector()->length() == 2);
4106 match(Set dst (URShiftVI dst shift));
4107 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %}
4108 ins_encode %{
4109 __ psrld($dst$$XMMRegister, (int)$shift$$constant);
4110 %}
4111 ins_pipe( pipe_slow );
4112 %}
4113
4114 instruct vsrl2I_reg(vecD dst, vecD src, regF shift) %{
4115 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4116 match(Set dst (URShiftVI src shift));
4117 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %}
4118 ins_encode %{
4119 bool vector256 = false;
4120 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4121 %}
4122 ins_pipe( pipe_slow );
4123 %}
4124
4125 instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
4126 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4127 match(Set dst (URShiftVI src shift));
4128 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %}
4129 ins_encode %{
4130 bool vector256 = false;
4131 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4132 %}
4133 ins_pipe( pipe_slow );
4134 %}
4135
4136 instruct vsrl4I(vecX dst, regF shift) %{
4137 predicate(n->as_Vector()->length() == 4);
4138 match(Set dst (URShiftVI dst shift));
4139 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %}
4140 ins_encode %{
4141 __ psrld($dst$$XMMRegister, $shift$$XMMRegister);
4142 %}
4143 ins_pipe( pipe_slow );
4144 %}
4145
4146 instruct vsrl4I_imm(vecX dst, immI8 shift) %{
4147 predicate(n->as_Vector()->length() == 4);
4148 match(Set dst (URShiftVI dst shift));
4149 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %}
4150 ins_encode %{
4151 __ psrld($dst$$XMMRegister, (int)$shift$$constant);
4152 %}
4153 ins_pipe( pipe_slow );
4154 %}
4155
4156 instruct vsrl4I_reg(vecX dst, vecX src, regF shift) %{
4157 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4158 match(Set dst (URShiftVI src shift));
4159 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %}
4160 ins_encode %{
4161 bool vector256 = false;
4162 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4163 %}
4164 ins_pipe( pipe_slow );
4165 %}
4166
4167 instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
4168 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4169 match(Set dst (URShiftVI src shift));
4170 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %}
4171 ins_encode %{
4172 bool vector256 = false;
4173 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4174 %}
4175 ins_pipe( pipe_slow );
4176 %}
4177
4178 instruct vsrl8I_reg(vecY dst, vecY src, regF shift) %{
4179 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
4180 match(Set dst (URShiftVI src shift));
4181 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %}
4182 ins_encode %{
4183 bool vector256 = true;
4184 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4185 %}
4186 ins_pipe( pipe_slow );
4187 %}
4188
4189 instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
4190 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
4191 match(Set dst (URShiftVI src shift));
4192 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %}
4193 ins_encode %{
4194 bool vector256 = true;
4195 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4196 %}
4197 ins_pipe( pipe_slow );
4198 %}
4199
4200 // Longs vector logical right shift
4201 instruct vsrl2L(vecX dst, regF shift) %{
4202 predicate(n->as_Vector()->length() == 2);
4203 match(Set dst (URShiftVL dst shift));
4204 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %}
4205 ins_encode %{
4206 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
4207 %}
4208 ins_pipe( pipe_slow );
4209 %}
4210
4211 instruct vsrl2L_imm(vecX dst, immI8 shift) %{
4212 predicate(n->as_Vector()->length() == 2);
4213 match(Set dst (URShiftVL dst shift));
4214 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %}
4215 ins_encode %{
4216 __ psrlq($dst$$XMMRegister, (int)$shift$$constant);
4217 %}
4218 ins_pipe( pipe_slow );
4219 %}
4220
4221 instruct vsrl2L_reg(vecX dst, vecX src, regF shift) %{
4222 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4223 match(Set dst (URShiftVL src shift));
4224 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %}
4225 ins_encode %{
4226 bool vector256 = false;
4227 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4228 %}
4229 ins_pipe( pipe_slow );
4230 %}
4231
4232 instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{
4233 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4234 match(Set dst (URShiftVL src shift));
4235 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %}
4236 ins_encode %{
4237 bool vector256 = false;
4238 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4239 %}
4240 ins_pipe( pipe_slow );
4241 %}
4242
4243 instruct vsrl4L_reg(vecY dst, vecY src, regF shift) %{
4244 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
4245 match(Set dst (URShiftVL src shift));
4246 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %}
4247 ins_encode %{
4248 bool vector256 = true;
4249 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4250 %}
4251 ins_pipe( pipe_slow );
4252 %}
4253
4254 instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{
4255 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
4256 match(Set dst (URShiftVL src shift));
4257 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %}
4258 ins_encode %{
4259 bool vector256 = true;
4260 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4261 %}
4262 ins_pipe( pipe_slow );
4263 %}
4264
4265 // ------------------- ArithmeticRightShift -----------------------------------
4266
4267 // Shorts/Chars vector arithmetic right shift
4268 instruct vsra2S(vecS dst, regF shift) %{
4269 predicate(n->as_Vector()->length() == 2);
4270 match(Set dst (RShiftVS dst shift));
4271 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %}
4272 ins_encode %{
4273 __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
4274 %}
4275 ins_pipe( pipe_slow );
4276 %}
4277
4278 instruct vsra2S_imm(vecS dst, immI8 shift) %{
4279 predicate(n->as_Vector()->length() == 2);
4280 match(Set dst (RShiftVS dst shift));
4281 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %}
4282 ins_encode %{
4283 __ psraw($dst$$XMMRegister, (int)$shift$$constant);
4284 %}
4285 ins_pipe( pipe_slow );
4286 %}
4287
4288 instruct vsra2S_reg(vecS dst, vecS src, regF shift) %{
4289 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4290 match(Set dst (RShiftVS src shift));
4291 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %}
4292 ins_encode %{
4293 bool vector256 = false;
4294 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4295 %}
4296 ins_pipe( pipe_slow );
4297 %}
4298
4299 instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
4300 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4301 match(Set dst (RShiftVS src shift));
4302 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %}
4303 ins_encode %{
4304 bool vector256 = false;
4305 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4306 %}
4307 ins_pipe( pipe_slow );
4308 %}
4309
4310 instruct vsra4S(vecD dst, regF shift) %{
4311 predicate(n->as_Vector()->length() == 4);
4312 match(Set dst (RShiftVS dst shift));
4313 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %}
4314 ins_encode %{
4315 __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
4316 %}
4317 ins_pipe( pipe_slow );
4318 %}
4319
4320 instruct vsra4S_imm(vecD dst, immI8 shift) %{
4321 predicate(n->as_Vector()->length() == 4);
4322 match(Set dst (RShiftVS dst shift));
4323 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %}
4324 ins_encode %{
4325 __ psraw($dst$$XMMRegister, (int)$shift$$constant);
4326 %}
4327 ins_pipe( pipe_slow );
4328 %}
4329
4330 instruct vsra4S_reg(vecD dst, vecD src, regF shift) %{
4331 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4332 match(Set dst (RShiftVS src shift));
4333 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %}
4334 ins_encode %{
4335 bool vector256 = false;
4336 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4337 %}
4338 ins_pipe( pipe_slow );
4339 %}
4340
4341 instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
4342 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4343 match(Set dst (RShiftVS src shift));
4344 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %}
4345 ins_encode %{
4346 bool vector256 = false;
4347 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4348 %}
4349 ins_pipe( pipe_slow );
4350 %}
4351
4352 instruct vsra8S(vecX dst, regF shift) %{
4353 predicate(n->as_Vector()->length() == 8);
4354 match(Set dst (RShiftVS dst shift));
4355 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %}
4356 ins_encode %{
4357 __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
4358 %}
4359 ins_pipe( pipe_slow );
4360 %}
4361
4362 instruct vsra8S_imm(vecX dst, immI8 shift) %{
4363 predicate(n->as_Vector()->length() == 8);
4364 match(Set dst (RShiftVS dst shift));
4365 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %}
4366 ins_encode %{
4367 __ psraw($dst$$XMMRegister, (int)$shift$$constant);
4368 %}
4369 ins_pipe( pipe_slow );
4370 %}
4371
4372 instruct vsra8S_reg(vecX dst, vecX src, regF shift) %{
4373 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
4374 match(Set dst (RShiftVS src shift));
4375 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %}
4376 ins_encode %{
4377 bool vector256 = false;
4378 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4379 %}
4380 ins_pipe( pipe_slow );
4381 %}
4382
4383 instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
4384 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
4385 match(Set dst (RShiftVS src shift));
4386 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %}
4387 ins_encode %{
4388 bool vector256 = false;
4389 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4390 %}
4391 ins_pipe( pipe_slow );
4392 %}
4393
4394 instruct vsra16S_reg(vecY dst, vecY src, regF shift) %{
4395 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
4396 match(Set dst (RShiftVS src shift));
4397 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %}
4398 ins_encode %{
4399 bool vector256 = true;
4400 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4401 %}
4402 ins_pipe( pipe_slow );
4403 %}
4404
4405 instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
4406 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
4407 match(Set dst (RShiftVS src shift));
4408 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %}
4409 ins_encode %{
4410 bool vector256 = true;
4411 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4412 %}
4413 ins_pipe( pipe_slow );
4414 %}
4415
4416 // Integers vector arithmetic right shift
4417 instruct vsra2I(vecD dst, regF shift) %{
4418 predicate(n->as_Vector()->length() == 2);
4419 match(Set dst (RShiftVI dst shift));
4420 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %}
4421 ins_encode %{
4422 __ psrad($dst$$XMMRegister, $shift$$XMMRegister);
4423 %}
4424 ins_pipe( pipe_slow );
4425 %}
4426
4427 instruct vsra2I_imm(vecD dst, immI8 shift) %{
4428 predicate(n->as_Vector()->length() == 2);
4429 match(Set dst (RShiftVI dst shift));
4430 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %}
4431 ins_encode %{
4432 __ psrad($dst$$XMMRegister, (int)$shift$$constant);
4433 %}
4434 ins_pipe( pipe_slow );
4435 %}
4436
4437 instruct vsra2I_reg(vecD dst, vecD src, regF shift) %{
4438 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4439 match(Set dst (RShiftVI src shift));
4440 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %}
4441 ins_encode %{
4442 bool vector256 = false;
4443 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4444 %}
4445 ins_pipe( pipe_slow );
4446 %}
4447
4448 instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
4449 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4450 match(Set dst (RShiftVI src shift));
4451 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %}
4452 ins_encode %{
4453 bool vector256 = false;
4454 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4455 %}
4456 ins_pipe( pipe_slow );
4457 %}
4458
4459 instruct vsra4I(vecX dst, regF shift) %{
4460 predicate(n->as_Vector()->length() == 4);
4461 match(Set dst (RShiftVI dst shift));
4462 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %}
4463 ins_encode %{
4464 __ psrad($dst$$XMMRegister, $shift$$XMMRegister);
4465 %}
4466 ins_pipe( pipe_slow );
4467 %}
4468
4469 instruct vsra4I_imm(vecX dst, immI8 shift) %{
4470 predicate(n->as_Vector()->length() == 4);
4471 match(Set dst (RShiftVI dst shift));
4472 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %}
4473 ins_encode %{
4474 __ psrad($dst$$XMMRegister, (int)$shift$$constant);
4475 %}
4476 ins_pipe( pipe_slow );
4477 %}
4478
4479 instruct vsra4I_reg(vecX dst, vecX src, regF shift) %{
4480 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4481 match(Set dst (RShiftVI src shift));
4482 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %}
4483 ins_encode %{
4484 bool vector256 = false;
4485 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4486 %}
4487 ins_pipe( pipe_slow );
4488 %}
4489
4490 instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
4491 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4492 match(Set dst (RShiftVI src shift));
4493 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %}
4494 ins_encode %{
4495 bool vector256 = false;
4496 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4497 %}
4498 ins_pipe( pipe_slow );
4499 %}
4500
4501 instruct vsra8I_reg(vecY dst, vecY src, regF shift) %{
4502 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
4503 match(Set dst (RShiftVI src shift));
4504 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %}
4505 ins_encode %{
4506 bool vector256 = true;
4507 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4508 %}
4509 ins_pipe( pipe_slow );
4510 %}
4511
4512 instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
4513 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
4514 match(Set dst (RShiftVI src shift));
4515 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %}
4516 ins_encode %{
4517 bool vector256 = true;
4518 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4519 %}
4520 ins_pipe( pipe_slow );
4521 %}
4522
4523 // There are no longs vector arithmetic right shift instructions.
4524
4525
4526 // --------------------------------- AND --------------------------------------
4527
4528 instruct vand4B(vecS dst, vecS src) %{
4529 predicate(n->as_Vector()->length_in_bytes() == 4);
4530 match(Set dst (AndV dst src));
4531 format %{ "pand $dst,$src\t! and vectors (4 bytes)" %}
4532 ins_encode %{
4533 __ pand($dst$$XMMRegister, $src$$XMMRegister);
4534 %}
4535 ins_pipe( pipe_slow );
4536 %}
4537
4538 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{
4539 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
4540 match(Set dst (AndV src1 src2));
4541 format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %}
4542 ins_encode %{
4543 bool vector256 = false;
4544 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4545 %}
4546 ins_pipe( pipe_slow );
4547 %}
4548
4549 instruct vand8B(vecD dst, vecD src) %{
4550 predicate(n->as_Vector()->length_in_bytes() == 8);
4551 match(Set dst (AndV dst src));
4552 format %{ "pand $dst,$src\t! and vectors (8 bytes)" %}
4553 ins_encode %{
4554 __ pand($dst$$XMMRegister, $src$$XMMRegister);
4555 %}
4556 ins_pipe( pipe_slow );
4557 %}
4558
4559 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{
4560 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
4561 match(Set dst (AndV src1 src2));
4562 format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %}
4563 ins_encode %{
4564 bool vector256 = false;
4565 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4566 %}
4567 ins_pipe( pipe_slow );
4568 %}
4569
4570 instruct vand16B(vecX dst, vecX src) %{
4571 predicate(n->as_Vector()->length_in_bytes() == 16);
4572 match(Set dst (AndV dst src));
4573 format %{ "pand $dst,$src\t! and vectors (16 bytes)" %}
4574 ins_encode %{
4575 __ pand($dst$$XMMRegister, $src$$XMMRegister);
4576 %}
4577 ins_pipe( pipe_slow );
4578 %}
4579
4580 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{
4581 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
4582 match(Set dst (AndV src1 src2));
4583 format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %}
4584 ins_encode %{
4585 bool vector256 = false;
4586 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4587 %}
4588 ins_pipe( pipe_slow );
4589 %}
4590
4591 instruct vand16B_mem(vecX dst, vecX src, memory mem) %{
4592 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
4593 match(Set dst (AndV src (LoadVector mem)));
4594 format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %}
4595 ins_encode %{
4596 bool vector256 = false;
4597 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
4598 %}
4599 ins_pipe( pipe_slow );
4600 %}
4601
4602 instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{
4603 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
4604 match(Set dst (AndV src1 src2));
4605 format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %}
4606 ins_encode %{
4607 bool vector256 = true;
4608 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4609 %}
4610 ins_pipe( pipe_slow );
4611 %}
4612
4613 instruct vand32B_mem(vecY dst, vecY src, memory mem) %{
4614 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
4615 match(Set dst (AndV src (LoadVector mem)));
4616 format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %}
4617 ins_encode %{
4618 bool vector256 = true;
4619 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
4620 %}
4621 ins_pipe( pipe_slow );
4622 %}
4623
4624 // --------------------------------- OR ---------------------------------------
4625
4626 instruct vor4B(vecS dst, vecS src) %{
4627 predicate(n->as_Vector()->length_in_bytes() == 4);
4628 match(Set dst (OrV dst src));
4629 format %{ "por $dst,$src\t! or vectors (4 bytes)" %}
4630 ins_encode %{
4631 __ por($dst$$XMMRegister, $src$$XMMRegister);
4632 %}
4633 ins_pipe( pipe_slow );
4634 %}
4635
4636 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{
4637 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
4638 match(Set dst (OrV src1 src2));
4639 format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %}
4640 ins_encode %{
4641 bool vector256 = false;
4642 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4643 %}
4644 ins_pipe( pipe_slow );
4645 %}
4646
4647 instruct vor8B(vecD dst, vecD src) %{
4648 predicate(n->as_Vector()->length_in_bytes() == 8);
4649 match(Set dst (OrV dst src));
4650 format %{ "por $dst,$src\t! or vectors (8 bytes)" %}
4651 ins_encode %{
4652 __ por($dst$$XMMRegister, $src$$XMMRegister);
4653 %}
4654 ins_pipe( pipe_slow );
4655 %}
4656
4657 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{
4658 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
4659 match(Set dst (OrV src1 src2));
4660 format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %}
4661 ins_encode %{
4662 bool vector256 = false;
4663 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4664 %}
4665 ins_pipe( pipe_slow );
4666 %}
4667
4668 instruct vor16B(vecX dst, vecX src) %{
4669 predicate(n->as_Vector()->length_in_bytes() == 16);
4670 match(Set dst (OrV dst src));
4671 format %{ "por $dst,$src\t! or vectors (16 bytes)" %}
4672 ins_encode %{
4673 __ por($dst$$XMMRegister, $src$$XMMRegister);
4674 %}
4675 ins_pipe( pipe_slow );
4676 %}
4677
4678 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{
4679 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
4680 match(Set dst (OrV src1 src2));
4681 format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %}
4682 ins_encode %{
4683 bool vector256 = false;
4684 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4685 %}
4686 ins_pipe( pipe_slow );
4687 %}
4688
4689 instruct vor16B_mem(vecX dst, vecX src, memory mem) %{
4690 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
4691 match(Set dst (OrV src (LoadVector mem)));
4692 format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %}
4693 ins_encode %{
4694 bool vector256 = false;
4695 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
4696 %}
4697 ins_pipe( pipe_slow );
4698 %}
4699
4700 instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{
4701 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
4702 match(Set dst (OrV src1 src2));
4703 format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %}
4704 ins_encode %{
4705 bool vector256 = true;
4706 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4707 %}
4708 ins_pipe( pipe_slow );
4709 %}
4710
4711 instruct vor32B_mem(vecY dst, vecY src, memory mem) %{
4712 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
4713 match(Set dst (OrV src (LoadVector mem)));
4714 format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %}
4715 ins_encode %{
4716 bool vector256 = true;
4717 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
4718 %}
4719 ins_pipe( pipe_slow );
4720 %}
4721
4722 // --------------------------------- XOR --------------------------------------
4723
4724 instruct vxor4B(vecS dst, vecS src) %{
4725 predicate(n->as_Vector()->length_in_bytes() == 4);
4726 match(Set dst (XorV dst src));
4727 format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %}
4728 ins_encode %{
4729 __ pxor($dst$$XMMRegister, $src$$XMMRegister);
4730 %}
4731 ins_pipe( pipe_slow );
4732 %}
4733
4734 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{
4735 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
4736 match(Set dst (XorV src1 src2));
4737 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %}
4738 ins_encode %{
4739 bool vector256 = false;
4740 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4741 %}
4742 ins_pipe( pipe_slow );
4743 %}
4744
4745 instruct vxor8B(vecD dst, vecD src) %{
4746 predicate(n->as_Vector()->length_in_bytes() == 8);
4747 match(Set dst (XorV dst src));
4748 format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %}
4749 ins_encode %{
4750 __ pxor($dst$$XMMRegister, $src$$XMMRegister);
4751 %}
4752 ins_pipe( pipe_slow );
4753 %}
4754
4755 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{
4756 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
4757 match(Set dst (XorV src1 src2));
4758 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %}
4759 ins_encode %{
4760 bool vector256 = false;
4761 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4762 %}
4763 ins_pipe( pipe_slow );
4764 %}
4765
4766 instruct vxor16B(vecX dst, vecX src) %{
4767 predicate(n->as_Vector()->length_in_bytes() == 16);
4768 match(Set dst (XorV dst src));
4769 format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %}
4770 ins_encode %{
4771 __ pxor($dst$$XMMRegister, $src$$XMMRegister);
4772 %}
4773 ins_pipe( pipe_slow );
4774 %}
4775
4776 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{
4777 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
4778 match(Set dst (XorV src1 src2));
4779 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %}
4780 ins_encode %{
4781 bool vector256 = false;
4782 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4783 %}
4784 ins_pipe( pipe_slow );
4785 %}
4786
4787 instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{
4788 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
4789 match(Set dst (XorV src (LoadVector mem)));
4790 format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %}
4791 ins_encode %{
4792 bool vector256 = false;
4793 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
4794 %}
4795 ins_pipe( pipe_slow );
4796 %}
4797
4798 instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{
4799 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
4800 match(Set dst (XorV src1 src2));
4801 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %}
4802 ins_encode %{
4803 bool vector256 = true;
4804 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4805 %}
4806 ins_pipe( pipe_slow );
4807 %}
4808
4809 instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{
4810 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
4811 match(Set dst (XorV src (LoadVector mem)));
4812 format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %}
4813 ins_encode %{
4814 bool vector256 = true;
4815 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
4816 %}
4817 ins_pipe( pipe_slow );
4818 %}
4819