Mercurial > hg > truffle
comparison src/cpu/x86/vm/x86.ad @ 6614:006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
Summary: Added asm encoding and mach nodes for vector arithmetic instructions on x86.
Reviewed-by: roland
author | kvn |
---|---|
date | Mon, 20 Aug 2012 09:07:21 -0700 |
parents | 2c368ea3e844 |
children | da91efe96a93 |
comparison
equal
deleted
inserted
replaced
6594:d5ec46c7da5c | 6614:006050192a5a |
---|---|
497 0, 0/*abio*/, | 497 0, 0/*abio*/, |
498 Op_RegP /* Return address */, 0, /* the memories */ | 498 Op_RegP /* Return address */, 0, /* the memories */ |
499 Op_RegF, Op_RegF, Op_RegF, Op_RegD, Op_RegD, Op_RegD, | 499 Op_RegF, Op_RegF, Op_RegF, Op_RegD, Op_RegD, Op_RegD, |
500 0 /*bottom*/ | 500 0 /*bottom*/ |
501 }; | 501 }; |
502 | |
503 const bool Matcher::match_rule_supported(int opcode) { | |
504 if (!has_match_rule(opcode)) | |
505 return false; | |
506 | |
507 switch (opcode) { | |
508 case Op_PopCountI: | |
509 case Op_PopCountL: | |
510 if (!UsePopCountInstruction) | |
511 return false; | |
512 case Op_MulVI: | |
513 if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX | |
514 return false; | |
515 break; | |
516 } | |
517 | |
518 return true; // Per default match rules are supported. | |
519 } | |
502 | 520 |
503 // Max vector size in bytes. 0 if not supported. | 521 // Max vector size in bytes. 0 if not supported. |
504 const int Matcher::vector_width_in_bytes(BasicType bt) { | 522 const int Matcher::vector_width_in_bytes(BasicType bt) { |
505 assert(is_java_primitive(bt), "only primitive type vectors"); | 523 assert(is_java_primitive(bt), "only primitive type vectors"); |
506 if (UseSSE < 2) return 0; | 524 if (UseSSE < 2) return 0; |
1437 predicate(UseAVX > 0); | 1455 predicate(UseAVX > 0); |
1438 match(Set dst (AbsF src)); | 1456 match(Set dst (AbsF src)); |
1439 ins_cost(150); | 1457 ins_cost(150); |
1440 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} | 1458 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} |
1441 ins_encode %{ | 1459 ins_encode %{ |
1460 bool vector256 = false; | |
1442 __ vandps($dst$$XMMRegister, $src$$XMMRegister, | 1461 __ vandps($dst$$XMMRegister, $src$$XMMRegister, |
1443 ExternalAddress(float_signmask())); | 1462 ExternalAddress(float_signmask()), vector256); |
1444 %} | 1463 %} |
1445 ins_pipe(pipe_slow); | 1464 ins_pipe(pipe_slow); |
1446 %} | 1465 %} |
1447 | 1466 |
1448 instruct absD_reg(regD dst) %{ | 1467 instruct absD_reg(regD dst) %{ |
1462 match(Set dst (AbsD src)); | 1481 match(Set dst (AbsD src)); |
1463 ins_cost(150); | 1482 ins_cost(150); |
1464 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" | 1483 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" |
1465 "# abs double by sign masking" %} | 1484 "# abs double by sign masking" %} |
1466 ins_encode %{ | 1485 ins_encode %{ |
1486 bool vector256 = false; | |
1467 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, | 1487 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, |
1468 ExternalAddress(double_signmask())); | 1488 ExternalAddress(double_signmask()), vector256); |
1469 %} | 1489 %} |
1470 ins_pipe(pipe_slow); | 1490 ins_pipe(pipe_slow); |
1471 %} | 1491 %} |
1472 | 1492 |
1473 instruct negF_reg(regF dst) %{ | 1493 instruct negF_reg(regF dst) %{ |
1485 predicate(UseAVX > 0); | 1505 predicate(UseAVX > 0); |
1486 match(Set dst (NegF src)); | 1506 match(Set dst (NegF src)); |
1487 ins_cost(150); | 1507 ins_cost(150); |
1488 format %{ "vxorps $dst, $src, [0x80000000]\t# neg float by sign flipping" %} | 1508 format %{ "vxorps $dst, $src, [0x80000000]\t# neg float by sign flipping" %} |
1489 ins_encode %{ | 1509 ins_encode %{ |
1510 bool vector256 = false; | |
1490 __ vxorps($dst$$XMMRegister, $src$$XMMRegister, | 1511 __ vxorps($dst$$XMMRegister, $src$$XMMRegister, |
1491 ExternalAddress(float_signflip())); | 1512 ExternalAddress(float_signflip()), vector256); |
1492 %} | 1513 %} |
1493 ins_pipe(pipe_slow); | 1514 ins_pipe(pipe_slow); |
1494 %} | 1515 %} |
1495 | 1516 |
1496 instruct negD_reg(regD dst) %{ | 1517 instruct negD_reg(regD dst) %{ |
1510 match(Set dst (NegD src)); | 1531 match(Set dst (NegD src)); |
1511 ins_cost(150); | 1532 ins_cost(150); |
1512 format %{ "vxorpd $dst, $src, [0x8000000000000000]\t" | 1533 format %{ "vxorpd $dst, $src, [0x8000000000000000]\t" |
1513 "# neg double by sign flipping" %} | 1534 "# neg double by sign flipping" %} |
1514 ins_encode %{ | 1535 ins_encode %{ |
1536 bool vector256 = false; | |
1515 __ vxorpd($dst$$XMMRegister, $src$$XMMRegister, | 1537 __ vxorpd($dst$$XMMRegister, $src$$XMMRegister, |
1516 ExternalAddress(double_signflip())); | 1538 ExternalAddress(double_signflip()), vector256); |
1517 %} | 1539 %} |
1518 ins_pipe(pipe_slow); | 1540 ins_pipe(pipe_slow); |
1519 %} | 1541 %} |
1520 | 1542 |
1521 instruct sqrtF_reg(regF dst, regF src) %{ | 1543 instruct sqrtF_reg(regF dst, regF src) %{ |
2380 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); | 2402 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); |
2381 %} | 2403 %} |
2382 ins_pipe( fpu_reg_reg ); | 2404 ins_pipe( fpu_reg_reg ); |
2383 %} | 2405 %} |
2384 | 2406 |
2407 // ====================VECTOR ARITHMETIC======================================= | |
2408 | |
2409 // --------------------------------- ADD -------------------------------------- | |
2410 | |
2411 // Bytes vector add | |
2412 instruct vadd4B(vecS dst, vecS src) %{ | |
2413 predicate(n->as_Vector()->length() == 4); | |
2414 match(Set dst (AddVB dst src)); | |
2415 format %{ "paddb $dst,$src\t! add packed4B" %} | |
2416 ins_encode %{ | |
2417 __ paddb($dst$$XMMRegister, $src$$XMMRegister); | |
2418 %} | |
2419 ins_pipe( pipe_slow ); | |
2420 %} | |
2421 | |
2422 instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{ | |
2423 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); | |
2424 match(Set dst (AddVB src1 src2)); | |
2425 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} | |
2426 ins_encode %{ | |
2427 bool vector256 = false; | |
2428 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
2429 %} | |
2430 ins_pipe( pipe_slow ); | |
2431 %} | |
2432 | |
2433 instruct vadd8B(vecD dst, vecD src) %{ | |
2434 predicate(n->as_Vector()->length() == 8); | |
2435 match(Set dst (AddVB dst src)); | |
2436 format %{ "paddb $dst,$src\t! add packed8B" %} | |
2437 ins_encode %{ | |
2438 __ paddb($dst$$XMMRegister, $src$$XMMRegister); | |
2439 %} | |
2440 ins_pipe( pipe_slow ); | |
2441 %} | |
2442 | |
2443 instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{ | |
2444 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); | |
2445 match(Set dst (AddVB src1 src2)); | |
2446 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} | |
2447 ins_encode %{ | |
2448 bool vector256 = false; | |
2449 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
2450 %} | |
2451 ins_pipe( pipe_slow ); | |
2452 %} | |
2453 | |
2454 instruct vadd16B(vecX dst, vecX src) %{ | |
2455 predicate(n->as_Vector()->length() == 16); | |
2456 match(Set dst (AddVB dst src)); | |
2457 format %{ "paddb $dst,$src\t! add packed16B" %} | |
2458 ins_encode %{ | |
2459 __ paddb($dst$$XMMRegister, $src$$XMMRegister); | |
2460 %} | |
2461 ins_pipe( pipe_slow ); | |
2462 %} | |
2463 | |
2464 instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{ | |
2465 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); | |
2466 match(Set dst (AddVB src1 src2)); | |
2467 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} | |
2468 ins_encode %{ | |
2469 bool vector256 = false; | |
2470 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
2471 %} | |
2472 ins_pipe( pipe_slow ); | |
2473 %} | |
2474 | |
2475 instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{ | |
2476 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); | |
2477 match(Set dst (AddVB src (LoadVector mem))); | |
2478 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} | |
2479 ins_encode %{ | |
2480 bool vector256 = false; | |
2481 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); | |
2482 %} | |
2483 ins_pipe( pipe_slow ); | |
2484 %} | |
2485 | |
2486 instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{ | |
2487 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); | |
2488 match(Set dst (AddVB src1 src2)); | |
2489 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} | |
2490 ins_encode %{ | |
2491 bool vector256 = true; | |
2492 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
2493 %} | |
2494 ins_pipe( pipe_slow ); | |
2495 %} | |
2496 | |
2497 instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{ | |
2498 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); | |
2499 match(Set dst (AddVB src (LoadVector mem))); | |
2500 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} | |
2501 ins_encode %{ | |
2502 bool vector256 = true; | |
2503 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); | |
2504 %} | |
2505 ins_pipe( pipe_slow ); | |
2506 %} | |
2507 | |
2508 // Shorts/Chars vector add | |
2509 instruct vadd2S(vecS dst, vecS src) %{ | |
2510 predicate(n->as_Vector()->length() == 2); | |
2511 match(Set dst (AddVS dst src)); | |
2512 format %{ "paddw $dst,$src\t! add packed2S" %} | |
2513 ins_encode %{ | |
2514 __ paddw($dst$$XMMRegister, $src$$XMMRegister); | |
2515 %} | |
2516 ins_pipe( pipe_slow ); | |
2517 %} | |
2518 | |
2519 instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{ | |
2520 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); | |
2521 match(Set dst (AddVS src1 src2)); | |
2522 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} | |
2523 ins_encode %{ | |
2524 bool vector256 = false; | |
2525 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
2526 %} | |
2527 ins_pipe( pipe_slow ); | |
2528 %} | |
2529 | |
2530 instruct vadd4S(vecD dst, vecD src) %{ | |
2531 predicate(n->as_Vector()->length() == 4); | |
2532 match(Set dst (AddVS dst src)); | |
2533 format %{ "paddw $dst,$src\t! add packed4S" %} | |
2534 ins_encode %{ | |
2535 __ paddw($dst$$XMMRegister, $src$$XMMRegister); | |
2536 %} | |
2537 ins_pipe( pipe_slow ); | |
2538 %} | |
2539 | |
2540 instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{ | |
2541 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); | |
2542 match(Set dst (AddVS src1 src2)); | |
2543 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} | |
2544 ins_encode %{ | |
2545 bool vector256 = false; | |
2546 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
2547 %} | |
2548 ins_pipe( pipe_slow ); | |
2549 %} | |
2550 | |
2551 instruct vadd8S(vecX dst, vecX src) %{ | |
2552 predicate(n->as_Vector()->length() == 8); | |
2553 match(Set dst (AddVS dst src)); | |
2554 format %{ "paddw $dst,$src\t! add packed8S" %} | |
2555 ins_encode %{ | |
2556 __ paddw($dst$$XMMRegister, $src$$XMMRegister); | |
2557 %} | |
2558 ins_pipe( pipe_slow ); | |
2559 %} | |
2560 | |
2561 instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{ | |
2562 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); | |
2563 match(Set dst (AddVS src1 src2)); | |
2564 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} | |
2565 ins_encode %{ | |
2566 bool vector256 = false; | |
2567 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
2568 %} | |
2569 ins_pipe( pipe_slow ); | |
2570 %} | |
2571 | |
2572 instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{ | |
2573 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); | |
2574 match(Set dst (AddVS src (LoadVector mem))); | |
2575 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} | |
2576 ins_encode %{ | |
2577 bool vector256 = false; | |
2578 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); | |
2579 %} | |
2580 ins_pipe( pipe_slow ); | |
2581 %} | |
2582 | |
2583 instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{ | |
2584 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); | |
2585 match(Set dst (AddVS src1 src2)); | |
2586 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} | |
2587 ins_encode %{ | |
2588 bool vector256 = true; | |
2589 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
2590 %} | |
2591 ins_pipe( pipe_slow ); | |
2592 %} | |
2593 | |
2594 instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{ | |
2595 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); | |
2596 match(Set dst (AddVS src (LoadVector mem))); | |
2597 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} | |
2598 ins_encode %{ | |
2599 bool vector256 = true; | |
2600 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); | |
2601 %} | |
2602 ins_pipe( pipe_slow ); | |
2603 %} | |
2604 | |
2605 // Integers vector add | |
2606 instruct vadd2I(vecD dst, vecD src) %{ | |
2607 predicate(n->as_Vector()->length() == 2); | |
2608 match(Set dst (AddVI dst src)); | |
2609 format %{ "paddd $dst,$src\t! add packed2I" %} | |
2610 ins_encode %{ | |
2611 __ paddd($dst$$XMMRegister, $src$$XMMRegister); | |
2612 %} | |
2613 ins_pipe( pipe_slow ); | |
2614 %} | |
2615 | |
2616 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ | |
2617 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); | |
2618 match(Set dst (AddVI src1 src2)); | |
2619 format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %} | |
2620 ins_encode %{ | |
2621 bool vector256 = false; | |
2622 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
2623 %} | |
2624 ins_pipe( pipe_slow ); | |
2625 %} | |
2626 | |
2627 instruct vadd4I(vecX dst, vecX src) %{ | |
2628 predicate(n->as_Vector()->length() == 4); | |
2629 match(Set dst (AddVI dst src)); | |
2630 format %{ "paddd $dst,$src\t! add packed4I" %} | |
2631 ins_encode %{ | |
2632 __ paddd($dst$$XMMRegister, $src$$XMMRegister); | |
2633 %} | |
2634 ins_pipe( pipe_slow ); | |
2635 %} | |
2636 | |
2637 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ | |
2638 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); | |
2639 match(Set dst (AddVI src1 src2)); | |
2640 format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %} | |
2641 ins_encode %{ | |
2642 bool vector256 = false; | |
2643 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
2644 %} | |
2645 ins_pipe( pipe_slow ); | |
2646 %} | |
2647 | |
2648 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{ | |
2649 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); | |
2650 match(Set dst (AddVI src (LoadVector mem))); | |
2651 format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %} | |
2652 ins_encode %{ | |
2653 bool vector256 = false; | |
2654 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); | |
2655 %} | |
2656 ins_pipe( pipe_slow ); | |
2657 %} | |
2658 | |
2659 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{ | |
2660 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); | |
2661 match(Set dst (AddVI src1 src2)); | |
2662 format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %} | |
2663 ins_encode %{ | |
2664 bool vector256 = true; | |
2665 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
2666 %} | |
2667 ins_pipe( pipe_slow ); | |
2668 %} | |
2669 | |
2670 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{ | |
2671 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); | |
2672 match(Set dst (AddVI src (LoadVector mem))); | |
2673 format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %} | |
2674 ins_encode %{ | |
2675 bool vector256 = true; | |
2676 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); | |
2677 %} | |
2678 ins_pipe( pipe_slow ); | |
2679 %} | |
2680 | |
2681 // Longs vector add | |
2682 instruct vadd2L(vecX dst, vecX src) %{ | |
2683 predicate(n->as_Vector()->length() == 2); | |
2684 match(Set dst (AddVL dst src)); | |
2685 format %{ "paddq $dst,$src\t! add packed2L" %} | |
2686 ins_encode %{ | |
2687 __ paddq($dst$$XMMRegister, $src$$XMMRegister); | |
2688 %} | |
2689 ins_pipe( pipe_slow ); | |
2690 %} | |
2691 | |
2692 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{ | |
2693 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); | |
2694 match(Set dst (AddVL src1 src2)); | |
2695 format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %} | |
2696 ins_encode %{ | |
2697 bool vector256 = false; | |
2698 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
2699 %} | |
2700 ins_pipe( pipe_slow ); | |
2701 %} | |
2702 | |
2703 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{ | |
2704 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); | |
2705 match(Set dst (AddVL src (LoadVector mem))); | |
2706 format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %} | |
2707 ins_encode %{ | |
2708 bool vector256 = false; | |
2709 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); | |
2710 %} | |
2711 ins_pipe( pipe_slow ); | |
2712 %} | |
2713 | |
2714 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{ | |
2715 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); | |
2716 match(Set dst (AddVL src1 src2)); | |
2717 format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %} | |
2718 ins_encode %{ | |
2719 bool vector256 = true; | |
2720 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
2721 %} | |
2722 ins_pipe( pipe_slow ); | |
2723 %} | |
2724 | |
2725 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{ | |
2726 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); | |
2727 match(Set dst (AddVL src (LoadVector mem))); | |
2728 format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %} | |
2729 ins_encode %{ | |
2730 bool vector256 = true; | |
2731 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); | |
2732 %} | |
2733 ins_pipe( pipe_slow ); | |
2734 %} | |
2735 | |
2736 // Floats vector add | |
2737 instruct vadd2F(vecD dst, vecD src) %{ | |
2738 predicate(n->as_Vector()->length() == 2); | |
2739 match(Set dst (AddVF dst src)); | |
2740 format %{ "addps $dst,$src\t! add packed2F" %} | |
2741 ins_encode %{ | |
2742 __ addps($dst$$XMMRegister, $src$$XMMRegister); | |
2743 %} | |
2744 ins_pipe( pipe_slow ); | |
2745 %} | |
2746 | |
2747 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ | |
2748 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); | |
2749 match(Set dst (AddVF src1 src2)); | |
2750 format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %} | |
2751 ins_encode %{ | |
2752 bool vector256 = false; | |
2753 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
2754 %} | |
2755 ins_pipe( pipe_slow ); | |
2756 %} | |
2757 | |
2758 instruct vadd4F(vecX dst, vecX src) %{ | |
2759 predicate(n->as_Vector()->length() == 4); | |
2760 match(Set dst (AddVF dst src)); | |
2761 format %{ "addps $dst,$src\t! add packed4F" %} | |
2762 ins_encode %{ | |
2763 __ addps($dst$$XMMRegister, $src$$XMMRegister); | |
2764 %} | |
2765 ins_pipe( pipe_slow ); | |
2766 %} | |
2767 | |
2768 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ | |
2769 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); | |
2770 match(Set dst (AddVF src1 src2)); | |
2771 format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %} | |
2772 ins_encode %{ | |
2773 bool vector256 = false; | |
2774 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
2775 %} | |
2776 ins_pipe( pipe_slow ); | |
2777 %} | |
2778 | |
2779 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{ | |
2780 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); | |
2781 match(Set dst (AddVF src (LoadVector mem))); | |
2782 format %{ "vaddps $dst,$src,$mem\t! add packed4F" %} | |
2783 ins_encode %{ | |
2784 bool vector256 = false; | |
2785 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); | |
2786 %} | |
2787 ins_pipe( pipe_slow ); | |
2788 %} | |
2789 | |
2790 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{ | |
2791 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); | |
2792 match(Set dst (AddVF src1 src2)); | |
2793 format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %} | |
2794 ins_encode %{ | |
2795 bool vector256 = true; | |
2796 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
2797 %} | |
2798 ins_pipe( pipe_slow ); | |
2799 %} | |
2800 | |
2801 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{ | |
2802 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); | |
2803 match(Set dst (AddVF src (LoadVector mem))); | |
2804 format %{ "vaddps $dst,$src,$mem\t! add packed8F" %} | |
2805 ins_encode %{ | |
2806 bool vector256 = true; | |
2807 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); | |
2808 %} | |
2809 ins_pipe( pipe_slow ); | |
2810 %} | |
2811 | |
2812 // Doubles vector add | |
2813 instruct vadd2D(vecX dst, vecX src) %{ | |
2814 predicate(n->as_Vector()->length() == 2); | |
2815 match(Set dst (AddVD dst src)); | |
2816 format %{ "addpd $dst,$src\t! add packed2D" %} | |
2817 ins_encode %{ | |
2818 __ addpd($dst$$XMMRegister, $src$$XMMRegister); | |
2819 %} | |
2820 ins_pipe( pipe_slow ); | |
2821 %} | |
2822 | |
2823 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{ | |
2824 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); | |
2825 match(Set dst (AddVD src1 src2)); | |
2826 format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %} | |
2827 ins_encode %{ | |
2828 bool vector256 = false; | |
2829 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
2830 %} | |
2831 ins_pipe( pipe_slow ); | |
2832 %} | |
2833 | |
2834 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{ | |
2835 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); | |
2836 match(Set dst (AddVD src (LoadVector mem))); | |
2837 format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %} | |
2838 ins_encode %{ | |
2839 bool vector256 = false; | |
2840 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); | |
2841 %} | |
2842 ins_pipe( pipe_slow ); | |
2843 %} | |
2844 | |
2845 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{ | |
2846 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); | |
2847 match(Set dst (AddVD src1 src2)); | |
2848 format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %} | |
2849 ins_encode %{ | |
2850 bool vector256 = true; | |
2851 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
2852 %} | |
2853 ins_pipe( pipe_slow ); | |
2854 %} | |
2855 | |
2856 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{ | |
2857 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); | |
2858 match(Set dst (AddVD src (LoadVector mem))); | |
2859 format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %} | |
2860 ins_encode %{ | |
2861 bool vector256 = true; | |
2862 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); | |
2863 %} | |
2864 ins_pipe( pipe_slow ); | |
2865 %} | |
2866 | |
2867 // --------------------------------- SUB -------------------------------------- | |
2868 | |
2869 // Bytes vector sub | |
2870 instruct vsub4B(vecS dst, vecS src) %{ | |
2871 predicate(n->as_Vector()->length() == 4); | |
2872 match(Set dst (SubVB dst src)); | |
2873 format %{ "psubb $dst,$src\t! sub packed4B" %} | |
2874 ins_encode %{ | |
2875 __ psubb($dst$$XMMRegister, $src$$XMMRegister); | |
2876 %} | |
2877 ins_pipe( pipe_slow ); | |
2878 %} | |
2879 | |
2880 instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{ | |
2881 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); | |
2882 match(Set dst (SubVB src1 src2)); | |
2883 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} | |
2884 ins_encode %{ | |
2885 bool vector256 = false; | |
2886 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
2887 %} | |
2888 ins_pipe( pipe_slow ); | |
2889 %} | |
2890 | |
2891 instruct vsub8B(vecD dst, vecD src) %{ | |
2892 predicate(n->as_Vector()->length() == 8); | |
2893 match(Set dst (SubVB dst src)); | |
2894 format %{ "psubb $dst,$src\t! sub packed8B" %} | |
2895 ins_encode %{ | |
2896 __ psubb($dst$$XMMRegister, $src$$XMMRegister); | |
2897 %} | |
2898 ins_pipe( pipe_slow ); | |
2899 %} | |
2900 | |
2901 instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{ | |
2902 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); | |
2903 match(Set dst (SubVB src1 src2)); | |
2904 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} | |
2905 ins_encode %{ | |
2906 bool vector256 = false; | |
2907 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
2908 %} | |
2909 ins_pipe( pipe_slow ); | |
2910 %} | |
2911 | |
2912 instruct vsub16B(vecX dst, vecX src) %{ | |
2913 predicate(n->as_Vector()->length() == 16); | |
2914 match(Set dst (SubVB dst src)); | |
2915 format %{ "psubb $dst,$src\t! sub packed16B" %} | |
2916 ins_encode %{ | |
2917 __ psubb($dst$$XMMRegister, $src$$XMMRegister); | |
2918 %} | |
2919 ins_pipe( pipe_slow ); | |
2920 %} | |
2921 | |
2922 instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{ | |
2923 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); | |
2924 match(Set dst (SubVB src1 src2)); | |
2925 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} | |
2926 ins_encode %{ | |
2927 bool vector256 = false; | |
2928 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
2929 %} | |
2930 ins_pipe( pipe_slow ); | |
2931 %} | |
2932 | |
2933 instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{ | |
2934 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); | |
2935 match(Set dst (SubVB src (LoadVector mem))); | |
2936 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} | |
2937 ins_encode %{ | |
2938 bool vector256 = false; | |
2939 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); | |
2940 %} | |
2941 ins_pipe( pipe_slow ); | |
2942 %} | |
2943 | |
2944 instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{ | |
2945 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); | |
2946 match(Set dst (SubVB src1 src2)); | |
2947 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} | |
2948 ins_encode %{ | |
2949 bool vector256 = true; | |
2950 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
2951 %} | |
2952 ins_pipe( pipe_slow ); | |
2953 %} | |
2954 | |
2955 instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{ | |
2956 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); | |
2957 match(Set dst (SubVB src (LoadVector mem))); | |
2958 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} | |
2959 ins_encode %{ | |
2960 bool vector256 = true; | |
2961 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); | |
2962 %} | |
2963 ins_pipe( pipe_slow ); | |
2964 %} | |
2965 | |
2966 // Shorts/Chars vector sub | |
2967 instruct vsub2S(vecS dst, vecS src) %{ | |
2968 predicate(n->as_Vector()->length() == 2); | |
2969 match(Set dst (SubVS dst src)); | |
2970 format %{ "psubw $dst,$src\t! sub packed2S" %} | |
2971 ins_encode %{ | |
2972 __ psubw($dst$$XMMRegister, $src$$XMMRegister); | |
2973 %} | |
2974 ins_pipe( pipe_slow ); | |
2975 %} | |
2976 | |
2977 instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{ | |
2978 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); | |
2979 match(Set dst (SubVS src1 src2)); | |
2980 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} | |
2981 ins_encode %{ | |
2982 bool vector256 = false; | |
2983 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
2984 %} | |
2985 ins_pipe( pipe_slow ); | |
2986 %} | |
2987 | |
2988 instruct vsub4S(vecD dst, vecD src) %{ | |
2989 predicate(n->as_Vector()->length() == 4); | |
2990 match(Set dst (SubVS dst src)); | |
2991 format %{ "psubw $dst,$src\t! sub packed4S" %} | |
2992 ins_encode %{ | |
2993 __ psubw($dst$$XMMRegister, $src$$XMMRegister); | |
2994 %} | |
2995 ins_pipe( pipe_slow ); | |
2996 %} | |
2997 | |
2998 instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{ | |
2999 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); | |
3000 match(Set dst (SubVS src1 src2)); | |
3001 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} | |
3002 ins_encode %{ | |
3003 bool vector256 = false; | |
3004 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
3005 %} | |
3006 ins_pipe( pipe_slow ); | |
3007 %} | |
3008 | |
3009 instruct vsub8S(vecX dst, vecX src) %{ | |
3010 predicate(n->as_Vector()->length() == 8); | |
3011 match(Set dst (SubVS dst src)); | |
3012 format %{ "psubw $dst,$src\t! sub packed8S" %} | |
3013 ins_encode %{ | |
3014 __ psubw($dst$$XMMRegister, $src$$XMMRegister); | |
3015 %} | |
3016 ins_pipe( pipe_slow ); | |
3017 %} | |
3018 | |
3019 instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{ | |
3020 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); | |
3021 match(Set dst (SubVS src1 src2)); | |
3022 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} | |
3023 ins_encode %{ | |
3024 bool vector256 = false; | |
3025 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
3026 %} | |
3027 ins_pipe( pipe_slow ); | |
3028 %} | |
3029 | |
3030 instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{ | |
3031 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); | |
3032 match(Set dst (SubVS src (LoadVector mem))); | |
3033 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} | |
3034 ins_encode %{ | |
3035 bool vector256 = false; | |
3036 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); | |
3037 %} | |
3038 ins_pipe( pipe_slow ); | |
3039 %} | |
3040 | |
3041 instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{ | |
3042 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); | |
3043 match(Set dst (SubVS src1 src2)); | |
3044 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} | |
3045 ins_encode %{ | |
3046 bool vector256 = true; | |
3047 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
3048 %} | |
3049 ins_pipe( pipe_slow ); | |
3050 %} | |
3051 | |
3052 instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{ | |
3053 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); | |
3054 match(Set dst (SubVS src (LoadVector mem))); | |
3055 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} | |
3056 ins_encode %{ | |
3057 bool vector256 = true; | |
3058 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); | |
3059 %} | |
3060 ins_pipe( pipe_slow ); | |
3061 %} | |
3062 | |
3063 // Integers vector sub | |
3064 instruct vsub2I(vecD dst, vecD src) %{ | |
3065 predicate(n->as_Vector()->length() == 2); | |
3066 match(Set dst (SubVI dst src)); | |
3067 format %{ "psubd $dst,$src\t! sub packed2I" %} | |
3068 ins_encode %{ | |
3069 __ psubd($dst$$XMMRegister, $src$$XMMRegister); | |
3070 %} | |
3071 ins_pipe( pipe_slow ); | |
3072 %} | |
3073 | |
3074 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{ | |
3075 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); | |
3076 match(Set dst (SubVI src1 src2)); | |
3077 format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %} | |
3078 ins_encode %{ | |
3079 bool vector256 = false; | |
3080 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
3081 %} | |
3082 ins_pipe( pipe_slow ); | |
3083 %} | |
3084 | |
3085 instruct vsub4I(vecX dst, vecX src) %{ | |
3086 predicate(n->as_Vector()->length() == 4); | |
3087 match(Set dst (SubVI dst src)); | |
3088 format %{ "psubd $dst,$src\t! sub packed4I" %} | |
3089 ins_encode %{ | |
3090 __ psubd($dst$$XMMRegister, $src$$XMMRegister); | |
3091 %} | |
3092 ins_pipe( pipe_slow ); | |
3093 %} | |
3094 | |
3095 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{ | |
3096 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); | |
3097 match(Set dst (SubVI src1 src2)); | |
3098 format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %} | |
3099 ins_encode %{ | |
3100 bool vector256 = false; | |
3101 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
3102 %} | |
3103 ins_pipe( pipe_slow ); | |
3104 %} | |
3105 | |
3106 instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{ | |
3107 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); | |
3108 match(Set dst (SubVI src (LoadVector mem))); | |
3109 format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %} | |
3110 ins_encode %{ | |
3111 bool vector256 = false; | |
3112 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); | |
3113 %} | |
3114 ins_pipe( pipe_slow ); | |
3115 %} | |
3116 | |
3117 instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{ | |
3118 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); | |
3119 match(Set dst (SubVI src1 src2)); | |
3120 format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %} | |
3121 ins_encode %{ | |
3122 bool vector256 = true; | |
3123 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
3124 %} | |
3125 ins_pipe( pipe_slow ); | |
3126 %} | |
3127 | |
3128 instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{ | |
3129 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); | |
3130 match(Set dst (SubVI src (LoadVector mem))); | |
3131 format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %} | |
3132 ins_encode %{ | |
3133 bool vector256 = true; | |
3134 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); | |
3135 %} | |
3136 ins_pipe( pipe_slow ); | |
3137 %} | |
3138 | |
3139 // Longs vector sub | |
3140 instruct vsub2L(vecX dst, vecX src) %{ | |
3141 predicate(n->as_Vector()->length() == 2); | |
3142 match(Set dst (SubVL dst src)); | |
3143 format %{ "psubq $dst,$src\t! sub packed2L" %} | |
3144 ins_encode %{ | |
3145 __ psubq($dst$$XMMRegister, $src$$XMMRegister); | |
3146 %} | |
3147 ins_pipe( pipe_slow ); | |
3148 %} | |
3149 | |
3150 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{ | |
3151 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); | |
3152 match(Set dst (SubVL src1 src2)); | |
3153 format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %} | |
3154 ins_encode %{ | |
3155 bool vector256 = false; | |
3156 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
3157 %} | |
3158 ins_pipe( pipe_slow ); | |
3159 %} | |
3160 | |
3161 instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{ | |
3162 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); | |
3163 match(Set dst (SubVL src (LoadVector mem))); | |
3164 format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %} | |
3165 ins_encode %{ | |
3166 bool vector256 = false; | |
3167 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); | |
3168 %} | |
3169 ins_pipe( pipe_slow ); | |
3170 %} | |
3171 | |
3172 instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{ | |
3173 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); | |
3174 match(Set dst (SubVL src1 src2)); | |
3175 format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %} | |
3176 ins_encode %{ | |
3177 bool vector256 = true; | |
3178 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
3179 %} | |
3180 ins_pipe( pipe_slow ); | |
3181 %} | |
3182 | |
3183 instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{ | |
3184 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); | |
3185 match(Set dst (SubVL src (LoadVector mem))); | |
3186 format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %} | |
3187 ins_encode %{ | |
3188 bool vector256 = true; | |
3189 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); | |
3190 %} | |
3191 ins_pipe( pipe_slow ); | |
3192 %} | |
3193 | |
3194 // Floats vector sub | |
3195 instruct vsub2F(vecD dst, vecD src) %{ | |
3196 predicate(n->as_Vector()->length() == 2); | |
3197 match(Set dst (SubVF dst src)); | |
3198 format %{ "subps $dst,$src\t! sub packed2F" %} | |
3199 ins_encode %{ | |
3200 __ subps($dst$$XMMRegister, $src$$XMMRegister); | |
3201 %} | |
3202 ins_pipe( pipe_slow ); | |
3203 %} | |
3204 | |
3205 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{ | |
3206 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); | |
3207 match(Set dst (SubVF src1 src2)); | |
3208 format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %} | |
3209 ins_encode %{ | |
3210 bool vector256 = false; | |
3211 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
3212 %} | |
3213 ins_pipe( pipe_slow ); | |
3214 %} | |
3215 | |
3216 instruct vsub4F(vecX dst, vecX src) %{ | |
3217 predicate(n->as_Vector()->length() == 4); | |
3218 match(Set dst (SubVF dst src)); | |
3219 format %{ "subps $dst,$src\t! sub packed4F" %} | |
3220 ins_encode %{ | |
3221 __ subps($dst$$XMMRegister, $src$$XMMRegister); | |
3222 %} | |
3223 ins_pipe( pipe_slow ); | |
3224 %} | |
3225 | |
3226 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{ | |
3227 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); | |
3228 match(Set dst (SubVF src1 src2)); | |
3229 format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %} | |
3230 ins_encode %{ | |
3231 bool vector256 = false; | |
3232 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
3233 %} | |
3234 ins_pipe( pipe_slow ); | |
3235 %} | |
3236 | |
3237 instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{ | |
3238 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); | |
3239 match(Set dst (SubVF src (LoadVector mem))); | |
3240 format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %} | |
3241 ins_encode %{ | |
3242 bool vector256 = false; | |
3243 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); | |
3244 %} | |
3245 ins_pipe( pipe_slow ); | |
3246 %} | |
3247 | |
3248 instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{ | |
3249 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); | |
3250 match(Set dst (SubVF src1 src2)); | |
3251 format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %} | |
3252 ins_encode %{ | |
3253 bool vector256 = true; | |
3254 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
3255 %} | |
3256 ins_pipe( pipe_slow ); | |
3257 %} | |
3258 | |
3259 instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{ | |
3260 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); | |
3261 match(Set dst (SubVF src (LoadVector mem))); | |
3262 format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %} | |
3263 ins_encode %{ | |
3264 bool vector256 = true; | |
3265 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); | |
3266 %} | |
3267 ins_pipe( pipe_slow ); | |
3268 %} | |
3269 | |
3270 // Doubles vector sub | |
3271 instruct vsub2D(vecX dst, vecX src) %{ | |
3272 predicate(n->as_Vector()->length() == 2); | |
3273 match(Set dst (SubVD dst src)); | |
3274 format %{ "subpd $dst,$src\t! sub packed2D" %} | |
3275 ins_encode %{ | |
3276 __ subpd($dst$$XMMRegister, $src$$XMMRegister); | |
3277 %} | |
3278 ins_pipe( pipe_slow ); | |
3279 %} | |
3280 | |
3281 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{ | |
3282 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); | |
3283 match(Set dst (SubVD src1 src2)); | |
3284 format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %} | |
3285 ins_encode %{ | |
3286 bool vector256 = false; | |
3287 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
3288 %} | |
3289 ins_pipe( pipe_slow ); | |
3290 %} | |
3291 | |
3292 instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{ | |
3293 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); | |
3294 match(Set dst (SubVD src (LoadVector mem))); | |
3295 format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %} | |
3296 ins_encode %{ | |
3297 bool vector256 = false; | |
3298 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); | |
3299 %} | |
3300 ins_pipe( pipe_slow ); | |
3301 %} | |
3302 | |
3303 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{ | |
3304 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); | |
3305 match(Set dst (SubVD src1 src2)); | |
3306 format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %} | |
3307 ins_encode %{ | |
3308 bool vector256 = true; | |
3309 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
3310 %} | |
3311 ins_pipe( pipe_slow ); | |
3312 %} | |
3313 | |
3314 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{ | |
3315 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); | |
3316 match(Set dst (SubVD src (LoadVector mem))); | |
3317 format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %} | |
3318 ins_encode %{ | |
3319 bool vector256 = true; | |
3320 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); | |
3321 %} | |
3322 ins_pipe( pipe_slow ); | |
3323 %} | |
3324 | |
3325 // --------------------------------- MUL -------------------------------------- | |
3326 | |
3327 // Shorts/Chars vector mul | |
3328 instruct vmul2S(vecS dst, vecS src) %{ | |
3329 predicate(n->as_Vector()->length() == 2); | |
3330 match(Set dst (MulVS dst src)); | |
3331 format %{ "pmullw $dst,$src\t! mul packed2S" %} | |
3332 ins_encode %{ | |
3333 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); | |
3334 %} | |
3335 ins_pipe( pipe_slow ); | |
3336 %} | |
3337 | |
3338 instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{ | |
3339 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); | |
3340 match(Set dst (MulVS src1 src2)); | |
3341 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} | |
3342 ins_encode %{ | |
3343 bool vector256 = false; | |
3344 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
3345 %} | |
3346 ins_pipe( pipe_slow ); | |
3347 %} | |
3348 | |
3349 instruct vmul4S(vecD dst, vecD src) %{ | |
3350 predicate(n->as_Vector()->length() == 4); | |
3351 match(Set dst (MulVS dst src)); | |
3352 format %{ "pmullw $dst,$src\t! mul packed4S" %} | |
3353 ins_encode %{ | |
3354 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); | |
3355 %} | |
3356 ins_pipe( pipe_slow ); | |
3357 %} | |
3358 | |
3359 instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{ | |
3360 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); | |
3361 match(Set dst (MulVS src1 src2)); | |
3362 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} | |
3363 ins_encode %{ | |
3364 bool vector256 = false; | |
3365 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
3366 %} | |
3367 ins_pipe( pipe_slow ); | |
3368 %} | |
3369 | |
3370 instruct vmul8S(vecX dst, vecX src) %{ | |
3371 predicate(n->as_Vector()->length() == 8); | |
3372 match(Set dst (MulVS dst src)); | |
3373 format %{ "pmullw $dst,$src\t! mul packed8S" %} | |
3374 ins_encode %{ | |
3375 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); | |
3376 %} | |
3377 ins_pipe( pipe_slow ); | |
3378 %} | |
3379 | |
3380 instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{ | |
3381 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); | |
3382 match(Set dst (MulVS src1 src2)); | |
3383 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} | |
3384 ins_encode %{ | |
3385 bool vector256 = false; | |
3386 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
3387 %} | |
3388 ins_pipe( pipe_slow ); | |
3389 %} | |
3390 | |
3391 instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{ | |
3392 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); | |
3393 match(Set dst (MulVS src (LoadVector mem))); | |
3394 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} | |
3395 ins_encode %{ | |
3396 bool vector256 = false; | |
3397 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); | |
3398 %} | |
3399 ins_pipe( pipe_slow ); | |
3400 %} | |
3401 | |
3402 instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{ | |
3403 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); | |
3404 match(Set dst (MulVS src1 src2)); | |
3405 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} | |
3406 ins_encode %{ | |
3407 bool vector256 = true; | |
3408 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
3409 %} | |
3410 ins_pipe( pipe_slow ); | |
3411 %} | |
3412 | |
3413 instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{ | |
3414 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); | |
3415 match(Set dst (MulVS src (LoadVector mem))); | |
3416 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} | |
3417 ins_encode %{ | |
3418 bool vector256 = true; | |
3419 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); | |
3420 %} | |
3421 ins_pipe( pipe_slow ); | |
3422 %} | |
3423 | |
3424 // Integers vector mul (sse4_1) | |
3425 instruct vmul2I(vecD dst, vecD src) %{ | |
3426 predicate(UseSSE > 3 && n->as_Vector()->length() == 2); | |
3427 match(Set dst (MulVI dst src)); | |
3428 format %{ "pmulld $dst,$src\t! mul packed2I" %} | |
3429 ins_encode %{ | |
3430 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); | |
3431 %} | |
3432 ins_pipe( pipe_slow ); | |
3433 %} | |
3434 | |
3435 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{ | |
3436 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); | |
3437 match(Set dst (MulVI src1 src2)); | |
3438 format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %} | |
3439 ins_encode %{ | |
3440 bool vector256 = false; | |
3441 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
3442 %} | |
3443 ins_pipe( pipe_slow ); | |
3444 %} | |
3445 | |
3446 instruct vmul4I(vecX dst, vecX src) %{ | |
3447 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); | |
3448 match(Set dst (MulVI dst src)); | |
3449 format %{ "pmulld $dst,$src\t! mul packed4I" %} | |
3450 ins_encode %{ | |
3451 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); | |
3452 %} | |
3453 ins_pipe( pipe_slow ); | |
3454 %} | |
3455 | |
3456 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{ | |
3457 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); | |
3458 match(Set dst (MulVI src1 src2)); | |
3459 format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %} | |
3460 ins_encode %{ | |
3461 bool vector256 = false; | |
3462 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
3463 %} | |
3464 ins_pipe( pipe_slow ); | |
3465 %} | |
3466 | |
3467 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{ | |
3468 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); | |
3469 match(Set dst (MulVI src (LoadVector mem))); | |
3470 format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %} | |
3471 ins_encode %{ | |
3472 bool vector256 = false; | |
3473 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); | |
3474 %} | |
3475 ins_pipe( pipe_slow ); | |
3476 %} | |
3477 | |
3478 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{ | |
3479 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); | |
3480 match(Set dst (MulVI src1 src2)); | |
3481 format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %} | |
3482 ins_encode %{ | |
3483 bool vector256 = true; | |
3484 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
3485 %} | |
3486 ins_pipe( pipe_slow ); | |
3487 %} | |
3488 | |
3489 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{ | |
3490 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); | |
3491 match(Set dst (MulVI src (LoadVector mem))); | |
3492 format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %} | |
3493 ins_encode %{ | |
3494 bool vector256 = true; | |
3495 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); | |
3496 %} | |
3497 ins_pipe( pipe_slow ); | |
3498 %} | |
3499 | |
3500 // Floats vector mul | |
3501 instruct vmul2F(vecD dst, vecD src) %{ | |
3502 predicate(n->as_Vector()->length() == 2); | |
3503 match(Set dst (MulVF dst src)); | |
3504 format %{ "mulps $dst,$src\t! mul packed2F" %} | |
3505 ins_encode %{ | |
3506 __ mulps($dst$$XMMRegister, $src$$XMMRegister); | |
3507 %} | |
3508 ins_pipe( pipe_slow ); | |
3509 %} | |
3510 | |
3511 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{ | |
3512 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); | |
3513 match(Set dst (MulVF src1 src2)); | |
3514 format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %} | |
3515 ins_encode %{ | |
3516 bool vector256 = false; | |
3517 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
3518 %} | |
3519 ins_pipe( pipe_slow ); | |
3520 %} | |
3521 | |
3522 instruct vmul4F(vecX dst, vecX src) %{ | |
3523 predicate(n->as_Vector()->length() == 4); | |
3524 match(Set dst (MulVF dst src)); | |
3525 format %{ "mulps $dst,$src\t! mul packed4F" %} | |
3526 ins_encode %{ | |
3527 __ mulps($dst$$XMMRegister, $src$$XMMRegister); | |
3528 %} | |
3529 ins_pipe( pipe_slow ); | |
3530 %} | |
3531 | |
3532 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{ | |
3533 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); | |
3534 match(Set dst (MulVF src1 src2)); | |
3535 format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %} | |
3536 ins_encode %{ | |
3537 bool vector256 = false; | |
3538 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
3539 %} | |
3540 ins_pipe( pipe_slow ); | |
3541 %} | |
3542 | |
3543 instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{ | |
3544 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); | |
3545 match(Set dst (MulVF src (LoadVector mem))); | |
3546 format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %} | |
3547 ins_encode %{ | |
3548 bool vector256 = false; | |
3549 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); | |
3550 %} | |
3551 ins_pipe( pipe_slow ); | |
3552 %} | |
3553 | |
3554 instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{ | |
3555 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); | |
3556 match(Set dst (MulVF src1 src2)); | |
3557 format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %} | |
3558 ins_encode %{ | |
3559 bool vector256 = true; | |
3560 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
3561 %} | |
3562 ins_pipe( pipe_slow ); | |
3563 %} | |
3564 | |
3565 instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{ | |
3566 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); | |
3567 match(Set dst (MulVF src (LoadVector mem))); | |
3568 format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %} | |
3569 ins_encode %{ | |
3570 bool vector256 = true; | |
3571 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); | |
3572 %} | |
3573 ins_pipe( pipe_slow ); | |
3574 %} | |
3575 | |
3576 // Doubles vector mul | |
3577 instruct vmul2D(vecX dst, vecX src) %{ | |
3578 predicate(n->as_Vector()->length() == 2); | |
3579 match(Set dst (MulVD dst src)); | |
3580 format %{ "mulpd $dst,$src\t! mul packed2D" %} | |
3581 ins_encode %{ | |
3582 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); | |
3583 %} | |
3584 ins_pipe( pipe_slow ); | |
3585 %} | |
3586 | |
3587 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{ | |
3588 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); | |
3589 match(Set dst (MulVD src1 src2)); | |
3590 format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %} | |
3591 ins_encode %{ | |
3592 bool vector256 = false; | |
3593 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
3594 %} | |
3595 ins_pipe( pipe_slow ); | |
3596 %} | |
3597 | |
3598 instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{ | |
3599 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); | |
3600 match(Set dst (MulVD src (LoadVector mem))); | |
3601 format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %} | |
3602 ins_encode %{ | |
3603 bool vector256 = false; | |
3604 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); | |
3605 %} | |
3606 ins_pipe( pipe_slow ); | |
3607 %} | |
3608 | |
3609 instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{ | |
3610 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); | |
3611 match(Set dst (MulVD src1 src2)); | |
3612 format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %} | |
3613 ins_encode %{ | |
3614 bool vector256 = true; | |
3615 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
3616 %} | |
3617 ins_pipe( pipe_slow ); | |
3618 %} | |
3619 | |
3620 instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{ | |
3621 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); | |
3622 match(Set dst (MulVD src (LoadVector mem))); | |
3623 format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %} | |
3624 ins_encode %{ | |
3625 bool vector256 = true; | |
3626 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); | |
3627 %} | |
3628 ins_pipe( pipe_slow ); | |
3629 %} | |
3630 | |
3631 // --------------------------------- DIV -------------------------------------- | |
3632 | |
3633 // Floats vector div | |
3634 instruct vdiv2F(vecD dst, vecD src) %{ | |
3635 predicate(n->as_Vector()->length() == 2); | |
3636 match(Set dst (DivVF dst src)); | |
3637 format %{ "divps $dst,$src\t! div packed2F" %} | |
3638 ins_encode %{ | |
3639 __ divps($dst$$XMMRegister, $src$$XMMRegister); | |
3640 %} | |
3641 ins_pipe( pipe_slow ); | |
3642 %} | |
3643 | |
3644 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{ | |
3645 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); | |
3646 match(Set dst (DivVF src1 src2)); | |
3647 format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %} | |
3648 ins_encode %{ | |
3649 bool vector256 = false; | |
3650 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
3651 %} | |
3652 ins_pipe( pipe_slow ); | |
3653 %} | |
3654 | |
3655 instruct vdiv4F(vecX dst, vecX src) %{ | |
3656 predicate(n->as_Vector()->length() == 4); | |
3657 match(Set dst (DivVF dst src)); | |
3658 format %{ "divps $dst,$src\t! div packed4F" %} | |
3659 ins_encode %{ | |
3660 __ divps($dst$$XMMRegister, $src$$XMMRegister); | |
3661 %} | |
3662 ins_pipe( pipe_slow ); | |
3663 %} | |
3664 | |
3665 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{ | |
3666 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); | |
3667 match(Set dst (DivVF src1 src2)); | |
3668 format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %} | |
3669 ins_encode %{ | |
3670 bool vector256 = false; | |
3671 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
3672 %} | |
3673 ins_pipe( pipe_slow ); | |
3674 %} | |
3675 | |
3676 instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{ | |
3677 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); | |
3678 match(Set dst (DivVF src (LoadVector mem))); | |
3679 format %{ "vdivps $dst,$src,$mem\t! div packed4F" %} | |
3680 ins_encode %{ | |
3681 bool vector256 = false; | |
3682 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); | |
3683 %} | |
3684 ins_pipe( pipe_slow ); | |
3685 %} | |
3686 | |
3687 instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{ | |
3688 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); | |
3689 match(Set dst (DivVF src1 src2)); | |
3690 format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %} | |
3691 ins_encode %{ | |
3692 bool vector256 = true; | |
3693 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
3694 %} | |
3695 ins_pipe( pipe_slow ); | |
3696 %} | |
3697 | |
3698 instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{ | |
3699 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); | |
3700 match(Set dst (DivVF src (LoadVector mem))); | |
3701 format %{ "vdivps $dst,$src,$mem\t! div packed8F" %} | |
3702 ins_encode %{ | |
3703 bool vector256 = true; | |
3704 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); | |
3705 %} | |
3706 ins_pipe( pipe_slow ); | |
3707 %} | |
3708 | |
3709 // Doubles vector div | |
3710 instruct vdiv2D(vecX dst, vecX src) %{ | |
3711 predicate(n->as_Vector()->length() == 2); | |
3712 match(Set dst (DivVD dst src)); | |
3713 format %{ "divpd $dst,$src\t! div packed2D" %} | |
3714 ins_encode %{ | |
3715 __ divpd($dst$$XMMRegister, $src$$XMMRegister); | |
3716 %} | |
3717 ins_pipe( pipe_slow ); | |
3718 %} | |
3719 | |
3720 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{ | |
3721 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); | |
3722 match(Set dst (DivVD src1 src2)); | |
3723 format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %} | |
3724 ins_encode %{ | |
3725 bool vector256 = false; | |
3726 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
3727 %} | |
3728 ins_pipe( pipe_slow ); | |
3729 %} | |
3730 | |
3731 instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{ | |
3732 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); | |
3733 match(Set dst (DivVD src (LoadVector mem))); | |
3734 format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %} | |
3735 ins_encode %{ | |
3736 bool vector256 = false; | |
3737 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); | |
3738 %} | |
3739 ins_pipe( pipe_slow ); | |
3740 %} | |
3741 | |
3742 instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{ | |
3743 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); | |
3744 match(Set dst (DivVD src1 src2)); | |
3745 format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %} | |
3746 ins_encode %{ | |
3747 bool vector256 = true; | |
3748 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
3749 %} | |
3750 ins_pipe( pipe_slow ); | |
3751 %} | |
3752 | |
3753 instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{ | |
3754 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); | |
3755 match(Set dst (DivVD src (LoadVector mem))); | |
3756 format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %} | |
3757 ins_encode %{ | |
3758 bool vector256 = true; | |
3759 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); | |
3760 %} | |
3761 ins_pipe( pipe_slow ); | |
3762 %} | |
3763 | |
3764 // ------------------------------ LeftShift ----------------------------------- | |
3765 | |
3766 // Shorts/Chars vector left shift | |
3767 instruct vsll2S(vecS dst, regF shift) %{ | |
3768 predicate(n->as_Vector()->length() == 2); | |
3769 match(Set dst (LShiftVS dst shift)); | |
3770 format %{ "psllw $dst,$shift\t! left shift packed2S" %} | |
3771 ins_encode %{ | |
3772 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); | |
3773 %} | |
3774 ins_pipe( pipe_slow ); | |
3775 %} | |
3776 | |
3777 instruct vsll2S_imm(vecS dst, immI8 shift) %{ | |
3778 predicate(n->as_Vector()->length() == 2); | |
3779 match(Set dst (LShiftVS dst shift)); | |
3780 format %{ "psllw $dst,$shift\t! left shift packed2S" %} | |
3781 ins_encode %{ | |
3782 __ psllw($dst$$XMMRegister, (int)$shift$$constant); | |
3783 %} | |
3784 ins_pipe( pipe_slow ); | |
3785 %} | |
3786 | |
3787 instruct vsll2S_reg(vecS dst, vecS src, regF shift) %{ | |
3788 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); | |
3789 match(Set dst (LShiftVS src shift)); | |
3790 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} | |
3791 ins_encode %{ | |
3792 bool vector256 = false; | |
3793 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); | |
3794 %} | |
3795 ins_pipe( pipe_slow ); | |
3796 %} | |
3797 | |
3798 instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ | |
3799 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); | |
3800 match(Set dst (LShiftVS src shift)); | |
3801 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} | |
3802 ins_encode %{ | |
3803 bool vector256 = false; | |
3804 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); | |
3805 %} | |
3806 ins_pipe( pipe_slow ); | |
3807 %} | |
3808 | |
3809 instruct vsll4S(vecD dst, regF shift) %{ | |
3810 predicate(n->as_Vector()->length() == 4); | |
3811 match(Set dst (LShiftVS dst shift)); | |
3812 format %{ "psllw $dst,$shift\t! left shift packed4S" %} | |
3813 ins_encode %{ | |
3814 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); | |
3815 %} | |
3816 ins_pipe( pipe_slow ); | |
3817 %} | |
3818 | |
3819 instruct vsll4S_imm(vecD dst, immI8 shift) %{ | |
3820 predicate(n->as_Vector()->length() == 4); | |
3821 match(Set dst (LShiftVS dst shift)); | |
3822 format %{ "psllw $dst,$shift\t! left shift packed4S" %} | |
3823 ins_encode %{ | |
3824 __ psllw($dst$$XMMRegister, (int)$shift$$constant); | |
3825 %} | |
3826 ins_pipe( pipe_slow ); | |
3827 %} | |
3828 | |
3829 instruct vsll4S_reg(vecD dst, vecD src, regF shift) %{ | |
3830 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); | |
3831 match(Set dst (LShiftVS src shift)); | |
3832 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} | |
3833 ins_encode %{ | |
3834 bool vector256 = false; | |
3835 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); | |
3836 %} | |
3837 ins_pipe( pipe_slow ); | |
3838 %} | |
3839 | |
3840 instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ | |
3841 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); | |
3842 match(Set dst (LShiftVS src shift)); | |
3843 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} | |
3844 ins_encode %{ | |
3845 bool vector256 = false; | |
3846 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); | |
3847 %} | |
3848 ins_pipe( pipe_slow ); | |
3849 %} | |
3850 | |
3851 instruct vsll8S(vecX dst, regF shift) %{ | |
3852 predicate(n->as_Vector()->length() == 8); | |
3853 match(Set dst (LShiftVS dst shift)); | |
3854 format %{ "psllw $dst,$shift\t! left shift packed8S" %} | |
3855 ins_encode %{ | |
3856 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); | |
3857 %} | |
3858 ins_pipe( pipe_slow ); | |
3859 %} | |
3860 | |
3861 instruct vsll8S_imm(vecX dst, immI8 shift) %{ | |
3862 predicate(n->as_Vector()->length() == 8); | |
3863 match(Set dst (LShiftVS dst shift)); | |
3864 format %{ "psllw $dst,$shift\t! left shift packed8S" %} | |
3865 ins_encode %{ | |
3866 __ psllw($dst$$XMMRegister, (int)$shift$$constant); | |
3867 %} | |
3868 ins_pipe( pipe_slow ); | |
3869 %} | |
3870 | |
3871 instruct vsll8S_reg(vecX dst, vecX src, regF shift) %{ | |
3872 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); | |
3873 match(Set dst (LShiftVS src shift)); | |
3874 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} | |
3875 ins_encode %{ | |
3876 bool vector256 = false; | |
3877 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); | |
3878 %} | |
3879 ins_pipe( pipe_slow ); | |
3880 %} | |
3881 | |
3882 instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ | |
3883 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); | |
3884 match(Set dst (LShiftVS src shift)); | |
3885 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} | |
3886 ins_encode %{ | |
3887 bool vector256 = false; | |
3888 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); | |
3889 %} | |
3890 ins_pipe( pipe_slow ); | |
3891 %} | |
3892 | |
3893 instruct vsll16S_reg(vecY dst, vecY src, regF shift) %{ | |
3894 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); | |
3895 match(Set dst (LShiftVS src shift)); | |
3896 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} | |
3897 ins_encode %{ | |
3898 bool vector256 = true; | |
3899 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); | |
3900 %} | |
3901 ins_pipe( pipe_slow ); | |
3902 %} | |
3903 | |
3904 instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ | |
3905 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); | |
3906 match(Set dst (LShiftVS src shift)); | |
3907 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} | |
3908 ins_encode %{ | |
3909 bool vector256 = true; | |
3910 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); | |
3911 %} | |
3912 ins_pipe( pipe_slow ); | |
3913 %} | |
3914 | |
3915 // Integers vector left shift | |
3916 instruct vsll2I(vecD dst, regF shift) %{ | |
3917 predicate(n->as_Vector()->length() == 2); | |
3918 match(Set dst (LShiftVI dst shift)); | |
3919 format %{ "pslld $dst,$shift\t! left shift packed2I" %} | |
3920 ins_encode %{ | |
3921 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); | |
3922 %} | |
3923 ins_pipe( pipe_slow ); | |
3924 %} | |
3925 | |
3926 instruct vsll2I_imm(vecD dst, immI8 shift) %{ | |
3927 predicate(n->as_Vector()->length() == 2); | |
3928 match(Set dst (LShiftVI dst shift)); | |
3929 format %{ "pslld $dst,$shift\t! left shift packed2I" %} | |
3930 ins_encode %{ | |
3931 __ pslld($dst$$XMMRegister, (int)$shift$$constant); | |
3932 %} | |
3933 ins_pipe( pipe_slow ); | |
3934 %} | |
3935 | |
3936 instruct vsll2I_reg(vecD dst, vecD src, regF shift) %{ | |
3937 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); | |
3938 match(Set dst (LShiftVI src shift)); | |
3939 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} | |
3940 ins_encode %{ | |
3941 bool vector256 = false; | |
3942 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); | |
3943 %} | |
3944 ins_pipe( pipe_slow ); | |
3945 %} | |
3946 | |
3947 instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ | |
3948 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); | |
3949 match(Set dst (LShiftVI src shift)); | |
3950 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} | |
3951 ins_encode %{ | |
3952 bool vector256 = false; | |
3953 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); | |
3954 %} | |
3955 ins_pipe( pipe_slow ); | |
3956 %} | |
3957 | |
3958 instruct vsll4I(vecX dst, regF shift) %{ | |
3959 predicate(n->as_Vector()->length() == 4); | |
3960 match(Set dst (LShiftVI dst shift)); | |
3961 format %{ "pslld $dst,$shift\t! left shift packed4I" %} | |
3962 ins_encode %{ | |
3963 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); | |
3964 %} | |
3965 ins_pipe( pipe_slow ); | |
3966 %} | |
3967 | |
3968 instruct vsll4I_imm(vecX dst, immI8 shift) %{ | |
3969 predicate(n->as_Vector()->length() == 4); | |
3970 match(Set dst (LShiftVI dst shift)); | |
3971 format %{ "pslld $dst,$shift\t! left shift packed4I" %} | |
3972 ins_encode %{ | |
3973 __ pslld($dst$$XMMRegister, (int)$shift$$constant); | |
3974 %} | |
3975 ins_pipe( pipe_slow ); | |
3976 %} | |
3977 | |
3978 instruct vsll4I_reg(vecX dst, vecX src, regF shift) %{ | |
3979 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); | |
3980 match(Set dst (LShiftVI src shift)); | |
3981 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} | |
3982 ins_encode %{ | |
3983 bool vector256 = false; | |
3984 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); | |
3985 %} | |
3986 ins_pipe( pipe_slow ); | |
3987 %} | |
3988 | |
3989 instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ | |
3990 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); | |
3991 match(Set dst (LShiftVI src shift)); | |
3992 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} | |
3993 ins_encode %{ | |
3994 bool vector256 = false; | |
3995 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); | |
3996 %} | |
3997 ins_pipe( pipe_slow ); | |
3998 %} | |
3999 | |
4000 instruct vsll8I_reg(vecY dst, vecY src, regF shift) %{ | |
4001 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); | |
4002 match(Set dst (LShiftVI src shift)); | |
4003 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} | |
4004 ins_encode %{ | |
4005 bool vector256 = true; | |
4006 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); | |
4007 %} | |
4008 ins_pipe( pipe_slow ); | |
4009 %} | |
4010 | |
4011 instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ | |
4012 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); | |
4013 match(Set dst (LShiftVI src shift)); | |
4014 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} | |
4015 ins_encode %{ | |
4016 bool vector256 = true; | |
4017 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); | |
4018 %} | |
4019 ins_pipe( pipe_slow ); | |
4020 %} | |
4021 | |
4022 // Longs vector left shift | |
4023 instruct vsll2L(vecX dst, regF shift) %{ | |
4024 predicate(n->as_Vector()->length() == 2); | |
4025 match(Set dst (LShiftVL dst shift)); | |
4026 format %{ "psllq $dst,$shift\t! left shift packed2L" %} | |
4027 ins_encode %{ | |
4028 __ psllq($dst$$XMMRegister, $shift$$XMMRegister); | |
4029 %} | |
4030 ins_pipe( pipe_slow ); | |
4031 %} | |
4032 | |
4033 instruct vsll2L_imm(vecX dst, immI8 shift) %{ | |
4034 predicate(n->as_Vector()->length() == 2); | |
4035 match(Set dst (LShiftVL dst shift)); | |
4036 format %{ "psllq $dst,$shift\t! left shift packed2L" %} | |
4037 ins_encode %{ | |
4038 __ psllq($dst$$XMMRegister, (int)$shift$$constant); | |
4039 %} | |
4040 ins_pipe( pipe_slow ); | |
4041 %} | |
4042 | |
4043 instruct vsll2L_reg(vecX dst, vecX src, regF shift) %{ | |
4044 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); | |
4045 match(Set dst (LShiftVL src shift)); | |
4046 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} | |
4047 ins_encode %{ | |
4048 bool vector256 = false; | |
4049 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); | |
4050 %} | |
4051 ins_pipe( pipe_slow ); | |
4052 %} | |
4053 | |
4054 instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ | |
4055 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); | |
4056 match(Set dst (LShiftVL src shift)); | |
4057 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} | |
4058 ins_encode %{ | |
4059 bool vector256 = false; | |
4060 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); | |
4061 %} | |
4062 ins_pipe( pipe_slow ); | |
4063 %} | |
4064 | |
4065 instruct vsll4L_reg(vecY dst, vecY src, regF shift) %{ | |
4066 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); | |
4067 match(Set dst (LShiftVL src shift)); | |
4068 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} | |
4069 ins_encode %{ | |
4070 bool vector256 = true; | |
4071 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); | |
4072 %} | |
4073 ins_pipe( pipe_slow ); | |
4074 %} | |
4075 | |
4076 instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ | |
4077 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); | |
4078 match(Set dst (LShiftVL src shift)); | |
4079 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} | |
4080 ins_encode %{ | |
4081 bool vector256 = true; | |
4082 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); | |
4083 %} | |
4084 ins_pipe( pipe_slow ); | |
4085 %} | |
4086 | |
4087 // ----------------------- LogicalRightShift ----------------------------------- | |
4088 | |
4089 // Shorts/Chars vector logical right shift produces incorrect Java result | |
4090 // for negative data because java code convert short value into int with | |
4091 // sign extension before a shift. | |
4092 | |
4093 // Integers vector logical right shift | |
4094 instruct vsrl2I(vecD dst, regF shift) %{ | |
4095 predicate(n->as_Vector()->length() == 2); | |
4096 match(Set dst (URShiftVI dst shift)); | |
4097 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} | |
4098 ins_encode %{ | |
4099 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); | |
4100 %} | |
4101 ins_pipe( pipe_slow ); | |
4102 %} | |
4103 | |
4104 instruct vsrl2I_imm(vecD dst, immI8 shift) %{ | |
4105 predicate(n->as_Vector()->length() == 2); | |
4106 match(Set dst (URShiftVI dst shift)); | |
4107 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} | |
4108 ins_encode %{ | |
4109 __ psrld($dst$$XMMRegister, (int)$shift$$constant); | |
4110 %} | |
4111 ins_pipe( pipe_slow ); | |
4112 %} | |
4113 | |
4114 instruct vsrl2I_reg(vecD dst, vecD src, regF shift) %{ | |
4115 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); | |
4116 match(Set dst (URShiftVI src shift)); | |
4117 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} | |
4118 ins_encode %{ | |
4119 bool vector256 = false; | |
4120 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); | |
4121 %} | |
4122 ins_pipe( pipe_slow ); | |
4123 %} | |
4124 | |
4125 instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ | |
4126 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); | |
4127 match(Set dst (URShiftVI src shift)); | |
4128 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} | |
4129 ins_encode %{ | |
4130 bool vector256 = false; | |
4131 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); | |
4132 %} | |
4133 ins_pipe( pipe_slow ); | |
4134 %} | |
4135 | |
4136 instruct vsrl4I(vecX dst, regF shift) %{ | |
4137 predicate(n->as_Vector()->length() == 4); | |
4138 match(Set dst (URShiftVI dst shift)); | |
4139 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} | |
4140 ins_encode %{ | |
4141 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); | |
4142 %} | |
4143 ins_pipe( pipe_slow ); | |
4144 %} | |
4145 | |
4146 instruct vsrl4I_imm(vecX dst, immI8 shift) %{ | |
4147 predicate(n->as_Vector()->length() == 4); | |
4148 match(Set dst (URShiftVI dst shift)); | |
4149 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} | |
4150 ins_encode %{ | |
4151 __ psrld($dst$$XMMRegister, (int)$shift$$constant); | |
4152 %} | |
4153 ins_pipe( pipe_slow ); | |
4154 %} | |
4155 | |
4156 instruct vsrl4I_reg(vecX dst, vecX src, regF shift) %{ | |
4157 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); | |
4158 match(Set dst (URShiftVI src shift)); | |
4159 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} | |
4160 ins_encode %{ | |
4161 bool vector256 = false; | |
4162 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); | |
4163 %} | |
4164 ins_pipe( pipe_slow ); | |
4165 %} | |
4166 | |
4167 instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ | |
4168 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); | |
4169 match(Set dst (URShiftVI src shift)); | |
4170 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} | |
4171 ins_encode %{ | |
4172 bool vector256 = false; | |
4173 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); | |
4174 %} | |
4175 ins_pipe( pipe_slow ); | |
4176 %} | |
4177 | |
4178 instruct vsrl8I_reg(vecY dst, vecY src, regF shift) %{ | |
4179 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); | |
4180 match(Set dst (URShiftVI src shift)); | |
4181 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} | |
4182 ins_encode %{ | |
4183 bool vector256 = true; | |
4184 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); | |
4185 %} | |
4186 ins_pipe( pipe_slow ); | |
4187 %} | |
4188 | |
4189 instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ | |
4190 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); | |
4191 match(Set dst (URShiftVI src shift)); | |
4192 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} | |
4193 ins_encode %{ | |
4194 bool vector256 = true; | |
4195 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); | |
4196 %} | |
4197 ins_pipe( pipe_slow ); | |
4198 %} | |
4199 | |
4200 // Longs vector logical right shift | |
4201 instruct vsrl2L(vecX dst, regF shift) %{ | |
4202 predicate(n->as_Vector()->length() == 2); | |
4203 match(Set dst (URShiftVL dst shift)); | |
4204 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} | |
4205 ins_encode %{ | |
4206 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); | |
4207 %} | |
4208 ins_pipe( pipe_slow ); | |
4209 %} | |
4210 | |
4211 instruct vsrl2L_imm(vecX dst, immI8 shift) %{ | |
4212 predicate(n->as_Vector()->length() == 2); | |
4213 match(Set dst (URShiftVL dst shift)); | |
4214 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} | |
4215 ins_encode %{ | |
4216 __ psrlq($dst$$XMMRegister, (int)$shift$$constant); | |
4217 %} | |
4218 ins_pipe( pipe_slow ); | |
4219 %} | |
4220 | |
4221 instruct vsrl2L_reg(vecX dst, vecX src, regF shift) %{ | |
4222 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); | |
4223 match(Set dst (URShiftVL src shift)); | |
4224 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} | |
4225 ins_encode %{ | |
4226 bool vector256 = false; | |
4227 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); | |
4228 %} | |
4229 ins_pipe( pipe_slow ); | |
4230 %} | |
4231 | |
4232 instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ | |
4233 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); | |
4234 match(Set dst (URShiftVL src shift)); | |
4235 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} | |
4236 ins_encode %{ | |
4237 bool vector256 = false; | |
4238 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); | |
4239 %} | |
4240 ins_pipe( pipe_slow ); | |
4241 %} | |
4242 | |
4243 instruct vsrl4L_reg(vecY dst, vecY src, regF shift) %{ | |
4244 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); | |
4245 match(Set dst (URShiftVL src shift)); | |
4246 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} | |
4247 ins_encode %{ | |
4248 bool vector256 = true; | |
4249 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); | |
4250 %} | |
4251 ins_pipe( pipe_slow ); | |
4252 %} | |
4253 | |
4254 instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ | |
4255 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); | |
4256 match(Set dst (URShiftVL src shift)); | |
4257 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} | |
4258 ins_encode %{ | |
4259 bool vector256 = true; | |
4260 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); | |
4261 %} | |
4262 ins_pipe( pipe_slow ); | |
4263 %} | |
4264 | |
4265 // ------------------- ArithmeticRightShift ----------------------------------- | |
4266 | |
4267 // Shorts/Chars vector arithmetic right shift | |
4268 instruct vsra2S(vecS dst, regF shift) %{ | |
4269 predicate(n->as_Vector()->length() == 2); | |
4270 match(Set dst (RShiftVS dst shift)); | |
4271 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} | |
4272 ins_encode %{ | |
4273 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); | |
4274 %} | |
4275 ins_pipe( pipe_slow ); | |
4276 %} | |
4277 | |
4278 instruct vsra2S_imm(vecS dst, immI8 shift) %{ | |
4279 predicate(n->as_Vector()->length() == 2); | |
4280 match(Set dst (RShiftVS dst shift)); | |
4281 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} | |
4282 ins_encode %{ | |
4283 __ psraw($dst$$XMMRegister, (int)$shift$$constant); | |
4284 %} | |
4285 ins_pipe( pipe_slow ); | |
4286 %} | |
4287 | |
4288 instruct vsra2S_reg(vecS dst, vecS src, regF shift) %{ | |
4289 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); | |
4290 match(Set dst (RShiftVS src shift)); | |
4291 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} | |
4292 ins_encode %{ | |
4293 bool vector256 = false; | |
4294 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); | |
4295 %} | |
4296 ins_pipe( pipe_slow ); | |
4297 %} | |
4298 | |
4299 instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ | |
4300 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); | |
4301 match(Set dst (RShiftVS src shift)); | |
4302 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} | |
4303 ins_encode %{ | |
4304 bool vector256 = false; | |
4305 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); | |
4306 %} | |
4307 ins_pipe( pipe_slow ); | |
4308 %} | |
4309 | |
4310 instruct vsra4S(vecD dst, regF shift) %{ | |
4311 predicate(n->as_Vector()->length() == 4); | |
4312 match(Set dst (RShiftVS dst shift)); | |
4313 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} | |
4314 ins_encode %{ | |
4315 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); | |
4316 %} | |
4317 ins_pipe( pipe_slow ); | |
4318 %} | |
4319 | |
4320 instruct vsra4S_imm(vecD dst, immI8 shift) %{ | |
4321 predicate(n->as_Vector()->length() == 4); | |
4322 match(Set dst (RShiftVS dst shift)); | |
4323 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} | |
4324 ins_encode %{ | |
4325 __ psraw($dst$$XMMRegister, (int)$shift$$constant); | |
4326 %} | |
4327 ins_pipe( pipe_slow ); | |
4328 %} | |
4329 | |
4330 instruct vsra4S_reg(vecD dst, vecD src, regF shift) %{ | |
4331 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); | |
4332 match(Set dst (RShiftVS src shift)); | |
4333 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} | |
4334 ins_encode %{ | |
4335 bool vector256 = false; | |
4336 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); | |
4337 %} | |
4338 ins_pipe( pipe_slow ); | |
4339 %} | |
4340 | |
4341 instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ | |
4342 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); | |
4343 match(Set dst (RShiftVS src shift)); | |
4344 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} | |
4345 ins_encode %{ | |
4346 bool vector256 = false; | |
4347 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); | |
4348 %} | |
4349 ins_pipe( pipe_slow ); | |
4350 %} | |
4351 | |
4352 instruct vsra8S(vecX dst, regF shift) %{ | |
4353 predicate(n->as_Vector()->length() == 8); | |
4354 match(Set dst (RShiftVS dst shift)); | |
4355 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} | |
4356 ins_encode %{ | |
4357 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); | |
4358 %} | |
4359 ins_pipe( pipe_slow ); | |
4360 %} | |
4361 | |
4362 instruct vsra8S_imm(vecX dst, immI8 shift) %{ | |
4363 predicate(n->as_Vector()->length() == 8); | |
4364 match(Set dst (RShiftVS dst shift)); | |
4365 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} | |
4366 ins_encode %{ | |
4367 __ psraw($dst$$XMMRegister, (int)$shift$$constant); | |
4368 %} | |
4369 ins_pipe( pipe_slow ); | |
4370 %} | |
4371 | |
4372 instruct vsra8S_reg(vecX dst, vecX src, regF shift) %{ | |
4373 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); | |
4374 match(Set dst (RShiftVS src shift)); | |
4375 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} | |
4376 ins_encode %{ | |
4377 bool vector256 = false; | |
4378 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); | |
4379 %} | |
4380 ins_pipe( pipe_slow ); | |
4381 %} | |
4382 | |
4383 instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ | |
4384 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); | |
4385 match(Set dst (RShiftVS src shift)); | |
4386 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} | |
4387 ins_encode %{ | |
4388 bool vector256 = false; | |
4389 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); | |
4390 %} | |
4391 ins_pipe( pipe_slow ); | |
4392 %} | |
4393 | |
4394 instruct vsra16S_reg(vecY dst, vecY src, regF shift) %{ | |
4395 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); | |
4396 match(Set dst (RShiftVS src shift)); | |
4397 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} | |
4398 ins_encode %{ | |
4399 bool vector256 = true; | |
4400 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); | |
4401 %} | |
4402 ins_pipe( pipe_slow ); | |
4403 %} | |
4404 | |
4405 instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ | |
4406 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); | |
4407 match(Set dst (RShiftVS src shift)); | |
4408 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} | |
4409 ins_encode %{ | |
4410 bool vector256 = true; | |
4411 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); | |
4412 %} | |
4413 ins_pipe( pipe_slow ); | |
4414 %} | |
4415 | |
4416 // Integers vector arithmetic right shift | |
4417 instruct vsra2I(vecD dst, regF shift) %{ | |
4418 predicate(n->as_Vector()->length() == 2); | |
4419 match(Set dst (RShiftVI dst shift)); | |
4420 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} | |
4421 ins_encode %{ | |
4422 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); | |
4423 %} | |
4424 ins_pipe( pipe_slow ); | |
4425 %} | |
4426 | |
4427 instruct vsra2I_imm(vecD dst, immI8 shift) %{ | |
4428 predicate(n->as_Vector()->length() == 2); | |
4429 match(Set dst (RShiftVI dst shift)); | |
4430 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} | |
4431 ins_encode %{ | |
4432 __ psrad($dst$$XMMRegister, (int)$shift$$constant); | |
4433 %} | |
4434 ins_pipe( pipe_slow ); | |
4435 %} | |
4436 | |
4437 instruct vsra2I_reg(vecD dst, vecD src, regF shift) %{ | |
4438 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); | |
4439 match(Set dst (RShiftVI src shift)); | |
4440 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} | |
4441 ins_encode %{ | |
4442 bool vector256 = false; | |
4443 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); | |
4444 %} | |
4445 ins_pipe( pipe_slow ); | |
4446 %} | |
4447 | |
4448 instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ | |
4449 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); | |
4450 match(Set dst (RShiftVI src shift)); | |
4451 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} | |
4452 ins_encode %{ | |
4453 bool vector256 = false; | |
4454 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); | |
4455 %} | |
4456 ins_pipe( pipe_slow ); | |
4457 %} | |
4458 | |
4459 instruct vsra4I(vecX dst, regF shift) %{ | |
4460 predicate(n->as_Vector()->length() == 4); | |
4461 match(Set dst (RShiftVI dst shift)); | |
4462 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} | |
4463 ins_encode %{ | |
4464 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); | |
4465 %} | |
4466 ins_pipe( pipe_slow ); | |
4467 %} | |
4468 | |
4469 instruct vsra4I_imm(vecX dst, immI8 shift) %{ | |
4470 predicate(n->as_Vector()->length() == 4); | |
4471 match(Set dst (RShiftVI dst shift)); | |
4472 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} | |
4473 ins_encode %{ | |
4474 __ psrad($dst$$XMMRegister, (int)$shift$$constant); | |
4475 %} | |
4476 ins_pipe( pipe_slow ); | |
4477 %} | |
4478 | |
4479 instruct vsra4I_reg(vecX dst, vecX src, regF shift) %{ | |
4480 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); | |
4481 match(Set dst (RShiftVI src shift)); | |
4482 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} | |
4483 ins_encode %{ | |
4484 bool vector256 = false; | |
4485 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); | |
4486 %} | |
4487 ins_pipe( pipe_slow ); | |
4488 %} | |
4489 | |
4490 instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ | |
4491 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); | |
4492 match(Set dst (RShiftVI src shift)); | |
4493 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} | |
4494 ins_encode %{ | |
4495 bool vector256 = false; | |
4496 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); | |
4497 %} | |
4498 ins_pipe( pipe_slow ); | |
4499 %} | |
4500 | |
4501 instruct vsra8I_reg(vecY dst, vecY src, regF shift) %{ | |
4502 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); | |
4503 match(Set dst (RShiftVI src shift)); | |
4504 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} | |
4505 ins_encode %{ | |
4506 bool vector256 = true; | |
4507 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); | |
4508 %} | |
4509 ins_pipe( pipe_slow ); | |
4510 %} | |
4511 | |
4512 instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ | |
4513 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); | |
4514 match(Set dst (RShiftVI src shift)); | |
4515 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} | |
4516 ins_encode %{ | |
4517 bool vector256 = true; | |
4518 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); | |
4519 %} | |
4520 ins_pipe( pipe_slow ); | |
4521 %} | |
4522 | |
4523 // There are no longs vector arithmetic right shift instructions. | |
4524 | |
4525 | |
4526 // --------------------------------- AND -------------------------------------- | |
4527 | |
4528 instruct vand4B(vecS dst, vecS src) %{ | |
4529 predicate(n->as_Vector()->length_in_bytes() == 4); | |
4530 match(Set dst (AndV dst src)); | |
4531 format %{ "pand $dst,$src\t! and vectors (4 bytes)" %} | |
4532 ins_encode %{ | |
4533 __ pand($dst$$XMMRegister, $src$$XMMRegister); | |
4534 %} | |
4535 ins_pipe( pipe_slow ); | |
4536 %} | |
4537 | |
4538 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{ | |
4539 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); | |
4540 match(Set dst (AndV src1 src2)); | |
4541 format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %} | |
4542 ins_encode %{ | |
4543 bool vector256 = false; | |
4544 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
4545 %} | |
4546 ins_pipe( pipe_slow ); | |
4547 %} | |
4548 | |
4549 instruct vand8B(vecD dst, vecD src) %{ | |
4550 predicate(n->as_Vector()->length_in_bytes() == 8); | |
4551 match(Set dst (AndV dst src)); | |
4552 format %{ "pand $dst,$src\t! and vectors (8 bytes)" %} | |
4553 ins_encode %{ | |
4554 __ pand($dst$$XMMRegister, $src$$XMMRegister); | |
4555 %} | |
4556 ins_pipe( pipe_slow ); | |
4557 %} | |
4558 | |
4559 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{ | |
4560 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); | |
4561 match(Set dst (AndV src1 src2)); | |
4562 format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %} | |
4563 ins_encode %{ | |
4564 bool vector256 = false; | |
4565 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
4566 %} | |
4567 ins_pipe( pipe_slow ); | |
4568 %} | |
4569 | |
4570 instruct vand16B(vecX dst, vecX src) %{ | |
4571 predicate(n->as_Vector()->length_in_bytes() == 16); | |
4572 match(Set dst (AndV dst src)); | |
4573 format %{ "pand $dst,$src\t! and vectors (16 bytes)" %} | |
4574 ins_encode %{ | |
4575 __ pand($dst$$XMMRegister, $src$$XMMRegister); | |
4576 %} | |
4577 ins_pipe( pipe_slow ); | |
4578 %} | |
4579 | |
4580 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{ | |
4581 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); | |
4582 match(Set dst (AndV src1 src2)); | |
4583 format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %} | |
4584 ins_encode %{ | |
4585 bool vector256 = false; | |
4586 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
4587 %} | |
4588 ins_pipe( pipe_slow ); | |
4589 %} | |
4590 | |
4591 instruct vand16B_mem(vecX dst, vecX src, memory mem) %{ | |
4592 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); | |
4593 match(Set dst (AndV src (LoadVector mem))); | |
4594 format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %} | |
4595 ins_encode %{ | |
4596 bool vector256 = false; | |
4597 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); | |
4598 %} | |
4599 ins_pipe( pipe_slow ); | |
4600 %} | |
4601 | |
4602 instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{ | |
4603 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); | |
4604 match(Set dst (AndV src1 src2)); | |
4605 format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %} | |
4606 ins_encode %{ | |
4607 bool vector256 = true; | |
4608 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
4609 %} | |
4610 ins_pipe( pipe_slow ); | |
4611 %} | |
4612 | |
4613 instruct vand32B_mem(vecY dst, vecY src, memory mem) %{ | |
4614 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); | |
4615 match(Set dst (AndV src (LoadVector mem))); | |
4616 format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %} | |
4617 ins_encode %{ | |
4618 bool vector256 = true; | |
4619 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); | |
4620 %} | |
4621 ins_pipe( pipe_slow ); | |
4622 %} | |
4623 | |
4624 // --------------------------------- OR --------------------------------------- | |
4625 | |
4626 instruct vor4B(vecS dst, vecS src) %{ | |
4627 predicate(n->as_Vector()->length_in_bytes() == 4); | |
4628 match(Set dst (OrV dst src)); | |
4629 format %{ "por $dst,$src\t! or vectors (4 bytes)" %} | |
4630 ins_encode %{ | |
4631 __ por($dst$$XMMRegister, $src$$XMMRegister); | |
4632 %} | |
4633 ins_pipe( pipe_slow ); | |
4634 %} | |
4635 | |
4636 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{ | |
4637 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); | |
4638 match(Set dst (OrV src1 src2)); | |
4639 format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %} | |
4640 ins_encode %{ | |
4641 bool vector256 = false; | |
4642 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
4643 %} | |
4644 ins_pipe( pipe_slow ); | |
4645 %} | |
4646 | |
4647 instruct vor8B(vecD dst, vecD src) %{ | |
4648 predicate(n->as_Vector()->length_in_bytes() == 8); | |
4649 match(Set dst (OrV dst src)); | |
4650 format %{ "por $dst,$src\t! or vectors (8 bytes)" %} | |
4651 ins_encode %{ | |
4652 __ por($dst$$XMMRegister, $src$$XMMRegister); | |
4653 %} | |
4654 ins_pipe( pipe_slow ); | |
4655 %} | |
4656 | |
4657 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{ | |
4658 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); | |
4659 match(Set dst (OrV src1 src2)); | |
4660 format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %} | |
4661 ins_encode %{ | |
4662 bool vector256 = false; | |
4663 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
4664 %} | |
4665 ins_pipe( pipe_slow ); | |
4666 %} | |
4667 | |
4668 instruct vor16B(vecX dst, vecX src) %{ | |
4669 predicate(n->as_Vector()->length_in_bytes() == 16); | |
4670 match(Set dst (OrV dst src)); | |
4671 format %{ "por $dst,$src\t! or vectors (16 bytes)" %} | |
4672 ins_encode %{ | |
4673 __ por($dst$$XMMRegister, $src$$XMMRegister); | |
4674 %} | |
4675 ins_pipe( pipe_slow ); | |
4676 %} | |
4677 | |
4678 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{ | |
4679 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); | |
4680 match(Set dst (OrV src1 src2)); | |
4681 format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %} | |
4682 ins_encode %{ | |
4683 bool vector256 = false; | |
4684 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
4685 %} | |
4686 ins_pipe( pipe_slow ); | |
4687 %} | |
4688 | |
4689 instruct vor16B_mem(vecX dst, vecX src, memory mem) %{ | |
4690 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); | |
4691 match(Set dst (OrV src (LoadVector mem))); | |
4692 format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %} | |
4693 ins_encode %{ | |
4694 bool vector256 = false; | |
4695 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); | |
4696 %} | |
4697 ins_pipe( pipe_slow ); | |
4698 %} | |
4699 | |
4700 instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{ | |
4701 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); | |
4702 match(Set dst (OrV src1 src2)); | |
4703 format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %} | |
4704 ins_encode %{ | |
4705 bool vector256 = true; | |
4706 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
4707 %} | |
4708 ins_pipe( pipe_slow ); | |
4709 %} | |
4710 | |
4711 instruct vor32B_mem(vecY dst, vecY src, memory mem) %{ | |
4712 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); | |
4713 match(Set dst (OrV src (LoadVector mem))); | |
4714 format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %} | |
4715 ins_encode %{ | |
4716 bool vector256 = true; | |
4717 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); | |
4718 %} | |
4719 ins_pipe( pipe_slow ); | |
4720 %} | |
4721 | |
4722 // --------------------------------- XOR -------------------------------------- | |
4723 | |
4724 instruct vxor4B(vecS dst, vecS src) %{ | |
4725 predicate(n->as_Vector()->length_in_bytes() == 4); | |
4726 match(Set dst (XorV dst src)); | |
4727 format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %} | |
4728 ins_encode %{ | |
4729 __ pxor($dst$$XMMRegister, $src$$XMMRegister); | |
4730 %} | |
4731 ins_pipe( pipe_slow ); | |
4732 %} | |
4733 | |
4734 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{ | |
4735 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); | |
4736 match(Set dst (XorV src1 src2)); | |
4737 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %} | |
4738 ins_encode %{ | |
4739 bool vector256 = false; | |
4740 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
4741 %} | |
4742 ins_pipe( pipe_slow ); | |
4743 %} | |
4744 | |
4745 instruct vxor8B(vecD dst, vecD src) %{ | |
4746 predicate(n->as_Vector()->length_in_bytes() == 8); | |
4747 match(Set dst (XorV dst src)); | |
4748 format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %} | |
4749 ins_encode %{ | |
4750 __ pxor($dst$$XMMRegister, $src$$XMMRegister); | |
4751 %} | |
4752 ins_pipe( pipe_slow ); | |
4753 %} | |
4754 | |
4755 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{ | |
4756 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); | |
4757 match(Set dst (XorV src1 src2)); | |
4758 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %} | |
4759 ins_encode %{ | |
4760 bool vector256 = false; | |
4761 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
4762 %} | |
4763 ins_pipe( pipe_slow ); | |
4764 %} | |
4765 | |
4766 instruct vxor16B(vecX dst, vecX src) %{ | |
4767 predicate(n->as_Vector()->length_in_bytes() == 16); | |
4768 match(Set dst (XorV dst src)); | |
4769 format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %} | |
4770 ins_encode %{ | |
4771 __ pxor($dst$$XMMRegister, $src$$XMMRegister); | |
4772 %} | |
4773 ins_pipe( pipe_slow ); | |
4774 %} | |
4775 | |
4776 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{ | |
4777 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); | |
4778 match(Set dst (XorV src1 src2)); | |
4779 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %} | |
4780 ins_encode %{ | |
4781 bool vector256 = false; | |
4782 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
4783 %} | |
4784 ins_pipe( pipe_slow ); | |
4785 %} | |
4786 | |
4787 instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{ | |
4788 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); | |
4789 match(Set dst (XorV src (LoadVector mem))); | |
4790 format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %} | |
4791 ins_encode %{ | |
4792 bool vector256 = false; | |
4793 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); | |
4794 %} | |
4795 ins_pipe( pipe_slow ); | |
4796 %} | |
4797 | |
4798 instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{ | |
4799 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); | |
4800 match(Set dst (XorV src1 src2)); | |
4801 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %} | |
4802 ins_encode %{ | |
4803 bool vector256 = true; | |
4804 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); | |
4805 %} | |
4806 ins_pipe( pipe_slow ); | |
4807 %} | |
4808 | |
4809 instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{ | |
4810 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); | |
4811 match(Set dst (XorV src (LoadVector mem))); | |
4812 format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %} | |
4813 ins_encode %{ | |
4814 bool vector256 = true; | |
4815 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); | |
4816 %} | |
4817 ins_pipe( pipe_slow ); | |
4818 %} | |
4819 |