From ecb229420c39706e6aec3a616588e39be6f91593 Mon Sep 17 00:00:00 2001 From: Jatin Bhateja Date: Sun, 22 Jun 2025 01:02:38 +0530 Subject: [PATCH 1/5] 8360116: Add support for AVX10 floating point minmax instruction --- src/hotspot/cpu/x86/assembler_x86.cpp | 93 +++++++++++++++++++ src/hotspot/cpu/x86/assembler_x86.hpp | 10 ++ src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp | 11 +++ src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp | 3 + src/hotspot/cpu/x86/macroAssembler_x86.cpp | 16 ++++ src/hotspot/cpu/x86/x86.ad | 51 +++++++++- src/hotspot/cpu/x86/x86_64.ad | 52 ++++++++++- 7 files changed, 229 insertions(+), 7 deletions(-) diff --git a/src/hotspot/cpu/x86/assembler_x86.cpp b/src/hotspot/cpu/x86/assembler_x86.cpp index 897b06e94df8e..6fc2becd8a1ec 100644 --- a/src/hotspot/cpu/x86/assembler_x86.cpp +++ b/src/hotspot/cpu/x86/assembler_x86.cpp @@ -8257,6 +8257,14 @@ void Assembler::vmaxsh(XMMRegister dst, XMMRegister nds, XMMRegister src) { emit_int16(0x5F, (0xC0 | encode)); } +void Assembler::eminmaxsh(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8) { + assert(VM_Version::supports_avx10_2(), ""); + InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_is_evex_instruction(); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3A, &attributes); + emit_int24(0x53, (0xC0 | encode), imm8); +} + void Assembler::vminsh(XMMRegister dst, XMMRegister nds, XMMRegister src) { assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16"); InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); @@ -8682,6 +8690,7 @@ void Assembler::vpminsq(XMMRegister dst, XMMRegister nds, XMMRegister src, int v emit_int16(0x39, (0xC0 | encode)); } + void Assembler::minps(XMMRegister dst, XMMRegister src) { InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); @@ -8771,12 +8780,68 @@ void Assembler::vmaxps(XMMRegister dst, XMMRegister nds, XMMRegister src, int ve emit_int16(0x5F, (0xC0 | encode)); } +void Assembler::evminmaxps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int imm8, int vector_len) { + assert(VM_Version::supports_avx10_2(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); + emit_int24(0x52, (0xC0 | encode), imm8); +} + +void Assembler::evminmaxps(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int imm8, int vector_len) { + assert(VM_Version::supports_avx10_2(), ""); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); + if (merge) { + attributes.reset_is_clear_context(); + } + vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); + emit_int8(0x52); + emit_operand(dst, src, 0); + emit_int8(imm8); +} + void Assembler::maxpd(XMMRegister dst, XMMRegister src) { InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int16(0x5F, (0xC0 | encode)); } +void Assembler::evminmaxpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int imm8, int vector_len) { + assert(VM_Version::supports_avx10_2(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); + emit_int24(0x52, (0xC0 | encode), imm8); +} + +void Assembler::evminmaxpd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int imm8, int vector_len) { + assert(VM_Version::supports_avx10_2(), ""); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); + if (merge) { + attributes.reset_is_clear_context(); + } + vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); + emit_int8(0x52); + emit_operand(dst, src, 0); + emit_int8(imm8); +} + void Assembler::vmaxpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { assert(vector_len >= AVX_512bit ? VM_Version::supports_evex() : VM_Version::supports_avx(), ""); InstructionAttr attributes(vector_len, /* vex_w */true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); @@ -13119,6 +13184,14 @@ void Assembler::vminss(XMMRegister dst, XMMRegister nds, XMMRegister src) { emit_int16(0x5D, (0xC0 | encode)); } +void Assembler::eminmaxss(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8) { + assert(VM_Version::supports_avx10_2(), ""); + InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_is_evex_instruction(); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); + emit_int24(0x53, (0xC0 | encode), imm8); +} + void Assembler::vminsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { assert(VM_Version::supports_avx(), ""); InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); @@ -13127,6 +13200,14 @@ void Assembler::vminsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { emit_int16(0x5D, (0xC0 | encode)); } +void Assembler::eminmaxsd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8) { + assert(VM_Version::supports_avx10_2(), ""); + InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_is_evex_instruction(); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); + emit_int24(0x53, (0xC0 | encode), imm8); +} + void Assembler::vcmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len) { assert(VM_Version::supports_avx(), ""); assert(vector_len <= AVX_256bit, ""); @@ -16526,6 +16607,18 @@ void Assembler::evminph(XMMRegister dst, XMMRegister nds, Address src, int vecto emit_operand(dst, src, 0); } +void Assembler::evminmaxph(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int imm8, int vector_len) { + assert(VM_Version::supports_avx10_2(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3A, &attributes); + emit_int24(0x52, (0xC0 | encode), imm8); +} + void Assembler::evmaxph(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16"); assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), ""); diff --git a/src/hotspot/cpu/x86/assembler_x86.hpp b/src/hotspot/cpu/x86/assembler_x86.hpp index b1959e23722f3..a1f8856757acc 100644 --- a/src/hotspot/cpu/x86/assembler_x86.hpp +++ b/src/hotspot/cpu/x86/assembler_x86.hpp @@ -2745,6 +2745,16 @@ class Assembler : public AbstractAssembler { void minpd(XMMRegister dst, XMMRegister src); void vminpd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len); + // AVX10.2 floating point minmax instructions + void eminmaxsh(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8); + void eminmaxss(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8); + void eminmaxsd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8); + void evminmaxph(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int imm8, int vector_len); + void evminmaxps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int imm8, int vector_len); + void evminmaxps(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int imm8, int vector_len); + void evminmaxpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int imm8, int vector_len); + void evminmaxpd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int imm8, int vector_len); + // Maximum of packed integers void pmaxsb(XMMRegister dst, XMMRegister src); void vpmaxsb(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len); diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp index 4317bb3d01825..e6376c26f5f56 100644 --- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp @@ -1238,6 +1238,17 @@ void C2_MacroAssembler::evminmax_fp(int opcode, BasicType elem_bt, } } +void C2_MacroAssembler::vminmax_fp(int opc, BasicType elem_bt, XMMRegister dst, KRegister mask, + XMMRegister src1, XMMRegister src2, int vlen_enc) { + assert(opc == Op_MinV || opc == Op_MaxV, ""); + if (elem_bt == T_FLOAT) { + evminmaxps(dst, mask, src1, src2, true, opc == Op_MinV ? 0x4 : 0x5, vlen_enc); + } else { + assert(elem_bt == T_DOUBLE, ""); + evminmaxpd(dst, mask, src1, src2, true, opc == Op_MinV ? 0x4 : 0x5, vlen_enc); + } +} + // Float/Double signum void C2_MacroAssembler::signum_fp(int opcode, XMMRegister dst, XMMRegister zero, XMMRegister one) { assert(opcode == Op_SignumF || opcode == Op_SignumD, "sanity"); diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp index 713eb73d68f38..ee6fecb9f8854 100644 --- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp @@ -72,6 +72,9 @@ XMMRegister tmp, XMMRegister atmp, XMMRegister btmp, int vlen_enc); + void vminmax_fp(int opc, BasicType elem_bt, XMMRegister dst, KRegister mask, + XMMRegister src1, XMMRegister src2, int vlen_enc); + void vpuminmaxq(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2, XMMRegister xtmp1, XMMRegister xtmp2, int vlen_enc); void evminmax_fp(int opcode, BasicType elem_bt, diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.cpp b/src/hotspot/cpu/x86/macroAssembler_x86.cpp index 803bce4894589..43299f859fd01 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp @@ -8854,6 +8854,10 @@ void MacroAssembler::evpmins(BasicType type, XMMRegister dst, KRegister mask, XM evpminsd(dst, mask, nds, src, merge, vector_len); break; case T_LONG: evpminsq(dst, mask, nds, src, merge, vector_len); break; + case T_FLOAT: + evminmaxps(dst, mask, nds, src, merge, 0x4, vector_len); break; + case T_DOUBLE: + evminmaxpd(dst, mask, nds, src, merge, 0x4, vector_len); break; default: fatal("Unexpected type argument %s", type2name(type)); break; } @@ -8869,6 +8873,10 @@ void MacroAssembler::evpmaxs(BasicType type, XMMRegister dst, KRegister mask, XM evpmaxsd(dst, mask, nds, src, merge, vector_len); break; case T_LONG: evpmaxsq(dst, mask, nds, src, merge, vector_len); break; + case T_FLOAT: + evminmaxps(dst, mask, nds, src, merge, 0x5, vector_len); break; + case T_DOUBLE: + evminmaxpd(dst, mask, nds, src, merge, 0x5, vector_len); break; default: fatal("Unexpected type argument %s", type2name(type)); break; } @@ -8884,6 +8892,10 @@ void MacroAssembler::evpmins(BasicType type, XMMRegister dst, KRegister mask, XM evpminsd(dst, mask, nds, src, merge, vector_len); break; case T_LONG: evpminsq(dst, mask, nds, src, merge, vector_len); break; + case T_FLOAT: + evminmaxps(dst, mask, nds, src, merge, 0x4, vector_len); break; + case T_DOUBLE: + evminmaxpd(dst, mask, nds, src, merge, 0x4, vector_len); break; default: fatal("Unexpected type argument %s", type2name(type)); break; } @@ -8899,6 +8911,10 @@ void MacroAssembler::evpmaxs(BasicType type, XMMRegister dst, KRegister mask, XM evpmaxsd(dst, mask, nds, src, merge, vector_len); break; case T_LONG: evpmaxsq(dst, mask, nds, src, merge, vector_len); break; + case T_FLOAT: + evminmaxps(dst, mask, nds, src, merge, 0x5, vector_len); break; + case T_DOUBLE: + evminmaxps(dst, mask, nds, src, merge, 0x5, vector_len); break; default: fatal("Unexpected type argument %s", type2name(type)); break; } diff --git a/src/hotspot/cpu/x86/x86.ad b/src/hotspot/cpu/x86/x86.ad index a281331cb2986..92b0498faf45a 100644 --- a/src/hotspot/cpu/x86/x86.ad +++ b/src/hotspot/cpu/x86/x86.ad @@ -2024,7 +2024,7 @@ bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { return false; // Implementation limitation } - if (is_floating_point_type(bt)) { + if (is_floating_point_type(bt) && !VM_Version::supports_avx10_2()) { return false; // Implementation limitation } return true; @@ -6347,9 +6347,25 @@ instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{ ins_pipe( pipe_slow ); %} +// Float/Double vector Min/Max +instruct minmaxFP_avx10_reg(vec dst, vec a, vec b) %{ + predicate(VM_Version::supports_avx10_2() && + is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE + match(Set dst (MinV a b)); + match(Set dst (MaxV a b)); + format %{ "vector_minmaxFP $dst, $a, $b" %} + ins_encode %{ + int vlen_enc = vector_length_encoding(this); + int opcode = this->ideal_Opcode(); + BasicType elem_bt = Matcher::vector_element_basic_type(this); + __ vminmax_fp(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc); + %} + ins_pipe( pipe_slow ); +%} + // Float/Double vector Min/Max instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{ - predicate(Matcher::vector_length_in_bytes(n) <= 32 && + predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) <= 32 && is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE UseAVX > 0); match(Set dst (MinV a b)); @@ -6371,7 +6387,7 @@ instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, l %} instruct evminmaxFP_reg_eavx(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{ - predicate(Matcher::vector_length_in_bytes(n) == 64 && + predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) == 64 && is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE match(Set dst (MinV a b)); match(Set dst (MaxV a b)); @@ -10686,8 +10702,22 @@ instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2) ins_pipe(pipe_slow); %} +instruct scalar_minmax_HF_avx10_reg(regF dst, regF src1, regF src2) +%{ + predicate(VM_Version::supports_avx10_2()); + match(Set dst (MaxHF src1 src2)); + match(Set dst (MinHF src1 src2)); + format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %} + ins_encode %{ + int function = this->ideal_Opcode() == Op_MinHF ? 0x4 : 0x5; + __ eminmaxsh($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, function); + %} + ins_pipe( pipe_slow ); +%} + instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2) %{ + predicate(!VM_Version::supports_avx10_2()); match(Set dst (MaxHF src1 src2)); match(Set dst (MinHF src1 src2)); effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2); @@ -10787,8 +10817,23 @@ instruct vector_fma_HF_mem(vec dst, memory src1, vec src2) ins_pipe( pipe_slow ); %} +instruct vector_minmax_HF_avx10_reg(vec dst, vec src1, vec src2) +%{ + predicate(VM_Version::supports_avx10_2()); + match(Set dst (MinVHF src1 src2)); + match(Set dst (MaxVHF src1 src2)); + format %{ "vector_min_max_fp16 $dst, $src1, $src2" %} + ins_encode %{ + int vlen_enc = vector_length_encoding(this); + int function = this->ideal_Opcode() == Op_MinVHF ? 0x4 : 0x5; + __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, true, function, vlen_enc); + %} + ins_pipe( pipe_slow ); +%} + instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2) %{ + predicate(!VM_Version::supports_avx10_2()); match(Set dst (MinVHF src1 src2)); match(Set dst (MaxVHF src1 src2)); effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2); diff --git a/src/hotspot/cpu/x86/x86_64.ad b/src/hotspot/cpu/x86/x86_64.ad index 7b545f0f5f786..017de0aa65f33 100644 --- a/src/hotspot/cpu/x86/x86_64.ad +++ b/src/hotspot/cpu/x86/x86_64.ad @@ -4450,9 +4450,20 @@ instruct loadD(regD dst, memory mem) ins_pipe(pipe_slow); // XXX %} +// max = java.lang.Math.max(float a, float b) +instruct maxF_avx10_reg(regF dst, regF a, regF b) %{ + predicate(VM_Version::supports_avx10_2() && !VLoopReductions::is_reduction(n)); + match(Set dst (MaxF a b)); + format %{ "maxF $dst, $a, $b" %} + ins_encode %{ + __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, 0x5); + %} + ins_pipe( pipe_slow ); +%} + // max = java.lang.Math.max(float a, float b) instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{ - predicate(UseAVX > 0 && !VLoopReductions::is_reduction(n)); + predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n)); match(Set dst (MaxF a b)); effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp); format %{ "maxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %} @@ -4475,9 +4486,20 @@ instruct maxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRe ins_pipe( pipe_slow ); %} +// max = java.lang.Math.max(double a, double b) +instruct maxD_avx10_reg(regD dst, regD a, regD b) %{ + predicate(VM_Version::supports_avx10_2() && !VLoopReductions::is_reduction(n)); + match(Set dst (MaxD a b)); + format %{ "maxD $dst, $a, $b" %} + ins_encode %{ + __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, 0x5); + %} + ins_pipe( pipe_slow ); +%} + // max = java.lang.Math.max(double a, double b) instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{ - predicate(UseAVX > 0 && !VLoopReductions::is_reduction(n)); + predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n)); match(Set dst (MaxD a b)); effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp); format %{ "maxD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %} @@ -4500,9 +4522,20 @@ instruct maxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRe ins_pipe( pipe_slow ); %} +// max = java.lang.Math.max(float a, float b) +instruct minF_avx10_reg(regF dst, regF a, regF b) %{ + predicate(VM_Version::supports_avx10_2() && !VLoopReductions::is_reduction(n)); + match(Set dst (MinF a b)); + format %{ "maxF $dst, $a, $b" %} + ins_encode %{ + __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, 0x4); + %} + ins_pipe( pipe_slow ); +%} + // min = java.lang.Math.min(float a, float b) instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{ - predicate(UseAVX > 0 && !VLoopReductions::is_reduction(n)); + predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n)); match(Set dst (MinF a b)); effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp); format %{ "minF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %} @@ -4527,7 +4560,7 @@ instruct minF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRe // min = java.lang.Math.min(double a, double b) instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{ - predicate(UseAVX > 0 && !VLoopReductions::is_reduction(n)); + predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n)); match(Set dst (MinD a b)); effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp); format %{ "minD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %} @@ -4537,6 +4570,17 @@ instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, ins_pipe( pipe_slow ); %} +// max = java.lang.Math.max(double a, double b) +instruct minD_avx10_reg(regD dst, regD a, regD b) %{ + predicate(VM_Version::supports_avx10_2() && !VLoopReductions::is_reduction(n)); + match(Set dst (MinD a b)); + format %{ "maxD $dst, $a, $b" %} + ins_encode %{ + __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, 0x4); + %} + ins_pipe( pipe_slow ); +%} + instruct minD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{ predicate(UseAVX > 0 && VLoopReductions::is_reduction(n)); match(Set dst (MinD a b)); From e775357183d57ad4e00bd6ca6e6f471ea36b6ce7 Mon Sep 17 00:00:00 2001 From: Jatin Bhateja Date: Wed, 25 Jun 2025 15:10:47 +0530 Subject: [PATCH 2/5] Extending the patch to cover reduction operations --- src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp | 35 +++- src/hotspot/cpu/x86/x86.ad | 184 +++++++++++++++--- src/hotspot/cpu/x86/x86_64.ad | 36 ++-- 3 files changed, 204 insertions(+), 51 deletions(-) diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp index e6376c26f5f56..7a0eb3a81715f 100644 --- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp @@ -1240,12 +1240,13 @@ void C2_MacroAssembler::evminmax_fp(int opcode, BasicType elem_bt, void C2_MacroAssembler::vminmax_fp(int opc, BasicType elem_bt, XMMRegister dst, KRegister mask, XMMRegister src1, XMMRegister src2, int vlen_enc) { - assert(opc == Op_MinV || opc == Op_MaxV, ""); + assert(opc == Op_MinV || opc == Op_MinReductionV || + opc == Op_MaxV || opc == Op_MaxReductionV, "sanity"); if (elem_bt == T_FLOAT) { - evminmaxps(dst, mask, src1, src2, true, opc == Op_MinV ? 0x4 : 0x5, vlen_enc); + evminmaxps(dst, mask, src1, src2, true, opc == Op_MinV || opc == Op_MinReductionV ? 0x4 : 0x5, vlen_enc); } else { assert(elem_bt == T_DOUBLE, ""); - evminmaxpd(dst, mask, src1, src2, true, opc == Op_MinV ? 0x4 : 0x5, vlen_enc); + evminmaxpd(dst, mask, src1, src2, true, opc == Op_MinV || opc == Op_MinReductionV ? 0x4 : 0x5, vlen_enc); } } @@ -2556,12 +2557,21 @@ void C2_MacroAssembler::reduceFloatMinMax(int opcode, int vlen, bool is_dst_vali } else { // i = [0,1] vpermilps(wtmp, wsrc, permconst[i], vlen_enc); } - vminmax_fp(opcode, T_FLOAT, wdst, wtmp, wsrc, tmp, atmp, btmp, vlen_enc); + + if (VM_Version::supports_avx10_2()) { + vminmax_fp(opcode, T_FLOAT, wdst, k0, wtmp, wsrc, vlen_enc); + } else { + vminmax_fp(opcode, T_FLOAT, wdst, wtmp, wsrc, tmp, atmp, btmp, vlen_enc); + } wsrc = wdst; vlen_enc = Assembler::AVX_128bit; } if (is_dst_valid) { - vminmax_fp(opcode, T_FLOAT, dst, wdst, dst, tmp, atmp, btmp, Assembler::AVX_128bit); + if (VM_Version::supports_avx10_2()) { + vminmax_fp(opcode, T_FLOAT, dst, k0, wdst, dst, Assembler::AVX_128bit); + } else { + vminmax_fp(opcode, T_FLOAT, dst, wdst, dst, tmp, atmp, btmp, Assembler::AVX_128bit); + } } } @@ -2587,12 +2597,23 @@ void C2_MacroAssembler::reduceDoubleMinMax(int opcode, int vlen, bool is_dst_val assert(i == 0, "%d", i); vpermilpd(wtmp, wsrc, 1, vlen_enc); } - vminmax_fp(opcode, T_DOUBLE, wdst, wtmp, wsrc, tmp, atmp, btmp, vlen_enc); + + if (VM_Version::supports_avx10_2()) { + vminmax_fp(opcode, T_DOUBLE, wdst, k0, wtmp, wsrc, vlen_enc); + } else { + vminmax_fp(opcode, T_DOUBLE, wdst, wtmp, wsrc, tmp, atmp, btmp, vlen_enc); + } + wsrc = wdst; vlen_enc = Assembler::AVX_128bit; } + if (is_dst_valid) { - vminmax_fp(opcode, T_DOUBLE, dst, wdst, dst, tmp, atmp, btmp, Assembler::AVX_128bit); + if (VM_Version::supports_avx10_2()) { + vminmax_fp(opcode, T_DOUBLE, dst, k0, wdst, dst, Assembler::AVX_128bit); + } else { + vminmax_fp(opcode, T_DOUBLE, dst, wdst, dst, tmp, atmp, btmp, Assembler::AVX_128bit); + } } } diff --git a/src/hotspot/cpu/x86/x86.ad b/src/hotspot/cpu/x86/x86.ad index 92b0498faf45a..88140f3d50a5e 100644 --- a/src/hotspot/cpu/x86/x86.ad +++ b/src/hotspot/cpu/x86/x86.ad @@ -5293,9 +5293,9 @@ instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legV //--------------------Min/Max Float Reduction -------------------- // Float Min Reduction -instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, - legVec atmp, legVec btmp, legVec xmm_1, rFlagsReg cr) %{ - predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && +instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp, + legVec btmp, legVec xmm_1, rFlagsReg cr) %{ + predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && Matcher::vector_length(n->in(2)) == 2); @@ -5316,7 +5316,7 @@ instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp, legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{ - predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && + predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && Matcher::vector_length(n->in(2)) >= 4); @@ -5335,9 +5335,9 @@ instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legV ins_pipe( pipe_slow ); %} -instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, - legVec atmp, legVec btmp, legVec xmm_1, rFlagsReg cr) %{ - predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && +instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, legVec atmp, + legVec btmp, legVec xmm_1, rFlagsReg cr) %{ + predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && Matcher::vector_length(n->in(2)) == 2); match(Set dst (MinReductionV dst src)); match(Set dst (MaxReductionV dst src)); @@ -5355,9 +5355,9 @@ instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, %} -instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, - legVec atmp, legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{ - predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && +instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp, + legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{ + predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && Matcher::vector_length(n->in(2)) >= 4); match(Set dst (MinReductionV dst src)); match(Set dst (MaxReductionV dst src)); @@ -5374,12 +5374,78 @@ instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, ins_pipe( pipe_slow ); %} +instruct minmax_reduction2F_avx10(regF dst, immF src1, vec src2, vec xtmp1) %{ + predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && + ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || + (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && + Matcher::vector_length(n->in(2)) == 2); + match(Set dst (MinReductionV src1 src2)); + match(Set dst (MaxReductionV src1 src2)); + effect(TEMP dst, TEMP xtmp1); + format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 as TEMP" %} + ins_encode %{ + int opcode = this->ideal_Opcode(); + int vlen = Matcher::vector_length(this, $src2); + __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, + xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct minmax_reductionF_avx10(regF dst, immF src1, vec src2, vec xtmp1, vec xtmp2) %{ + predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && + ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || + (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && + Matcher::vector_length(n->in(2)) >= 4); + match(Set dst (MinReductionV src1 src2)); + match(Set dst (MaxReductionV src1 src2)); + effect(TEMP dst, TEMP xtmp1, TEMP xtmp2); + format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 and $xtmp2 as TEMP" %} + ins_encode %{ + int opcode = this->ideal_Opcode(); + int vlen = Matcher::vector_length(this, $src2); + __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, + xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct minmax_reduction2F_avx10_av(regF dst, vec src, vec xtmp1) %{ + predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && + Matcher::vector_length(n->in(2)) == 2); + match(Set dst (MinReductionV dst src)); + match(Set dst (MaxReductionV dst src)); + effect(TEMP dst, TEMP xtmp1); + format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 as TEMP" %} + ins_encode %{ + int opcode = this->ideal_Opcode(); + int vlen = Matcher::vector_length(this, $src); + __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg, + $xtmp1$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct minmax_reductionF_avx10_av(regF dst, vec src, vec xtmp1, vec xtmp2) %{ + predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && + Matcher::vector_length(n->in(2)) >= 4); + match(Set dst (MinReductionV dst src)); + match(Set dst (MaxReductionV dst src)); + effect(TEMP dst, TEMP xtmp1, TEMP xtmp2); + format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 and $xtmp2 as TEMP" %} + ins_encode %{ + int opcode = this->ideal_Opcode(); + int vlen = Matcher::vector_length(this, $src); + __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg, + $xtmp1$$XMMRegister, $xtmp2$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} //--------------------Min Double Reduction -------------------- -instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, - legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, // TEMPs - rFlagsReg cr) %{ - predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && +instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2, + legVec tmp3, legVec tmp4, rFlagsReg cr) %{ + predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && Matcher::vector_length(n->in(2)) == 2); @@ -5398,10 +5464,9 @@ instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, ins_pipe( pipe_slow ); %} -instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, - legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, legVec tmp5, // TEMPs - rFlagsReg cr) %{ - predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && +instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2, + legVec tmp3, legVec tmp4, legVec tmp5, rFlagsReg cr) %{ + predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && Matcher::vector_length(n->in(2)) >= 4); @@ -5421,10 +5486,9 @@ instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, %} -instruct minmax_reduction2D_av(legRegD dst, legVec src, - legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, // TEMPs - rFlagsReg cr) %{ - predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && +instruct minmax_reduction2D_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, + legVec tmp3, legVec tmp4, rFlagsReg cr) %{ + predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && Matcher::vector_length(n->in(2)) == 2); match(Set dst (MinReductionV dst src)); match(Set dst (MaxReductionV dst src)); @@ -5441,10 +5505,9 @@ instruct minmax_reduction2D_av(legRegD dst, legVec src, ins_pipe( pipe_slow ); %} -instruct minmax_reductionD_av(legRegD dst, legVec src, - legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, legVec tmp5, // TEMPs - rFlagsReg cr) %{ - predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && +instruct minmax_reductionD_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3, + legVec tmp4, legVec tmp5, rFlagsReg cr) %{ + predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && Matcher::vector_length(n->in(2)) >= 4); match(Set dst (MinReductionV dst src)); match(Set dst (MaxReductionV dst src)); @@ -5461,6 +5524,75 @@ instruct minmax_reductionD_av(legRegD dst, legVec src, ins_pipe( pipe_slow ); %} +instruct minmax_reduction2D_avx10(regD dst, immD src1, vec src2, vec xtmp1) %{ + predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && + ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || + (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && + Matcher::vector_length(n->in(2)) == 2); + match(Set dst (MinReductionV src1 src2)); + match(Set dst (MaxReductionV src1 src2)); + effect(TEMP dst, TEMP xtmp1); + format %{ "vector_minmax2D_reduction $dst, $src1, $src2 ; using $xtmp1 as TEMP" %} + ins_encode %{ + int opcode = this->ideal_Opcode(); + int vlen = Matcher::vector_length(this, $src2); + __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, + xnoreg, xnoreg, $xtmp1$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct minmax_reductionD_avx10(regD dst, immD src1, vec src2, vec xtmp1, vec xtmp2) %{ + predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && + ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || + (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && + Matcher::vector_length(n->in(2)) >= 4); + match(Set dst (MinReductionV src1 src2)); + match(Set dst (MaxReductionV src1 src2)); + effect(TEMP dst, TEMP xtmp1, TEMP xtmp2); + format %{ "vector_minmaxD_reduction $dst, $src1, $src2 ; using $xtmp1 and $xtmp2 as TEMP" %} + ins_encode %{ + int opcode = this->ideal_Opcode(); + int vlen = Matcher::vector_length(this, $src2); + __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, + xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + + +instruct minmax_reduction2D_av_avx10(regD dst, vec src, vec xtmp1) %{ + predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && + Matcher::vector_length(n->in(2)) == 2); + match(Set dst (MinReductionV dst src)); + match(Set dst (MaxReductionV dst src)); + effect(TEMP dst, TEMP xtmp1); + format %{ "vector_minmax2D_reduction $dst, $src ; using $xtmp1 as TEMP" %} + ins_encode %{ + int opcode = this->ideal_Opcode(); + int vlen = Matcher::vector_length(this, $src); + __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, + xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct minmax_reductionD_av_avx10(regD dst, vec src, vec xtmp1, vec xtmp2) %{ + predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && + Matcher::vector_length(n->in(2)) >= 4); + match(Set dst (MinReductionV dst src)); + match(Set dst (MaxReductionV dst src)); + effect(TEMP dst, TEMP xtmp1, TEMP xtmp2); + format %{ "vector_minmaxD_reduction $dst, $src ; using $xtmp1 and $xtmp2 as TEMP" %} + ins_encode %{ + int opcode = this->ideal_Opcode(); + int vlen = Matcher::vector_length(this, $src); + __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, + xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + // ====================VECTOR ARITHMETIC======================================= // --------------------------------- ADD -------------------------------------- diff --git a/src/hotspot/cpu/x86/x86_64.ad b/src/hotspot/cpu/x86/x86_64.ad index 017de0aa65f33..2782e7b3f74a0 100644 --- a/src/hotspot/cpu/x86/x86_64.ad +++ b/src/hotspot/cpu/x86/x86_64.ad @@ -4452,7 +4452,7 @@ instruct loadD(regD dst, memory mem) // max = java.lang.Math.max(float a, float b) instruct maxF_avx10_reg(regF dst, regF a, regF b) %{ - predicate(VM_Version::supports_avx10_2() && !VLoopReductions::is_reduction(n)); + predicate(VM_Version::supports_avx10_2()); match(Set dst (MaxF a b)); format %{ "maxF $dst, $a, $b" %} ins_encode %{ @@ -4474,7 +4474,7 @@ instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, %} instruct maxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{ - predicate(UseAVX > 0 && VLoopReductions::is_reduction(n)); + predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n)); match(Set dst (MaxF a b)); effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr); @@ -4488,7 +4488,7 @@ instruct maxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRe // max = java.lang.Math.max(double a, double b) instruct maxD_avx10_reg(regD dst, regD a, regD b) %{ - predicate(VM_Version::supports_avx10_2() && !VLoopReductions::is_reduction(n)); + predicate(VM_Version::supports_avx10_2()); match(Set dst (MaxD a b)); format %{ "maxD $dst, $a, $b" %} ins_encode %{ @@ -4510,7 +4510,7 @@ instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, %} instruct maxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{ - predicate(UseAVX > 0 && VLoopReductions::is_reduction(n)); + predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n)); match(Set dst (MaxD a b)); effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr); @@ -4524,7 +4524,7 @@ instruct maxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRe // max = java.lang.Math.max(float a, float b) instruct minF_avx10_reg(regF dst, regF a, regF b) %{ - predicate(VM_Version::supports_avx10_2() && !VLoopReductions::is_reduction(n)); + predicate(VM_Version::supports_avx10_2()); match(Set dst (MinF a b)); format %{ "maxF $dst, $a, $b" %} ins_encode %{ @@ -4546,7 +4546,7 @@ instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, %} instruct minF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{ - predicate(UseAVX > 0 && VLoopReductions::is_reduction(n)); + predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n)); match(Set dst (MinF a b)); effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr); @@ -4558,31 +4558,31 @@ instruct minF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRe ins_pipe( pipe_slow ); %} -// min = java.lang.Math.min(double a, double b) -instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{ - predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n)); +// max = java.lang.Math.max(double a, double b) +instruct minD_avx10_reg(regD dst, regD a, regD b) %{ + predicate(VM_Version::supports_avx10_2()); match(Set dst (MinD a b)); - effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp); - format %{ "minD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %} + format %{ "maxD $dst, $a, $b" %} ins_encode %{ - __ vminmax_fp(Op_MinV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit); + __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, 0x4); %} ins_pipe( pipe_slow ); %} -// max = java.lang.Math.max(double a, double b) -instruct minD_avx10_reg(regD dst, regD a, regD b) %{ - predicate(VM_Version::supports_avx10_2() && !VLoopReductions::is_reduction(n)); +// min = java.lang.Math.min(double a, double b) +instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{ + predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n)); match(Set dst (MinD a b)); - format %{ "maxD $dst, $a, $b" %} + effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp); + format %{ "minD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %} ins_encode %{ - __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, 0x4); + __ vminmax_fp(Op_MinV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit); %} ins_pipe( pipe_slow ); %} instruct minD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{ - predicate(UseAVX > 0 && VLoopReductions::is_reduction(n)); + predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n)); match(Set dst (MinD a b)); effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr); From b6e55157a68290eb776ac0ca0ecd80980da11307 Mon Sep 17 00:00:00 2001 From: Jatin Bhateja Date: Wed, 25 Jun 2025 15:23:19 +0530 Subject: [PATCH 3/5] Update comments --- src/hotspot/cpu/x86/x86_64.ad | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/hotspot/cpu/x86/x86_64.ad b/src/hotspot/cpu/x86/x86_64.ad index 2782e7b3f74a0..44c6ff826d092 100644 --- a/src/hotspot/cpu/x86/x86_64.ad +++ b/src/hotspot/cpu/x86/x86_64.ad @@ -4522,7 +4522,7 @@ instruct maxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRe ins_pipe( pipe_slow ); %} -// max = java.lang.Math.max(float a, float b) +// max = java.lang.Math.min(float a, float b) instruct minF_avx10_reg(regF dst, regF a, regF b) %{ predicate(VM_Version::supports_avx10_2()); match(Set dst (MinF a b)); @@ -4558,7 +4558,7 @@ instruct minF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRe ins_pipe( pipe_slow ); %} -// max = java.lang.Math.max(double a, double b) +// max = java.lang.Math.min(double a, double b) instruct minD_avx10_reg(regD dst, regD a, regD b) %{ predicate(VM_Version::supports_avx10_2()); match(Set dst (MinD a b)); From 382c9b9e849221f65eedda3bf40abe0976ed1871 Mon Sep 17 00:00:00 2001 From: Jatin Bhateja Date: Thu, 26 Jun 2025 13:28:33 +0530 Subject: [PATCH 4/5] Review comments resolutions --- src/hotspot/cpu/x86/assembler_x86.cpp | 19 +++++++++++++++++-- src/hotspot/cpu/x86/assembler_x86.hpp | 1 + src/hotspot/cpu/x86/x86.ad | 16 +++++++++++++++- 3 files changed, 33 insertions(+), 3 deletions(-) diff --git a/src/hotspot/cpu/x86/assembler_x86.cpp b/src/hotspot/cpu/x86/assembler_x86.cpp index 6fc2becd8a1ec..32aba47f26974 100644 --- a/src/hotspot/cpu/x86/assembler_x86.cpp +++ b/src/hotspot/cpu/x86/assembler_x86.cpp @@ -8690,7 +8690,6 @@ void Assembler::vpminsq(XMMRegister dst, XMMRegister nds, XMMRegister src, int v emit_int16(0x39, (0xC0 | encode)); } - void Assembler::minps(XMMRegister dst, XMMRegister src) { InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); @@ -8782,7 +8781,7 @@ void Assembler::vmaxps(XMMRegister dst, XMMRegister nds, XMMRegister src, int ve void Assembler::evminmaxps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int imm8, int vector_len) { assert(VM_Version::supports_avx10_2(), ""); - InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); attributes.set_is_evex_instruction(); attributes.set_embedded_opmask_register_specifier(mask); if (merge) { @@ -16619,6 +16618,22 @@ void Assembler::evminmaxph(XMMRegister dst, KRegister mask, XMMRegister nds, XMM emit_int24(0x52, (0xC0 | encode), imm8); } +void Assembler::evminmaxph(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int imm8, int vector_len) { + assert(VM_Version::supports_avx10_2(), ""); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit); + vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3A, &attributes); + emit_int8(0x52); + emit_operand(dst, src, 0); + emit_int8(imm8); +} + void Assembler::evmaxph(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16"); assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), ""); diff --git a/src/hotspot/cpu/x86/assembler_x86.hpp b/src/hotspot/cpu/x86/assembler_x86.hpp index a1f8856757acc..f814a6e9e794d 100644 --- a/src/hotspot/cpu/x86/assembler_x86.hpp +++ b/src/hotspot/cpu/x86/assembler_x86.hpp @@ -2750,6 +2750,7 @@ class Assembler : public AbstractAssembler { void eminmaxss(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8); void eminmaxsd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8); void evminmaxph(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int imm8, int vector_len); + void evminmaxph(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int imm8, int vector_len); void evminmaxps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int imm8, int vector_len); void evminmaxps(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int imm8, int vector_len); void evminmaxpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int imm8, int vector_len); diff --git a/src/hotspot/cpu/x86/x86.ad b/src/hotspot/cpu/x86/x86.ad index 88140f3d50a5e..58c0bc4494bc4 100644 --- a/src/hotspot/cpu/x86/x86.ad +++ b/src/hotspot/cpu/x86/x86.ad @@ -6518,7 +6518,7 @@ instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, l ins_pipe( pipe_slow ); %} -instruct evminmaxFP_reg_eavx(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{ +instruct evminmaxFP_reg_evex(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{ predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) == 64 && is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE match(Set dst (MinV a b)); @@ -10949,6 +10949,20 @@ instruct vector_fma_HF_mem(vec dst, memory src1, vec src2) ins_pipe( pipe_slow ); %} +instruct vector_minmax_HF_avx10_mem(vec dst, vec src1, memory src2) +%{ + predicate(VM_Version::supports_avx10_2()); + match(Set dst (MinVHF src1 (VectorReinterpret (LoadVector src2)))); + match(Set dst (MaxVHF src1 (VectorReinterpret (LoadVector src2)))); + format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %} + ins_encode %{ + int vlen_enc = vector_length_encoding(this); + int function = this->ideal_Opcode() == Op_MinVHF ? 0x4 : 0x5; + __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$Address, true, function, vlen_enc); + %} + ins_pipe( pipe_slow ); +%} + instruct vector_minmax_HF_avx10_reg(vec dst, vec src1, vec src2) %{ predicate(VM_Version::supports_avx10_2()); From 8969798370f349f757d7cc0c20f1e1d32e08a272 Mon Sep 17 00:00:00 2001 From: Jatin Bhateja Date: Fri, 27 Jun 2025 13:35:39 +0530 Subject: [PATCH 5/5] Review resolutions --- src/hotspot/cpu/x86/assembler_x86.hpp | 11 +++++++++++ src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp | 7 +++++-- src/hotspot/cpu/x86/macroAssembler_x86.cpp | 16 ++++++++-------- src/hotspot/cpu/x86/x86.ad | 6 +++--- src/hotspot/cpu/x86/x86_64.ad | 8 ++++---- 5 files changed, 31 insertions(+), 17 deletions(-) diff --git a/src/hotspot/cpu/x86/assembler_x86.hpp b/src/hotspot/cpu/x86/assembler_x86.hpp index f814a6e9e794d..45c24f8c83256 100644 --- a/src/hotspot/cpu/x86/assembler_x86.hpp +++ b/src/hotspot/cpu/x86/assembler_x86.hpp @@ -441,6 +441,17 @@ class InstructionAttr; // See fxsave and xsave(EVEX enabled) documentation for layout const int FPUStateSizeInWords = 2688 / wordSize; + +// AVX10 new minmax instruction control mask encoding. +// +// imm8[4] = 0 (please refer to Table 11.1 of section 11.2 of AVX10 manual[1] for details) +// imm8[3:2] (sign control) = 01 (select sign, please refer to Table 11.5 of section 11.2 of AVX10 manual[1] for details) +// imm8[1:0] = 00 (min) / 01 (max) +// +// [1] https://www.intel.com/content/www/us/en/content-details/856721/intel-advanced-vector-extensions-10-2-intel-avx10-2-architecture-specification.html?wapkw=AVX10 +const int AVX10_MINMAX_MAX_COMPARE_SIGN = 0x5; +const int AVX10_MINMAX_MIN_COMPARE_SIGN = 0x4; + // The Intel x86/Amd64 Assembler: Pure assembler doing NO optimizations on the instruction // level (e.g. mov rax, 0 is not translated into xor rax, rax!); i.e., what you write // is what you get. The Assembler is generating code into a CodeBuffer. diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp index d12a51f43aa57..6d24c145a50f1 100644 --- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp @@ -1234,11 +1234,14 @@ void C2_MacroAssembler::vminmax_fp(int opc, BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vlen_enc) { assert(opc == Op_MinV || opc == Op_MinReductionV || opc == Op_MaxV || opc == Op_MaxReductionV, "sanity"); + + int imm8 = (opc == Op_MinV || opc == Op_MinReductionV) ? AVX10_MINMAX_MIN_COMPARE_SIGN + : AVX10_MINMAX_MAX_COMPARE_SIGN; if (elem_bt == T_FLOAT) { - evminmaxps(dst, mask, src1, src2, true, opc == Op_MinV || opc == Op_MinReductionV ? 0x4 : 0x5, vlen_enc); + evminmaxps(dst, mask, src1, src2, true, imm8, vlen_enc); } else { assert(elem_bt == T_DOUBLE, ""); - evminmaxpd(dst, mask, src1, src2, true, opc == Op_MinV || opc == Op_MinReductionV ? 0x4 : 0x5, vlen_enc); + evminmaxpd(dst, mask, src1, src2, true, imm8, vlen_enc); } } diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.cpp b/src/hotspot/cpu/x86/macroAssembler_x86.cpp index a0afbf9303b12..c8bf289e9d49e 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp @@ -8842,9 +8842,9 @@ void MacroAssembler::evpmins(BasicType type, XMMRegister dst, KRegister mask, XM case T_LONG: evpminsq(dst, mask, nds, src, merge, vector_len); break; case T_FLOAT: - evminmaxps(dst, mask, nds, src, merge, 0x4, vector_len); break; + evminmaxps(dst, mask, nds, src, merge, AVX10_MINMAX_MIN_COMPARE_SIGN, vector_len); break; case T_DOUBLE: - evminmaxpd(dst, mask, nds, src, merge, 0x4, vector_len); break; + evminmaxpd(dst, mask, nds, src, merge, AVX10_MINMAX_MIN_COMPARE_SIGN, vector_len); break; default: fatal("Unexpected type argument %s", type2name(type)); break; } @@ -8861,9 +8861,9 @@ void MacroAssembler::evpmaxs(BasicType type, XMMRegister dst, KRegister mask, XM case T_LONG: evpmaxsq(dst, mask, nds, src, merge, vector_len); break; case T_FLOAT: - evminmaxps(dst, mask, nds, src, merge, 0x5, vector_len); break; + evminmaxps(dst, mask, nds, src, merge, AVX10_MINMAX_MAX_COMPARE_SIGN, vector_len); break; case T_DOUBLE: - evminmaxpd(dst, mask, nds, src, merge, 0x5, vector_len); break; + evminmaxpd(dst, mask, nds, src, merge, AVX10_MINMAX_MAX_COMPARE_SIGN, vector_len); break; default: fatal("Unexpected type argument %s", type2name(type)); break; } @@ -8880,9 +8880,9 @@ void MacroAssembler::evpmins(BasicType type, XMMRegister dst, KRegister mask, XM case T_LONG: evpminsq(dst, mask, nds, src, merge, vector_len); break; case T_FLOAT: - evminmaxps(dst, mask, nds, src, merge, 0x4, vector_len); break; + evminmaxps(dst, mask, nds, src, merge, AVX10_MINMAX_MIN_COMPARE_SIGN, vector_len); break; case T_DOUBLE: - evminmaxpd(dst, mask, nds, src, merge, 0x4, vector_len); break; + evminmaxpd(dst, mask, nds, src, merge, AVX10_MINMAX_MIN_COMPARE_SIGN, vector_len); break; default: fatal("Unexpected type argument %s", type2name(type)); break; } @@ -8899,9 +8899,9 @@ void MacroAssembler::evpmaxs(BasicType type, XMMRegister dst, KRegister mask, XM case T_LONG: evpmaxsq(dst, mask, nds, src, merge, vector_len); break; case T_FLOAT: - evminmaxps(dst, mask, nds, src, merge, 0x5, vector_len); break; + evminmaxps(dst, mask, nds, src, merge, AVX10_MINMAX_MAX_COMPARE_SIGN, vector_len); break; case T_DOUBLE: - evminmaxps(dst, mask, nds, src, merge, 0x5, vector_len); break; + evminmaxps(dst, mask, nds, src, merge, AVX10_MINMAX_MAX_COMPARE_SIGN, vector_len); break; default: fatal("Unexpected type argument %s", type2name(type)); break; } diff --git a/src/hotspot/cpu/x86/x86.ad b/src/hotspot/cpu/x86/x86.ad index 58c0bc4494bc4..c0a55917a940a 100644 --- a/src/hotspot/cpu/x86/x86.ad +++ b/src/hotspot/cpu/x86/x86.ad @@ -10841,7 +10841,7 @@ instruct scalar_minmax_HF_avx10_reg(regF dst, regF src1, regF src2) match(Set dst (MinHF src1 src2)); format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %} ins_encode %{ - int function = this->ideal_Opcode() == Op_MinHF ? 0x4 : 0x5; + int function = this->ideal_Opcode() == Op_MinHF ? AVX10_MINMAX_MIN_COMPARE_SIGN : AVX10_MINMAX_MAX_COMPARE_SIGN; __ eminmaxsh($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, function); %} ins_pipe( pipe_slow ); @@ -10957,7 +10957,7 @@ instruct vector_minmax_HF_avx10_mem(vec dst, vec src1, memory src2) format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %} ins_encode %{ int vlen_enc = vector_length_encoding(this); - int function = this->ideal_Opcode() == Op_MinVHF ? 0x4 : 0x5; + int function = this->ideal_Opcode() == Op_MinVHF ? AVX10_MINMAX_MIN_COMPARE_SIGN : AVX10_MINMAX_MAX_COMPARE_SIGN; __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$Address, true, function, vlen_enc); %} ins_pipe( pipe_slow ); @@ -10971,7 +10971,7 @@ instruct vector_minmax_HF_avx10_reg(vec dst, vec src1, vec src2) format %{ "vector_min_max_fp16 $dst, $src1, $src2" %} ins_encode %{ int vlen_enc = vector_length_encoding(this); - int function = this->ideal_Opcode() == Op_MinVHF ? 0x4 : 0x5; + int function = this->ideal_Opcode() == Op_MinVHF ? AVX10_MINMAX_MIN_COMPARE_SIGN : AVX10_MINMAX_MAX_COMPARE_SIGN; __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, true, function, vlen_enc); %} ins_pipe( pipe_slow ); diff --git a/src/hotspot/cpu/x86/x86_64.ad b/src/hotspot/cpu/x86/x86_64.ad index ffe687b39f9e6..ae216112b520f 100644 --- a/src/hotspot/cpu/x86/x86_64.ad +++ b/src/hotspot/cpu/x86/x86_64.ad @@ -4456,7 +4456,7 @@ instruct maxF_avx10_reg(regF dst, regF a, regF b) %{ match(Set dst (MaxF a b)); format %{ "maxF $dst, $a, $b" %} ins_encode %{ - __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, 0x5); + __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_MINMAX_MAX_COMPARE_SIGN); %} ins_pipe( pipe_slow ); %} @@ -4492,7 +4492,7 @@ instruct maxD_avx10_reg(regD dst, regD a, regD b) %{ match(Set dst (MaxD a b)); format %{ "maxD $dst, $a, $b" %} ins_encode %{ - __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, 0x5); + __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_MINMAX_MAX_COMPARE_SIGN); %} ins_pipe( pipe_slow ); %} @@ -4528,7 +4528,7 @@ instruct minF_avx10_reg(regF dst, regF a, regF b) %{ match(Set dst (MinF a b)); format %{ "maxF $dst, $a, $b" %} ins_encode %{ - __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, 0x4); + __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_MINMAX_MIN_COMPARE_SIGN); %} ins_pipe( pipe_slow ); %} @@ -4564,7 +4564,7 @@ instruct minD_avx10_reg(regD dst, regD a, regD b) %{ match(Set dst (MinD a b)); format %{ "maxD $dst, $a, $b" %} ins_encode %{ - __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, 0x4); + __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_MINMAX_MIN_COMPARE_SIGN); %} ins_pipe( pipe_slow ); %}