-
Notifications
You must be signed in to change notification settings - Fork 6.1k
8360116: Add support for AVX10 floating point minmax instruction #25914
New issue
Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? # to your account
base: master
Are you sure you want to change the base?
Changes from all commits
ecb2294
e775357
b6e5515
b8ef2cc
382c9b9
8969798
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1230,6 +1230,21 @@ void C2_MacroAssembler::evminmax_fp(int opcode, BasicType elem_bt, | |
} | ||
} | ||
|
||
void C2_MacroAssembler::vminmax_fp(int opc, BasicType elem_bt, XMMRegister dst, KRegister mask, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Line 1122 mentions the differences between There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Details on insturction semantics can be found in section 11.2 of AVX10 manual https://www.intel.com/content/www/us/en/content-details/856721/intel-advanced-vector-extensions-10-2-intel-avx10-2-architecture-specification.html?wapkw=AVX10 |
||
XMMRegister src1, XMMRegister src2, int vlen_enc) { | ||
assert(opc == Op_MinV || opc == Op_MinReductionV || | ||
opc == Op_MaxV || opc == Op_MaxReductionV, "sanity"); | ||
|
||
int imm8 = (opc == Op_MinV || opc == Op_MinReductionV) ? AVX10_MINMAX_MIN_COMPARE_SIGN | ||
: AVX10_MINMAX_MAX_COMPARE_SIGN; | ||
if (elem_bt == T_FLOAT) { | ||
evminmaxps(dst, mask, src1, src2, true, imm8, vlen_enc); | ||
} else { | ||
assert(elem_bt == T_DOUBLE, ""); | ||
evminmaxpd(dst, mask, src1, src2, true, imm8, vlen_enc); | ||
} | ||
} | ||
|
||
// Float/Double signum | ||
void C2_MacroAssembler::signum_fp(int opcode, XMMRegister dst, XMMRegister zero, XMMRegister one) { | ||
assert(opcode == Op_SignumF || opcode == Op_SignumD, "sanity"); | ||
|
@@ -2537,12 +2552,21 @@ void C2_MacroAssembler::reduceFloatMinMax(int opcode, int vlen, bool is_dst_vali | |
} else { // i = [0,1] | ||
vpermilps(wtmp, wsrc, permconst[i], vlen_enc); | ||
} | ||
vminmax_fp(opcode, T_FLOAT, wdst, wtmp, wsrc, tmp, atmp, btmp, vlen_enc); | ||
|
||
if (VM_Version::supports_avx10_2()) { | ||
vminmax_fp(opcode, T_FLOAT, wdst, k0, wtmp, wsrc, vlen_enc); | ||
} else { | ||
vminmax_fp(opcode, T_FLOAT, wdst, wtmp, wsrc, tmp, atmp, btmp, vlen_enc); | ||
} | ||
wsrc = wdst; | ||
vlen_enc = Assembler::AVX_128bit; | ||
} | ||
if (is_dst_valid) { | ||
vminmax_fp(opcode, T_FLOAT, dst, wdst, dst, tmp, atmp, btmp, Assembler::AVX_128bit); | ||
if (VM_Version::supports_avx10_2()) { | ||
vminmax_fp(opcode, T_FLOAT, dst, k0, wdst, dst, Assembler::AVX_128bit); | ||
} else { | ||
vminmax_fp(opcode, T_FLOAT, dst, wdst, dst, tmp, atmp, btmp, Assembler::AVX_128bit); | ||
} | ||
} | ||
} | ||
|
||
|
@@ -2568,12 +2592,23 @@ void C2_MacroAssembler::reduceDoubleMinMax(int opcode, int vlen, bool is_dst_val | |
assert(i == 0, "%d", i); | ||
vpermilpd(wtmp, wsrc, 1, vlen_enc); | ||
} | ||
vminmax_fp(opcode, T_DOUBLE, wdst, wtmp, wsrc, tmp, atmp, btmp, vlen_enc); | ||
|
||
if (VM_Version::supports_avx10_2()) { | ||
vminmax_fp(opcode, T_DOUBLE, wdst, k0, wtmp, wsrc, vlen_enc); | ||
} else { | ||
vminmax_fp(opcode, T_DOUBLE, wdst, wtmp, wsrc, tmp, atmp, btmp, vlen_enc); | ||
} | ||
|
||
wsrc = wdst; | ||
vlen_enc = Assembler::AVX_128bit; | ||
} | ||
|
||
if (is_dst_valid) { | ||
vminmax_fp(opcode, T_DOUBLE, dst, wdst, dst, tmp, atmp, btmp, Assembler::AVX_128bit); | ||
if (VM_Version::supports_avx10_2()) { | ||
vminmax_fp(opcode, T_DOUBLE, dst, k0, wdst, dst, Assembler::AVX_128bit); | ||
} else { | ||
vminmax_fp(opcode, T_DOUBLE, dst, wdst, dst, tmp, atmp, btmp, Assembler::AVX_128bit); | ||
} | ||
} | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is there a reason
evminmaxph
does not have a version wheresrc
has typeAddress
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Currently, we do not have a matcher pattern to consume it, as the MIN/MAX sequence was anyway, a bulky one. I have added a new pattern for memory operand flavor of the pattern specifically for AVX-10, along with this patch.
Patch has been regressed over the following tests using Intel SDE https://www.intel.com/content/www/us/en/download/684897/intel-software-development-emulator.html (Version 9.53).
e.g. command line /home/jatinbha/softwares/sde-external-9.53.0-2025-03-16-lin/sde64 -future -ptr_raise -icount -- java