diff --git a/src/hotspot/cpu/loongarch/loongarch_64.ad b/src/hotspot/cpu/loongarch/loongarch_64.ad index 9a3ef92d0ce..89df7f869c4 100644 --- a/src/hotspot/cpu/loongarch/loongarch_64.ad +++ b/src/hotspot/cpu/loongarch/loongarch_64.ad @@ -10849,24 +10849,30 @@ instruct convHF2F_reg_reg(regF dst, mRegI src, regF tmp) %{ ins_pipe(pipe_slow); %} -instruct round_float_reg(mRegI dst, regF src, mRegL tmp) +instruct round_float_reg(mRegI dst, regF src, regF vtemp1) %{ match(Set dst (RoundF src)); - effect(TEMP_DEF dst, TEMP tmp); - format %{ "round_float $dst, $src\t# @round_float_reg" %} + effect(TEMP_DEF dst, TEMP vtemp1); + format %{ "round_float $dst, $src\t# " + "TEMP($vtemp1) @round_float_reg" %} ins_encode %{ - __ java_round_float($dst$$Register, $src$$FloatRegister, $tmp$$Register); + __ java_round_float($dst$$Register, + $src$$FloatRegister, + $vtemp1$$FloatRegister); %} ins_pipe( pipe_slow ); %} -instruct round_double_reg(mRegL dst, regD src, mRegL tmp) +instruct round_double_reg(mRegL dst, regD src, regD vtemp1) %{ match(Set dst (RoundD src)); - effect(TEMP_DEF dst, TEMP tmp); - format %{ "round_double $dst, $src\t# @round_double_reg" %} + effect(TEMP_DEF dst, TEMP vtemp1); + format %{ "round_double $dst, $src\t# " + "TEMP($vtemp1) @round_double_reg" %} ins_encode %{ - __ java_round_double($dst$$Register, $src$$FloatRegister, $tmp$$Register); + __ java_round_double($dst$$Register, + $src$$FloatRegister, + $vtemp1$$FloatRegister); %} ins_pipe( pipe_slow ); %} @@ -14407,7 +14413,8 @@ instruct round_float_lsx(vReg dst, vReg src, vReg vtemp1, vReg vtemp2) %{ predicate(Matcher::vector_length_in_bytes(n) <= 16); match(Set dst (RoundVF src)); effect(TEMP_DEF dst, TEMP vtemp1, TEMP vtemp2); - format %{ "round_float_lsx $dst, $src\t# @round_float_lsx" %} + format %{ "round_float_lsx $dst, $src\t# " + "TEMP($vtemp1, $vtemp2) @round_float_lsx" %} ins_encode %{ __ java_round_float_lsx($dst$$FloatRegister, $src$$FloatRegister, @@ -14421,7 +14428,8 @@ instruct round_float_lasx(vReg dst, vReg src, vReg vtemp1, vReg vtemp2) %{ predicate(Matcher::vector_length_in_bytes(n) > 16); match(Set dst (RoundVF src)); effect(TEMP_DEF dst, TEMP vtemp1, TEMP vtemp2); - format %{ "round_float_lasx $dst, $src\t# @round_float_lasx" %} + format %{ "round_float_lasx $dst, $src\t# " + "TEMP($vtemp1, $vtemp2) @round_float_lasx" %} ins_encode %{ __ java_round_float_lasx($dst$$FloatRegister, $src$$FloatRegister, @@ -14435,7 +14443,8 @@ instruct round_double_lsx(vReg dst, vReg src, vReg vtemp1, vReg vtemp2) %{ predicate(Matcher::vector_length_in_bytes(n) <= 16); match(Set dst (RoundVD src)); effect(TEMP_DEF dst, TEMP vtemp1, TEMP vtemp2); - format %{ "round_double_lsx $dst, $src\t# @round_double_lsx" %} + format %{ "round_double_lsx $dst, $src\t# " + "TEMP($vtemp1, $vtemp2) @round_double_lsx" %} ins_encode %{ __ java_round_double_lsx($dst$$FloatRegister, $src$$FloatRegister, @@ -14449,7 +14458,8 @@ instruct round_double_lasx(vReg dst, vReg src, vReg vtemp1, vReg vtemp2) %{ predicate(Matcher::vector_length_in_bytes(n) > 16); match(Set dst (RoundVD src)); effect(TEMP_DEF dst, TEMP vtemp1, TEMP vtemp2); - format %{ "round_double_lasx $dst, $src\t# @round_double_lasx" %} + format %{ "round_double_lasx $dst, $src\t# " + "TEMP($vtemp1, $vtemp2) @round_double_lasx" %} ins_encode %{ __ java_round_double_lasx($dst$$FloatRegister, $src$$FloatRegister, diff --git a/src/hotspot/cpu/loongarch/macroAssembler_loongarch.cpp b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.cpp index b7b7ece47e8..95fa6c906ea 100644 --- a/src/hotspot/cpu/loongarch/macroAssembler_loongarch.cpp +++ b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.cpp @@ -3716,25 +3716,31 @@ void MacroAssembler::encode_iso_array(Register src, Register dst, // in the IEEE-754-2008. For single-precision floatings, // the following algorithm can be used to effectively // implement rounding via standard operations. -// -// if src >= 0: -// dst = floor(src + 0.49999997f) -// else: -// dst = floor(src + 0.5f) void MacroAssembler::java_round_float(Register dst, FloatRegister src, - Register tmp) { + FloatRegister vtemp1) { block_comment("java_round_float: { "); + + Label L_abnormal, L_done; + li(AT, StubRoutines::la::round_float_imm()); - movfr2gr_s(tmp, src); - bstrpick_w(tmp, tmp, 31, 31); - slli_w(tmp, tmp, 2); - fldx_s(fscratch, AT, tmp); - fadd_s(fscratch, fscratch, src); + // if src is -0.5f, return 0 as result + fld_s(vtemp1, AT, 0); + fcmp_ceq_s(FCC0, vtemp1, src); + bceqz(FCC0, L_abnormal); + move(dst, R0); + b(L_done); + // else, floor src with the magic number + bind(L_abnormal); + fld_s(vtemp1, AT, 4); + fadd_s(fscratch, vtemp1, src); ftintrm_w_s(fscratch, fscratch); movfr2gr_s(dst, fscratch); + + bind(L_done); + block_comment("} java_round_float"); } @@ -3744,18 +3750,13 @@ void MacroAssembler::java_round_float_lsx(FloatRegister dst, FloatRegister vtemp2) { block_comment("java_round_float_lsx: { "); li(AT, StubRoutines::la::round_float_imm()); + vldrepl_w(vtemp1, AT, 0); // repl -0.5f + vldrepl_w(vtemp2, AT, 1); // repl 0.49999997f - vldrepl_w(vtemp2, AT, 1); // repl 0.5f - vslti_w(fscratch, src, 0); // masked add - vand_v(vtemp2, fscratch, vtemp2); - vfadd_s(dst, src, vtemp2); - - vldrepl_w(vtemp1, AT, 0); // repl 0.49999997f - vnor_v(fscratch, fscratch, fscratch); // rev mask - vand_v(vtemp1, fscratch, vtemp1); - vfadd_s(dst, dst, vtemp1); - - vftintrm_w_s(dst, dst); + vfcmp_cne_s(fscratch, src, vtemp1); // generate the mask + vand_v(fscratch, fscratch, src); // clear the special + vfadd_s(dst, fscratch, vtemp2); // plus the magic + vftintrm_w_s(dst, dst); // floor the result block_comment("} java_round_float_lsx"); } @@ -3765,18 +3766,13 @@ void MacroAssembler::java_round_float_lasx(FloatRegister dst, FloatRegister vtemp2) { block_comment("java_round_float_lasx: { "); li(AT, StubRoutines::la::round_float_imm()); + xvldrepl_w(vtemp1, AT, 0); // repl -0.5f + xvldrepl_w(vtemp2, AT, 1); // repl 0.49999997f - xvldrepl_w(vtemp2, AT, 1); // repl 0.5f - xvslti_w(fscratch, src, 0); // masked add - xvand_v(vtemp2, fscratch, vtemp2); - xvfadd_s(dst, src, vtemp2); - - xvldrepl_w(vtemp1, AT, 0); // repl 0.49999997f - xvnor_v(fscratch, fscratch, fscratch); // rev mask - xvand_v(vtemp1, fscratch, vtemp1); - xvfadd_s(dst, dst, vtemp1); - - xvftintrm_w_s(dst, dst); + xvfcmp_cne_s(fscratch, src, vtemp1); // generate the mask + xvand_v(fscratch, fscratch, src); // clear the special + xvfadd_s(dst, fscratch, vtemp2); // plus the magic + xvftintrm_w_s(dst, dst); // floor the result block_comment("} java_round_float_lasx"); } @@ -3785,25 +3781,31 @@ void MacroAssembler::java_round_float_lasx(FloatRegister dst, // in the IEEE-754-2008. For double-precision floatings, // the following algorithm can be used to effectively // implement rounding via standard operations. -// -// if src >= 0: -// dst = floor(src + 0.49999999999999994d) -// else: -// dst = floor(src + 0.5d) void MacroAssembler::java_round_double(Register dst, FloatRegister src, - Register tmp) { + FloatRegister vtemp1) { block_comment("java_round_double: { "); + + Label L_abnormal, L_done; + li(AT, StubRoutines::la::round_double_imm()); - movfr2gr_d(tmp, src); - bstrpick_d(tmp, tmp, 63, 63); - slli_d(tmp, tmp, 3); - fldx_d(fscratch, AT, tmp); - fadd_d(fscratch, fscratch, src); + // if src is -0.5d, return 0 as result + fld_d(vtemp1, AT, 0); + fcmp_ceq_d(FCC0, vtemp1, src); + bceqz(FCC0, L_abnormal); + move(dst, R0); + b(L_done); + // else, floor src with the magic number + bind(L_abnormal); + fld_d(vtemp1, AT, 8); + fadd_d(fscratch, vtemp1, src); ftintrm_l_d(fscratch, fscratch); movfr2gr_d(dst, fscratch); + + bind(L_done); + block_comment("} java_round_double"); } @@ -3813,18 +3815,13 @@ void MacroAssembler::java_round_double_lsx(FloatRegister dst, FloatRegister vtemp2) { block_comment("java_round_double_lsx: { "); li(AT, StubRoutines::la::round_double_imm()); + vldrepl_d(vtemp1, AT, 0); // repl -0.5d + vldrepl_d(vtemp2, AT, 1); // repl 0.49999999999999994d - vldrepl_d(vtemp2, AT, 1); // repl 0.5d - vslti_d(fscratch, src, 0); // masked add - vand_v(vtemp2, fscratch, vtemp2); - vfadd_d(dst, src, vtemp2); - - vldrepl_d(vtemp1, AT, 0); // repl 0.49999999999999994d - vnor_v(fscratch, fscratch, fscratch); // rev mask - vand_v(vtemp1, fscratch, vtemp1); - vfadd_d(dst, dst, vtemp1); - - vftintrm_l_d(dst, dst); + vfcmp_cne_d(fscratch, src, vtemp1); // generate the mask + vand_v(fscratch, fscratch, src); // clear the special + vfadd_d(dst, fscratch, vtemp2); // plus the magic + vftintrm_l_d(dst, dst); // floor the result block_comment("} java_round_double_lsx"); } @@ -3834,18 +3831,13 @@ void MacroAssembler::java_round_double_lasx(FloatRegister dst, FloatRegister vtemp2) { block_comment("java_round_double_lasx: { "); li(AT, StubRoutines::la::round_double_imm()); + xvldrepl_d(vtemp1, AT, 0); // repl -0.5d + xvldrepl_d(vtemp2, AT, 1); // repl 0.49999999999999994d - xvldrepl_d(vtemp2, AT, 1); // repl 0.5d - xvslti_d(fscratch, src, 0); // masked add - xvand_v(vtemp2, fscratch, vtemp2); - xvfadd_d(dst, src, vtemp2); - - xvldrepl_d(vtemp1, AT, 0); // repl 0.49999999999999994d - xvnor_v(fscratch, fscratch, fscratch); // rev mask - xvand_v(vtemp1, fscratch, vtemp1); - xvfadd_d(dst, dst, vtemp1); - - xvftintrm_l_d(dst, dst); + xvfcmp_cne_d(fscratch, src, vtemp1); // generate the mask + xvand_v(fscratch, fscratch, src); // clear the special + xvfadd_d(dst, fscratch, vtemp2); // plus the magic + xvftintrm_l_d(dst, dst); // floor the result block_comment("} java_round_double_lasx"); } diff --git a/src/hotspot/cpu/loongarch/macroAssembler_loongarch.hpp b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.hpp index 25db54ac4b9..4ea7aaa459f 100644 --- a/src/hotspot/cpu/loongarch/macroAssembler_loongarch.hpp +++ b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.hpp @@ -316,12 +316,14 @@ class MacroAssembler: public Assembler { void sign_extend_byte(Register reg) { ext_w_b(reg, reg); } // java.lang.Math::round intrinsics - void java_round_float(Register dst, FloatRegister src, Register tmp); + void java_round_float(Register dst, FloatRegister src, + FloatRegister vtemp1); void java_round_float_lsx(FloatRegister dst, FloatRegister src, FloatRegister vtemp1, FloatRegister vtemp2); void java_round_float_lasx(FloatRegister dst, FloatRegister src, FloatRegister vtemp1, FloatRegister vtemp2); - void java_round_double(Register dst, FloatRegister src, Register tmp); + void java_round_double(Register dst, FloatRegister src, + FloatRegister vtemp1); void java_round_double_lsx(FloatRegister dst, FloatRegister src, FloatRegister vtemp1, FloatRegister vtemp2); void java_round_double_lasx(FloatRegister dst, FloatRegister src, diff --git a/src/hotspot/cpu/loongarch/stubRoutines_loongarch_64.cpp b/src/hotspot/cpu/loongarch/stubRoutines_loongarch_64.cpp index 0986b40627a..4d8fe415483 100644 --- a/src/hotspot/cpu/loongarch/stubRoutines_loongarch_64.cpp +++ b/src/hotspot/cpu/loongarch/stubRoutines_loongarch_64.cpp @@ -188,11 +188,9 @@ ATTRIBUTE_ALIGNED(64) jdouble StubRoutines::la::_pio2[] = { }; ATTRIBUTE_ALIGNED(64) jfloat StubRoutines::la::_round_float_imm[] = { - 0.49999997f, // round positive - 0.5f, // round negative + -0.5f, 0.49999997f // magic number for ties }; ATTRIBUTE_ALIGNED(64) jdouble StubRoutines::la::_round_double_imm[] = { - 0.49999999999999994d, // round positive - 0.5d, // round negative + -0.5d, 0.49999999999999994d // magic number for ties };