diff --git a/src/hotspot/cpu/loongarch/loongarch_64.ad b/src/hotspot/cpu/loongarch/loongarch_64.ad
index 9a3ef92d0ce..89df7f869c4 100644
--- a/src/hotspot/cpu/loongarch/loongarch_64.ad
+++ b/src/hotspot/cpu/loongarch/loongarch_64.ad
@@ -10849,24 +10849,30 @@ instruct convHF2F_reg_reg(regF dst, mRegI src, regF tmp) %{
   ins_pipe(pipe_slow);
 %}
 
-instruct round_float_reg(mRegI dst, regF src, mRegL tmp)
+instruct round_float_reg(mRegI dst, regF src, regF vtemp1)
 %{
   match(Set dst (RoundF src));
-  effect(TEMP_DEF dst, TEMP tmp);
-  format %{ "round_float $dst, $src\t# @round_float_reg" %}
+  effect(TEMP_DEF dst, TEMP vtemp1);
+  format %{ "round_float    $dst, $src\t# "
+            "TEMP($vtemp1) @round_float_reg" %}
   ins_encode %{
-    __ java_round_float($dst$$Register, $src$$FloatRegister, $tmp$$Register);
+    __ java_round_float($dst$$Register,
+                        $src$$FloatRegister,
+                        $vtemp1$$FloatRegister);
   %}
   ins_pipe( pipe_slow );
 %}
 
-instruct round_double_reg(mRegL dst, regD src, mRegL tmp)
+instruct round_double_reg(mRegL dst, regD src, regD vtemp1)
 %{
   match(Set dst (RoundD src));
-  effect(TEMP_DEF dst, TEMP tmp);
-  format %{ "round_double $dst, $src\t# @round_double_reg" %}
+  effect(TEMP_DEF dst, TEMP vtemp1);
+  format %{ "round_double    $dst, $src\t# "
+            "TEMP($vtemp1) @round_double_reg" %}
   ins_encode %{
-    __ java_round_double($dst$$Register, $src$$FloatRegister, $tmp$$Register);
+    __ java_round_double($dst$$Register,
+                         $src$$FloatRegister,
+                         $vtemp1$$FloatRegister);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -14407,7 +14413,8 @@ instruct round_float_lsx(vReg dst, vReg src, vReg vtemp1, vReg vtemp2) %{
   predicate(Matcher::vector_length_in_bytes(n) <= 16);
   match(Set dst (RoundVF src));
   effect(TEMP_DEF dst, TEMP vtemp1, TEMP vtemp2);
-  format %{ "round_float_lsx $dst, $src\t# @round_float_lsx" %}
+  format %{ "round_float_lsx    $dst, $src\t# "
+            "TEMP($vtemp1, $vtemp2) @round_float_lsx" %}
   ins_encode %{
     __ java_round_float_lsx($dst$$FloatRegister,
                             $src$$FloatRegister,
@@ -14421,7 +14428,8 @@ instruct round_float_lasx(vReg dst, vReg src, vReg vtemp1, vReg vtemp2) %{
   predicate(Matcher::vector_length_in_bytes(n) > 16);
   match(Set dst (RoundVF src));
   effect(TEMP_DEF dst, TEMP vtemp1, TEMP vtemp2);
-  format %{ "round_float_lasx $dst, $src\t# @round_float_lasx" %}
+  format %{ "round_float_lasx    $dst, $src\t# "
+            "TEMP($vtemp1, $vtemp2) @round_float_lasx" %}
   ins_encode %{
     __ java_round_float_lasx($dst$$FloatRegister,
                              $src$$FloatRegister,
@@ -14435,7 +14443,8 @@ instruct round_double_lsx(vReg dst, vReg src, vReg vtemp1, vReg vtemp2) %{
   predicate(Matcher::vector_length_in_bytes(n) <= 16);
   match(Set dst (RoundVD src));
   effect(TEMP_DEF dst, TEMP vtemp1, TEMP vtemp2);
-  format %{ "round_double_lsx $dst, $src\t# @round_double_lsx" %}
+  format %{ "round_double_lsx $dst, $src\t# "
+            "TEMP($vtemp1, $vtemp2) @round_double_lsx" %}
   ins_encode %{
     __ java_round_double_lsx($dst$$FloatRegister,
                              $src$$FloatRegister,
@@ -14449,7 +14458,8 @@ instruct round_double_lasx(vReg dst, vReg src, vReg vtemp1, vReg vtemp2) %{
   predicate(Matcher::vector_length_in_bytes(n) > 16);
   match(Set dst (RoundVD src));
   effect(TEMP_DEF dst, TEMP vtemp1, TEMP vtemp2);
-  format %{ "round_double_lasx $dst, $src\t# @round_double_lasx" %}
+  format %{ "round_double_lasx $dst, $src\t# "
+            "TEMP($vtemp1, $vtemp2) @round_double_lasx" %}
   ins_encode %{
     __ java_round_double_lasx($dst$$FloatRegister,
                               $src$$FloatRegister,
diff --git a/src/hotspot/cpu/loongarch/macroAssembler_loongarch.cpp b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.cpp
index b7b7ece47e8..95fa6c906ea 100644
--- a/src/hotspot/cpu/loongarch/macroAssembler_loongarch.cpp
+++ b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.cpp
@@ -3716,25 +3716,31 @@ void MacroAssembler::encode_iso_array(Register src, Register dst,
 // in the IEEE-754-2008. For single-precision floatings,
 // the following algorithm can be used to effectively
 // implement rounding via standard operations.
-//
-// if src >= 0:
-//   dst = floor(src + 0.49999997f)
-// else:
-//   dst = floor(src + 0.5f)
 void MacroAssembler::java_round_float(Register dst,
                                       FloatRegister src,
-                                      Register tmp) {
+                                      FloatRegister vtemp1) {
   block_comment("java_round_float: { ");
+
+  Label L_abnormal, L_done;
+
   li(AT, StubRoutines::la::round_float_imm());
 
-  movfr2gr_s(tmp, src);
-  bstrpick_w(tmp, tmp, 31, 31);
-  slli_w(tmp, tmp, 2);
-  fldx_s(fscratch, AT, tmp);
-  fadd_s(fscratch, fscratch, src);
+  // if src is -0.5f, return 0 as result
+  fld_s(vtemp1, AT, 0);
+  fcmp_ceq_s(FCC0, vtemp1, src);
+  bceqz(FCC0, L_abnormal);
+  move(dst, R0);
+  b(L_done);
 
+  // else, floor src with the magic number
+  bind(L_abnormal);
+  fld_s(vtemp1, AT, 4);
+  fadd_s(fscratch, vtemp1, src);
   ftintrm_w_s(fscratch, fscratch);
   movfr2gr_s(dst, fscratch);
+
+  bind(L_done);
+
   block_comment("} java_round_float");
 }
 
@@ -3744,18 +3750,13 @@ void MacroAssembler::java_round_float_lsx(FloatRegister dst,
                                           FloatRegister vtemp2) {
   block_comment("java_round_float_lsx: { ");
   li(AT, StubRoutines::la::round_float_imm());
+  vldrepl_w(vtemp1, AT, 0);  // repl -0.5f
+  vldrepl_w(vtemp2, AT, 1);  // repl 0.49999997f
 
-  vldrepl_w(vtemp2, AT, 1);  // repl 0.5f
-  vslti_w(fscratch, src, 0);  // masked add
-  vand_v(vtemp2, fscratch, vtemp2);
-  vfadd_s(dst, src, vtemp2);
-
-  vldrepl_w(vtemp1, AT, 0);  // repl 0.49999997f
-  vnor_v(fscratch, fscratch, fscratch);  // rev mask
-  vand_v(vtemp1, fscratch, vtemp1);
-  vfadd_s(dst, dst, vtemp1);
-
-  vftintrm_w_s(dst, dst);
+  vfcmp_cne_s(fscratch, src, vtemp1);  // generate the mask
+  vand_v(fscratch, fscratch, src);     // clear the special
+  vfadd_s(dst, fscratch, vtemp2);      // plus the magic
+  vftintrm_w_s(dst, dst);              // floor the result
   block_comment("} java_round_float_lsx");
 }
 
@@ -3765,18 +3766,13 @@ void MacroAssembler::java_round_float_lasx(FloatRegister dst,
                                            FloatRegister vtemp2) {
   block_comment("java_round_float_lasx: { ");
   li(AT, StubRoutines::la::round_float_imm());
+  xvldrepl_w(vtemp1, AT, 0);  // repl -0.5f
+  xvldrepl_w(vtemp2, AT, 1);  // repl 0.49999997f
 
-  xvldrepl_w(vtemp2, AT, 1);  // repl 0.5f
-  xvslti_w(fscratch, src, 0);  // masked add
-  xvand_v(vtemp2, fscratch, vtemp2);
-  xvfadd_s(dst, src, vtemp2);
-
-  xvldrepl_w(vtemp1, AT, 0);  // repl 0.49999997f
-  xvnor_v(fscratch, fscratch, fscratch);  // rev mask
-  xvand_v(vtemp1, fscratch, vtemp1);
-  xvfadd_s(dst, dst, vtemp1);
-
-  xvftintrm_w_s(dst, dst);
+  xvfcmp_cne_s(fscratch, src, vtemp1);  // generate the mask
+  xvand_v(fscratch, fscratch, src);     // clear the special
+  xvfadd_s(dst, fscratch, vtemp2);      // plus the magic
+  xvftintrm_w_s(dst, dst);              // floor the result
   block_comment("} java_round_float_lasx");
 }
 
@@ -3785,25 +3781,31 @@ void MacroAssembler::java_round_float_lasx(FloatRegister dst,
 // in the IEEE-754-2008. For double-precision floatings,
 // the following algorithm can be used to effectively
 // implement rounding via standard operations.
-//
-// if src >= 0:
-//   dst = floor(src + 0.49999999999999994d)
-// else:
-//   dst = floor(src + 0.5d)
 void MacroAssembler::java_round_double(Register dst,
                                        FloatRegister src,
-                                       Register tmp) {
+                                       FloatRegister vtemp1) {
   block_comment("java_round_double: { ");
+
+  Label L_abnormal, L_done;
+
   li(AT, StubRoutines::la::round_double_imm());
 
-  movfr2gr_d(tmp, src);
-  bstrpick_d(tmp, tmp, 63, 63);
-  slli_d(tmp, tmp, 3);
-  fldx_d(fscratch, AT, tmp);
-  fadd_d(fscratch, fscratch, src);
+  // if src is -0.5d, return 0 as result
+  fld_d(vtemp1, AT, 0);
+  fcmp_ceq_d(FCC0, vtemp1, src);
+  bceqz(FCC0, L_abnormal);
+  move(dst, R0);
+  b(L_done);
 
+  // else, floor src with the magic number
+  bind(L_abnormal);
+  fld_d(vtemp1, AT, 8);
+  fadd_d(fscratch, vtemp1, src);
   ftintrm_l_d(fscratch, fscratch);
   movfr2gr_d(dst, fscratch);
+
+  bind(L_done);
+
   block_comment("} java_round_double");
 }
 
@@ -3813,18 +3815,13 @@ void MacroAssembler::java_round_double_lsx(FloatRegister dst,
                                            FloatRegister vtemp2) {
   block_comment("java_round_double_lsx: { ");
   li(AT, StubRoutines::la::round_double_imm());
+  vldrepl_d(vtemp1, AT, 0);  // repl -0.5d
+  vldrepl_d(vtemp2, AT, 1);  // repl 0.49999999999999994d
 
-  vldrepl_d(vtemp2, AT, 1);  // repl 0.5d
-  vslti_d(fscratch, src, 0);  // masked add
-  vand_v(vtemp2, fscratch, vtemp2);
-  vfadd_d(dst, src, vtemp2);
-
-  vldrepl_d(vtemp1, AT, 0);  // repl 0.49999999999999994d
-  vnor_v(fscratch, fscratch, fscratch);  // rev mask
-  vand_v(vtemp1, fscratch, vtemp1);
-  vfadd_d(dst, dst, vtemp1);
-
-  vftintrm_l_d(dst, dst);
+  vfcmp_cne_d(fscratch, src, vtemp1);  // generate the mask
+  vand_v(fscratch, fscratch, src);     // clear the special
+  vfadd_d(dst, fscratch, vtemp2);      // plus the magic
+  vftintrm_l_d(dst, dst);              // floor the result
   block_comment("} java_round_double_lsx");
 }
 
@@ -3834,18 +3831,13 @@ void MacroAssembler::java_round_double_lasx(FloatRegister dst,
                                             FloatRegister vtemp2) {
   block_comment("java_round_double_lasx: { ");
   li(AT, StubRoutines::la::round_double_imm());
+  xvldrepl_d(vtemp1, AT, 0);  // repl -0.5d
+  xvldrepl_d(vtemp2, AT, 1);  // repl 0.49999999999999994d
 
-  xvldrepl_d(vtemp2, AT, 1);  // repl 0.5d
-  xvslti_d(fscratch, src, 0);  // masked add
-  xvand_v(vtemp2, fscratch, vtemp2);
-  xvfadd_d(dst, src, vtemp2);
-
-  xvldrepl_d(vtemp1, AT, 0);  // repl 0.49999999999999994d
-  xvnor_v(fscratch, fscratch, fscratch);  // rev mask
-  xvand_v(vtemp1, fscratch, vtemp1);
-  xvfadd_d(dst, dst, vtemp1);
-
-  xvftintrm_l_d(dst, dst);
+  xvfcmp_cne_d(fscratch, src, vtemp1);  // generate the mask
+  xvand_v(fscratch, fscratch, src);     // clear the special
+  xvfadd_d(dst, fscratch, vtemp2);      // plus the magic
+  xvftintrm_l_d(dst, dst);              // floor the result
   block_comment("} java_round_double_lasx");
 }
 
diff --git a/src/hotspot/cpu/loongarch/macroAssembler_loongarch.hpp b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.hpp
index 25db54ac4b9..4ea7aaa459f 100644
--- a/src/hotspot/cpu/loongarch/macroAssembler_loongarch.hpp
+++ b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.hpp
@@ -316,12 +316,14 @@ class MacroAssembler: public Assembler {
   void sign_extend_byte(Register reg)  { ext_w_b(reg, reg); }
 
   // java.lang.Math::round intrinsics
-  void java_round_float(Register dst, FloatRegister src, Register tmp);
+  void java_round_float(Register dst, FloatRegister src,
+                        FloatRegister vtemp1);
   void java_round_float_lsx(FloatRegister dst, FloatRegister src,
                             FloatRegister vtemp1, FloatRegister vtemp2);
   void java_round_float_lasx(FloatRegister dst, FloatRegister src,
                              FloatRegister vtemp1, FloatRegister vtemp2);
-  void java_round_double(Register dst, FloatRegister src, Register tmp);
+  void java_round_double(Register dst, FloatRegister src,
+                         FloatRegister vtemp1);
   void java_round_double_lsx(FloatRegister dst, FloatRegister src,
                              FloatRegister vtemp1, FloatRegister vtemp2);
   void java_round_double_lasx(FloatRegister dst, FloatRegister src,
diff --git a/src/hotspot/cpu/loongarch/stubRoutines_loongarch_64.cpp b/src/hotspot/cpu/loongarch/stubRoutines_loongarch_64.cpp
index 0986b40627a..4d8fe415483 100644
--- a/src/hotspot/cpu/loongarch/stubRoutines_loongarch_64.cpp
+++ b/src/hotspot/cpu/loongarch/stubRoutines_loongarch_64.cpp
@@ -188,11 +188,9 @@ ATTRIBUTE_ALIGNED(64) jdouble StubRoutines::la::_pio2[] = {
 };
 
 ATTRIBUTE_ALIGNED(64) jfloat StubRoutines::la::_round_float_imm[] = {
-  0.49999997f, // round positive
-  0.5f,        // round negative
+  -0.5f, 0.49999997f // magic number for ties
 };
 
 ATTRIBUTE_ALIGNED(64) jdouble StubRoutines::la::_round_double_imm[] = {
-  0.49999999999999994d, // round positive
-  0.5d,                 // round negative
+  -0.5d, 0.49999999999999994d // magic number for ties
 };