Skip to content

Commit

Permalink
riscv64: Add fmsub/fnmsub/fnmadd instruction lowerings (#8588)
Browse files Browse the repository at this point in the history
  • Loading branch information
afonso360 authored May 9, 2024
1 parent aa2beae commit fe433b7
Show file tree
Hide file tree
Showing 5 changed files with 249 additions and 40 deletions.
15 changes: 15 additions & 0 deletions cranelift/codegen/src/isa/riscv64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1372,6 +1372,21 @@
(rule (rv_fmadd $F32 frm rs1 rs2 rs3) (fpu_rrrr (FpuOPRRRR.FmaddS) $F32 frm rs1 rs2 rs3))
(rule (rv_fmadd $F64 frm rs1 rs2 rs3) (fpu_rrrr (FpuOPRRRR.FmaddD) $F64 frm rs1 rs2 rs3))

;; Helper for emitting the `fmsub` instruction.
(decl rv_fmsub (Type FRM FReg FReg FReg) FReg)
(rule (rv_fmsub $F32 frm rs1 rs2 rs3) (fpu_rrrr (FpuOPRRRR.FmsubS) $F32 frm rs1 rs2 rs3))
(rule (rv_fmsub $F64 frm rs1 rs2 rs3) (fpu_rrrr (FpuOPRRRR.FmsubD) $F64 frm rs1 rs2 rs3))

;; Helper for emitting the `fnmadd` instruction.
(decl rv_fnmadd (Type FRM FReg FReg FReg) FReg)
(rule (rv_fnmadd $F32 frm rs1 rs2 rs3) (fpu_rrrr (FpuOPRRRR.FnmaddS) $F32 frm rs1 rs2 rs3))
(rule (rv_fnmadd $F64 frm rs1 rs2 rs3) (fpu_rrrr (FpuOPRRRR.FnmaddD) $F64 frm rs1 rs2 rs3))

;; Helper for emitting the `fnmsub` instruction.
(decl rv_fnmsub (Type FRM FReg FReg FReg) FReg)
(rule (rv_fnmsub $F32 frm rs1 rs2 rs3) (fpu_rrrr (FpuOPRRRR.FnmsubS) $F32 frm rs1 rs2 rs3))
(rule (rv_fnmsub $F64 frm rs1 rs2 rs3) (fpu_rrrr (FpuOPRRRR.FnmsubD) $F64 frm rs1 rs2 rs3))

;; Helper for emitting the `fmv.x.w` instruction.
(decl rv_fmvxw (FReg) XReg)
(rule (rv_fmvxw r) (fpu_rr (FpuOPRR.FmvXW) $I32 (FRM.RNE) r))
Expand Down
30 changes: 22 additions & 8 deletions cranelift/codegen/src/isa/riscv64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1508,41 +1508,55 @@
(rv_vfsgnj_vf x y (unmasked) ty))

;;;; Rules for `fma` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; fmadd: rs1 * rs2 + rs3
(rule 0 (lower (has_type (ty_scalar_float ty) (fma x y z)))
(rv_fmadd ty (FRM.RNE) x y z))

;; fmsub: rs1 * rs2 - rs3
(rule 1 (lower (has_type (ty_scalar_float ty) (fma x y (fneg z))))
(rv_fmsub ty (FRM.RNE) x y z))

;; fnmsub: -rs1 * rs2 + rs3
(rule 2 (lower (has_type (ty_scalar_float ty) (fma (fneg x) y z)))
(rv_fnmsub ty (FRM.RNE) x y z))

;; fnmadd: -rs1 * rs2 - rs3
(rule 3 (lower (has_type (ty_scalar_float ty) (fma (fneg x) y (fneg z))))
(rv_fnmadd ty (FRM.RNE) x y z))

;; (fma x y z) computes x * y + z
;; vfmacc computes vd[i] = +(vs1[i] * vs2[i]) + vd[i]
;; We need to reverse the order of the arguments

(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fma x y z)))
(rule 4 (lower (has_type (ty_vec_fits_in_register ty) (fma x y z)))
(rv_vfmacc_vv z y x (unmasked) ty))

(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (fma (splat x) y z)))
(rule 5 (lower (has_type (ty_vec_fits_in_register ty) (fma (splat x) y z)))
(rv_vfmacc_vf z y x (unmasked) ty))

;; vfmsac computes vd[i] = +(vs1[i] * vs2[i]) - vd[i]

(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (fma x y (fneg z))))
(rule 6 (lower (has_type (ty_vec_fits_in_register ty) (fma x y (fneg z))))
(rv_vfmsac_vv z y x (unmasked) ty))

(rule 6 (lower (has_type (ty_vec_fits_in_register ty) (fma (splat x) y (fneg z))))
(rule 9 (lower (has_type (ty_vec_fits_in_register ty) (fma (splat x) y (fneg z))))
(rv_vfmsac_vf z y x (unmasked) ty))

;; vfnmacc computes vd[i] = -(vs1[i] * vs2[i]) - vd[i]

(rule 4 (lower (has_type (ty_vec_fits_in_register ty) (fma (fneg x) y (fneg z))))
(rule 7 (lower (has_type (ty_vec_fits_in_register ty) (fma (fneg x) y (fneg z))))
(rv_vfnmacc_vv z y x (unmasked) ty))

(rule 6 (lower (has_type (ty_vec_fits_in_register ty) (fma (fneg (splat x)) y (fneg z))))
(rule 9 (lower (has_type (ty_vec_fits_in_register ty) (fma (fneg (splat x)) y (fneg z))))
(rv_vfnmacc_vf z y x (unmasked) ty))

;; vfnmsac computes vd[i] = -(vs1[i] * vs2[i]) + vd[i]

(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (fma (fneg x) y z)))
(rule 5 (lower (has_type (ty_vec_fits_in_register ty) (fma (fneg x) y z)))
(rv_vfnmsac_vv z y x (unmasked) ty))

(rule 5 (lower (has_type (ty_vec_fits_in_register ty) (fma (fneg (splat x)) y z)))
(rule 8 (lower (has_type (ty_vec_fits_in_register ty) (fma (fneg (splat x)) y z)))
(rv_vfnmsac_vf z y x (unmasked) ty))


Expand Down
32 changes: 0 additions & 32 deletions cranelift/filetests/filetests/isa/riscv64/float.clif
Original file line number Diff line number Diff line change
Expand Up @@ -259,38 +259,6 @@ block0(v0: f64):
; ret


function %f29(f32, f32, f32) -> f32 {
block0(v0: f32, v1: f32, v2: f32):
v3 = fma v0, v1, v2
return v3
}

; VCode:
; block0:
; fmadd.s fa0,fa0,fa1,fa2,rne
; ret
;
; Disassembled:
; block0: ; offset 0x0
; fmadd.s fa0, fa0, fa1, fa2, rne
; ret

function %f30(f64, f64, f64) -> f64 {
block0(v0: f64, v1: f64, v2: f64):
v3 = fma v0, v1, v2
return v3
}

; VCode:
; block0:
; fmadd.d fa0,fa0,fa1,fa2,rne
; ret
;
; Disassembled:
; block0: ; offset 0x0
; fmadd.d fa0, fa0, fa1, fa2, rne
; ret

function %f31(f32, f32) -> f32 {
block0(v0: f32, v1: f32):
v2 = fcopysign v0, v1
Expand Down
142 changes: 142 additions & 0 deletions cranelift/filetests/filetests/isa/riscv64/fma.clif
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
test compile precise-output
set unwind_info=false
target riscv64


function %fmadd_f32(f32, f32, f32) -> f32 {
block0(v0: f32, v1: f32, v2: f32):
v3 = fma v0, v1, v2
return v3
}

; VCode:
; block0:
; fmadd.s fa0,fa0,fa1,fa2,rne
; ret
;
; Disassembled:
; block0: ; offset 0x0
; fmadd.s fa0, fa0, fa1, fa2, rne
; ret

function %fmadd_f64(f64, f64, f64) -> f64 {
block0(v0: f64, v1: f64, v2: f64):
v3 = fma v0, v1, v2
return v3
}

; VCode:
; block0:
; fmadd.d fa0,fa0,fa1,fa2,rne
; ret
;
; Disassembled:
; block0: ; offset 0x0
; fmadd.d fa0, fa0, fa1, fa2, rne
; ret


function %fmsub_f32(f32, f32, f32) -> f32 {
block0(v0: f32, v1: f32, v2: f32):
v3 = fneg v2
v4 = fma v0, v1, v3
return v4
}

; VCode:
; block0:
; fmsub.s fa0,fa0,fa1,fa2,rne
; ret
;
; Disassembled:
; block0: ; offset 0x0
; fmsub.s fa0, fa0, fa1, fa2, rne
; ret

function %fmsub_f64(f64, f64, f64) -> f64 {
block0(v0: f64, v1: f64, v2: f64):
v3 = fneg v2
v4 = fma v0, v1, v3
return v4
}

; VCode:
; block0:
; fmsub.d fa0,fa0,fa1,fa2,rne
; ret
;
; Disassembled:
; block0: ; offset 0x0
; fmsub.d fa0, fa0, fa1, fa2, rne
; ret

function %fnmsub_f32(f32, f32, f32) -> f32 {
block0(v0: f32, v1: f32, v2: f32):
v3 = fneg v0
v4 = fma v3, v1, v2
return v4
}

; VCode:
; block0:
; fnmsub.s fa0,fa0,fa1,fa2,rne
; ret
;
; Disassembled:
; block0: ; offset 0x0
; fnmsub.s fa0, fa0, fa1, fa2, rne
; ret

function %fnmsub_f64(f64, f64, f64) -> f64 {
block0(v0: f64, v1: f64, v2: f64):
v3 = fneg v0
v4 = fma v3, v1, v2
return v4
}

; VCode:
; block0:
; fnmsub.d fa0,fa0,fa1,fa2,rne
; ret
;
; Disassembled:
; block0: ; offset 0x0
; fnmsub.d fa0, fa0, fa1, fa2, rne
; ret

function %fnmadd_f32(f32, f32, f32) -> f32 {
block0(v0: f32, v1: f32, v2: f32):
v3 = fneg v0
v4 = fneg v2
v5 = fma v3, v1, v4
return v5
}

; VCode:
; block0:
; fnmadd.s fa0,fa0,fa1,fa2,rne
; ret
;
; Disassembled:
; block0: ; offset 0x0
; fnmadd.s fa0, fa0, fa1, fa2, rne
; ret

function %fnmadd_f64(f64, f64, f64) -> f64 {
block0(v0: f64, v1: f64, v2: f64):
v3 = fneg v0
v4 = fneg v2
v5 = fma v3, v1, v4
return v5
}

; VCode:
; block0:
; fnmadd.d fa0,fa0,fa1,fa2,rne
; ret
;
; Disassembled:
; block0: ; offset 0x0
; fnmadd.d fa0, fa0, fa1, fa2, rne
; ret

70 changes: 70 additions & 0 deletions cranelift/filetests/filetests/runtests/fma.clif
Original file line number Diff line number Diff line change
Expand Up @@ -151,3 +151,73 @@ block0(v0: f32, v1: f32, v2: f32):
return v4
}
; run: %fma_load_f32(0x9.0, 0x9.0, 0x9.0) == 0x1.680000p6




function %fmsub_f32(f32, f32, f32) -> f32 {
block0(v0: f32, v1: f32, v2: f32):
v3 = fneg v2
v4 = fma v0, v1, v3
return v4
}
; run: %fmsub_f32(0x83.0, 0x2.68091p6, 0x9.88721p1) == 0x1.3af060p14
; run: %fmsub_f32(-0x83.0, 0x2.68091p6, 0x9.88721p1) == -0x1.3b88e6p14
; run: %fmsub_f32(-0x83.0, 0x2.68091p6, -0x9.88721p1) == -0x1.3af060p14


function %fmsub_f64(f64, f64, f64) -> f64 {
block0(v0: f64, v1: f64, v2: f64):
v3 = fneg v2
v4 = fma v0, v1, v3
return v4
}
; run: %fmsub_f64(0x1.3b88ea148dd4ap14, 0x2.680916809121p6, 0x9.887218721837p1) == 0x1.7ba5badfd4333p21
; run: %fmsub_f64(-0x1.3b88ea148dd4ap14, 0x2.680916809121p6, 0x9.887218721837p1) == -0x1.7ba6ebee17417p21
; run: %fmsub_f64(-0x1.3b88ea148dd4ap14, 0x2.680916809121p6, -0x9.887218721837p1) == -0x1.7ba5badfd4333p21



function %fnmsub_f32(f32, f32, f32) -> f32 {
block0(v0: f32, v1: f32, v2: f32):
v3 = fneg v0
v4 = fma v3, v1, v2
return v4
}
; run: %fnmsub_f32(0x83.0, 0x2.68091p6, 0x9.88721p1) == -0x1.3af060p14
; run: %fnmsub_f32(-0x83.0, 0x2.68091p6, 0x9.88721p1) == 0x1.3b88e6p14
; run: %fnmsub_f32(-0x83.0, 0x2.68091p6, -0x9.88721p1) == 0x1.3af060p14

function %fnmsub_f64(f64, f64, f64) -> f64 {
block0(v0: f64, v1: f64, v2: f64):
v3 = fneg v0
v4 = fma v3, v1, v2
return v4
}
; run: %fnmsub_f64(0x1.3b88ea148dd4ap14, 0x2.680916809121p6, 0x9.887218721837p1) == -0x1.7ba5badfd4333p21
; run: %fnmsub_f64(-0x1.3b88ea148dd4ap14, 0x2.680916809121p6, 0x9.887218721837p1) == 0x1.7ba6ebee17417p21
; run: %fnmsub_f64(-0x1.3b88ea148dd4ap14, 0x2.680916809121p6, -0x9.887218721837p1) == 0x1.7ba5badfd4333p21



function %fnmadd_f32(f32, f32, f32) -> f32 {
block0(v0: f32, v1: f32, v2: f32):
v3 = fneg v0
v4 = fneg v2
v5 = fma v3, v1, v4
return v5
}
; run: %fnmadd_f32(0x83.0, 0x2.68091p6, 0x9.88721p1) == -0x1.3b88e6p14
; run: %fnmadd_f32(-0x83.0, 0x2.68091p6, 0x9.88721p1) == 0x1.3af060p14
; run: %fnmadd_f32(-0x83.0, 0x2.68091p6, -0x9.88721p1) == 0x1.3b88e6p14

function %fnmadd_f64(f64, f64, f64) -> f64 {
block0(v0: f64, v1: f64, v2: f64):
v3 = fneg v0
v4 = fneg v2
v5 = fma v3, v1, v4
return v5
}
; run: %fnmadd_f64(0x1.3b88ea148dd4ap14, 0x2.680916809121p6, 0x9.887218721837p1) == -0x1.7ba6ebee17417p21
; run: %fnmadd_f64(-0x1.3b88ea148dd4ap14, 0x2.680916809121p6, 0x9.887218721837p1) == 0x1.7ba5badfd4333p21
; run: %fnmadd_f64(-0x1.3b88ea148dd4ap14, 0x2.680916809121p6, -0x9.887218721837p1) == 0x1.7ba6ebee17417p21

0 comments on commit fe433b7

Please sign in to comment.