Skip to content

Commit

Permalink
aarch64: Add support for load+extends patterns
Browse files Browse the repository at this point in the history
This commit adds support for merging a load with a `{u,s}extend` instruction. On AArch64 the load instructions already do this by default, so we can just emit the regular loads.

See also #8765 that does a similar thing for RISC-V
  • Loading branch information
afonso360 committed Jun 11, 2024
1 parent 9e34bde commit 6c781d7
Show file tree
Hide file tree
Showing 2 changed files with 230 additions and 0 deletions.
22 changes: 22 additions & 0 deletions cranelift/codegen/src/isa/aarch64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1139,6 +1139,17 @@
(u8_from_uimm8 lane)))))
(value_regs (mov_from_vec (put_in_reg vec) lane (lane_size in)) (imm $I64 (ImmExtend.Zero) 0)))

;; Zero extensions from a load can be encoded in the load itself
(rule (lower (has_type (fits_in_64 _) (uextend x @ (has_type in_ty (load flags address offset)))))
(if-let inst (is_sinkable_inst x))
(let ((_ Unit (sink_inst inst)))
(aarch64_uload in_ty (amode in_ty address offset) flags)))

(decl aarch64_uload (Type AMode MemFlags) Reg)
(rule (aarch64_uload $I8 amode flags) (aarch64_uload8 amode flags))
(rule (aarch64_uload $I16 amode flags) (aarch64_uload16 amode flags))
(rule (aarch64_uload $I32 amode flags) (aarch64_uload32 amode flags))

;;;; Rules for `sextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; General rule for extending input to an output which fits in a single
Expand Down Expand Up @@ -1187,6 +1198,17 @@
(hi Reg (asr_imm $I64 lo (imm_shift_from_u8 63))))
(value_regs lo hi)))

;; Signed extensions from a load can be encoded in the load itself
(rule (lower (has_type (fits_in_64 _) (sextend x @ (has_type in_ty (load flags address offset)))))
(if-let inst (is_sinkable_inst x))
(let ((_ Unit (sink_inst inst)))
(aarch64_sload in_ty (amode in_ty address offset) flags)))

(decl aarch64_sload (Type AMode MemFlags) Reg)
(rule (aarch64_sload $I8 amode flags) (aarch64_sload8 amode flags))
(rule (aarch64_sload $I16 amode flags) (aarch64_sload16 amode flags))
(rule (aarch64_sload $I32 amode flags) (aarch64_sload32 amode flags))

;;;; Rules for `bnot` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; Base case using `orn` between two registers.
Expand Down
208 changes: 208 additions & 0 deletions cranelift/filetests/filetests/isa/aarch64/load-extends.clif
Original file line number Diff line number Diff line change
@@ -0,0 +1,208 @@
test compile precise-output
set unwind_info=false
target aarch64

function %load_uextend_i8_i16(i64) -> i16 {
block0(v0: i64):
v1 = load.i8 v0
v2 = uextend.i16 v1
return v2
}

; VCode:
; block0:
; ldrb w0, [x0]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; ldrb w0, [x0] ; trap: heap_oob
; ret

function %load_uextend_i8_i32(i64) -> i32 {
block0(v0: i64):
v1 = load.i8 v0
v2 = uextend.i32 v1
return v2
}

; VCode:
; block0:
; ldrb w0, [x0]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; ldrb w0, [x0] ; trap: heap_oob
; ret

function %load_uextend_i8_i64(i64) -> i64 {
block0(v0: i64):
v1 = load.i8 v0
v2 = uextend.i64 v1
return v2
}

; VCode:
; block0:
; ldrb w0, [x0]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; ldrb w0, [x0] ; trap: heap_oob
; ret

function %load_uextend_i16_i32(i64) -> i32 {
block0(v0: i64):
v1 = load.i16 v0
v2 = uextend.i32 v1
return v2
}

; VCode:
; block0:
; ldrh w0, [x0]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; ldrh w0, [x0] ; trap: heap_oob
; ret

function %load_uextend_i16_i64(i64) -> i64 {
block0(v0: i64):
v1 = load.i16 v0
v2 = uextend.i64 v1
return v2
}

; VCode:
; block0:
; ldrh w0, [x0]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; ldrh w0, [x0] ; trap: heap_oob
; ret

function %load_uextend_i32_i64(i64) -> i64 {
block0(v0: i64):
v1 = load.i32 v0
v2 = uextend.i64 v1
return v2
}

; VCode:
; block0:
; ldr w0, [x0]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; ldr w0, [x0] ; trap: heap_oob
; ret

function %load_sextend_i8_i16(i64) -> i16 {
block0(v0: i64):
v1 = load.i8 v0
v2 = sextend.i16 v1
return v2
}

; VCode:
; block0:
; ldrsb x0, [x0]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; ldrsb x0, [x0] ; trap: heap_oob
; ret

function %load_sextend_i8_i32(i64) -> i32 {
block0(v0: i64):
v1 = load.i8 v0
v2 = sextend.i32 v1
return v2
}

; VCode:
; block0:
; ldrsb x0, [x0]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; ldrsb x0, [x0] ; trap: heap_oob
; ret

function %load_sextend_i8_i64(i64) -> i64 {
block0(v0: i64):
v1 = load.i8 v0
v2 = sextend.i64 v1
return v2
}

; VCode:
; block0:
; ldrsb x0, [x0]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; ldrsb x0, [x0] ; trap: heap_oob
; ret

function %load_sextend_i16_i32(i64) -> i32 {
block0(v0: i64):
v1 = load.i16 v0
v2 = sextend.i32 v1
return v2
}

; VCode:
; block0:
; ldrsh x0, [x0]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; ldrsh x0, [x0] ; trap: heap_oob
; ret

function %load_sextend_i16_i64(i64) -> i64 {
block0(v0: i64):
v1 = load.i16 v0
v2 = sextend.i64 v1
return v2
}

; VCode:
; block0:
; ldrsh x0, [x0]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; ldrsh x0, [x0] ; trap: heap_oob
; ret

function %load_sextend_i32_i64(i64) -> i64 {
block0(v0: i64):
v1 = load.i32 v0
v2 = sextend.i64 v1
return v2
}

; VCode:
; block0:
; ldrsw x0, [x0]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; ldrsw x0, [x0] ; trap: heap_oob
; ret

0 comments on commit 6c781d7

Please sign in to comment.