diff --git a/sljit_src/sljitLir.c b/sljit_src/sljitLir.c index 7b2c00a6..d760f995 100644 --- a/sljit_src/sljitLir.c +++ b/sljit_src/sljitLir.c @@ -1078,14 +1078,16 @@ static const char* op1_names[] = { "mov", "mov", "mov", "mov", "mov", "mov", "mov", "mov", "mov", "clz", "ctz", "rev", - "rev", "rev", "rev", "rev" + "rev", "rev", "rev", "rev", + "rev16w", "rev32w" }; static const char* op1_types[] = { "", ".u8", ".s8", ".u16", ".s16", ".u32", ".s32", "32", ".p", "", "", "", - ".u16", ".s16", ".u32", ".s32" + ".u16", ".s16", ".u32", ".s32", + "", "" }; static const char* op2_names[] = { @@ -1413,7 +1415,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op1(struct sljit_compiler } #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_REV_S32); + CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_REV32PACK); switch (GET_OPCODE(op)) { case SLJIT_MOV: @@ -1423,6 +1425,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op1(struct sljit_compiler case SLJIT_MOV_P: case SLJIT_REV_U32: case SLJIT_REV_S32: + case SLJIT_REV32PACK: /* Nothing allowed */ CHECK_ARGUMENT(!(op & (SLJIT_32 | SLJIT_SET_Z | VARIABLE_FLAG_MASK))); break; diff --git a/sljit_src/sljitLir.h b/sljit_src/sljitLir.h index 414267ae..a9893440 100644 --- a/sljit_src/sljitLir.h +++ b/sljit_src/sljitLir.h @@ -671,13 +671,15 @@ static SLJIT_INLINE sljit_uw sljit_get_generated_code_size(struct sljit_compiler two separate 32 bit floating point registers (e.g. ARM32). The second 32 bit part can be accessed by SLJIT_F64_SECOND. */ #define SLJIT_HAS_F64_AS_F32_PAIR 11 +/* [Nor wmulated] There is support for word size bulk byte swaps. */ +#define SLJIT_HAS_REVPACK 12 /* [Not emulated] Some SIMD operations are supported by the compiler. */ -#define SLJIT_HAS_SIMD 12 +#define SLJIT_HAS_SIMD 13 /* [Not emulated] SIMD registers are mapped to a pair of double precision floating point registers. E.g. passing either SLJIT_FR0 or SLJIT_FR1 to a simd operation represents the same 128 bit register, and both SLJIT_FR0 and SLJIT_FR1 are overwritten. */ -#define SLJIT_SIMD_REGS_ARE_PAIRS 13 +#define SLJIT_SIMD_REGS_ARE_PAIRS 14 #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) /* [Not emulated] SSE2 support is available on x86. */ @@ -1127,6 +1129,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile Note: converts between little and big endian formats Note: immediate source argument is not supported */ #define SLJIT_REV_S32 (SLJIT_OP1_BASE + 15) +/* Reverse the order of bytes in each 16 bit halfword + Flags: - (may destroy flags) + Note: converts between little and big endian formats in bulk + Note: immediate source argument is not supported */ +#define SLJIT_REV16PACK (SLJIT_OP1_BASE + 16) +#define SLJIT_REV16PACK32 (SLJIT_REV16PACK | SLJIT_32) +/* Reverse the order of bytes in each 32 bit word + Flags: - (may destroy flags) + Note: converts between little and big endian formats in bulk + Note: immediate source argument is not supported */ +#define SLJIT_REV32PACK (SLJIT_OP1_BASE + 17) /* The following unary operations are supported by using sljit_emit_op2: - binary not: SLJIT_XOR with immedate -1 as src1 or src2 diff --git a/sljit_src/sljitNativeARM_32.c b/sljit_src/sljitNativeARM_32.c index 58fd0366..d4801fc2 100644 --- a/sljit_src/sljitNativeARM_32.c +++ b/sljit_src/sljitNativeARM_32.c @@ -127,6 +127,7 @@ static const sljit_u8 freg_ebit_map[(SLJIT_NUMBER_OF_FLOAT_REGISTERS << 1) + 3] #define POP 0xe8bd0000 #define REV 0xe6bf0f30 #define REV16 0xe6bf0fb0 +#define REVSH 0xe6ff0fb0 #define RSB 0xe0600000 #define RSC 0xe0e00000 #define SBC 0xe0c00000 @@ -1027,6 +1028,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) case SLJIT_HAS_ROT: case SLJIT_HAS_CMOV: case SLJIT_HAS_REV: + case SLJIT_HAS_REVPACK: case SLJIT_HAS_PREFETCH: case SLJIT_HAS_COPY_F32: case SLJIT_HAS_COPY_F64: @@ -1552,16 +1554,22 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl case SLJIT_REV: case SLJIT_REV_U32: case SLJIT_REV_S32: + case SLJIT_REV32PACK: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED)); return push_inst(compiler, REV | RD(dst) | RM(src2)); case SLJIT_REV_U16: - case SLJIT_REV_S16: + case SLJIT_REV16PACK: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED) && src2 != TMP_REG1 && dst != TMP_REG1); FAIL_IF(push_inst(compiler, REV16 | RD(dst) | RM(src2))); - if (dst == TMP_REG2 || (src2 == TMP_REG2 && op == SLJIT_REV_U16)) + if (op == SLJIT_REV16PACK || dst == TMP_REG2 || src2 == TMP_REG2) return SLJIT_SUCCESS; - return push_inst(compiler, (op == SLJIT_REV_U16 ? UXTH : SXTH) | RD(dst) | RM(dst)); + return push_inst(compiler, UXTH | RD(dst) | RM(dst)); + + case SLJIT_REV_S16: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED) && src2 != TMP_REG1 && dst != TMP_REG1); + return push_inst(compiler, REVSH | RD(dst) | RM(src2)); + case SLJIT_ADD: SLJIT_ASSERT(!(flags & INV_IMM)); @@ -2282,6 +2290,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile case SLJIT_REV: case SLJIT_REV_U32: case SLJIT_REV_S32: + case SLJIT_REV16PACK: + case SLJIT_REV32PACK: return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src, srcw); case SLJIT_REV_U16: diff --git a/sljit_src/sljitNativeARM_64.c b/sljit_src/sljitNativeARM_64.c index 8587893e..678f71be 100644 --- a/sljit_src/sljitNativeARM_64.c +++ b/sljit_src/sljitNativeARM_64.c @@ -137,7 +137,8 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #define RBIT 0xdac00000 #define RET 0xd65f0000 #define REV 0xdac00c00 -#define REV16 0xdac00400 +#define REV16 0x5ac00400 +#define REV32 0xdac00800 #define RORV 0x9ac02c00 #define SBC 0xda000000 #define SBFM 0x93400000 @@ -441,6 +442,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) case SLJIT_HAS_REV: case SLJIT_HAS_ROT: case SLJIT_HAS_CMOV: + case SLJIT_HAS_REVPACK: case SLJIT_HAS_PREFETCH: case SLJIT_HAS_COPY_F32: case SLJIT_HAS_COPY_F64: @@ -699,6 +701,8 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s case SLJIT_REV_S16: case SLJIT_REV_U32: case SLJIT_REV_S32: + case SLJIT_REV16PACK: + case SLJIT_REV32PACK: case SLJIT_ADDC: case SLJIT_SUBC: /* No form with immediate operand (except imm 0, which @@ -873,7 +877,7 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s return push_inst(compiler, (MOV ^ W_OP) | RD(dst) | RM(arg2)); case SLJIT_MOV_S32: SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); - return push_inst(compiler, SBFM | (1 << 22) | RD(dst) | RN(arg2) | (31 << 10)); + return push_inst(compiler, SBFM | RD(dst) | RN(arg2) | (31 << 10)); case SLJIT_CLZ: SLJIT_ASSERT(arg1 == TMP_REG1); return push_inst(compiler, (CLZ ^ inv_bits) | RD(dst) | RN(arg2)); @@ -886,20 +890,33 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s inv_bits |= inv_bits >> 21; return push_inst(compiler, (REV ^ inv_bits) | RD(dst) | RN(arg2)); case SLJIT_REV_U16: + case SLJIT_REV16PACK: + SLJIT_ASSERT(arg1 == TMP_REG1 && dst != TMP_REG2); + inst_bits = REV16; + if (op == SLJIT_REV16PACK && !(op & SLJIT_32)) + inst_bits |= (W_OP | (1 << 10)); + FAIL_IF(push_inst(compiler, inst_bits | RD(dst) | RN(arg2))); + if (op == SLJIT_REV16PACK || dst == TMP_REG1 || arg2 == TMP_REG2) + return SLJIT_SUCCESS; + inv_bits |= inv_bits >> 9; + return push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(dst) | (15 << 10)); case SLJIT_REV_S16: SLJIT_ASSERT(arg1 == TMP_REG1 && dst != TMP_REG2); - FAIL_IF(push_inst(compiler, (REV16 ^ (sljit_ins)0x80000000) | RD(dst) | RN(arg2))); - if (dst == TMP_REG1 || (arg2 == TMP_REG2 && op == SLJIT_REV_U16)) + FAIL_IF(push_inst(compiler, REV16 | RD(dst) | RN(arg2))); + if (dst == TMP_REG1) return SLJIT_SUCCESS; inv_bits |= inv_bits >> 9; - return push_inst(compiler, ((op == SLJIT_REV_U16 ? UBFM : SBFM) ^ inv_bits) | RD(dst) | RN(dst) | (15 << 10)); + return push_inst(compiler, (SBFM ^ inv_bits) | RD(dst) | RN(dst) | (15 << 10)); case SLJIT_REV_U32: case SLJIT_REV_S32: SLJIT_ASSERT(arg1 == TMP_REG1 && dst != TMP_REG2); - FAIL_IF(push_inst(compiler, (REV ^ (sljit_ins)0x80000400) | RD(dst) | RN(arg2))); + FAIL_IF(push_inst(compiler, (REV ^ (W_OP | (1 << 10))) | RD(dst) | RN(arg2))); if (op == SLJIT_REV_U32 || dst == TMP_REG1) return SLJIT_SUCCESS; - return push_inst(compiler, SBFM | (1 << 22) | RD(dst) | RN(dst) | (31 << 10)); + return push_inst(compiler, SBFM | RD(dst) | RN(dst) | (31 << 10)); + case SLJIT_REV32PACK: + SLJIT_ASSERT(arg1 == TMP_REG1 && dst != TMP_REG2); + return push_inst(compiler, REV32 | RD(dst) | RN(arg2)); case SLJIT_ADD: compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; CHECK_FLAGS(1 << 29); diff --git a/sljit_src/sljitNativeARM_T2_32.c b/sljit_src/sljitNativeARM_T2_32.c index adf31702..9c8ae0bf 100644 --- a/sljit_src/sljitNativeARM_T2_32.c +++ b/sljit_src/sljitNativeARM_T2_32.c @@ -179,6 +179,8 @@ static const sljit_u8 freg_ebit_map[(SLJIT_NUMBER_OF_FLOAT_REGISTERS << 1) + 3] #define REV_W 0xfa90f080 #define REV16 0xba40 #define REV16_W 0xfa90f090 +#define REVSH 0xbac0 +#define REVSH_W 0xfa90f0b0 #define RBIT 0xfa90f0a0 #define RORS 0x41c0 #define ROR_W 0xfa60f000 @@ -556,6 +558,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) case SLJIT_HAS_REV: case SLJIT_HAS_ROT: case SLJIT_HAS_CMOV: + case SLJIT_HAS_REVPACK: case SLJIT_HAS_PREFETCH: case SLJIT_HAS_COPY_F32: case SLJIT_HAS_COPY_F64: @@ -679,6 +682,8 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s case SLJIT_REV_S16: case SLJIT_REV_U32: case SLJIT_REV_S32: + case SLJIT_REV16PACK: + case SLJIT_REV32PACK: case SLJIT_MUL: /* No form with immediate operand. */ break; @@ -902,12 +907,13 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s case SLJIT_REV: case SLJIT_REV_U32: case SLJIT_REV_S32: + case SLJIT_REV32PACK: SLJIT_ASSERT(arg1 == TMP_REG2); if (IS_2_LO_REGS(dst, arg2)) return push_inst16(compiler, REV | RD3(dst) | RN3(arg2)); return push_inst32(compiler, REV_W | RN4(arg2) | RD4(dst) | RM4(arg2)); case SLJIT_REV_U16: - case SLJIT_REV_S16: + case SLJIT_REV16PACK: SLJIT_ASSERT(arg1 == TMP_REG2 && dst != TMP_REG2); flags &= 0xffff; @@ -916,12 +922,18 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s else FAIL_IF(push_inst32(compiler, REV16_W | RN4(arg2) | RD4(dst) | RM4(arg2))); - if (dst == TMP_REG1 || (arg2 == TMP_REG1 && flags == SLJIT_REV_U16)) + if (flags == SLJIT_REV16PACK || dst == TMP_REG1 || arg2 == TMP_REG1) return SLJIT_SUCCESS; if (reg_map[dst] <= 7) - return push_inst16(compiler, (flags == SLJIT_REV_U16 ? UXTH : SXTH) | RD3(dst) | RN3(dst)); - return push_inst32(compiler, (flags == SLJIT_REV_U16 ? UXTH_W : SXTH_W) | RD4(dst) | RM4(dst)); + return push_inst16(compiler, UXTH | RD3(dst) | RN3(dst)); + return push_inst32(compiler, UXTH_W | RD4(dst) | RM4(dst)); + case SLJIT_REV_S16: + SLJIT_ASSERT(arg1 == TMP_REG2 && dst != TMP_REG2); + + if (IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, REVSH | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, REVSH_W | RN4(arg2) | RD4(dst) | RM4(arg2)); case SLJIT_ADD: compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; if (IS_3_LO_REGS(dst, arg1, arg2)) diff --git a/sljit_src/sljitNativeLOONGARCH_64.c b/sljit_src/sljitNativeLOONGARCH_64.c index d61bb679..45c29d23 100644 --- a/sljit_src/sljitNativeLOONGARCH_64.c +++ b/sljit_src/sljitNativeLOONGARCH_64.c @@ -1221,14 +1221,21 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl FAIL_IF(push_inst(compiler, REVB_2H | RD(dst) | RJ(src2))); return push_inst(compiler, INST(BSTRPICK, op) | RD(dst) | RJ(dst) | (15 << 16)); + case SLJIT_REV16PACK: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + return push_inst(compiler, ((op & SLJIT_32) ? REVB_2H : REVB_4H) | RD(dst) | RJ(src2)); + case SLJIT_REV_S32: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM) && dst != TMP_REG1); FAIL_IF(push_inst(compiler, REVB_2W | RD(dst) | RJ(src2))); return push_inst(compiler, SLLI_W | RD(dst) | RJ(dst) | IMM_I12(0)); case SLJIT_REV_U32: + case SLJIT_REV32PACK: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM) && dst != TMP_REG1); FAIL_IF(push_inst(compiler, REVB_2W | RD(dst) | RJ(src2))); + if (GET_OPCODE(op) == SLJIT_REV32PACK) + return SLJIT_SUCCESS; return push_inst(compiler, BSTRPICK_D | RD(dst) | RJ(dst) | (31 << 16)); case SLJIT_ADD: diff --git a/sljit_src/sljitNativeMIPS_common.c b/sljit_src/sljitNativeMIPS_common.c index 96b24024..7be192e8 100644 --- a/sljit_src/sljitNativeMIPS_common.c +++ b/sljit_src/sljitNativeMIPS_common.c @@ -895,6 +895,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) return 2; #endif /* SLJIT_MIPS_REV >= 1 */ #if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 2) + case SLJIT_HAS_REVPACK: case SLJIT_HAS_REV: case SLJIT_HAS_ROT: return 1; @@ -1635,9 +1636,12 @@ static sljit_s32 emit_rev(struct sljit_compiler *compiler, sljit_s32 op, sljit_s op = GET_OPCODE(op); #if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2 #if defined(SLJIT_CONFIG_MIPS_64) && SLJIT_CONFIG_MIPS_64 - if (!is_32 && (op == SLJIT_REV)) { + if (!is_32 && (op == SLJIT_REV || op == SLJIT_REV32PACK)) { FAIL_IF(push_inst(compiler, DSBH | T(src) | D(dst), DR(dst))); - return push_inst(compiler, DSHD | T(dst) | D(dst), DR(dst)); + FAIL_IF(push_inst(compiler, DSHD | T(dst) | D(dst), DR(dst))); + if (op != SLJIT_REV32PACK) + return SLJIT_SUCCESS; + return push_inst(compiler, DROTR32 | T(dst) | D(dst), DR(dst)); } if (op != SLJIT_REV && src != TMP_REG2) { FAIL_IF(push_inst(compiler, SLL | T(src) | D(TMP_REG1), DR(TMP_REG1))); @@ -1711,10 +1715,13 @@ static sljit_s32 emit_rev16(struct sljit_compiler *compiler, sljit_s32 op, sljit #else /* !SLJIT_CONFIG_MIPS_32 */ FAIL_IF(push_inst(compiler, DSBH | T(src) | D(dst), DR(dst))); #endif /* SLJIT_CONFIG_MIPS_32 */ - if (GET_OPCODE(op) == SLJIT_REV_U16) + switch (GET_OPCODE(op)) { + case SLJIT_REV_U16: return push_inst(compiler, ANDI | S(dst) | T(dst) | 0xffff, DR(dst)); - else + case SLJIT_REV_S16: return push_inst(compiler, SEH | T(dst) | D(dst), DR(dst)); + } + return SLJIT_SUCCESS; #else /* SLJIT_MIPS_REV < 2 */ FAIL_IF(push_inst(compiler, SELECT_OP(DSRL, SRL) | T(src) | D(TMP_REG1) | SH_IMM(8), DR(TMP_REG1))); FAIL_IF(push_inst(compiler, SELECT_OP(DSLL32, SLL) | T(src) | D(dst) | SH_IMM(24), DR(dst))); @@ -1851,11 +1858,13 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl case SLJIT_REV: case SLJIT_REV_U32: case SLJIT_REV_S32: + case SLJIT_REV32PACK: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM) && src2 != TMP_REG1 && dst != TMP_REG1); return emit_rev(compiler, op, dst, src2); case SLJIT_REV_U16: case SLJIT_REV_S16: + case SLJIT_REV16PACK: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); return emit_rev16(compiler, op, dst, src2); @@ -2573,6 +2582,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile case SLJIT_CLZ: case SLJIT_CTZ: case SLJIT_REV: + case SLJIT_REV16PACK: + case SLJIT_REV32PACK: return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw); case SLJIT_REV_U16: diff --git a/test_src/sljitTest.c b/test_src/sljitTest.c index adbaf410..02c7dcbb 100644 --- a/test_src/sljitTest.c +++ b/test_src/sljitTest.c @@ -11295,7 +11295,7 @@ static void test90(void) /* Test reverse four bytes. */ executable_code code; struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); - sljit_sw buf[7]; + sljit_sw buf[8]; sljit_s32 ibuf[6]; sljit_s32 i; @@ -11304,49 +11304,53 @@ static void test90(void) FAILED(!compiler, "cannot create compiler\n"); - for (i = 0; i < 7; i++) + for (i = 0; i < 8; i++) buf[i] = -1; for (i = 0; i < 6; i++) ibuf[i] = -1; sljit_emit_enter(compiler, 0, SLJIT_ARGS2(VOID, W, W), 5, 5, 0, 0, 2 * sizeof(sljit_sw)); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, WCONST(0xffffa1b2c3d4, 0xa1b2c3d4)); - sljit_emit_op1(compiler, SLJIT_REV_U32, SLJIT_R0, 0, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, WCONST(0xffffa1b2c3d4, 0xa1b2c3d4)); + sljit_emit_op1(compiler, SLJIT_REV_U32, SLJIT_R0, 0, SLJIT_R1, 0); /* buf[0] */ sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_REV_U32, SLJIT_R1, 0, SLJIT_R1, 0); + /* buf[1] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw), SLJIT_R1, 0); + /* Sign extend negative integer. */ sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, WCONST(0xffffa1b2c3d4, 0xa1b2c3d4)); sljit_emit_op1(compiler, SLJIT_REV_S32, SLJIT_R1, 0, SLJIT_R2, 0); - /* buf[1] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw), SLJIT_R1, 0); + /* buf[2] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_sw), SLJIT_R1, 0); /* Sign extend positive integer. */ sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, WCONST(0xffff1a2b3c4d,0x1a2b3c4d)); sljit_emit_op1(compiler, SLJIT_REV_S32, SLJIT_R1, 0, SLJIT_R2, 0); - /* buf[2] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_sw), SLJIT_R1, 0); + /* buf[3] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_sw), SLJIT_R1, 0); sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_s32), SLJIT_IMM, (sljit_s32)0xf9e8d7c6); sljit_emit_op1(compiler, SLJIT_REV_U32, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_s32)); - /* buf[3] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_sw), SLJIT_R2, 0); + /* buf[4] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_sw), SLJIT_R2, 0); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, sizeof(sljit_s32)); sljit_emit_op1(compiler, SLJIT_REV_S32, SLJIT_S2, 0, SLJIT_MEM2(SLJIT_S1, SLJIT_R1), 0); - /* buf[4] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_sw), SLJIT_S2, 0); + /* buf[5] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 5 * sizeof(sljit_sw), SLJIT_S2, 0); sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw), SLJIT_IMM, (sljit_s32)0xaabbccdd); sljit_emit_op1(compiler, SLJIT_REV_U32, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw)); - /* buf[5] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 5 * sizeof(sljit_sw), SLJIT_R2, 0); + /* buf[6] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_sw), SLJIT_R2, 0); sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw), SLJIT_IMM, (sljit_s32)0xaabbccdd); sljit_emit_op1(compiler, SLJIT_REV_S32, SLJIT_R4, 0, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw)); - /* buf[6] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_sw), SLJIT_R4, 0); + /* buf[7] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 7 * sizeof(sljit_sw), SLJIT_R4, 0); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, WCONST(0xffff01020304, 0x01020304)); /* ibuf[0] */ @@ -11382,18 +11386,19 @@ static void test90(void) code.func2((sljit_sw)buf, (sljit_sw)ibuf); FAILED(buf[0] != WCONST(0xd4c3b2a1, 0xd4c3b2a1), "test90 case 1 failed\n"); - FAILED(buf[1] != WCONST(-0x2b3c4d5f, 0xd4c3b2a1), "test90 case 2 failed\n"); - FAILED(buf[2] != 0x4d3c2b1a, "test90 case 3 failed\n"); - FAILED(buf[3] != WCONST(0xc6d7e8f9, 0xc6d7e8f9), "test90 case 4 failed\n"); - FAILED(buf[4] != WCONST(-0x39281707, 0xc6d7e8f9), "test90 case 5 failed\n"); - FAILED(buf[5] != WCONST(0xddccbbaa, 0xddccbbaa), "test90 case 6 failed\n"); - FAILED(buf[6] != WCONST(-0x22334456, 0xddccbbaa), "test90 case 7 failed\n"); - FAILED(ibuf[0] != 0x04030201, "test90 case 8 failed\n"); - FAILED(ibuf[1] != 0x04030201, "test90 case 9 failed\n"); - FAILED(ibuf[2] != (sljit_s32)0xc0d0e0f0, "test90 case 10 failed\n"); - FAILED(ibuf[3] != (sljit_s32)0xc0d0e0f0, "test90 case 11 failed\n"); - FAILED(ibuf[4] != (sljit_s32)0xb4a39281, "test90 case 12 failed\n"); - FAILED(ibuf[5] != 0x55555555, "test90 case 13 failed\n"); + FAILED(buf[1] != buf[0], "test90 case 2 failed\n"); + FAILED(buf[2] != WCONST(-0x2b3c4d5f, 0xd4c3b2a1), "test90 case 3 failed\n"); + FAILED(buf[3] != 0x4d3c2b1a, "test90 case 4 failed\n"); + FAILED(buf[4] != WCONST(0xc6d7e8f9, 0xc6d7e8f9), "test90 case 5 failed\n"); + FAILED(buf[5] != WCONST(-0x39281707, 0xc6d7e8f9), "test90 case 6 failed\n"); + FAILED(buf[6] != WCONST(0xddccbbaa, 0xddccbbaa), "test90 case 7 failed\n"); + FAILED(buf[7] != WCONST(-0x22334456, 0xddccbbaa), "test90 case 8 failed\n"); + FAILED(ibuf[0] != 0x04030201, "test90 case 9 failed\n"); + FAILED(ibuf[1] != 0x04030201, "test90 case 10 failed\n"); + FAILED(ibuf[2] != (sljit_s32)0xc0d0e0f0, "test90 case 11 failed\n"); + FAILED(ibuf[3] != (sljit_s32)0xc0d0e0f0, "test90 case 12 failed\n"); + FAILED(ibuf[4] != (sljit_s32)0xb4a39281, "test90 case 13 failed\n"); + FAILED(ibuf[5] != 0x55555555, "test90 case 14 failed\n"); sljit_free_code(code.code, NULL); successful_tests++; @@ -14048,6 +14053,98 @@ static void test103(void) successful_tests++; } +static void test104(void) +{ + /* Test packed bswap. */ + executable_code code; + struct sljit_compiler* compiler; + sljit_sw buf[7]; + sljit_s32 ibuf[3]; + int i; + + if (verbose) + printf("Run test104\n"); + + if (!sljit_has_cpu_feature(SLJIT_HAS_REVPACK)) { + if (verbose) + printf("packed ints are not available, test104 skipped\n"); + successful_tests++; + return; + } + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + for (i = 0; i < 7; i++) + buf[i] = -1; + + for (i = 0; i < 3; i++) + ibuf[i] = -1; + + sljit_emit_enter(compiler, 0, SLJIT_ARGS2(VOID, P, P), 3, 3, 0, 0, sizeof(sljit_sw)); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, WCONST(0x11aa5566994422,0x11aa55)); + sljit_emit_op1(compiler, SLJIT_REV16PACK, SLJIT_R1, 0, SLJIT_R0, 0); + /* buf[0] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R1, 0); + + sljit_emit_op1(compiler, SLJIT_REV16PACK, SLJIT_R1, 0, SLJIT_R1, 0); + /* buf[1] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw), SLJIT_R1, 0); + + /* buf[2] */ + sljit_emit_op1(compiler, SLJIT_REV16PACK, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_sw), SLJIT_R0, 0); + + sljit_emit_op1(compiler, SLJIT_REV16PACK, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_sw)); + /* buf[3] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_sw), SLJIT_R0, 0); + + /* buf[4] */ + sljit_emit_op1(compiler, SLJIT_REV16PACK, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_sw), SLJIT_MEM0(), (sljit_sw)buf); + + sljit_emit_op1(compiler, SLJIT_MOV32_U8, SLJIT_R2, 0, SLJIT_IMM, 1); + sljit_emit_op1(compiler, SLJIT_REV16PACK, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_MEM2(SLJIT_S0, SLJIT_R2), SLJIT_WORD_SHIFT); + /* buf[5] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 5 * sizeof(sljit_sw), SLJIT_MEM1(SLJIT_SP), 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, WCONST(0x9f8e7d6c5b4a3928, 0x9f8e7d6c)); + /* buf[6] */ + sljit_emit_op1(compiler, SLJIT_REV32PACK, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_sw), SLJIT_R1, 0); + + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)0xaabbccdd); + sljit_emit_op1(compiler, SLJIT_REV16PACK32, SLJIT_R0, 0, SLJIT_R1, 0); + /* ibuf[0] */ + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_S1), 0, SLJIT_R0, 0); + + /* ibuf[1] */ + sljit_emit_op1(compiler, SLJIT_REV16PACK32, SLJIT_MEM0(), (sljit_sw)(ibuf + 1), SLJIT_R0, 0); + + /* ibuf[2] */ + sljit_emit_op2(compiler, SLJIT_ADD32, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1); + sljit_emit_op1(compiler, SLJIT_REV16PACK32, SLJIT_MEM2(SLJIT_S1, SLJIT_R2), 2, SLJIT_MEM1(SLJIT_S1), 0); + + sljit_emit_return_void(compiler); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func2((sljit_sw)buf, (sljit_sw)ibuf); + FAILED(buf[0] != WCONST(0x110055aa99662244, 0x110055aa), "test104 case 1 failed\n"); + FAILED(buf[1] != WCONST(0x11aa5566994422, 0x11aa55), "test104 case 2 failed\n"); + FAILED(buf[2] != buf[0], "test104 case 2 failed\n"); + FAILED(buf[3] != buf[1], "test104 case 3 failed\n"); + FAILED(buf[4] != buf[1], "test104 case 4 failed\n"); + FAILED(buf[5] != buf[0], "test104 case 5 failed\n"); + FAILED(buf[6] != WCONST(0x6c7d8e9f28394a5b, 0x6c7d8e9f), "test104 case 6 failed\n"); + FAILED(ibuf[0] != (sljit_s32)0xbbaaddcc, "test104 case 7 failed\n"); + FAILED(ibuf[1] != (sljit_s32)0xaabbccdd, "test104 case 8 failed\n"); + FAILED(ibuf[2] != ibuf[1], "test104 case 9 failed\n"); + + sljit_free_code(code.code, NULL); + successful_tests++; +} + int sljit_test(int argc, char* argv[]) { sljit_s32 has_arg = (argc >= 2 && argv[1][0] == '-' && argv[1][2] == '\0'); @@ -14164,12 +14261,13 @@ int sljit_test(int argc, char* argv[]) test101(); test102(); test103(); + test104(); #if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) sljit_free_unused_memory_exec(); #endif -# define TEST_COUNT 103 +# define TEST_COUNT 104 printf("SLJIT tests: "); if (successful_tests == TEST_COUNT)