Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add SLJIT_REV16PACK and SLJIT_REV32PACK #206

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions sljit_src/sljitLir.c
Original file line number Diff line number Diff line change
Expand Up @@ -1078,14 +1078,16 @@ static const char* op1_names[] = {
"mov", "mov", "mov", "mov",
"mov", "mov", "mov", "mov",
"mov", "clz", "ctz", "rev",
"rev", "rev", "rev", "rev"
"rev", "rev", "rev", "rev",
"rev16w", "rev32w"
};

static const char* op1_types[] = {
"", ".u8", ".s8", ".u16",
".s16", ".u32", ".s32", "32",
".p", "", "", "",
".u16", ".s16", ".u32", ".s32"
".u16", ".s16", ".u32", ".s32",
"", ""
};

static const char* op2_names[] = {
Expand Down Expand Up @@ -1413,7 +1415,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op1(struct sljit_compiler
}

#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_REV_S32);
CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_REV32PACK);

switch (GET_OPCODE(op)) {
case SLJIT_MOV:
Expand All @@ -1423,6 +1425,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op1(struct sljit_compiler
case SLJIT_MOV_P:
case SLJIT_REV_U32:
case SLJIT_REV_S32:
case SLJIT_REV32PACK:
/* Nothing allowed */
CHECK_ARGUMENT(!(op & (SLJIT_32 | SLJIT_SET_Z | VARIABLE_FLAG_MASK)));
break;
Expand Down
17 changes: 15 additions & 2 deletions sljit_src/sljitLir.h
Original file line number Diff line number Diff line change
Expand Up @@ -671,13 +671,15 @@ static SLJIT_INLINE sljit_uw sljit_get_generated_code_size(struct sljit_compiler
two separate 32 bit floating point registers (e.g. ARM32). The
second 32 bit part can be accessed by SLJIT_F64_SECOND. */
#define SLJIT_HAS_F64_AS_F32_PAIR 11
/* [Nor wmulated] There is support for word size bulk byte swaps. */
#define SLJIT_HAS_REVPACK 12
/* [Not emulated] Some SIMD operations are supported by the compiler. */
#define SLJIT_HAS_SIMD 12
#define SLJIT_HAS_SIMD 13
/* [Not emulated] SIMD registers are mapped to a pair of double precision
floating point registers. E.g. passing either SLJIT_FR0 or SLJIT_FR1 to
a simd operation represents the same 128 bit register, and both SLJIT_FR0
and SLJIT_FR1 are overwritten. */
#define SLJIT_SIMD_REGS_ARE_PAIRS 13
#define SLJIT_SIMD_REGS_ARE_PAIRS 14

#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
/* [Not emulated] SSE2 support is available on x86. */
Expand Down Expand Up @@ -1127,6 +1129,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
Note: converts between little and big endian formats
Note: immediate source argument is not supported */
#define SLJIT_REV_S32 (SLJIT_OP1_BASE + 15)
/* Reverse the order of bytes in each 16 bit halfword
Flags: - (may destroy flags)
Note: converts between little and big endian formats in bulk
Note: immediate source argument is not supported */
#define SLJIT_REV16PACK (SLJIT_OP1_BASE + 16)
#define SLJIT_REV16PACK32 (SLJIT_REV16PACK | SLJIT_32)
/* Reverse the order of bytes in each 32 bit word
Flags: - (may destroy flags)
Note: converts between little and big endian formats in bulk
Note: immediate source argument is not supported */
#define SLJIT_REV32PACK (SLJIT_OP1_BASE + 17)

/* The following unary operations are supported by using sljit_emit_op2:
- binary not: SLJIT_XOR with immedate -1 as src1 or src2
Expand Down
16 changes: 13 additions & 3 deletions sljit_src/sljitNativeARM_32.c
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ static const sljit_u8 freg_ebit_map[(SLJIT_NUMBER_OF_FLOAT_REGISTERS << 1) + 3]
#define POP 0xe8bd0000
#define REV 0xe6bf0f30
#define REV16 0xe6bf0fb0
#define REVSH 0xe6ff0fb0
#define RSB 0xe0600000
#define RSC 0xe0e00000
#define SBC 0xe0c00000
Expand Down Expand Up @@ -1027,6 +1028,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
case SLJIT_HAS_ROT:
case SLJIT_HAS_CMOV:
case SLJIT_HAS_REV:
case SLJIT_HAS_REVPACK:
case SLJIT_HAS_PREFETCH:
case SLJIT_HAS_COPY_F32:
case SLJIT_HAS_COPY_F64:
Expand Down Expand Up @@ -1552,16 +1554,22 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
case SLJIT_REV:
case SLJIT_REV_U32:
case SLJIT_REV_S32:
case SLJIT_REV32PACK:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
return push_inst(compiler, REV | RD(dst) | RM(src2));

case SLJIT_REV_U16:
case SLJIT_REV_S16:
case SLJIT_REV16PACK:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED) && src2 != TMP_REG1 && dst != TMP_REG1);
FAIL_IF(push_inst(compiler, REV16 | RD(dst) | RM(src2)));
if (dst == TMP_REG2 || (src2 == TMP_REG2 && op == SLJIT_REV_U16))
if (op == SLJIT_REV16PACK || dst == TMP_REG2 || src2 == TMP_REG2)
return SLJIT_SUCCESS;
return push_inst(compiler, (op == SLJIT_REV_U16 ? UXTH : SXTH) | RD(dst) | RM(dst));
return push_inst(compiler, UXTH | RD(dst) | RM(dst));

case SLJIT_REV_S16:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED) && src2 != TMP_REG1 && dst != TMP_REG1);
return push_inst(compiler, REVSH | RD(dst) | RM(src2));

case SLJIT_ADD:
SLJIT_ASSERT(!(flags & INV_IMM));

Expand Down Expand Up @@ -2282,6 +2290,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
case SLJIT_REV:
case SLJIT_REV_U32:
case SLJIT_REV_S32:
case SLJIT_REV16PACK:
case SLJIT_REV32PACK:
return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src, srcw);

case SLJIT_REV_U16:
Expand Down
31 changes: 24 additions & 7 deletions sljit_src/sljitNativeARM_64.c
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,8 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
#define RBIT 0xdac00000
#define RET 0xd65f0000
#define REV 0xdac00c00
#define REV16 0xdac00400
#define REV16 0x5ac00400
#define REV32 0xdac00800
#define RORV 0x9ac02c00
#define SBC 0xda000000
#define SBFM 0x93400000
Expand Down Expand Up @@ -441,6 +442,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
case SLJIT_HAS_REV:
case SLJIT_HAS_ROT:
case SLJIT_HAS_CMOV:
case SLJIT_HAS_REVPACK:
case SLJIT_HAS_PREFETCH:
case SLJIT_HAS_COPY_F32:
case SLJIT_HAS_COPY_F64:
Expand Down Expand Up @@ -699,6 +701,8 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
case SLJIT_REV_S16:
case SLJIT_REV_U32:
case SLJIT_REV_S32:
case SLJIT_REV16PACK:
case SLJIT_REV32PACK:
case SLJIT_ADDC:
case SLJIT_SUBC:
/* No form with immediate operand (except imm 0, which
Expand Down Expand Up @@ -873,7 +877,7 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
return push_inst(compiler, (MOV ^ W_OP) | RD(dst) | RM(arg2));
case SLJIT_MOV_S32:
SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
return push_inst(compiler, SBFM | (1 << 22) | RD(dst) | RN(arg2) | (31 << 10));
return push_inst(compiler, SBFM | RD(dst) | RN(arg2) | (31 << 10));
case SLJIT_CLZ:
SLJIT_ASSERT(arg1 == TMP_REG1);
return push_inst(compiler, (CLZ ^ inv_bits) | RD(dst) | RN(arg2));
Expand All @@ -886,20 +890,33 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
inv_bits |= inv_bits >> 21;
return push_inst(compiler, (REV ^ inv_bits) | RD(dst) | RN(arg2));
case SLJIT_REV_U16:
case SLJIT_REV16PACK:
SLJIT_ASSERT(arg1 == TMP_REG1 && dst != TMP_REG2);
inst_bits = REV16;
if (op == SLJIT_REV16PACK && !(op & SLJIT_32))
inst_bits |= (W_OP | (1 << 10));
FAIL_IF(push_inst(compiler, inst_bits | RD(dst) | RN(arg2)));
if (op == SLJIT_REV16PACK || dst == TMP_REG1 || arg2 == TMP_REG2)
return SLJIT_SUCCESS;
inv_bits |= inv_bits >> 9;
return push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(dst) | (15 << 10));
case SLJIT_REV_S16:
SLJIT_ASSERT(arg1 == TMP_REG1 && dst != TMP_REG2);
FAIL_IF(push_inst(compiler, (REV16 ^ (sljit_ins)0x80000000) | RD(dst) | RN(arg2)));
if (dst == TMP_REG1 || (arg2 == TMP_REG2 && op == SLJIT_REV_U16))
FAIL_IF(push_inst(compiler, REV16 | RD(dst) | RN(arg2)));
if (dst == TMP_REG1)
return SLJIT_SUCCESS;
inv_bits |= inv_bits >> 9;
return push_inst(compiler, ((op == SLJIT_REV_U16 ? UBFM : SBFM) ^ inv_bits) | RD(dst) | RN(dst) | (15 << 10));
return push_inst(compiler, (SBFM ^ inv_bits) | RD(dst) | RN(dst) | (15 << 10));
case SLJIT_REV_U32:
case SLJIT_REV_S32:
SLJIT_ASSERT(arg1 == TMP_REG1 && dst != TMP_REG2);
FAIL_IF(push_inst(compiler, (REV ^ (sljit_ins)0x80000400) | RD(dst) | RN(arg2)));
FAIL_IF(push_inst(compiler, (REV ^ (W_OP | (1 << 10))) | RD(dst) | RN(arg2)));
if (op == SLJIT_REV_U32 || dst == TMP_REG1)
return SLJIT_SUCCESS;
return push_inst(compiler, SBFM | (1 << 22) | RD(dst) | RN(dst) | (31 << 10));
return push_inst(compiler, SBFM | RD(dst) | RN(dst) | (31 << 10));
case SLJIT_REV32PACK:
SLJIT_ASSERT(arg1 == TMP_REG1 && dst != TMP_REG2);
return push_inst(compiler, REV32 | RD(dst) | RN(arg2));
case SLJIT_ADD:
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
CHECK_FLAGS(1 << 29);
Expand Down
20 changes: 16 additions & 4 deletions sljit_src/sljitNativeARM_T2_32.c
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,8 @@ static const sljit_u8 freg_ebit_map[(SLJIT_NUMBER_OF_FLOAT_REGISTERS << 1) + 3]
#define REV_W 0xfa90f080
#define REV16 0xba40
#define REV16_W 0xfa90f090
#define REVSH 0xbac0
#define REVSH_W 0xfa90f0b0
#define RBIT 0xfa90f0a0
#define RORS 0x41c0
#define ROR_W 0xfa60f000
Expand Down Expand Up @@ -556,6 +558,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
case SLJIT_HAS_REV:
case SLJIT_HAS_ROT:
case SLJIT_HAS_CMOV:
case SLJIT_HAS_REVPACK:
case SLJIT_HAS_PREFETCH:
case SLJIT_HAS_COPY_F32:
case SLJIT_HAS_COPY_F64:
Expand Down Expand Up @@ -679,6 +682,8 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
case SLJIT_REV_S16:
case SLJIT_REV_U32:
case SLJIT_REV_S32:
case SLJIT_REV16PACK:
case SLJIT_REV32PACK:
case SLJIT_MUL:
/* No form with immediate operand. */
break;
Expand Down Expand Up @@ -902,12 +907,13 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
case SLJIT_REV:
case SLJIT_REV_U32:
case SLJIT_REV_S32:
case SLJIT_REV32PACK:
SLJIT_ASSERT(arg1 == TMP_REG2);
if (IS_2_LO_REGS(dst, arg2))
return push_inst16(compiler, REV | RD3(dst) | RN3(arg2));
return push_inst32(compiler, REV_W | RN4(arg2) | RD4(dst) | RM4(arg2));
case SLJIT_REV_U16:
case SLJIT_REV_S16:
case SLJIT_REV16PACK:
SLJIT_ASSERT(arg1 == TMP_REG2 && dst != TMP_REG2);

flags &= 0xffff;
Expand All @@ -916,12 +922,18 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
else
FAIL_IF(push_inst32(compiler, REV16_W | RN4(arg2) | RD4(dst) | RM4(arg2)));

if (dst == TMP_REG1 || (arg2 == TMP_REG1 && flags == SLJIT_REV_U16))
if (flags == SLJIT_REV16PACK || dst == TMP_REG1 || arg2 == TMP_REG1)
return SLJIT_SUCCESS;

if (reg_map[dst] <= 7)
return push_inst16(compiler, (flags == SLJIT_REV_U16 ? UXTH : SXTH) | RD3(dst) | RN3(dst));
return push_inst32(compiler, (flags == SLJIT_REV_U16 ? UXTH_W : SXTH_W) | RD4(dst) | RM4(dst));
return push_inst16(compiler, UXTH | RD3(dst) | RN3(dst));
return push_inst32(compiler, UXTH_W | RD4(dst) | RM4(dst));
case SLJIT_REV_S16:
SLJIT_ASSERT(arg1 == TMP_REG2 && dst != TMP_REG2);

if (IS_2_LO_REGS(dst, arg2))
return push_inst16(compiler, REVSH | RD3(dst) | RN3(arg2));
return push_inst32(compiler, REVSH_W | RN4(arg2) | RD4(dst) | RM4(arg2));
case SLJIT_ADD:
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
if (IS_3_LO_REGS(dst, arg1, arg2))
Expand Down
7 changes: 7 additions & 0 deletions sljit_src/sljitNativeLOONGARCH_64.c
Original file line number Diff line number Diff line change
Expand Up @@ -1221,14 +1221,21 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
FAIL_IF(push_inst(compiler, REVB_2H | RD(dst) | RJ(src2)));
return push_inst(compiler, INST(BSTRPICK, op) | RD(dst) | RJ(dst) | (15 << 16));

case SLJIT_REV16PACK:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
return push_inst(compiler, ((op & SLJIT_32) ? REVB_2H : REVB_4H) | RD(dst) | RJ(src2));

case SLJIT_REV_S32:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM) && dst != TMP_REG1);
FAIL_IF(push_inst(compiler, REVB_2W | RD(dst) | RJ(src2)));
return push_inst(compiler, SLLI_W | RD(dst) | RJ(dst) | IMM_I12(0));

case SLJIT_REV_U32:
case SLJIT_REV32PACK:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM) && dst != TMP_REG1);
FAIL_IF(push_inst(compiler, REVB_2W | RD(dst) | RJ(src2)));
if (GET_OPCODE(op) == SLJIT_REV32PACK)
return SLJIT_SUCCESS;
return push_inst(compiler, BSTRPICK_D | RD(dst) | RJ(dst) | (31 << 16));

case SLJIT_ADD:
Expand Down
19 changes: 15 additions & 4 deletions sljit_src/sljitNativeMIPS_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -895,6 +895,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
return 2;
#endif /* SLJIT_MIPS_REV >= 1 */
#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 2)
case SLJIT_HAS_REVPACK:
case SLJIT_HAS_REV:
case SLJIT_HAS_ROT:
return 1;
Expand Down Expand Up @@ -1635,9 +1636,12 @@ static sljit_s32 emit_rev(struct sljit_compiler *compiler, sljit_s32 op, sljit_s
op = GET_OPCODE(op);
#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2
#if defined(SLJIT_CONFIG_MIPS_64) && SLJIT_CONFIG_MIPS_64
if (!is_32 && (op == SLJIT_REV)) {
if (!is_32 && (op == SLJIT_REV || op == SLJIT_REV32PACK)) {
FAIL_IF(push_inst(compiler, DSBH | T(src) | D(dst), DR(dst)));
return push_inst(compiler, DSHD | T(dst) | D(dst), DR(dst));
FAIL_IF(push_inst(compiler, DSHD | T(dst) | D(dst), DR(dst)));
if (op != SLJIT_REV32PACK)
return SLJIT_SUCCESS;
return push_inst(compiler, DROTR32 | T(dst) | D(dst), DR(dst));
}
if (op != SLJIT_REV && src != TMP_REG2) {
FAIL_IF(push_inst(compiler, SLL | T(src) | D(TMP_REG1), DR(TMP_REG1)));
Expand Down Expand Up @@ -1711,10 +1715,13 @@ static sljit_s32 emit_rev16(struct sljit_compiler *compiler, sljit_s32 op, sljit
#else /* !SLJIT_CONFIG_MIPS_32 */
FAIL_IF(push_inst(compiler, DSBH | T(src) | D(dst), DR(dst)));
#endif /* SLJIT_CONFIG_MIPS_32 */
if (GET_OPCODE(op) == SLJIT_REV_U16)
switch (GET_OPCODE(op)) {
case SLJIT_REV_U16:
return push_inst(compiler, ANDI | S(dst) | T(dst) | 0xffff, DR(dst));
else
case SLJIT_REV_S16:
return push_inst(compiler, SEH | T(dst) | D(dst), DR(dst));
}
return SLJIT_SUCCESS;
#else /* SLJIT_MIPS_REV < 2 */
FAIL_IF(push_inst(compiler, SELECT_OP(DSRL, SRL) | T(src) | D(TMP_REG1) | SH_IMM(8), DR(TMP_REG1)));
FAIL_IF(push_inst(compiler, SELECT_OP(DSLL32, SLL) | T(src) | D(dst) | SH_IMM(24), DR(dst)));
Expand Down Expand Up @@ -1851,11 +1858,13 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
case SLJIT_REV:
case SLJIT_REV_U32:
case SLJIT_REV_S32:
case SLJIT_REV32PACK:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM) && src2 != TMP_REG1 && dst != TMP_REG1);
return emit_rev(compiler, op, dst, src2);

case SLJIT_REV_U16:
case SLJIT_REV_S16:
case SLJIT_REV16PACK:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
return emit_rev16(compiler, op, dst, src2);

Expand Down Expand Up @@ -2573,6 +2582,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
case SLJIT_CLZ:
case SLJIT_CTZ:
case SLJIT_REV:
case SLJIT_REV16PACK:
case SLJIT_REV32PACK:
return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw);

case SLJIT_REV_U16:
Expand Down
Loading