Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

arm: refactor test92 to avoid conflicting writes and add possibly conflicting reads #162

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
241 changes: 105 additions & 136 deletions test_src/sljitTest.c
Original file line number Diff line number Diff line change
Expand Up @@ -11395,7 +11395,7 @@ static void test91(void)
sljit_s32 i;

if (verbose)
printf("Run test13\n");
printf("Run test91\n");

if (!sljit_has_cpu_feature(SLJIT_HAS_FPU)) {
if (verbose)
Expand Down Expand Up @@ -11520,6 +11520,15 @@ static void test91(void)
successful_tests++;
}

#define M (sljit_uw)(~0ul)
#ifdef SLJIT_BIG_ENDIAN
#define O(w,o) (8 * (8 - (sizeof(w) * (o + 1))))
#else
#define O(w,o) (8 * sizeof(w) * o)
#endif /* BIG_ENDIAN */
#define H(w,o) (((sljit_uw)(w)(~0) << O(w,o)) ^ M)
#define HV(w,o,v) (sljit_sw)(((sljit_uw)v << O(w,o)) | H(w,o))

static void test92(void)
{
#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \
Expand All @@ -11529,245 +11538,205 @@ static void test92(void)
executable_code code;
struct sljit_compiler *compiler = sljit_create_compiler(NULL, NULL);
struct sljit_label *label;
struct sljit_jump *jump;
sljit_sw buf[32];
sljit_s32 i;
struct sljit_jump *jump, *skip;
sljit_sw buf[12];

if (verbose)
printf("Run test92\n");

FAILED(!compiler, "cannot create compiler\n");

for (i = 0; i < 32; i++)
carenas marked this conversation as resolved.
Show resolved Hide resolved
buf[i] = -1;

buf[0] = -4678;
*(sljit_u8*)(buf + 2) = 178;
*(sljit_u8*)(buf + 5) = 211;
*(sljit_u16*)(buf + 9) = 17897;
*(sljit_u16*)(buf + 12) = 57812;
*(sljit_u32*)(buf + 15) = 1234567890;
*(sljit_u32*)(buf + 17) = 987609876;
*(sljit_u8*)(buf + 20) = 192;
buf[23] = 6359;
((sljit_u8*)(buf + 26))[1] = 105;
((sljit_u8*)(buf + 28))[2] = 13;
((sljit_u16*)(buf + 30))[1] = 14876;
buf[1] = HV(sljit_u8, 0, 178);
buf[2] = HV(sljit_u8, 0, 211);
buf[3] = HV(sljit_u16, 0, 17897);
buf[4] = HV(sljit_u16, 0, 57812);
buf[5] = HV(sljit_u32, 0, 1234567890);
buf[6] = HV(sljit_u32, 0, 987609876);
buf[7] = HV(sljit_u8, 0,192);
buf[8] = 6359;
buf[9] = HV(sljit_u8, 1, 105);
buf[10] = HV(sljit_u8, 2, 13);
buf[11] = HV(sljit_u16, 1, 14876);

sljit_emit_enter(compiler, 0, SLJIT_ARGS1(VOID, P), 5, 5, 0, 0, 2 * sizeof(sljit_sw));

label = sljit_emit_label(compiler);
sljit_emit_atomic_load(compiler, SLJIT_MOV, SLJIT_R1, SLJIT_S0);
/* buf[1] */
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw), SLJIT_R1, 0);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_R1, 0);
jump = sljit_emit_cmp(compiler, SLJIT_NOT_EQUAL, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_S0), 0);
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would keep the original logic of storing the result of loads.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wouldn't this require then memory barriers to be added?, moving the store outside of the critical loop seems to work, but without a memory barrier it might get reordered and would cause failures (although not as obvious, as the infinite loop)

When refactoring, I thought about using a second array for the stores, but with a possible granule of 512 words, it will need ugly tricks to keep it safe anyway.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On ARM the doc says in "Load-Acquire, Store-Release" section:

  • A Load-Acquire places no additional ordering constraints on any loads or stores appearing before the
    Load-Acquire.
  • The Store-Release places no additional ordering constraints on any loads or stores appearing after the
    Store-Release instruction.

If barriers needs to be added on some cpus, the instruction should implicitly add them.

Copy link
Contributor Author

@carenas carenas May 2, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that (even before LSE and other future improvements) in aarch64 there are 2 pairs of instructions for its LL/SC.

Our implementation uses the weak (and better performing) pair of ldxr/stxr but their semantics don't correspond to what is used on other processors and that seem to map[1] better to the strong (and slower) pair (ldaxr/stlxr).

Not that anyone should, but if someone would implement a futex or mutex with sljit, it might work fine in x86/s390x and break with ARM (and probably the other RISC CPUs if implemented using LL/SC).

I have to admit that without a clear understanding on how this API is to be used, I am not sure if it is a problem but it might be something worth considering by maybe adding a "weak" parameter that would be a NOOP in strong ordered architectures and allow selecting the right pair in weak ordered ones like ARM.

If this is to be modeled like C11/C++11 atomics then maybe a full set of memory_order options might be needed instead.

[1] https://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html

sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, -9856);
/* buf[0] */
sljit_emit_atomic_store(compiler, SLJIT_MOV | SLJIT_SET_ATOMIC_STORED, SLJIT_R1, SLJIT_S0, SLJIT_R0);
sljit_set_label(sljit_emit_jump(compiler, SLJIT_ATOMIC_NOT_STORED), label);
sljit_set_label(jump, sljit_emit_label(compiler));

sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S0, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
label = sljit_emit_label(compiler);
sljit_emit_atomic_load(compiler, SLJIT_MOV_U8, SLJIT_R2, SLJIT_R1);
/* buf[3] */
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_sw), SLJIT_R2, 0);
sljit_emit_op1(compiler, SLJIT_MOV_U8, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw));
jump = sljit_emit_cmp(compiler, SLJIT_NOT_EQUAL, SLJIT_R2, 0, SLJIT_R0, 0);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 203);
/* buf[2] */
/* buf[1] */
sljit_emit_atomic_store(compiler, SLJIT_MOV_U8 | SLJIT_SET_ATOMIC_STORED, SLJIT_R0, SLJIT_R1, SLJIT_R2);
jump = sljit_emit_jump(compiler, SLJIT_ATOMIC_STORED);
skip = sljit_emit_jump(compiler, SLJIT_ATOMIC_STORED);
sljit_set_label(sljit_emit_jump(compiler, SLJIT_JUMP), label);
sljit_set_label(skip, sljit_emit_label(compiler));
sljit_set_label(jump, sljit_emit_label(compiler));
/* buf[4] */
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_sw), SLJIT_R0, 0);

sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_S0, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_R0, 0);
label = sljit_emit_label(compiler);
sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_S0, 0, SLJIT_IMM, 5 * sizeof(sljit_sw));
sljit_emit_atomic_load(compiler, SLJIT_MOV32_U8, SLJIT_R0, SLJIT_R0);
/* buf[6] */
sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_sw), SLJIT_R0, 0);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_R0, 0);
sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_S0, 0, SLJIT_IMM, 5 * sizeof(sljit_sw));
skip = sljit_emit_cmp(compiler, SLJIT_NOT_EQUAL | SLJIT_32, SLJIT_R0, 0, SLJIT_IMM, 211);
sljit_emit_op1(compiler, SLJIT_MOV32_U8, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_sw));
jump = sljit_emit_cmp(compiler, SLJIT_NOT_EQUAL | SLJIT_32, SLJIT_S2, 0, SLJIT_R0, 0);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, 97);
/* buf[5] */
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_R2, 0);
/* buf[2] */
sljit_emit_atomic_store(compiler, SLJIT_MOV_U8 | SLJIT_SET_ATOMIC_STORED, SLJIT_S1, SLJIT_R0, SLJIT_S2);
sljit_set_label(sljit_emit_jump(compiler, SLJIT_ATOMIC_NOT_STORED), label);
/* buf[7] */
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 7 * sizeof(sljit_sw), SLJIT_R0, 0);
/* buf[8] */
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 8 * sizeof(sljit_sw), SLJIT_S1, 0);
sljit_set_label(jump, sljit_emit_label(compiler));
sljit_set_label(skip, sljit_emit_label(compiler));

label = sljit_emit_label(compiler);
sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S0, 0, SLJIT_IMM, 9 * sizeof(sljit_sw));
sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S0, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_R1, 0);
sljit_emit_atomic_load(compiler, SLJIT_MOV_U16, SLJIT_S2, SLJIT_R1);
/* buf[10] */
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 10 * sizeof(sljit_sw), SLJIT_S2, 0);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_S2, 0);
sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_S0, 0, SLJIT_IMM, 9 * sizeof(sljit_sw));
/* buf[9] */
sljit_emit_op1(compiler, SLJIT_MOV_U16, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_sw));
jump = sljit_emit_cmp(compiler, SLJIT_NOT_EQUAL, SLJIT_S2, 0, SLJIT_R1, 0);
/* buf[3] */
sljit_emit_atomic_store(compiler, SLJIT_MOV_U16 | SLJIT_SET_ATOMIC_STORED, SLJIT_R0, SLJIT_R0, SLJIT_R1);
sljit_set_label(sljit_emit_jump(compiler, SLJIT_ATOMIC_NOT_STORED), label);
/* buf[11] */
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 11 * sizeof(sljit_sw), SLJIT_R0, 0);
sljit_set_label(jump, sljit_emit_label(compiler));

sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S0, 0, SLJIT_IMM, 12 * sizeof(sljit_sw));
sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S0, 0, SLJIT_IMM, 4 * sizeof(sljit_sw));
label = sljit_emit_label(compiler);
sljit_emit_atomic_load(compiler, SLJIT_MOV32_U16, SLJIT_R0, SLJIT_R1);
/* buf[13] */
sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_S0), 13 * sizeof(sljit_sw), SLJIT_R0, 0);
sljit_emit_op1(compiler, SLJIT_MOV32_U16, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_sw));
jump = sljit_emit_cmp(compiler, SLJIT_NOT_EQUAL | SLJIT_32, SLJIT_R0, 0, SLJIT_S1, 0);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_R0, 0);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 41306);
/* buf[12] */
/* buf[4] */
sljit_emit_atomic_store(compiler, SLJIT_MOV32_U16 | SLJIT_SET_ATOMIC_STORED, SLJIT_R0, SLJIT_R1, SLJIT_R3);
sljit_set_label(sljit_emit_jump(compiler, SLJIT_ATOMIC_NOT_STORED), label);
/* buf[14] */
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 14 * sizeof(sljit_sw), SLJIT_R0, 0);
sljit_set_label(jump, sljit_emit_label(compiler));

sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_S2, 0, SLJIT_S0, 0, SLJIT_IMM, 15 * sizeof(sljit_sw));
sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_S2, 0, SLJIT_S0, 0, SLJIT_IMM, 5 * sizeof(sljit_sw));
label = sljit_emit_label(compiler);
sljit_emit_atomic_load(compiler, SLJIT_MOV_U32, SLJIT_R2, SLJIT_S2);
/* buf[16] */
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 16 * sizeof(sljit_sw), SLJIT_R2, 0);
sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 5 * sizeof(sljit_sw));
jump = sljit_emit_cmp(compiler, SLJIT_NOT_EQUAL, SLJIT_R2, 0, SLJIT_S1, 0);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S3, 0, SLJIT_R2, 0);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 987654321);
sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_S1, 0, SLJIT_S0, 0, SLJIT_IMM, 15 * sizeof(sljit_sw));
/* buf[15] */
sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_S1, 0, SLJIT_S0, 0, SLJIT_IMM, 5 * sizeof(sljit_sw));
/* buf[5] */
sljit_emit_atomic_store(compiler, SLJIT_MOV_U32 | SLJIT_SET_ATOMIC_STORED, SLJIT_R1, SLJIT_S1, SLJIT_S3);
sljit_set_label(sljit_emit_jump(compiler, SLJIT_ATOMIC_NOT_STORED), label);
sljit_set_label(jump, sljit_emit_label(compiler));

sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R2, 0, SLJIT_S0, 0, SLJIT_IMM, 17 * sizeof(sljit_sw));
sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R2, 0, SLJIT_S0, 0, SLJIT_IMM, 6 * sizeof(sljit_sw));
label = sljit_emit_label(compiler);
sljit_emit_atomic_load(compiler, SLJIT_MOV32, SLJIT_R0, SLJIT_R2);
/* buf[18] */
sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_S0), 18 * sizeof(sljit_sw), SLJIT_R0, 0);
sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_sw));
jump = sljit_emit_cmp(compiler, SLJIT_NOT_EQUAL | SLJIT_32, SLJIT_R0, 0, SLJIT_S1, 0);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_R0, 0);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -573621);
sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R1, 0, SLJIT_IMM, 678906789);
/* buf[17] */
/* buf[6] */
sljit_emit_atomic_store(compiler, SLJIT_MOV32 | SLJIT_SET_ATOMIC_STORED, SLJIT_R1, SLJIT_R2, SLJIT_S2);
sljit_set_label(sljit_emit_jump(compiler, SLJIT_ATOMIC_NOT_STORED), label);
/* buf[19] */
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 19 * sizeof(sljit_sw), SLJIT_R0, 0);
sljit_set_label(jump, sljit_emit_label(compiler));

sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S0, 0, SLJIT_IMM, 20 * sizeof(sljit_sw));
sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S0, 0, SLJIT_IMM, 7 * sizeof(sljit_sw));
label = sljit_emit_label(compiler);
sljit_emit_atomic_load(compiler, SLJIT_MOV_U8, SLJIT_R3, SLJIT_R1);
/* buf[21] */
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 21 * sizeof(sljit_sw), SLJIT_R3, 0);
sljit_emit_op1(compiler, SLJIT_MOV_U8, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 7 * sizeof(sljit_sw));
jump = sljit_emit_cmp(compiler, SLJIT_NOT_EQUAL, SLJIT_R3, 0, SLJIT_S1, 0);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R4, 0, SLJIT_R3, 0);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, 240);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -5893);
/* buf[20] */
/* buf[7] */
sljit_emit_atomic_store(compiler, SLJIT_MOV_U8 | SLJIT_SET_ATOMIC_STORED, SLJIT_R3, SLJIT_R1, SLJIT_R4);
sljit_set_label(sljit_emit_jump(compiler, SLJIT_ATOMIC_NOT_STORED), label);
/* buf[22] */
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 22 * sizeof(sljit_sw), SLJIT_R0, 0);
sljit_set_label(jump, sljit_emit_label(compiler));

sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_S0, 0, SLJIT_IMM, 23 * sizeof(sljit_sw));
sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_S0, 0, SLJIT_IMM, 8 * sizeof(sljit_sw));
label = sljit_emit_label(compiler);
sljit_emit_atomic_load(compiler, SLJIT_MOV, SLJIT_S3, SLJIT_R0);
/* buf[24] */
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 24 * sizeof(sljit_sw), SLJIT_S3, 0);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 8 * sizeof(sljit_sw));
jump = sljit_emit_cmp(compiler, SLJIT_NOT_EQUAL, SLJIT_S3, 0, SLJIT_S1, 0);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S4, 0, SLJIT_S3, 0);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S3, 0, SLJIT_IMM, 4059);
/* buf[23] */
/* buf[8] */
sljit_emit_atomic_store(compiler, SLJIT_MOV | SLJIT_SET_ATOMIC_STORED, SLJIT_S3, SLJIT_R0, SLJIT_S4);
sljit_set_label(sljit_emit_jump(compiler, SLJIT_ATOMIC_NOT_STORED), label);
/* buf[25] */
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 25 * sizeof(sljit_sw), SLJIT_R0, 0);
sljit_set_label(jump, sljit_emit_label(compiler));

sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S0, 0, SLJIT_IMM, 26 * sizeof(sljit_sw) + 1);
sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S0, 0, SLJIT_IMM, 9 * sizeof(sljit_sw) + 1);
label = sljit_emit_label(compiler);
sljit_emit_atomic_load(compiler, SLJIT_MOV_U8, SLJIT_R0, SLJIT_R1);
/* buf[27] */
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 27 * sizeof(sljit_sw), SLJIT_R0, 0);
sljit_emit_op1(compiler, SLJIT_MOV_U8, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_R1), 0);
skip = sljit_emit_cmp(compiler, SLJIT_NOT_EQUAL, SLJIT_R0, 0, SLJIT_S1, 0);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 204);
/* buf[26] */
/* buf[9] */
sljit_emit_atomic_store(compiler, SLJIT_MOV_U8 | SLJIT_SET_ATOMIC_STORED, SLJIT_R2, SLJIT_R1, SLJIT_R0);
jump = sljit_emit_jump(compiler, SLJIT_ATOMIC_STORED);
sljit_set_label(sljit_emit_jump(compiler, SLJIT_JUMP), label);
sljit_set_label(jump, sljit_emit_label(compiler));
sljit_set_label(skip, sljit_emit_label(compiler));

sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S0, 0, SLJIT_IMM, 28 * sizeof(sljit_sw) + 2);
sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S0, 0, SLJIT_IMM, 10 * sizeof(sljit_sw) + 2);
label = sljit_emit_label(compiler);
sljit_emit_atomic_load(compiler, SLJIT_MOV_U8, SLJIT_R0, SLJIT_R1);
/* buf[29] */
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 29 * sizeof(sljit_sw), SLJIT_R0, 0);
sljit_emit_op1(compiler, SLJIT_MOV_U8, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_R1), 0);
jump = sljit_emit_cmp(compiler, SLJIT_NOT_EQUAL, SLJIT_R0, 0, SLJIT_S1, 0);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 240);
/* buf[28] */
/* buf[10] */
sljit_emit_atomic_store(compiler, SLJIT_MOV_U8 | SLJIT_SET_ATOMIC_STORED, SLJIT_R2, SLJIT_R1, SLJIT_R0);
sljit_set_label(sljit_emit_jump(compiler, SLJIT_ATOMIC_NOT_STORED), label);
sljit_set_label(jump, sljit_emit_label(compiler));

sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S0, 0, SLJIT_IMM, 30 * sizeof(sljit_sw) + 2);
sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S0, 0, SLJIT_IMM, 11 * sizeof(sljit_sw) + 2);
label = sljit_emit_label(compiler);
sljit_emit_atomic_load(compiler, SLJIT_MOV_U16, SLJIT_R0, SLJIT_R1);
/* buf[31] */
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 31 * sizeof(sljit_sw), SLJIT_R0, 0);
sljit_emit_op1(compiler, SLJIT_MOV_U16, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_R1), 0);
jump = sljit_emit_cmp(compiler, SLJIT_NOT_EQUAL, SLJIT_R0, 0, SLJIT_S1, 0);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 51403);
/* buf[30] */
/* buf[11] */
sljit_emit_atomic_store(compiler, SLJIT_MOV_U16 | SLJIT_SET_ATOMIC_STORED, SLJIT_R2, SLJIT_R1, SLJIT_R0);
sljit_set_label(sljit_emit_jump(compiler, SLJIT_ATOMIC_NOT_STORED), label);
sljit_set_label(jump, sljit_emit_label(compiler));

sljit_emit_return_void(compiler);

code.code = sljit_generate_code(compiler);
CHECK(compiler);
sljit_free_compiler(compiler);

code.func1((sljit_sw)&buf);

FAILED(buf[0] != -9856, "test92 case 1 failed\n");
FAILED(buf[1] != -4678, "test92 case 2 failed\n");
FAILED(*(sljit_u8*)(buf + 2) != 203, "test92 case 3 failed\n");
FAILED(((sljit_u8*)(buf + 2))[1] != 0xff, "test92 case 4 failed\n");
FAILED(buf[3] != 178, "test92 case 5 failed\n");
FAILED(buf[4] != 203, "test92 case 6 failed\n");
FAILED(*(sljit_u8*)(buf + 5) != 97, "test92 case 7 failed\n");
FAILED(((sljit_u8*)(buf + 5))[1] != 0xff, "test92 case 8 failed\n");
FAILED(*(sljit_u32*)(buf + 6) != 211, "test92 case 9 failed\n");
FAILED(buf[7] != (sljit_sw)(buf + 5), "test92 case 10 failed\n");
FAILED(buf[8] != 97, "test92 case 11 failed\n");
FAILED(*(sljit_u16*)(buf + 9) != (sljit_u16)(sljit_sw)(buf + 9), "test92 case 12 failed\n");
FAILED(((sljit_u8*)(buf + 9))[2] != 0xff, "test92 case 13 failed\n");
FAILED(buf[10] != 17897, "test92 case 14 failed\n");
FAILED(buf[11] != (sljit_sw)(buf + 9), "test92 case 15 failed\n");
FAILED(*(sljit_u16*)(buf + 12) != 41306, "test92 case 16 failed\n");
FAILED(((sljit_u8*)(buf + 12))[2] != 0xff, "test92 case 17 failed\n");
FAILED(*(sljit_u32*)(buf + 13) != 57812, "test92 case 18 failed\n");
FAILED(buf[14] != 41306, "test92 case 19 failed\n");
FAILED(*(sljit_u32*)(buf + 15) != 987654321, "test92 case 20 failed\n");
#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE)
FAILED(((sljit_u8*)(buf + 15))[4] != 0xff, "test92 case 21 failed\n");
#endif /* SLJIT_64BIT_ARCHITECTURE */
FAILED(buf[16] != 1234567890, "test92 case 22 failed\n");
FAILED(*(sljit_u32*)(buf + 17) != 678906789, "test92 case 23 failed\n");
#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE)
FAILED(((sljit_u8*)(buf + 17))[4] != 0xff, "test92 case 24 failed\n");
#endif /* SLJIT_64BIT_ARCHITECTURE */
FAILED(*(sljit_u32*)(buf + 18) != 987609876, "test92 case 25 failed\n");
FAILED(buf[19] != -573621, "test92 case 26 failed\n");
FAILED(*(sljit_u8*)(buf + 20) != 240, "test92 case 27 failed\n");
FAILED(((sljit_u8*)(buf + 20))[1] != 0xff, "test92 case 28 failed\n");
FAILED(buf[21] != 192, "test92 case 29 failed\n");
FAILED(buf[22] != -5893, "test92 case 30 failed\n");
FAILED(buf[23] != 4059, "test92 case 31 failed\n");
FAILED(buf[24] != 6359, "test92 case 32 failed\n");
FAILED(buf[25] != (sljit_sw)(buf + 23), "test92 case 33 failed\n");
FAILED(((sljit_u8*)(buf + 26))[0] != 255, "test92 case 34 failed\n");
FAILED(((sljit_u8*)(buf + 26))[1] != 204, "test92 case 35 failed\n");
FAILED(((sljit_u8*)(buf + 26))[2] != 255, "test92 case 36 failed\n");
FAILED(buf[27] != 105, "test92 case 37 failed\n");
FAILED(((sljit_u8*)(buf + 28))[1] != 255, "test92 case 38 failed\n");
FAILED(((sljit_u8*)(buf + 28))[2] != 240, "test92 case 39 failed\n");
FAILED(((sljit_u8*)(buf + 28))[3] != 255, "test92 case 40 failed\n");
FAILED(buf[29] != 13, "test92 case 41 failed\n");
FAILED(((sljit_u16*)(buf + 30))[0] != 65535, "test92 case 42 failed\n");
FAILED(((sljit_u16*)(buf + 30))[1] != 51403, "test92 case 43 failed\n");
FAILED(buf[31] != 14876, "test92 case 44 failed\n");
FAILED(buf[1] != HV(sljit_u8, 0, 203), "test92 case 2 failed\n");
FAILED(buf[2] != HV(sljit_u8, 0, 97), "test92 case 3 failed\n");
FAILED(buf[3] != HV(sljit_u16, 0, (sljit_u16)(sljit_sw)(buf + 3)),
"test92 case 4 failed\n");
FAILED(buf[4] != HV(sljit_u16, 0, 41306), "test92 case 5 failed\n");
FAILED(buf[5] != HV(sljit_u32, 0, 987654321), "test92 case 6 failed\n");
FAILED(buf[6] != HV(sljit_u32, 0, 678906789), "test92 case 7 failed\n");
FAILED(buf[7] != HV(sljit_u8, 0, 240), "test92 case 8 failed\n");
FAILED(buf[8] != 4059, "test92 case 9 failed\n");
FAILED(buf[9] != HV(sljit_u8, 1, 204), "test92 case 10 failed\n");
FAILED(buf[10] != HV(sljit_u8, 2, 240), "test92 case 11 failed\n");
FAILED(buf[11] != HV(sljit_u16, 1, 51403), "test92 case 12 failed\n");

sljit_free_code(code.code, NULL);
#endif
successful_tests++;
}

#undef HV
#undef H
#undef O
#undef M

static void test93(void)
{
/* Floating point set immediate. */
Expand Down