Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AArch64: Implement arrayTranslateTROTNoBreak #7587

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions compiler/aarch64/codegen/OMRCodeGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,12 @@ OMR::ARM64::CodeGenerator::initialize()
{
cg->setSupportsArrayTranslateTRTO255();
}

static bool disableTROTNoBreak = (feGetEnv("TR_disableTROTNoBreak") != NULL);
if (!disableTROTNoBreak)
{
cg->setSupportsArrayTranslateTROTNoBreak();
}
}

void
Expand Down
71 changes: 51 additions & 20 deletions compiler/aarch64/codegen/OMRTreeEvaluator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6426,54 +6426,80 @@ OMR::ARM64::TreeEvaluator::arraytranslateEvaluator(TR::Node *node, TR::CodeGener
// (0) input ptr
// (1) output ptr
// (2) translation table (dummy)
// (3) stop character (terminal character, either 0xff00ff00 (ISO8859) or 0xff80ff80 (ASCII)
// (3) stop character (terminal character)
// TROT: dummy
// TRTO: either 0xff00ff00 (ISO8859) or 0xff80ff80 (ASCII)
// (4) input length (in elements)
// (5) stopping char (dummy)
//
// Number of translated elements is returned

TR::Compilation *comp = cg->comp();
bool arrayTranslateTRTO255 = false;
bool isSourceByteArray = node->isSourceByteArrayTranslate();
TR_RuntimeHelper helper;
bool useX3 = false;
bool useX6 = false;
bool useV2 = false;
bool useV3 = false;

TR_ASSERT_FATAL(!node->isSourceByteArrayTranslate(), "Source is byte[] for arraytranslate");
TR_ASSERT_FATAL(node->isTargetByteArrayTranslate(), "Target is char[] for arraytranslate");
TR_ASSERT_FATAL(node->getChild(3)->getOpCodeValue() == TR::iconst, "Non-constant stop char for arraytranslate");

if (node->getChild(3)->getInt() == 0x0ff00ff00)
if (isSourceByteArray)
{
arrayTranslateTRTO255 = true;
// byte[] to char[]
TR_ASSERT_FATAL(!node->isTargetByteArrayTranslate(), "byte[] to byte[] is not supported in arraytranslate");
helper = TR_ARM64arrayTranslateTROTNoBreak;
}
else
{
TR_ASSERT_FATAL(node->getChild(3)->getInt() == 0x0ff80ff80, "Unknown stop char for arraytranslate");
// char[] to byte[]
TR_ASSERT_FATAL(node->isTargetByteArrayTranslate(), "char[] to char[] is not supported for arraytranslate");
if (node->getChild(3)->getInt() == 0x0ff00ff00)
{
helper = TR_ARM64arrayTranslateTRTO255;
useX6 = true;
useV2 = true;
}
else
{
TR_ASSERT_FATAL(node->getChild(3)->getInt() == 0x0ff80ff80, "Unknown stop char for arraytranslate");

helper = TR_ARM64arrayTranslateTRTO;
useX3 = true;
useX6 = true;
useV2 = true;
useV3 = true;
}
}

int numDeps = 9 + (useX3 ? 1 : 0) + (useX6 ? 1 : 0) + (useV2 ? 1 : 0) + (useV3 ? 1 : 0);

static bool verboseArrayTranslate = (feGetEnv("TR_verboseArrayTranslate") != NULL);
if (verboseArrayTranslate)
{
fprintf(stderr, "arrayTranslateTRTO: %s @ %s [isTO255: %d]\n",
fprintf(stderr, "arrayTranslate: %s @ %s [isSourceByteArray: %d] [child(3): %x] x3=%d x6=%d v2=%d v3=%d\n",
comp->signature(),
comp->getHotnessName(comp->getMethodHotness()),
arrayTranslateTRTO255
isSourceByteArray,
node->getChild(3)->getInt(),
useX3, useX6, useV2, useV3
);
}

TR::Register *inputReg = cg->gprClobberEvaluate(node->getChild(0));
TR::Register *outputReg = cg->gprClobberEvaluate(node->getChild(1));
TR::Register *stopCharReg = arrayTranslateTRTO255 ? NULL : cg->gprClobberEvaluate(node->getChild(3));
TR::Register *stopCharReg = useX3 ? cg->gprClobberEvaluate(node->getChild(3)) : NULL;
TR::Register *inputLenReg = cg->gprClobberEvaluate(node->getChild(4));
TR::Register *outputLenReg = cg->allocateRegister();

int numDeps = arrayTranslateTRTO255 ? 10 : 12;

TR::RegisterDependencyConditions *deps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(1, numDeps, cg->trMemory());

deps->addPreCondition(inputReg, TR::RealRegister::x0);

deps->addPostCondition(outputLenReg, TR::RealRegister::x0);
deps->addPostCondition(outputReg, TR::RealRegister::x1);
deps->addPostCondition(inputLenReg, TR::RealRegister::x2);
if (!arrayTranslateTRTO255)
if (useX3)
{
deps->addPostCondition(stopCharReg, TR::RealRegister::x3);
}
Expand All @@ -6484,23 +6510,28 @@ OMR::ARM64::TreeEvaluator::arraytranslateEvaluator(TR::Node *node, TR::CodeGener
cg->stopUsingRegister(clobberedReg);
deps->addPostCondition(clobberedReg = cg->allocateRegister(), TR::RealRegister::x5);
cg->stopUsingRegister(clobberedReg);
deps->addPostCondition(clobberedReg = cg->allocateRegister(), TR::RealRegister::x6);
cg->stopUsingRegister(clobberedReg);
if (useX6)
{
deps->addPostCondition(clobberedReg = cg->allocateRegister(), TR::RealRegister::x6);
cg->stopUsingRegister(clobberedReg);
}

deps->addPostCondition(clobberedReg = cg->allocateRegister(TR_VRF), TR::RealRegister::v0);
cg->stopUsingRegister(clobberedReg);
deps->addPostCondition(clobberedReg = cg->allocateRegister(TR_VRF), TR::RealRegister::v1);
cg->stopUsingRegister(clobberedReg);
deps->addPostCondition(clobberedReg = cg->allocateRegister(TR_VRF), TR::RealRegister::v2);
cg->stopUsingRegister(clobberedReg);
if (!arrayTranslateTRTO255)
if (useV2)
{
deps->addPostCondition(clobberedReg = cg->allocateRegister(TR_VRF), TR::RealRegister::v2);
cg->stopUsingRegister(clobberedReg);
}
if (useV3)
{
deps->addPostCondition(clobberedReg = cg->allocateRegister(TR_VRF), TR::RealRegister::v3);
cg->stopUsingRegister(clobberedReg);
}

// Array Translate helper call
TR_RuntimeHelper helper = arrayTranslateTRTO255 ? TR_ARM64arrayTranslateTRTO255 : TR_ARM64arrayTranslateTRTO;
TR::SymbolReference *helperSym = cg->symRefTab()->findOrCreateRuntimeHelper(helper);
uintptr_t addr = reinterpret_cast<uintptr_t>(helperSym->getMethodAddress());
generateImmSymInstruction(cg, TR::InstOpCode::bl, node, addr, deps, helperSym, NULL);
Expand All @@ -6514,7 +6545,7 @@ OMR::ARM64::TreeEvaluator::arraytranslateEvaluator(TR::Node *node, TR::CodeGener
if (outputReg != node->getChild(1)->getRegister())
cg->stopUsingRegister(outputReg);

if (!arrayTranslateTRTO255 && stopCharReg != node->getChild(3)->getRegister())
if (useX3 && stopCharReg != node->getChild(3)->getRegister())
cg->stopUsingRegister(stopCharReg);

if (inputLenReg != node->getChild(4)->getRegister())
Expand Down
70 changes: 70 additions & 0 deletions compiler/aarch64/runtime/ARM64ArrayTranslate.spp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

.globl FUNC_LABEL(__arrayTranslateTRTO)
.globl FUNC_LABEL(__arrayTranslateTRTO255)
.globl FUNC_LABEL(__arrayTranslateTROTNoBreak)

.text
.align 2
Expand Down Expand Up @@ -240,3 +241,72 @@ atTRTO255_Done:
// number of translated elements
sub x0, x1, x6
ret

// ----
// arrayTranslateTROTNoBreak
// ----
// OT stands for One byte to Two bytes
//
// uint8 input[];
// uint16 output[];
// int32 len;
//
// int32 i = 0;
// while (i < len) {
// output[i] = (uint16)input[i];
// i++;
// }
// return i;
//
// in: x0: input
// x1: output
// x2: len
// out: x0: num of translated elements
// trash: x4-x5, v0-v1

FUNC_LABEL(__arrayTranslateTROTNoBreak):
cmp w2, #16
b.cc atTROTNB_15
lsr w4, w2, #4
atTROTNB_16Loop:
// load 16 elements
ldr q0, [x0], #16
// unsigned extension
uxtl v1.8h, v0.8b
uxtl2 v0.8h, v0.16b
subs w4, w4, #1
// store 16 elements
stp q1, q0, [x1], #32
b.ne atTROTNB_16Loop
atTROTNB_15:
// 15 elements or less remaining
tst w2, #8
b.eq atTROTNB_7
// load 8 elements
ldr d0, [x0], #8
// unsigned extension
uxtl v1.8h, v0.8b
// store 8 elements
str q1, [x1], #16
atTROTNB_7:
// 7 elements or less remaining
tst w2, #4
b.eq atTROTNB_3
// load 4 elements
ldr s0, [x0], #4
// unsigned extension
uxtl v1.8h, v0.8b
// store 4 elements
str d1, [x1], #8
atTROTNB_3:
// 3 elements or less remaining
ands w4, w2, #3
atTROTNB_1Loop:
b.eq atTROTNB_Done
ldrb w5, [x0], #1
subs w4, w4, #1
strh w5, [x1], #2
b atTROTNB_1Loop
atTROTNB_Done:
mov x0, x2
ret
1 change: 1 addition & 0 deletions compiler/ras/Debug.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4131,6 +4131,7 @@ TR_Debug::getRuntimeHelperName(int32_t index)
case TR_ARM64PatchGCRHelper: return "_patchGCRHelper";
case TR_ARM64arrayTranslateTRTO: return "__arrayTranslateTRTO";
case TR_ARM64arrayTranslateTRTO255: return "__arrayTranslateTRTO255";
case TR_ARM64arrayTranslateTROTNoBreak: return "__arrayTranslateTROTNoBreak";
}
}
#endif
Expand Down
3 changes: 2 additions & 1 deletion compiler/runtime/Helpers.inc
Original file line number Diff line number Diff line change
Expand Up @@ -499,7 +499,8 @@ SETVAL(TR_ARM64PatchGCRHelper,TR_FSRH+44)
SETVAL(TR_ARM64fieldWatchHelper,TR_FSRH+45)
SETVAL(TR_ARM64arrayTranslateTRTO,TR_FSRH+46)
SETVAL(TR_ARM64arrayTranslateTRTO255,TR_FSRH+47)
SETVAL(TR_ARM64numRuntimeHelpers,TR_FSRH+48)
SETVAL(TR_ARM64arrayTranslateTROTNoBreak,TR_FSRH+48)
SETVAL(TR_ARM64numRuntimeHelpers,TR_FSRH+49)

SETVAL(TR_S390longDivide,TR_FSRH)
SETVAL(TR_S390interfaceCallHelper,TR_FSRH+1)
Expand Down