diff --git a/compiler/aarch64/codegen/OMRCodeGenerator.cpp b/compiler/aarch64/codegen/OMRCodeGenerator.cpp index e0dd59702c2..fda192e907b 100644 --- a/compiler/aarch64/codegen/OMRCodeGenerator.cpp +++ b/compiler/aarch64/codegen/OMRCodeGenerator.cpp @@ -207,6 +207,12 @@ OMR::ARM64::CodeGenerator::initialize() { cg->setSupportsArrayTranslateTRTO255(); } + + static bool disableTROTNoBreak = (feGetEnv("TR_disableTROTNoBreak") != NULL); + if (!disableTROTNoBreak) + { + cg->setSupportsArrayTranslateTROTNoBreak(); + } } void diff --git a/compiler/aarch64/codegen/OMRTreeEvaluator.cpp b/compiler/aarch64/codegen/OMRTreeEvaluator.cpp index 7332730c994..97ab14d236c 100644 --- a/compiler/aarch64/codegen/OMRTreeEvaluator.cpp +++ b/compiler/aarch64/codegen/OMRTreeEvaluator.cpp @@ -6426,46 +6426,72 @@ OMR::ARM64::TreeEvaluator::arraytranslateEvaluator(TR::Node *node, TR::CodeGener // (0) input ptr // (1) output ptr // (2) translation table (dummy) - // (3) stop character (terminal character, either 0xff00ff00 (ISO8859) or 0xff80ff80 (ASCII) + // (3) stop character (terminal character) + // TROT: dummy + // TRTO: either 0xff00ff00 (ISO8859) or 0xff80ff80 (ASCII) // (4) input length (in elements) // (5) stopping char (dummy) // // Number of translated elements is returned TR::Compilation *comp = cg->comp(); - bool arrayTranslateTRTO255 = false; + bool isSourceByteArray = node->isSourceByteArrayTranslate(); + TR_RuntimeHelper helper; + bool useX3 = false; + bool useX6 = false; + bool useV2 = false; + bool useV3 = false; - TR_ASSERT_FATAL(!node->isSourceByteArrayTranslate(), "Source is byte[] for arraytranslate"); - TR_ASSERT_FATAL(node->isTargetByteArrayTranslate(), "Target is char[] for arraytranslate"); TR_ASSERT_FATAL(node->getChild(3)->getOpCodeValue() == TR::iconst, "Non-constant stop char for arraytranslate"); - if (node->getChild(3)->getInt() == 0x0ff00ff00) + if (isSourceByteArray) { - arrayTranslateTRTO255 = true; + // byte[] to char[] + TR_ASSERT_FATAL(!node->isTargetByteArrayTranslate(), "byte[] to byte[] is not supported in arraytranslate"); + helper = TR_ARM64arrayTranslateTROTNoBreak; } else { - TR_ASSERT_FATAL(node->getChild(3)->getInt() == 0x0ff80ff80, "Unknown stop char for arraytranslate"); + // char[] to byte[] + TR_ASSERT_FATAL(node->isTargetByteArrayTranslate(), "char[] to char[] is not supported for arraytranslate"); + if (node->getChild(3)->getInt() == 0x0ff00ff00) + { + helper = TR_ARM64arrayTranslateTRTO255; + useX6 = true; + useV2 = true; + } + else + { + TR_ASSERT_FATAL(node->getChild(3)->getInt() == 0x0ff80ff80, "Unknown stop char for arraytranslate"); + + helper = TR_ARM64arrayTranslateTRTO; + useX3 = true; + useX6 = true; + useV2 = true; + useV3 = true; + } } + int numDeps = 9 + (useX3 ? 1 : 0) + (useX6 ? 1 : 0) + (useV2 ? 1 : 0) + (useV3 ? 1 : 0); + static bool verboseArrayTranslate = (feGetEnv("TR_verboseArrayTranslate") != NULL); if (verboseArrayTranslate) { - fprintf(stderr, "arrayTranslateTRTO: %s @ %s [isTO255: %d]\n", + fprintf(stderr, "arrayTranslate: %s @ %s [isSourceByteArray: %d] [child(3): %x] x3=%d x6=%d v2=%d v3=%d\n", comp->signature(), comp->getHotnessName(comp->getMethodHotness()), - arrayTranslateTRTO255 + isSourceByteArray, + node->getChild(3)->getInt(), + useX3, useX6, useV2, useV3 ); } TR::Register *inputReg = cg->gprClobberEvaluate(node->getChild(0)); TR::Register *outputReg = cg->gprClobberEvaluate(node->getChild(1)); - TR::Register *stopCharReg = arrayTranslateTRTO255 ? NULL : cg->gprClobberEvaluate(node->getChild(3)); + TR::Register *stopCharReg = useX3 ? cg->gprClobberEvaluate(node->getChild(3)) : NULL; TR::Register *inputLenReg = cg->gprClobberEvaluate(node->getChild(4)); TR::Register *outputLenReg = cg->allocateRegister(); - int numDeps = arrayTranslateTRTO255 ? 10 : 12; - TR::RegisterDependencyConditions *deps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(1, numDeps, cg->trMemory()); deps->addPreCondition(inputReg, TR::RealRegister::x0); @@ -6473,7 +6499,7 @@ OMR::ARM64::TreeEvaluator::arraytranslateEvaluator(TR::Node *node, TR::CodeGener deps->addPostCondition(outputLenReg, TR::RealRegister::x0); deps->addPostCondition(outputReg, TR::RealRegister::x1); deps->addPostCondition(inputLenReg, TR::RealRegister::x2); - if (!arrayTranslateTRTO255) + if (useX3) { deps->addPostCondition(stopCharReg, TR::RealRegister::x3); } @@ -6484,23 +6510,28 @@ OMR::ARM64::TreeEvaluator::arraytranslateEvaluator(TR::Node *node, TR::CodeGener cg->stopUsingRegister(clobberedReg); deps->addPostCondition(clobberedReg = cg->allocateRegister(), TR::RealRegister::x5); cg->stopUsingRegister(clobberedReg); - deps->addPostCondition(clobberedReg = cg->allocateRegister(), TR::RealRegister::x6); - cg->stopUsingRegister(clobberedReg); + if (useX6) + { + deps->addPostCondition(clobberedReg = cg->allocateRegister(), TR::RealRegister::x6); + cg->stopUsingRegister(clobberedReg); + } deps->addPostCondition(clobberedReg = cg->allocateRegister(TR_VRF), TR::RealRegister::v0); cg->stopUsingRegister(clobberedReg); deps->addPostCondition(clobberedReg = cg->allocateRegister(TR_VRF), TR::RealRegister::v1); cg->stopUsingRegister(clobberedReg); - deps->addPostCondition(clobberedReg = cg->allocateRegister(TR_VRF), TR::RealRegister::v2); - cg->stopUsingRegister(clobberedReg); - if (!arrayTranslateTRTO255) + if (useV2) + { + deps->addPostCondition(clobberedReg = cg->allocateRegister(TR_VRF), TR::RealRegister::v2); + cg->stopUsingRegister(clobberedReg); + } + if (useV3) { deps->addPostCondition(clobberedReg = cg->allocateRegister(TR_VRF), TR::RealRegister::v3); cg->stopUsingRegister(clobberedReg); } // Array Translate helper call - TR_RuntimeHelper helper = arrayTranslateTRTO255 ? TR_ARM64arrayTranslateTRTO255 : TR_ARM64arrayTranslateTRTO; TR::SymbolReference *helperSym = cg->symRefTab()->findOrCreateRuntimeHelper(helper); uintptr_t addr = reinterpret_cast(helperSym->getMethodAddress()); generateImmSymInstruction(cg, TR::InstOpCode::bl, node, addr, deps, helperSym, NULL); @@ -6514,7 +6545,7 @@ OMR::ARM64::TreeEvaluator::arraytranslateEvaluator(TR::Node *node, TR::CodeGener if (outputReg != node->getChild(1)->getRegister()) cg->stopUsingRegister(outputReg); - if (!arrayTranslateTRTO255 && stopCharReg != node->getChild(3)->getRegister()) + if (useX3 && stopCharReg != node->getChild(3)->getRegister()) cg->stopUsingRegister(stopCharReg); if (inputLenReg != node->getChild(4)->getRegister()) diff --git a/compiler/aarch64/runtime/ARM64ArrayTranslate.spp b/compiler/aarch64/runtime/ARM64ArrayTranslate.spp index b7b07f00e4e..edc2e9d732b 100644 --- a/compiler/aarch64/runtime/ARM64ArrayTranslate.spp +++ b/compiler/aarch64/runtime/ARM64ArrayTranslate.spp @@ -25,6 +25,7 @@ .globl FUNC_LABEL(__arrayTranslateTRTO) .globl FUNC_LABEL(__arrayTranslateTRTO255) + .globl FUNC_LABEL(__arrayTranslateTROTNoBreak) .text .align 2 @@ -240,3 +241,72 @@ atTRTO255_Done: // number of translated elements sub x0, x1, x6 ret + +// ---- +// arrayTranslateTROTNoBreak +// ---- +// OT stands for One byte to Two bytes +// +// uint8 input[]; +// uint16 output[]; +// int32 len; +// +// int32 i = 0; +// while (i < len) { +// output[i] = (uint16)input[i]; +// i++; +// } +// return i; +// +// in: x0: input +// x1: output +// x2: len +// out: x0: num of translated elements +// trash: x4-x5, v0-v1 + +FUNC_LABEL(__arrayTranslateTROTNoBreak): + cmp w2, #16 + b.cc atTROTNB_15 + lsr w4, w2, #4 +atTROTNB_16Loop: + // load 16 elements + ldr q0, [x0], #16 + // unsigned extension + uxtl v1.8h, v0.8b + uxtl2 v0.8h, v0.16b + subs w4, w4, #1 + // store 16 elements + stp q1, q0, [x1], #32 + b.ne atTROTNB_16Loop +atTROTNB_15: + // 15 elements or less remaining + tst w2, #8 + b.eq atTROTNB_7 + // load 8 elements + ldr d0, [x0], #8 + // unsigned extension + uxtl v1.8h, v0.8b + // store 8 elements + str q1, [x1], #16 +atTROTNB_7: + // 7 elements or less remaining + tst w2, #4 + b.eq atTROTNB_3 + // load 4 elements + ldr s0, [x0], #4 + // unsigned extension + uxtl v1.8h, v0.8b + // store 4 elements + str d1, [x1], #8 +atTROTNB_3: + // 3 elements or less remaining + ands w4, w2, #3 +atTROTNB_1Loop: + b.eq atTROTNB_Done + ldrb w5, [x0], #1 + subs w4, w4, #1 + strh w5, [x1], #2 + b atTROTNB_1Loop +atTROTNB_Done: + mov x0, x2 + ret diff --git a/compiler/ras/Debug.cpp b/compiler/ras/Debug.cpp index 9ec209df8a2..2a98b5e0cef 100644 --- a/compiler/ras/Debug.cpp +++ b/compiler/ras/Debug.cpp @@ -4131,6 +4131,7 @@ TR_Debug::getRuntimeHelperName(int32_t index) case TR_ARM64PatchGCRHelper: return "_patchGCRHelper"; case TR_ARM64arrayTranslateTRTO: return "__arrayTranslateTRTO"; case TR_ARM64arrayTranslateTRTO255: return "__arrayTranslateTRTO255"; + case TR_ARM64arrayTranslateTROTNoBreak: return "__arrayTranslateTROTNoBreak"; } } #endif diff --git a/compiler/runtime/Helpers.inc b/compiler/runtime/Helpers.inc index fae1b17c348..f0051821e40 100644 --- a/compiler/runtime/Helpers.inc +++ b/compiler/runtime/Helpers.inc @@ -499,7 +499,8 @@ SETVAL(TR_ARM64PatchGCRHelper,TR_FSRH+44) SETVAL(TR_ARM64fieldWatchHelper,TR_FSRH+45) SETVAL(TR_ARM64arrayTranslateTRTO,TR_FSRH+46) SETVAL(TR_ARM64arrayTranslateTRTO255,TR_FSRH+47) -SETVAL(TR_ARM64numRuntimeHelpers,TR_FSRH+48) +SETVAL(TR_ARM64arrayTranslateTROTNoBreak,TR_FSRH+48) +SETVAL(TR_ARM64numRuntimeHelpers,TR_FSRH+49) SETVAL(TR_S390longDivide,TR_FSRH) SETVAL(TR_S390interfaceCallHelper,TR_FSRH+1)