Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

x86: Add disableAVX2/512 options and check XCR0 for OS support #7602

Merged
merged 2 commits into from
Jan 20, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions compiler/control/OMROptions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,9 @@ TR::OptionTable OMR::Options::_jitOptions[] = {
{"disableAsyncCheckVersioning", "O\tdisable versioning of loops wrt async checks", SET_OPTION_BIT(TR_DisableAsyncCheckVersioning), "F"},
{"disableAsyncCompilation", "M\tdisable asynchronous compilation", SET_OPTION_BIT(TR_DisableAsyncCompilation), "F"},
{"disableAutoSIMD", "M\tdisable automatic vectorization of loops", SET_OPTION_BIT(TR_DisableAutoSIMD), "F"},
{"disableAVX", "C\tdisable avx and newer on x86", TR::Options::disableCPUFeatures, TR_DisableAVX, 0, "F"},
{"disableAVX2", "C\tdisable avx2 and newer on x86", TR::Options::disableCPUFeatures, TR_DisableAVX2, 0, "F"},
{"disableAVX512", "C\tdisable avx512 on x86", TR::Options::disableCPUFeatures, TR_DisableAVX512, 0, "F"},
{"disableBasicBlockExtension", "O\tdisable basic block extension", TR::Options::disableOptimization, basicBlockExtension, 0, "P"},
{"disableBasicBlockPeepHole", "O\tdisable basic blocks peepHole", SET_OPTION_BIT(TR_DisableBasicBlockPeepHole), "F"},
{"disableBCDArithChildOrdering", "O\tstress testing option -- do not reorder children of BCD arithmetic nodes", SET_OPTION_BIT(TR_DisableBCDArithChildOrdering), "F" },
Expand Down Expand Up @@ -554,6 +557,9 @@ TR::OptionTable OMR::Options::_jitOptions[] = {
{"disableSIMDUTF16BEEncoder", "M\tdisable inlining of SIMD UTF16 Big Endian encoder", SET_OPTION_BIT(TR_DisableSIMDUTF16BEEncoder), "F"},
{"disableSIMDUTF16LEEncoder", "M\tdisable inlining of SIMD UTF16 Little Endian encoder", SET_OPTION_BIT(TR_DisableSIMDUTF16LEEncoder), "F"},
{"disableSmartPlacementOfCodeCaches", "O\tdisable placement of code caches in memory so they are near each other and the DLLs", SET_OPTION_BIT(TR_DisableSmartPlacementOfCodeCaches), "F", NOT_IN_SUBSET},
{"disableSSE3", "C\tdisable sse 3 and newer on x86", TR::Options::disableCPUFeatures, TR_DisableSSE3, 0, "F"},
{"disableSSE4_1", "C\tdisable sse 4.1 and newer on x86", TR::Options::disableCPUFeatures, TR_DisableSSE4_1, 0, "F"},
{"disableSSE4_2", "C\tdisable sse 4.2 and newer on x86", TR::Options::disableCPUFeatures, TR_DisableSSE4_2, 0, "F"},
{"disableStableAnnotations", "M\tdisable recognition of @Stable", SET_OPTION_BIT(TR_DisableStableAnnotations), "F"},
{"disableStaticFinalFieldFolding", "O\tdisable generic static final field folding", TR::Options::disableOptimization, staticFinalFieldFolding, 0, "P"},
{"disableStoreOnCondition", "O\tdisable store on condition (STOC) code gen", SET_OPTION_BIT(TR_DisableStoreOnCondition), "F"},
Expand Down Expand Up @@ -4943,6 +4949,35 @@ OMR::Options::configureOptReporting(const char *option, void *base, TR::OptionTa
return option;
}

const char *
OMR::Options::disableCPUFeatures(const char *option, void *base, TR::OptionTable *entry)
{
TR::Options *options = (TR::Options*)base;
TR_CompilationOptions co = (TR_CompilationOptions)entry->parm1;
options->setOption(co);

/* When disabling SIMD, disable newer features too */

switch (co)
{
case TR_DisableSSE3:
options->setOption(TR_DisableSSE3);
case TR_DisableSSE4_1:
options->setOption(TR_DisableSSE4_1);
case TR_DisableSSE4_2:
options->setOption(TR_DisableSSE4_2);
case TR_DisableAVX:
options->setOption(TR_DisableAVX);
case TR_DisableAVX2:
options->setOption(TR_DisableAVX2);
case TR_DisableAVX512:
options->setOption(TR_DisableAVX512);
default:
break;
}

return option;
}

const char *OMR::Options::_verboseOptionNames[TR_NumVerboseOptions] =
{
Expand Down
13 changes: 7 additions & 6 deletions compiler/control/OMROptions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -400,13 +400,13 @@ enum TR_CompilationOptions
TR_EnableVectorAPIBoxing = 0x00010000 + 10,
TR_EnableSequentialLoadStoreWarm = 0x00020000 + 10,
TR_EnableSequentialLoadStoreCold = 0x00040000 + 10,
// Available = 0x00080000 + 10,
// Available = 0x00100000 + 10,
// Available = 0x00200000 + 10,
TR_DisableAVX = 0x00080000 + 10,
TR_DisableAVX2 = 0x00100000 + 10,
TR_DisableAVX512 = 0x00200000 + 10,
TR_ConservativeCompilation = 0x00400000 + 10,
// Available = 0x00800000 + 10,
// Available = 0x01000000 + 10,
// Available = 0x02000000 + 10,
TR_DisableSSE3 = 0x00800000 + 10,
TR_DisableSSE4_1 = 0x01000000 + 10,
TR_DisableSSE4_2 = 0x02000000 + 10,
TR_DisableNewX86VolatileSupport = 0x04000000 + 10,
// Available = 0x08000000 + 10,
// Available = 0x10000000 + 10,
Expand Down Expand Up @@ -2295,6 +2295,7 @@ class OMR_EXTENSIBLE Options
static const char *clearBitsFromStringSet(const char *option, void *base, TR::OptionTable *entry);

static const char *configureOptReporting(const char *option, void *base, TR::OptionTable *entry);
static const char *disableCPUFeatures(const char *option, void *base, TR::OptionTable *entry);

// Option processing helper functions
//
Expand Down
3 changes: 0 additions & 3 deletions compiler/x/codegen/OMRCodeGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -424,7 +424,6 @@ OMR::X86::CodeGenerator::initializeX86(TR::Compilation *comp)
static bool disableX86TRTO = feGetEnv("TR_disableX86TRTO") != NULL;
if (!disableX86TRTO)
{
TR_ASSERT_FATAL(comp->compileRelocatableCode() || comp->isOutOfProcessCompilation() || comp->compilePortableCode() || comp->target().cpu.supportsFeature(OMR_FEATURE_X86_SSE4_1) == self()->getX86ProcessorInfo().supportsSSE4_1(), "supportsSSE4_1() failed\n");
if (comp->target().cpu.supportsFeature(OMR_FEATURE_X86_SSE4_1))
{
self()->setSupportsArrayTranslateTRTO();
Expand All @@ -433,8 +432,6 @@ OMR::X86::CodeGenerator::initializeX86(TR::Compilation *comp)
static bool disableX86TROT = feGetEnv("TR_disableX86TROT") != NULL;
if (!disableX86TROT)
{
TR_ASSERT_FATAL(comp->compileRelocatableCode() || comp->isOutOfProcessCompilation() || comp->compilePortableCode() || comp->target().cpu.supportsFeature(OMR_FEATURE_X86_SSE4_1) == self()->getX86ProcessorInfo().supportsSSE4_1(), "supportsSSE4_1() failed\n");
TR_ASSERT_FATAL(comp->compileRelocatableCode() || comp->isOutOfProcessCompilation() || comp->compilePortableCode() || comp->target().cpu.supportsFeature(OMR_FEATURE_X86_SSE2) == self()->getX86ProcessorInfo().supportsSSE2(), "supportsSSE4_1() failed\n");
if (comp->target().cpu.supportsFeature(OMR_FEATURE_X86_SSE4_1))
{
self()->setSupportsArrayTranslateTROT();
Expand Down
2 changes: 0 additions & 2 deletions compiler/x/codegen/OMRInstOpCode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,6 @@ template <typename TBuffer> typename TBuffer::cursor_t OMR::X86::InstOpCode::OpC
TR::Instruction::REX rex(rexbits);
rex.W = rex_w;

TR_ASSERT_FATAL(comp->compileRelocatableCode() || comp->isOutOfProcessCompilation() || comp->compilePortableCode() || comp->target().cpu.supportsAVX() == TR::CodeGenerator::getX86ProcessorInfo().supportsAVX(), "supportsAVX() failed\n");

if (enc != VEX_L___)
{
if (enc >> 2 && enc != VEX_LZ)
Expand Down
1 change: 0 additions & 1 deletion compiler/x/codegen/OMRTreeEvaluator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5818,7 +5818,6 @@ TR::Register* OMR::X86::TreeEvaluator::floatingPointBinaryArithmeticEvaluator(TR
TR::Node* operandNode0 = node->getChild(0);
TR::Node* operandNode1 = node->getChild(1);

TR_ASSERT_FATAL(cg->comp()->compileRelocatableCode() || cg->comp()->isOutOfProcessCompilation() || cg->comp()->compilePortableCode() || cg->comp()->target().cpu.supportsAVX() == TR::CodeGenerator::getX86ProcessorInfo().supportsAVX(), "supportsAVX() failed\n");
bool useRegMemForm = cg->comp()->target().cpu.supportsAVX();

if (useRegMemForm)
Expand Down
2 changes: 0 additions & 2 deletions compiler/x/codegen/X86BinaryEncoding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1547,8 +1547,6 @@ TR::X86RegInstruction::enlarge(int32_t requestedEnlargementSize, int32_t maxEnla
if (disableRexExpansion || cg()->comp()->getOption(TR_DisableZealousCodegenOpts))
return OMR::X86::EnlargementResult(0, 0);

TR_ASSERT_FATAL(cg()->comp()->compileRelocatableCode() || cg()->comp()->isOutOfProcessCompilation() || cg()->comp()->compilePortableCode() || cg()->comp()->target().cpu.supportsAVX() == cg()->getX86ProcessorInfo().supportsAVX(), "supportsAVX() failed\n");

if (getOpCode().info().supportsAVX() && cg()->comp()->target().cpu.supportsAVX())
return OMR::X86::EnlargementResult(0, 0); // REX expansion isn't allowed for AVX instructions

Expand Down
90 changes: 84 additions & 6 deletions compiler/x/env/OMRCPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,14 +61,39 @@ OMR::X86::CPU::detect(OMRPortLibrary * const omrPortLib)
processorDescription.features[i] &= featureMasks.features[i];
}

bool disableAVX = true;
bool disableAVX512 = true;

// Check XCRO register for OS support of xmm/ymm/zmm
if (TRUE == omrsysinfo_processor_has_feature(&processorDescription, OMR_FEATURE_X86_OSXSAVE))
{
static const bool disableAVX = feGetEnv("TR_DisableAVX") != NULL;
if (((6 & _xgetbv(0)) != 6) || disableAVX) // '6' = mask for XCR0[2:1]='11b' (XMM state and YMM state are enabled)
{
// Unset OSXSAVE if not enabled via CR0
omrsysinfo_processor_set_feature(&processorDescription, OMR_FEATURE_X86_OSXSAVE, FALSE);
}
// '6' = mask for XCR0[2:1]='11b' (XMM state and YMM state are enabled)
disableAVX = ((6 & _xgetbv(0)) != 6);
// 'e6' = (mask for XCR0[7:5]='111b' (Opmask, ZMM_Hi256, Hi16_ZMM) + XCR0[2:1]='11b' (XMM/YMM))
disableAVX512 = ((0xe6 & _xgetbv(0)) != 0xe6);
}

if (disableAVX)
{
// Unset AVX/AVX2 if not enabled via CR0 or otherwise disabled
omrsysinfo_processor_set_feature(&processorDescription, OMR_FEATURE_X86_AVX, FALSE);
omrsysinfo_processor_set_feature(&processorDescription, OMR_FEATURE_X86_AVX2, FALSE);
}

if (disableAVX512)
{
// Unset AVX-512 if not enabled via CR0 or otherwise disabled
// If other AVX-512 extensions are supported in the port library, they need to be disabled here
omrsysinfo_processor_set_feature(&processorDescription, OMR_FEATURE_X86_AVX512F, FALSE);
omrsysinfo_processor_set_feature(&processorDescription, OMR_FEATURE_X86_AVX512VL, FALSE);
omrsysinfo_processor_set_feature(&processorDescription, OMR_FEATURE_X86_AVX512BW, FALSE);
omrsysinfo_processor_set_feature(&processorDescription, OMR_FEATURE_X86_AVX512CD, FALSE);
omrsysinfo_processor_set_feature(&processorDescription, OMR_FEATURE_X86_AVX512DQ, FALSE);
omrsysinfo_processor_set_feature(&processorDescription, OMR_FEATURE_X86_AVX512_BITALG, FALSE);
omrsysinfo_processor_set_feature(&processorDescription, OMR_FEATURE_X86_AVX512_VBMI, FALSE);
omrsysinfo_processor_set_feature(&processorDescription, OMR_FEATURE_X86_AVX512_VBMI2, FALSE);
omrsysinfo_processor_set_feature(&processorDescription, OMR_FEATURE_X86_AVX512_VNNI, FALSE);
omrsysinfo_processor_set_feature(&processorDescription, OMR_FEATURE_X86_AVX512_VPOPCNTDQ, FALSE);
}

return TR::CPU(processorDescription);
Expand Down Expand Up @@ -254,9 +279,62 @@ OMR::X86::CPU::is(OMRProcessorArchitecture p)
return _processorDescription.processor == p;
}

bool
OMR::X86::CPU::isFeatureDisabledByOption(uint32_t feature)
{
TR_CompilationOptions option = (TR_CompilationOptions) 0;

switch (feature)
{
case OMR_FEATURE_X86_SSE3:
option = TR_DisableSSE3;
break;
case OMR_FEATURE_X86_SSE4_1:
option = TR_DisableSSE4_1;
break;
case OMR_FEATURE_X86_SSE4_2:
option = TR_DisableSSE4_2;
break;
case OMR_FEATURE_X86_AVX:
option = TR_DisableAVX;
break;
case OMR_FEATURE_X86_AVX2:
option = TR_DisableAVX2;
break;
case OMR_FEATURE_X86_AVX512F:
case OMR_FEATURE_X86_AVX512VL:
case OMR_FEATURE_X86_AVX512BW:
case OMR_FEATURE_X86_AVX512CD:
case OMR_FEATURE_X86_AVX512DQ:
case OMR_FEATURE_X86_AVX512ER:
case OMR_FEATURE_X86_AVX512PF:
case OMR_FEATURE_X86_AVX512_BITALG:
case OMR_FEATURE_X86_AVX512_IFMA:
case OMR_FEATURE_X86_AVX512_VBMI:
case OMR_FEATURE_X86_AVX512_VBMI2:
case OMR_FEATURE_X86_AVX512_VNNI:
case OMR_FEATURE_X86_AVX512_VPOPCNTDQ:
option = TR_DisableAVX512;
break;
default:
return false;
}

if (!_comp)
{
// Lazy initialize thread local compilation object
_comp = TR::comp();
}

return _comp && _comp->getOption(option);
}

bool
OMR::X86::CPU::supportsFeature(uint32_t feature)
{
if (isFeatureDisabledByOption(feature))
return false;

if (TR::Compiler->omrPortLib == NULL)
return self()->supports_feature_old_api(feature);

Expand Down
4 changes: 4 additions & 0 deletions compiler/x/env/OMRCPU.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ class OMR_EXTENSIBLE CPU : public OMR::CPU
CPU() : OMR::CPU() {}
CPU(const OMRProcessorDesc& processorDescription) : OMR::CPU(processorDescription) {}

TR::Compilation *_comp = NULL;

public:

static TR::CPU detect(OMRPortLibrary * const omrPortLib);
Expand Down Expand Up @@ -147,6 +149,8 @@ class OMR_EXTENSIBLE CPU : public OMR::CPU
bool supports_feature_old_api(uint32_t feature);
bool supports_feature_test(uint32_t feature);

bool isFeatureDisabledByOption(uint32_t feature);

/**
* @brief Returns name of the current processor
* @returns const char* string representing the name of the current processor
Expand Down
38 changes: 31 additions & 7 deletions compiler/x/runtime/X86Runtime.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,15 +87,39 @@ inline bool jitGetCPUID(TR_X86CPUIDBuffer* pBuffer)
pBuffer->_featureFlags8 = CPUInfo[EBX];
pBuffer->_featureFlags10 = CPUInfo[ECX];

// Check for XSAVE
bool disableAVX = true;
bool disableAVX512 = true;

// Check XCRO register for OS support of xmm/ymm/zmm
if(pBuffer->_featureFlags2 & TR_OSXSAVE)
{
static const bool disableAVX = feGetEnv("TR_DisableAVX") != NULL;
if(((6 & _xgetbv(0)) != 6) || disableAVX) // '6' = mask for XCR0[2:1]='11b' (XMM state and YMM state are enabled)
{
// Unset OSXSAVE if not enabled via CR0
pBuffer->_featureFlags2 &= ~TR_OSXSAVE;
}
// '6' = mask for XCR0[2:1]='11b' (XMM state and YMM state are enabled)
disableAVX = ((6 & _xgetbv(0)) != 6);
// 'e6' = (mask for XCR0[7:5]='111b' (Opmask, ZMM_Hi256, Hi16_ZMM) + XCR0[2:1]='11b' (XMM/YMM))
disableAVX512 = ((0xe6 & _xgetbv(0)) != 0xe6);
}

if (disableAVX)
{
// Unset AVX/AVX2 if not enabled via CR0 or otherwise disabled
pBuffer->_featureFlags2 &= ~TR_AVX;
pBuffer->_featureFlags8 &= ~TR_AVX2;
}

if (disableAVX512)
{
// Unset AVX-512 if not enabled via CR0 or otherwise disabled
// If other AVX-512 extensions are supported in the old cpuid API, they need to be disabled here
pBuffer->_featureFlags8 &= ~TR_AVX512F;
pBuffer->_featureFlags8 &= ~TR_AVX512VL;
pBuffer->_featureFlags8 &= ~TR_AVX512BW;
pBuffer->_featureFlags8 &= ~TR_AVX512CD;
pBuffer->_featureFlags8 &= ~TR_AVX512DQ;
pBuffer->_featureFlags10 &= ~TR_AVX512_BITALG;
pBuffer->_featureFlags10 &= ~TR_AVX512_VBMI;
pBuffer->_featureFlags10 &= ~TR_AVX512_VBMI2;
pBuffer->_featureFlags10 &= ~TR_AVX512_VNNI;
pBuffer->_featureFlags10 &= ~TR_AVX512_VPOPCNTDQ;
}

/* Mask out the bits the compiler does not care about.
Expand Down