From 1328c86e06f0615061b311995ff5b3b93037d106 Mon Sep 17 00:00:00 2001 From: Bradley Wood Date: Tue, 26 Nov 2024 14:54:18 -0500 Subject: [PATCH 1/2] x86: Add options to disable SIMD features Signed-off-by: Bradley Wood --- compiler/control/OMROptions.cpp | 35 ++++++++++++++++++ compiler/control/OMROptions.hpp | 13 +++---- compiler/x/codegen/OMRCodeGenerator.cpp | 3 -- compiler/x/codegen/OMRInstOpCode.cpp | 2 -- compiler/x/codegen/OMRTreeEvaluator.cpp | 1 - compiler/x/codegen/X86BinaryEncoding.cpp | 2 -- compiler/x/env/OMRCPU.cpp | 46 ++++++++++++++++++++++++ compiler/x/env/OMRCPU.hpp | 1 + 8 files changed, 89 insertions(+), 14 deletions(-) diff --git a/compiler/control/OMROptions.cpp b/compiler/control/OMROptions.cpp index 1ab22341773..162af2aa31a 100644 --- a/compiler/control/OMROptions.cpp +++ b/compiler/control/OMROptions.cpp @@ -269,6 +269,9 @@ TR::OptionTable OMR::Options::_jitOptions[] = { {"disableAsyncCheckVersioning", "O\tdisable versioning of loops wrt async checks", SET_OPTION_BIT(TR_DisableAsyncCheckVersioning), "F"}, {"disableAsyncCompilation", "M\tdisable asynchronous compilation", SET_OPTION_BIT(TR_DisableAsyncCompilation), "F"}, {"disableAutoSIMD", "M\tdisable automatic vectorization of loops", SET_OPTION_BIT(TR_DisableAutoSIMD), "F"}, + {"disableAVX", "C\tdisable avx and newer on x86", TR::Options::disableCPUFeatures, TR_DisableAVX, 0, "F"}, + {"disableAVX2", "C\tdisable avx2 and newer on x86", TR::Options::disableCPUFeatures, TR_DisableAVX2, 0, "F"}, + {"disableAVX512", "C\tdisable avx512 on x86", TR::Options::disableCPUFeatures, TR_DisableAVX512, 0, "F"}, {"disableBasicBlockExtension", "O\tdisable basic block extension", TR::Options::disableOptimization, basicBlockExtension, 0, "P"}, {"disableBasicBlockPeepHole", "O\tdisable basic blocks peepHole", SET_OPTION_BIT(TR_DisableBasicBlockPeepHole), "F"}, {"disableBCDArithChildOrdering", "O\tstress testing option -- do not reorder children of BCD arithmetic nodes", SET_OPTION_BIT(TR_DisableBCDArithChildOrdering), "F" }, @@ -554,6 +557,9 @@ TR::OptionTable OMR::Options::_jitOptions[] = { {"disableSIMDUTF16BEEncoder", "M\tdisable inlining of SIMD UTF16 Big Endian encoder", SET_OPTION_BIT(TR_DisableSIMDUTF16BEEncoder), "F"}, {"disableSIMDUTF16LEEncoder", "M\tdisable inlining of SIMD UTF16 Little Endian encoder", SET_OPTION_BIT(TR_DisableSIMDUTF16LEEncoder), "F"}, {"disableSmartPlacementOfCodeCaches", "O\tdisable placement of code caches in memory so they are near each other and the DLLs", SET_OPTION_BIT(TR_DisableSmartPlacementOfCodeCaches), "F", NOT_IN_SUBSET}, + {"disableSSE3", "C\tdisable sse 3 and newer on x86", TR::Options::disableCPUFeatures, TR_DisableSSE3, 0, "F"}, + {"disableSSE4_1", "C\tdisable sse 4.1 and newer on x86", TR::Options::disableCPUFeatures, TR_DisableSSE4_1, 0, "F"}, + {"disableSSE4_2", "C\tdisable sse 4.2 and newer on x86", TR::Options::disableCPUFeatures, TR_DisableSSE4_2, 0, "F"}, {"disableStableAnnotations", "M\tdisable recognition of @Stable", SET_OPTION_BIT(TR_DisableStableAnnotations), "F"}, {"disableStaticFinalFieldFolding", "O\tdisable generic static final field folding", TR::Options::disableOptimization, staticFinalFieldFolding, 0, "P"}, {"disableStoreOnCondition", "O\tdisable store on condition (STOC) code gen", SET_OPTION_BIT(TR_DisableStoreOnCondition), "F"}, @@ -4943,6 +4949,35 @@ OMR::Options::configureOptReporting(const char *option, void *base, TR::OptionTa return option; } +const char * +OMR::Options::disableCPUFeatures(const char *option, void *base, TR::OptionTable *entry) + { + TR::Options *options = (TR::Options*)base; + TR_CompilationOptions co = (TR_CompilationOptions)entry->parm1; + options->setOption(co); + + /* When disabling SIMD, disable newer features too */ + + switch (co) + { + case TR_DisableSSE3: + options->setOption(TR_DisableSSE3); + case TR_DisableSSE4_1: + options->setOption(TR_DisableSSE4_1); + case TR_DisableSSE4_2: + options->setOption(TR_DisableSSE4_2); + case TR_DisableAVX: + options->setOption(TR_DisableAVX); + case TR_DisableAVX2: + options->setOption(TR_DisableAVX2); + case TR_DisableAVX512: + options->setOption(TR_DisableAVX512); + default: + break; + } + + return option; + } const char *OMR::Options::_verboseOptionNames[TR_NumVerboseOptions] = { diff --git a/compiler/control/OMROptions.hpp b/compiler/control/OMROptions.hpp index fe6a228d7de..fd6bec07002 100644 --- a/compiler/control/OMROptions.hpp +++ b/compiler/control/OMROptions.hpp @@ -400,13 +400,13 @@ enum TR_CompilationOptions TR_EnableVectorAPIBoxing = 0x00010000 + 10, TR_EnableSequentialLoadStoreWarm = 0x00020000 + 10, TR_EnableSequentialLoadStoreCold = 0x00040000 + 10, - // Available = 0x00080000 + 10, - // Available = 0x00100000 + 10, - // Available = 0x00200000 + 10, + TR_DisableAVX = 0x00080000 + 10, + TR_DisableAVX2 = 0x00100000 + 10, + TR_DisableAVX512 = 0x00200000 + 10, TR_ConservativeCompilation = 0x00400000 + 10, - // Available = 0x00800000 + 10, - // Available = 0x01000000 + 10, - // Available = 0x02000000 + 10, + TR_DisableSSE3 = 0x00800000 + 10, + TR_DisableSSE4_1 = 0x01000000 + 10, + TR_DisableSSE4_2 = 0x02000000 + 10, TR_DisableNewX86VolatileSupport = 0x04000000 + 10, // Available = 0x08000000 + 10, // Available = 0x10000000 + 10, @@ -2295,6 +2295,7 @@ class OMR_EXTENSIBLE Options static const char *clearBitsFromStringSet(const char *option, void *base, TR::OptionTable *entry); static const char *configureOptReporting(const char *option, void *base, TR::OptionTable *entry); + static const char *disableCPUFeatures(const char *option, void *base, TR::OptionTable *entry); // Option processing helper functions // diff --git a/compiler/x/codegen/OMRCodeGenerator.cpp b/compiler/x/codegen/OMRCodeGenerator.cpp index 9ea35594895..d8367502983 100644 --- a/compiler/x/codegen/OMRCodeGenerator.cpp +++ b/compiler/x/codegen/OMRCodeGenerator.cpp @@ -424,7 +424,6 @@ OMR::X86::CodeGenerator::initializeX86(TR::Compilation *comp) static bool disableX86TRTO = feGetEnv("TR_disableX86TRTO") != NULL; if (!disableX86TRTO) { - TR_ASSERT_FATAL(comp->compileRelocatableCode() || comp->isOutOfProcessCompilation() || comp->compilePortableCode() || comp->target().cpu.supportsFeature(OMR_FEATURE_X86_SSE4_1) == self()->getX86ProcessorInfo().supportsSSE4_1(), "supportsSSE4_1() failed\n"); if (comp->target().cpu.supportsFeature(OMR_FEATURE_X86_SSE4_1)) { self()->setSupportsArrayTranslateTRTO(); @@ -433,8 +432,6 @@ OMR::X86::CodeGenerator::initializeX86(TR::Compilation *comp) static bool disableX86TROT = feGetEnv("TR_disableX86TROT") != NULL; if (!disableX86TROT) { - TR_ASSERT_FATAL(comp->compileRelocatableCode() || comp->isOutOfProcessCompilation() || comp->compilePortableCode() || comp->target().cpu.supportsFeature(OMR_FEATURE_X86_SSE4_1) == self()->getX86ProcessorInfo().supportsSSE4_1(), "supportsSSE4_1() failed\n"); - TR_ASSERT_FATAL(comp->compileRelocatableCode() || comp->isOutOfProcessCompilation() || comp->compilePortableCode() || comp->target().cpu.supportsFeature(OMR_FEATURE_X86_SSE2) == self()->getX86ProcessorInfo().supportsSSE2(), "supportsSSE4_1() failed\n"); if (comp->target().cpu.supportsFeature(OMR_FEATURE_X86_SSE4_1)) { self()->setSupportsArrayTranslateTROT(); diff --git a/compiler/x/codegen/OMRInstOpCode.cpp b/compiler/x/codegen/OMRInstOpCode.cpp index 925712630f0..088e9e79158 100644 --- a/compiler/x/codegen/OMRInstOpCode.cpp +++ b/compiler/x/codegen/OMRInstOpCode.cpp @@ -103,8 +103,6 @@ template typename TBuffer::cursor_t OMR::X86::InstOpCode::OpC TR::Instruction::REX rex(rexbits); rex.W = rex_w; - TR_ASSERT_FATAL(comp->compileRelocatableCode() || comp->isOutOfProcessCompilation() || comp->compilePortableCode() || comp->target().cpu.supportsAVX() == TR::CodeGenerator::getX86ProcessorInfo().supportsAVX(), "supportsAVX() failed\n"); - if (enc != VEX_L___) { if (enc >> 2 && enc != VEX_LZ) diff --git a/compiler/x/codegen/OMRTreeEvaluator.cpp b/compiler/x/codegen/OMRTreeEvaluator.cpp index 3de371978d9..dc93d962373 100644 --- a/compiler/x/codegen/OMRTreeEvaluator.cpp +++ b/compiler/x/codegen/OMRTreeEvaluator.cpp @@ -5818,7 +5818,6 @@ TR::Register* OMR::X86::TreeEvaluator::floatingPointBinaryArithmeticEvaluator(TR TR::Node* operandNode0 = node->getChild(0); TR::Node* operandNode1 = node->getChild(1); - TR_ASSERT_FATAL(cg->comp()->compileRelocatableCode() || cg->comp()->isOutOfProcessCompilation() || cg->comp()->compilePortableCode() || cg->comp()->target().cpu.supportsAVX() == TR::CodeGenerator::getX86ProcessorInfo().supportsAVX(), "supportsAVX() failed\n"); bool useRegMemForm = cg->comp()->target().cpu.supportsAVX(); if (useRegMemForm) diff --git a/compiler/x/codegen/X86BinaryEncoding.cpp b/compiler/x/codegen/X86BinaryEncoding.cpp index 3b2798b9bce..db2c51bb98c 100644 --- a/compiler/x/codegen/X86BinaryEncoding.cpp +++ b/compiler/x/codegen/X86BinaryEncoding.cpp @@ -1547,8 +1547,6 @@ TR::X86RegInstruction::enlarge(int32_t requestedEnlargementSize, int32_t maxEnla if (disableRexExpansion || cg()->comp()->getOption(TR_DisableZealousCodegenOpts)) return OMR::X86::EnlargementResult(0, 0); - TR_ASSERT_FATAL(cg()->comp()->compileRelocatableCode() || cg()->comp()->isOutOfProcessCompilation() || cg()->comp()->compilePortableCode() || cg()->comp()->target().cpu.supportsAVX() == cg()->getX86ProcessorInfo().supportsAVX(), "supportsAVX() failed\n"); - if (getOpCode().info().supportsAVX() && cg()->comp()->target().cpu.supportsAVX()) return OMR::X86::EnlargementResult(0, 0); // REX expansion isn't allowed for AVX instructions diff --git a/compiler/x/env/OMRCPU.cpp b/compiler/x/env/OMRCPU.cpp index 7c203c46fe1..036dc85d322 100644 --- a/compiler/x/env/OMRCPU.cpp +++ b/compiler/x/env/OMRCPU.cpp @@ -254,9 +254,55 @@ OMR::X86::CPU::is(OMRProcessorArchitecture p) return _processorDescription.processor == p; } +bool +OMR::X86::CPU::is_feature_disabled(uint32_t feature) + { + TR_CompilationOptions option = (TR_CompilationOptions) 0; + + switch (feature) + { + case OMR_FEATURE_X86_SSE3: + option = TR_DisableSSE3; + break; + case OMR_FEATURE_X86_SSE4_1: + option = TR_DisableSSE4_1; + break; + case OMR_FEATURE_X86_SSE4_2: + option = TR_DisableSSE4_2; + break; + case OMR_FEATURE_X86_AVX: + option = TR_DisableAVX; + break; + case OMR_FEATURE_X86_AVX2: + option = TR_DisableAVX2; + break; + case OMR_FEATURE_X86_AVX512F: + case OMR_FEATURE_X86_AVX512VL: + case OMR_FEATURE_X86_AVX512BW: + case OMR_FEATURE_X86_AVX512CD: + case OMR_FEATURE_X86_AVX512DQ: + case OMR_FEATURE_X86_AVX512ER: + case OMR_FEATURE_X86_AVX512PF: + case OMR_FEATURE_X86_AVX512_BITALG: + case OMR_FEATURE_X86_AVX512_IFMA: + case OMR_FEATURE_X86_AVX512_VBMI: + case OMR_FEATURE_X86_AVX512_VBMI2: + case OMR_FEATURE_X86_AVX512_VNNI: + case OMR_FEATURE_X86_AVX512_VPOPCNTDQ: + option = TR_DisableAVX512; + default: + break; + } + + return option && compilation && compilation->getOption(option); + } + bool OMR::X86::CPU::supportsFeature(uint32_t feature) { + if (is_feature_disabled(feature)) + return false; + if (TR::Compiler->omrPortLib == NULL) return self()->supports_feature_old_api(feature); diff --git a/compiler/x/env/OMRCPU.hpp b/compiler/x/env/OMRCPU.hpp index d2b842ce3c9..ac9291c36aa 100644 --- a/compiler/x/env/OMRCPU.hpp +++ b/compiler/x/env/OMRCPU.hpp @@ -146,6 +146,7 @@ class OMR_EXTENSIBLE CPU : public OMR::CPU bool supportsFeature(uint32_t feature); bool supports_feature_old_api(uint32_t feature); bool supports_feature_test(uint32_t feature); + bool is_feature_disabled(uint32_t feature); /** * @brief Returns name of the current processor From 86e7af6ae37f4563eb0ab9b2977c4f9d60be576d Mon Sep 17 00:00:00 2001 From: Bradley Wood Date: Wed, 27 Nov 2024 12:34:06 -0500 Subject: [PATCH 2/2] x86: Disable AVX-512 if zmm XCR0 flags are not set Signed-off-by: Bradley Wood --- compiler/x/env/OMRCPU.cpp | 40 +++++++++++++++++++++++++------ compiler/x/runtime/X86Runtime.hpp | 38 +++++++++++++++++++++++------ 2 files changed, 64 insertions(+), 14 deletions(-) diff --git a/compiler/x/env/OMRCPU.cpp b/compiler/x/env/OMRCPU.cpp index 036dc85d322..cd16535112a 100644 --- a/compiler/x/env/OMRCPU.cpp +++ b/compiler/x/env/OMRCPU.cpp @@ -61,14 +61,39 @@ OMR::X86::CPU::detect(OMRPortLibrary * const omrPortLib) processorDescription.features[i] &= featureMasks.features[i]; } + bool disableAVX = true; + bool disableAVX512 = true; + + // Check XCRO register for OS support of xmm/ymm/zmm if (TRUE == omrsysinfo_processor_has_feature(&processorDescription, OMR_FEATURE_X86_OSXSAVE)) { - static const bool disableAVX = feGetEnv("TR_DisableAVX") != NULL; - if (((6 & _xgetbv(0)) != 6) || disableAVX) // '6' = mask for XCR0[2:1]='11b' (XMM state and YMM state are enabled) - { - // Unset OSXSAVE if not enabled via CR0 - omrsysinfo_processor_set_feature(&processorDescription, OMR_FEATURE_X86_OSXSAVE, FALSE); - } + // '6' = mask for XCR0[2:1]='11b' (XMM state and YMM state are enabled) + disableAVX = ((6 & _xgetbv(0)) != 6); + // 'e6' = (mask for XCR0[7:5]='111b' (Opmask, ZMM_Hi256, Hi16_ZMM) + XCR0[2:1]='11b' (XMM/YMM)) + disableAVX512 = ((0xe6 & _xgetbv(0)) != 0xe6); + } + + if(disableAVX) + { + // Unset AVX/AVX2 if not enabled via CR0 or otherwise disabled + omrsysinfo_processor_set_feature(&processorDescription, OMR_FEATURE_X86_AVX, FALSE); + omrsysinfo_processor_set_feature(&processorDescription, OMR_FEATURE_X86_AVX2, FALSE); + } + + if (disableAVX512) + { + // Unset AVX-512 if not enabled via CR0 or otherwise disabled + // If other AVX-512 extensions are supported in the port library, they need to be disabled here + omrsysinfo_processor_set_feature(&processorDescription, OMR_FEATURE_X86_AVX512F, FALSE); + omrsysinfo_processor_set_feature(&processorDescription, OMR_FEATURE_X86_AVX512VL, FALSE); + omrsysinfo_processor_set_feature(&processorDescription, OMR_FEATURE_X86_AVX512BW, FALSE); + omrsysinfo_processor_set_feature(&processorDescription, OMR_FEATURE_X86_AVX512CD, FALSE); + omrsysinfo_processor_set_feature(&processorDescription, OMR_FEATURE_X86_AVX512DQ, FALSE); + omrsysinfo_processor_set_feature(&processorDescription, OMR_FEATURE_X86_AVX512_BITALG, FALSE); + omrsysinfo_processor_set_feature(&processorDescription, OMR_FEATURE_X86_AVX512_VBMI, FALSE); + omrsysinfo_processor_set_feature(&processorDescription, OMR_FEATURE_X86_AVX512_VBMI2, FALSE); + omrsysinfo_processor_set_feature(&processorDescription, OMR_FEATURE_X86_AVX512_VNNI, FALSE); + omrsysinfo_processor_set_feature(&processorDescription, OMR_FEATURE_X86_AVX512_VPOPCNTDQ, FALSE); } return TR::CPU(processorDescription); @@ -258,6 +283,7 @@ bool OMR::X86::CPU::is_feature_disabled(uint32_t feature) { TR_CompilationOptions option = (TR_CompilationOptions) 0; + TR::Compilation *comp = TR::comp(); switch (feature) { @@ -294,7 +320,7 @@ OMR::X86::CPU::is_feature_disabled(uint32_t feature) break; } - return option && compilation && compilation->getOption(option); + return option && comp && comp->getOption(option); } bool diff --git a/compiler/x/runtime/X86Runtime.hpp b/compiler/x/runtime/X86Runtime.hpp index 7a59d52bbb4..4205baaebcf 100644 --- a/compiler/x/runtime/X86Runtime.hpp +++ b/compiler/x/runtime/X86Runtime.hpp @@ -87,15 +87,39 @@ inline bool jitGetCPUID(TR_X86CPUIDBuffer* pBuffer) pBuffer->_featureFlags8 = CPUInfo[EBX]; pBuffer->_featureFlags10 = CPUInfo[ECX]; - // Check for XSAVE + bool disableAVX = true; + bool disableAVX512 = true; + + // Check XCRO register for OS support of xmm/ymm/zmm if(pBuffer->_featureFlags2 & TR_OSXSAVE) { - static const bool disableAVX = feGetEnv("TR_DisableAVX") != NULL; - if(((6 & _xgetbv(0)) != 6) || disableAVX) // '6' = mask for XCR0[2:1]='11b' (XMM state and YMM state are enabled) - { - // Unset OSXSAVE if not enabled via CR0 - pBuffer->_featureFlags2 &= ~TR_OSXSAVE; - } + // '6' = mask for XCR0[2:1]='11b' (XMM state and YMM state are enabled) + disableAVX = ((6 & _xgetbv(0)) != 6); + // 'e6' = (mask for XCR0[7:5]='111b' (Opmask, ZMM_Hi256, Hi16_ZMM) + XCR0[2:1]='11b' (XMM/YMM)) + disableAVX512 = ((0xe6 & _xgetbv(0)) != 0xe6); + } + + if(disableAVX) + { + // Unset AVX/AVX2 if not enabled via CR0 or otherwise disabled + pBuffer->_featureFlags2 &= ~TR_AVX; + pBuffer->_featureFlags8 &= ~TR_AVX2; + } + + if (disableAVX512) + { + // Unset AVX-512 if not enabled via CR0 or otherwise disabled + // If other AVX-512 extensions are supported in the old cpuid API, they need to be disabled here + pBuffer->_featureFlags8 &= ~TR_AVX512F; + pBuffer->_featureFlags8 &= ~TR_AVX512VL; + pBuffer->_featureFlags8 &= ~TR_AVX512BW; + pBuffer->_featureFlags8 &= ~TR_AVX512CD; + pBuffer->_featureFlags8 &= ~TR_AVX512DQ; + pBuffer->_featureFlags10 &= ~TR_AVX512_BITALG; + pBuffer->_featureFlags10 &= ~TR_AVX512_VBMI; + pBuffer->_featureFlags10 &= ~TR_AVX512_VBMI2; + pBuffer->_featureFlags10 &= ~TR_AVX512_VNNI; + pBuffer->_featureFlags10 &= ~TR_AVX512_VPOPCNTDQ; } /* Mask out the bits the compiler does not care about.