diff --git a/src/encoder_avx_base.h b/src/encoder_avx_base.h index d663f92..7185f74 100644 --- a/src/encoder_avx_base.h +++ b/src/encoder_avx_base.h @@ -217,12 +217,12 @@ HEDLEY_ALWAYS_INLINE void do_encode_avx2(int line_size, int* colOffset, const ui // duplicate halves data1A = _mm256_inserti128_si256(dataA, _mm256_castsi256_si128(dataA), 1); data1B = _mm256_inserti128_si256(dataB, _mm256_castsi256_si128(dataB), 1); -#if defined(__tune_znver2__) || defined(__tune_znver3__) || defined(__tune_znver4__) - data2A = _mm256_permute2x128_si256(dataA, dataA, 0x11); - data2B = _mm256_permute2x128_si256(dataB, dataB, 0x11); -#else +#if defined(__tune_znver1__) || defined(__tune_bdver4__) data2A = _mm256_permute4x64_epi64(dataA, 0xee); data2B = _mm256_permute4x64_epi64(dataB, 0xee); +#else + data2A = _mm256_permute2x128_si256(dataA, dataA, 0x11); + data2B = _mm256_permute2x128_si256(dataB, dataB, 0x11); #endif shuf1A = _mm256_load_si256(lookupsAVX2->shufExpand + m1); diff --git a/src/encoder_sse_base.h b/src/encoder_sse_base.h index f6c18b4..2eb6ca3 100644 --- a/src/encoder_sse_base.h +++ b/src/encoder_sse_base.h @@ -351,7 +351,7 @@ HEDLEY_ALWAYS_INLINE void do_encode_sse(int line_size, int* colOffset, const uin #if defined(__POPCNT__) && !defined(__tune_btver1__) if(use_isa & ISA_FEATURE_POPCNT) { shuf2Len = popcnt32(maskA) + 16; -# if defined(__tune_znver4__) || defined(__tune_znver3__) || defined(__tune_znver2__) || defined(__tune_znver1__) || defined(__tune_btver2__) +# if defined(__tune_znver6__) || defined(__tune_znver5__) || defined(__tune_znver4__) || defined(__tune_znver3__) || defined(__tune_znver2__) || defined(__tune_znver1__) || defined(__tune_btver2__) shuf1Len = popcnt32(m1) + 8; shuf3Len = popcnt32(m3) + shuf2Len + 8; # else