Skip to content

Commit

Permalink
Add Zen5 tunings
Browse files Browse the repository at this point in the history
  • Loading branch information
animetosho committed Aug 6, 2024
1 parent dc89435 commit 54fd976
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 5 deletions.
8 changes: 4 additions & 4 deletions src/encoder_avx_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -217,12 +217,12 @@ HEDLEY_ALWAYS_INLINE void do_encode_avx2(int line_size, int* colOffset, const ui
// duplicate halves
data1A = _mm256_inserti128_si256(dataA, _mm256_castsi256_si128(dataA), 1);
data1B = _mm256_inserti128_si256(dataB, _mm256_castsi256_si128(dataB), 1);
#if defined(__tune_znver2__) || defined(__tune_znver3__) || defined(__tune_znver4__)
data2A = _mm256_permute2x128_si256(dataA, dataA, 0x11);
data2B = _mm256_permute2x128_si256(dataB, dataB, 0x11);
#else
#if defined(__tune_znver1__) || defined(__tune_bdver4__)
data2A = _mm256_permute4x64_epi64(dataA, 0xee);
data2B = _mm256_permute4x64_epi64(dataB, 0xee);
#else
data2A = _mm256_permute2x128_si256(dataA, dataA, 0x11);
data2B = _mm256_permute2x128_si256(dataB, dataB, 0x11);
#endif

shuf1A = _mm256_load_si256(lookupsAVX2->shufExpand + m1);
Expand Down
2 changes: 1 addition & 1 deletion src/encoder_sse_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,7 @@ HEDLEY_ALWAYS_INLINE void do_encode_sse(int line_size, int* colOffset, const uin
#if defined(__POPCNT__) && !defined(__tune_btver1__)
if(use_isa & ISA_FEATURE_POPCNT) {
shuf2Len = popcnt32(maskA) + 16;
# if defined(__tune_znver4__) || defined(__tune_znver3__) || defined(__tune_znver2__) || defined(__tune_znver1__) || defined(__tune_btver2__)
# if defined(__tune_znver6__) || defined(__tune_znver5__) || defined(__tune_znver4__) || defined(__tune_znver3__) || defined(__tune_znver2__) || defined(__tune_znver1__) || defined(__tune_btver2__)
shuf1Len = popcnt32(m1) + 8;
shuf3Len = popcnt32(m3) + shuf2Len + 8;
# else
Expand Down

0 comments on commit 54fd976

Please sign in to comment.