Skip to content

Commit

Permalink
sse4.2: bug fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
masterchef2209 committed May 20, 2020
1 parent 6713f1d commit dd9be1d
Show file tree
Hide file tree
Showing 2 changed files with 86 additions and 1,424 deletions.
124 changes: 85 additions & 39 deletions simde/x86/sse4.2.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,8 @@ simde_mm_cmpestra_8_(simde__m128i a, int la, simde__m128i b, int lb, const int i
const int upper_bound = (128 / 8) - 1;
int a_invalid = 0;
int b_invalid = 0;
for(int i = 0 ; i < upper_bound ; i++) {
for(int j = 0; j< upper_bound ; j++){
for(int i = 0 ; i <= upper_bound ; i++) {
for(int j = 0; j <= upper_bound ; j++){
int bitvalue = ((a_.i8[i] == b_.i8[j]) ? 1 : 0);
if(i == la)
a_invalid = 1;
Expand All @@ -132,70 +132,93 @@ simde_mm_cmpestra_8_(simde__m128i a, int la, simde__m128i b, int lb, const int i
bitvalue = 0;
break;
}
bool_res_.i8[i] |= (bitvalue << j);
if(bitvalue)
bool_res_.i16[i] |= (1UL << j);
else
bool_res_.i16[i] &= ~(1UL << j);
}
}
int32_t int_res_1 = 0;
int32_t int_res_2 = 0;
switch(cmp_op) {
case SIMDE_SIDD_CMP_EQUAL_ANY:
for(int i = 0 ; i < upper_bound ; i++){
for(int i = 0 ; i <= upper_bound ; i++){
SIMDE_VECTORIZE_REDUCTION(|:int_res_1)
for(int j = 0 ; j < upper_bound ; j++){
for(int j = 0 ; j <= upper_bound ; j++){
int_res_1 |= (((bool_res_.i8[i] >> j) & 1) << i);
}
}
break;
case SIMDE_SIDD_CMP_RANGES:
for(int i = 0 ; i < upper_bound ; i++){
for(int i = 0 ; i <= upper_bound ; i++){
SIMDE_VECTORIZE_REDUCTION(|:int_res_1)
for(int j = 0 ; j < upper_bound ; j++){
for(int j = 0 ; j <= upper_bound ; j++){
int_res_1 |= ((((bool_res_.i8[i] >> j) & 1) & ((bool_res_.i8[i] >> (j + 1)) & 1)) << i);
j += 2;
}
}
break;
case SIMDE_SIDD_CMP_EQUAL_EACH:
for(int i = 0 ; i < upper_bound ; i++){
SIMDE_VECTORIZE_REDUCTION(|:int_res_1)
for(int j = 0 ; j < upper_bound ; j++){
int_res_1 |= (((bool_res_.i8[i] >> i) & 1) << i);
for(int i = 0 ; i <= upper_bound ; i++){
//SIMDE_VECTORIZE_REDUCTION(|:int_res_1)
for(int j = 0 ; j <= upper_bound ; j++){
int bitvalue = ((bool_res_.i8[i] >> i) & 1);
if(bitvalue)
int_res_1 |= (1UL << i);
else
int_res_1 &= ~(1UL << i);
}
}
break;
case SIMDE_SIDD_CMP_EQUAL_ORDERED:
int_res_1 = 0xff;
for(int i = 0 ; i < upper_bound ; i++){
for(int i = 0 ; i <= upper_bound ; i++){
int k = i;
HEDLEY_DIAGNOSTIC_PUSH
#if defined(SIMDE_BUG_CLANG_45959)
#pragma clang diagnostic ignored "-Wsign-conversion"
#endif
SIMDE_VECTORIZE_REDUCTION(&:int_res_1)
for(int j = 0 ; j < (upper_bound-i) ; j++){
for(int j = 0 ; j <= (upper_bound-i) ; j++){
int_res_1 &= (((bool_res_.i8[k] >> j) & 1 ) << i) ;
k += 1;
}
HEDLEY_DIAGNOSTIC_POP
}
break;
}
for(int i = 0; i < upper_bound ; i++){
for(int i = 0; i <= upper_bound ; i++){
if(polarity & SIMDE_SIDD_NEGATIVE_POLARITY){
if(polarity & SIMDE_SIDD_MASKED_POSITIVE_POLARITY) {
if (i >= lb) {
int_res_2 |= (((int_res_1 >> i) & 1) << i);
int bitvalue = ((int_res_1 >> i) & 1);
if(bitvalue)
int_res_2 |= (1UL << i);
else
int_res_2 &= ~(1UL << i);
}
else {
int_res_2 |= ((((int_res_1 >> i) & 1) ^ (-1)) << i);
int bitvalue = (((int_res_1 >> i) & 1) ^ (-1));
if(bitvalue)
int_res_2 |= (1UL << i);
else
int_res_2 &= ~(1UL << i);
}
}
else{
int_res_2 |= ((((int_res_1 >> i) & 1) ^ (-1)) << i);
int bitvalue = (((int_res_1 >> i) & 1) ^ (-1));
if(bitvalue)
int_res_2 |= (1UL << i);
else
int_res_2 &= ~(1UL << i);
}
}
else{
int_res_2 |= ( ((int_res_1 >> i) & 1) << i);
int bitvalue = ((int_res_1 >> i) & 1);
if(bitvalue)
int_res_2 |= (1UL << i);
else
int_res_2 &= ~(1UL << i);
}
}
return !int_res_2 & (lb > upper_bound);
Expand All @@ -214,8 +237,8 @@ simde_mm_cmpestra_16_(simde__m128i a, int la, simde__m128i b, int lb, const int
const int upper_bound = (128 / 16) - 1;
int a_invalid = 0;
int b_invalid = 0;
for(int i = 0 ; i < upper_bound ; i++) {
for(int j = 0; j< upper_bound ; j++)
for(int i = 0 ; i <= upper_bound ; i++) {
for(int j = 0; j <= upper_bound ; j++)
{
int bitvalue = ((a_.i16[i] == b_.i16[j]) ? 1 : 0);
if(i == la)
Expand All @@ -242,70 +265,93 @@ simde_mm_cmpestra_16_(simde__m128i a, int la, simde__m128i b, int lb, const int
bitvalue = 0;
break;
}
bool_res_.i16[i] |= (bitvalue << j);
if(bitvalue)
bool_res_.i16[i] |= (1UL << j);
else
bool_res_.i16[i] &= ~(1UL << j);
}
}
int32_t int_res_1 = 0;
int32_t int_res_2 = 0;
switch(cmp_op) {
case SIMDE_SIDD_CMP_EQUAL_ANY:
for(int i = 0 ; i < upper_bound ; i++){
for(int i = 0 ; i <= upper_bound ; i++){
SIMDE_VECTORIZE_REDUCTION(|:int_res_1)
for (int j = 0 ; j < upper_bound ; j++){
for (int j = 0 ; j <= upper_bound ; j++){
int_res_1 |= (((bool_res_.i16[i] >> j) & 1) << i) ;
}
}
break;
case SIMDE_SIDD_CMP_RANGES:
for(int i = 0 ; i < upper_bound ; i++){
for(int i = 0 ; i <= upper_bound ; i++){
SIMDE_VECTORIZE_REDUCTION(|:int_res_1)
for(int j = 0 ; j < upper_bound ; j++){
for(int j = 0 ; j <= upper_bound ; j++){
int_res_1 |= ((((bool_res_.i16[i] >> j) & 1) & ((bool_res_.i16[i] >> (j + 1)) & 1)) << i);
j += 2;
}
}
break;
case SIMDE_SIDD_CMP_EQUAL_EACH:
for(int i = 0 ; i < upper_bound ; i++){
for(int i = 0 ; i <= upper_bound ; i++){
SIMDE_VECTORIZE_REDUCTION(|:int_res_1)
for(int j = 0 ; j < upper_bound ; j++){
int_res_1 |= (((bool_res_.i16[i] >> i) & 1) << i);
for(int j = 0 ; j <= upper_bound ; j++){
int bitvalue = ((bool_res_.i16[i] >> i) & 1);
if(bitvalue)
int_res_1 |= (1UL << i);
else
int_res_1 &= ~(1UL << i);
}
}
break;
case SIMDE_SIDD_CMP_EQUAL_ORDERED:
int_res_1 = 0xffff;
for(int i = 0 ; i < upper_bound ; i++){
for(int i = 0 ; i <= upper_bound ; i++){
int k = i;
HEDLEY_DIAGNOSTIC_PUSH
#if defined(SIMDE_BUG_CLANG_45959)
#pragma clang diagnostic ignored "-Wsign-conversion"
#endif
SIMDE_VECTORIZE_REDUCTION(&:int_res_1)
for(int j = 0 ; j < (upper_bound-i) ; j++){
for(int j = 0 ; j <= (upper_bound-i) ; j++){
int_res_1 &= (((bool_res_.i16[k] >> j) & 1) << i) ;
k += 1;
}
HEDLEY_DIAGNOSTIC_POP
}
break;
}
for(int i = 0; i < upper_bound ; i++){
for(int i = 0; i <= upper_bound ; i++){
if(polarity & SIMDE_SIDD_NEGATIVE_POLARITY){
if(polarity & SIMDE_SIDD_MASKED_POSITIVE_POLARITY) {
if (i >= lb) {
int_res_2 |= (((int_res_1 >> i) & 1) << i);
int bitvalue = ((int_res_1 >> i) & 1);
if(bitvalue)
int_res_2 |= (1UL << i);
else
int_res_2 &= ~(1UL << i);
}
else {
int_res_2 |= ((((int_res_1 >> i) & 1) ^ (-1)) << i);
int bitvalue = (((int_res_1 >> i) & 1) ^ (-1));
if(bitvalue)
int_res_2 |= (1UL << i);
else
int_res_2 &= ~(1UL << i);
}
}
else{
int_res_2 |= ((((int_res_1 >> i) & 1) ^ (-1)) << i);
int bitvalue = (((int_res_1 >> i) & 1) ^ (-1));
if(bitvalue)
int_res_2 |= (1UL << i);
else
int_res_2 &= ~(1UL << i);
}
}
else{
int_res_2 |= (((int_res_1 >> i) & 1) << i);
int bitvalue = ((int_res_1 >> i) & 1);
if(bitvalue)
int_res_2 |= (1UL << i);
else
int_res_2 &= ~(1UL << i);
}
}
return !int_res_2 & (lb > upper_bound);
Expand Down Expand Up @@ -399,7 +445,7 @@ simde_mm_cmpistrs_8_(simde__m128i a) {
const int upper_bound = (128 / 8) - 1;
int a_invalid = 0;
SIMDE_VECTORIZE
for (int i = 0 ; i < upper_bound ; i++) {
for (int i = 0 ; i <= upper_bound ; i++) {
if(!a_.i8[i])
a_invalid = 1;
}
Expand All @@ -413,7 +459,7 @@ simde_mm_cmpistrs_16_(simde__m128i a) {
const int upper_bound = (128 / 16) - 1;
int a_invalid = 0;
SIMDE_VECTORIZE
for (int i = 0 ; i < upper_bound ; i++) {
for (int i = 0 ; i <= upper_bound ; i++) {
if(!a_.i16[i])
a_invalid = 1;
}
Expand All @@ -439,7 +485,7 @@ simde_mm_cmpistrz_8_(simde__m128i b) {
const int upper_bound = (128 / 8) - 1;
int b_invalid = 0;
SIMDE_VECTORIZE
for (int i = 0 ; i < upper_bound ; i++) {
for (int i = 0 ; i <= upper_bound ; i++) {
if(!b_.i8[i])
b_invalid = 1;
}
Expand All @@ -453,7 +499,7 @@ simde_mm_cmpistrz_16_(simde__m128i b) {
const int upper_bound = (128 / 16) - 1;
int b_invalid = 0;
SIMDE_VECTORIZE
for (int i = 0 ; i < upper_bound ; i++) {
for (int i = 0 ; i <= upper_bound ; i++) {
if(!b_.i16[i])
b_invalid = 1;
}
Expand Down
Loading

0 comments on commit dd9be1d

Please sign in to comment.