Skip to content

Commit

Permalink
sse4.2: added the implementation for mm_cmpestra
Browse files Browse the repository at this point in the history
  • Loading branch information
masterchef2209 committed May 21, 2020
1 parent 6ee040c commit b46c1c3
Show file tree
Hide file tree
Showing 3 changed files with 279 additions and 0 deletions.
1 change: 1 addition & 0 deletions simde/simde-common.h
Original file line number Diff line number Diff line change
Expand Up @@ -693,6 +693,7 @@ typedef SIMDE_FLOAT64_TYPE simde_float64;
# if defined(SIMDE_ARCH_AARCH64)
# define SIMDE_BUG_CLANG_45541
# endif
# define SIMDE_BUG_CLANG_45959
# endif
# if defined(HEDLEY_EMSCRIPTEN_VERSION)
# define SIMDE_BUG_EMSCRIPTEN_MISSING_IMPL /* Placeholder for (as yet) unfiled issues. */
Expand Down
239 changes: 239 additions & 0 deletions simde/x86/sse4.2.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,245 @@ SIMDE_BEGIN_DECLS_
#define _SIDD_UNIT_MASK SIMDE_SIDD_UNIT_MASK
#endif

SIMDE_FUNCTION_ATTRIBUTES
int
simde_mm_cmpestra_8_(simde__m128i a, int la, simde__m128i b, int lb, const int imm8)
SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) {
const int cmp_op = imm8 & 0x0c;
const int polarity = imm8 & 0x30;
simde__m128i_private
bool_res_ = simde__m128i_to_private(simde_mm_setzero_si128()),
a_ = simde__m128i_to_private(a),
b_ = simde__m128i_to_private(b);
const int upper_bound = (128 / 8) - 1;
int a_invalid = 0;
int b_invalid = 0;
for(int i = 0 ; i <= upper_bound ; i++) {
for(int j = 0; j <= upper_bound ; j++){
int bitvalue = ((a_.i8[i] == b_.i8[j]) ? 1 : 0);
a_invalid = 0;
b_invalid = 0;
if(i >= la)
a_invalid = 1;
if(j >= lb)
b_invalid = 1;
switch(cmp_op){
case SIMDE_SIDD_CMP_EQUAL_ANY:
case SIMDE_SIDD_CMP_RANGES:
if(!a_invalid && !b_invalid);
else
bitvalue = 0;
break;
case SIMDE_SIDD_CMP_EQUAL_EACH:
if(!a_invalid && !b_invalid);
else if(a_invalid && b_invalid)
bitvalue = 1;
else
bitvalue = 0;
break;
case SIMDE_SIDD_CMP_EQUAL_ORDERED:
if(!a_invalid && !b_invalid);
else if(a_invalid && !b_invalid)
bitvalue = 1;
else if(a_invalid && b_invalid)
bitvalue = 1;
else
bitvalue = 0;
break;
}
bool_res_.i8[i] |= (bitvalue << j);
}
}
int32_t int_res_1 = 0;
int32_t int_res_2 = 0;
switch(cmp_op) {
case SIMDE_SIDD_CMP_EQUAL_ANY:
for(int i = 0 ; i <= upper_bound ; i++){
SIMDE_VECTORIZE_REDUCTION(|:int_res_1)
for(int j = 0 ; j <= upper_bound ; j++){
int_res_1 |= (((bool_res_.i8[i] >> j) & 1) << i);
}
}
break;
case SIMDE_SIDD_CMP_RANGES:
for(int i = 0 ; i <= upper_bound ; i++){
SIMDE_VECTORIZE_REDUCTION(|:int_res_1)
for(int j = 0 ; j <= upper_bound ; j++){
int_res_1 |= ((((bool_res_.i8[i] >> j) & 1) & ((bool_res_.i8[i] >> (j + 1)) & 1)) << i);
j += 2;
}
}
break;
case SIMDE_SIDD_CMP_EQUAL_EACH:
SIMDE_VECTORIZE_REDUCTION(|:int_res_1)
for(int i = 0 ; i <= upper_bound ; i++){
int_res_1 |= (((bool_res_.i8[i] >> i) & 1) << i);
}
break;
case SIMDE_SIDD_CMP_EQUAL_ORDERED:
int_res_1 = 0xff;
for(int i = 0 ; i <= upper_bound ; i++){
int k = i;
HEDLEY_DIAGNOSTIC_PUSH
#if defined(SIMDE_BUG_CLANG_45959)
#pragma clang diagnostic ignored "-Wsign-conversion"
#endif
SIMDE_VECTORIZE_REDUCTION(&:int_res_1)
for(int j = 0 ; j <= (upper_bound-i) ; j++){
int_res_1 &= (((bool_res_.i8[k] >> j) & 1 ) << i) ;
k += 1;
}
HEDLEY_DIAGNOSTIC_POP
}
break;
}
for(int i = 0; i <= upper_bound ; i++){
if(polarity & SIMDE_SIDD_NEGATIVE_POLARITY){
if(polarity & SIMDE_SIDD_MASKED_POSITIVE_POLARITY) {
if (i >= lb) {
int_res_2 |= (((int_res_1 >> i) & 1) << i);
}
else {
int_res_2 |= ((((int_res_1 >> i) & 1) ^ (-1)) << i);
}
}
else{
int_res_2 |= ((((int_res_1 >> i) & 1) ^ (-1)) << i);
}
}
else{
int_res_2 |= ( ((int_res_1 >> i) & 1) << i);
}
}
return !int_res_2 & (lb > upper_bound);
}

SIMDE_FUNCTION_ATTRIBUTES
int
simde_mm_cmpestra_16_(simde__m128i a, int la, simde__m128i b, int lb, const int imm8)
SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) {
const int cmp_op = imm8 & 0x0c;
const int polarity = imm8 & 0x30;
simde__m128i_private
bool_res_ = simde__m128i_to_private(simde_mm_setzero_si128()),
a_ = simde__m128i_to_private(a),
b_ = simde__m128i_to_private(b);
const int upper_bound = (128 / 16) - 1;
int a_invalid = 0;
int b_invalid = 0;
for(int i = 0 ; i <= upper_bound ; i++) {
for(int j = 0; j <= upper_bound ; j++)
{
int bitvalue = ((a_.i16[i] == b_.i16[j]) ? 1 : 0);
a_invalid = 0;
b_invalid = 0;
if(i >= la)
a_invalid = 1;
if(j >= lb)
b_invalid = 1;
switch(cmp_op){
case SIMDE_SIDD_CMP_EQUAL_ANY:
case SIMDE_SIDD_CMP_RANGES:
if(!a_invalid && !b_invalid);
else
bitvalue = 0;
break;
case SIMDE_SIDD_CMP_EQUAL_EACH:
if(!a_invalid && !b_invalid);
else if(a_invalid && b_invalid)
bitvalue = 1;
else
bitvalue = 0;
break;
case SIMDE_SIDD_CMP_EQUAL_ORDERED:
if(!a_invalid && !b_invalid);
else if(a_invalid && !b_invalid)
bitvalue = 1;
else if(a_invalid && b_invalid)
bitvalue = 1;
else
bitvalue = 0;
break;
}
bool_res_.i16[i] |= (bitvalue << j);
}
}
int32_t int_res_1 = 0;
int32_t int_res_2 = 0;
switch(cmp_op) {
case SIMDE_SIDD_CMP_EQUAL_ANY:
for(int i = 0 ; i <= upper_bound ; i++){
SIMDE_VECTORIZE_REDUCTION(|:int_res_1)
for (int j = 0 ; j <= upper_bound ; j++){
int_res_1 |= (((bool_res_.i16[i] >> j) & 1) << i) ;
}
}
break;
case SIMDE_SIDD_CMP_RANGES:
for(int i = 0 ; i <= upper_bound ; i++){
SIMDE_VECTORIZE_REDUCTION(|:int_res_1)
for(int j = 0 ; j <= upper_bound ; j++){
int_res_1 |= ((((bool_res_.i16[i] >> j) & 1) & ((bool_res_.i16[i] >> (j + 1)) & 1)) << i);
j += 2;
}
}
break;
case SIMDE_SIDD_CMP_EQUAL_EACH:
SIMDE_VECTORIZE_REDUCTION(|:int_res_1)
for(int i = 0 ; i <= upper_bound ; i++){
int_res_1 |= (((bool_res_.i16[i] >> i) & 1) << i);
}
break;
case SIMDE_SIDD_CMP_EQUAL_ORDERED:
int_res_1 = 0xffff;
for(int i = 0 ; i <= upper_bound ; i++){
int k = i;
HEDLEY_DIAGNOSTIC_PUSH
#if defined(SIMDE_BUG_CLANG_45959)
#pragma clang diagnostic ignored "-Wsign-conversion"
#endif
SIMDE_VECTORIZE_REDUCTION(&:int_res_1)
for(int j = 0 ; j <= (upper_bound-i) ; j++){
int_res_1 &= (((bool_res_.i16[k] >> j) & 1) << i) ;
k += 1;
}
HEDLEY_DIAGNOSTIC_POP
}
break;
}
for(int i = 0; i <= upper_bound ; i++){
if(polarity & SIMDE_SIDD_NEGATIVE_POLARITY){
if(polarity & SIMDE_SIDD_MASKED_POSITIVE_POLARITY) {
if (i >= lb) {
int_res_2 |= (((int_res_1 >> i) & 1) << i);
}
else {
int_res_2 |= ((((int_res_1 >> i) & 1) ^ (-1)) << i);
}
}
else{
int_res_2 |= ((((int_res_1 >> i) & 1) ^ (-1)) << i);
}
}
else{
int_res_2 |= (((int_res_1 >> i) & 1) << i);
}
}
return !int_res_2 & (lb > upper_bound);
}

#if defined(SIMDE_X86_SSE4_2_NATIVE)
#define simde_mm_cmpestra(a, la, b, lb, imm8) _mm_cmpestra(a, la, b, lb, imm8)
#else
#define simde_mm_cmpestra(a, la, b, lb, imm8) \
(((imm8) & SIMDE_SIDD_UWORD_OPS) \
? simde_mm_cmpestra_16_((a), (la), (b), (lb), (imm8)) \
: simde_mm_cmpestra_8_((a), (la), (b), (lb), (imm8)))
#endif
#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES)
#define _mm_cmpestra(a, la, b, lb, imm8) simde_mm_cmpestra(a, la, b, lb, imm8)
#endif

SIMDE_FUNCTION_ATTRIBUTES
int simde_mm_cmpestrs (simde__m128i a, int la, simde__m128i b, int lb, const int imm8)
SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 127) {
Expand Down
39 changes: 39 additions & 0 deletions test/x86/sse4.2.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,43 @@

#if defined(SIMDE_X86_SSE4_2_NATIVE) || defined(SIMDE_NO_NATIVE) || defined(SIMDE_ALWAYS_BUILD_NATIVE_TESTS)


static MunitResult
test_simde_mm_cmpestra_ranges_8(const MunitParameter params[], void* data) {
(void) params;
(void) data;

const struct {
simde__m128i a;
int la;
simde__m128i b;
int lb;
int r;
} test_vec[] = {
{ simde_mm_set_epi8(INT8_C( 45), INT8_C( -94), INT8_C( 38), INT8_C( -11),
INT8_C( 84), INT8_C(-123), INT8_C( -43), INT8_C( -49),
INT8_C( 25), INT8_C( -55), INT8_C(-121), INT8_C( -6),
INT8_C( 57), INT8_C( 108), INT8_C( -55), INT8_C( 69)),
23 ,
simde_mm_set_epi8(INT8_C( -26), INT8_C( -61), INT8_C( -21), INT8_C( -96),
INT8_C( 48), INT8_C(-112), INT8_C( 95), INT8_C( -56),
INT8_C( 29), INT8_C( -55), INT8_C(-121), INT8_C( -6),
INT8_C( 57), INT8_C( 108), INT8_C( -55), INT8_C( 69)),
28 ,
0 }
};

for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
int r;
r = simde_mm_cmpestra(test_vec[i].a, test_vec[i].la, test_vec[i].b, test_vec[i].lb, 36);
munit_assert_int(r, ==, test_vec[i].r);
}

return MUNIT_OK;
}



static MunitResult
test_simde_mm_cmpestrs_8(const MunitParameter params[], void* data) {
(void) params;
Expand Down Expand Up @@ -760,6 +797,8 @@ HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL

static MunitTest test_suite_tests[] = {
#if defined(SIMDE_X86_SSE4_2_NATIVE) || defined(SIMDE_NO_NATIVE) || defined(SIMDE_ALWAYS_BUILD_NATIVE_TESTS)
SIMDE_TESTS_DEFINE_TEST(mm_cmpestra_ranges_8),

SIMDE_TESTS_DEFINE_TEST(mm_cmpestrs_8),
SIMDE_TESTS_DEFINE_TEST(mm_cmpestrs_16),
SIMDE_TESTS_DEFINE_TEST(mm_cmpestrz_8),
Expand Down

0 comments on commit b46c1c3

Please sign in to comment.