Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Implement ArrowBitmapUnpackInt8Unsafe #276

Merged
merged 8 commits into from
Aug 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 51 additions & 0 deletions src/nanoarrow/buffer_inline.h
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,17 @@ static inline int64_t _ArrowBytesForBits(int64_t bits) {
return (bits >> 3) + ((bits & 7) != 0);
}

static inline void _ArrowBitmapUnpackInt8(const uint8_t word, int8_t* out) {
out[0] = (word >> 0) & 1;
out[1] = (word >> 1) & 1;
out[2] = (word >> 2) & 1;
out[3] = (word >> 3) & 1;
out[4] = (word >> 4) & 1;
out[5] = (word >> 5) & 1;
out[6] = (word >> 6) & 1;
out[7] = (word >> 7) & 1;
}

static inline void _ArrowBitmapPackInt8(const int8_t* values, uint8_t* out) {
*out = (values[0] | values[1] << 1 | values[2] << 2 | values[3] << 3 | values[4] << 4 |
values[5] << 5 | values[6] << 6 | values[7] << 7);
Expand All @@ -236,6 +247,46 @@ static inline int8_t ArrowBitGet(const uint8_t* bits, int64_t i) {
return (bits[i >> 3] >> (i & 0x07)) & 1;
}

static inline void ArrowBitmapUnpackInt8Unsafe(const uint8_t* bits, int64_t start_offset,
int64_t length, int8_t* out) {
if (length == 0) {
return;
}

const int64_t i_begin = start_offset;
const int64_t i_end = start_offset + length;
const int64_t i_last_valid = i_end - 1;

const int64_t bytes_begin = i_begin / 8;
const int64_t bytes_last_valid = i_last_valid / 8;

if (bytes_begin == bytes_last_valid) {
// count bits within a single byte
for (int i = 0; i < length; i++) {
out[i] = ArrowBitGet(&bits[bytes_begin], i + i_begin % 8);
}

return;
}

// first byte
for (int i = 0; i < 8 - (i_begin % 8); i++) {
*out++ = ArrowBitGet(&bits[bytes_begin], i + i_begin % 8);
}

// middle bytes
for (int64_t i = bytes_begin + 1; i < bytes_last_valid; i++) {
_ArrowBitmapUnpackInt8(bits[i], out);
out += 8;
}

// last byte
const int bits_remaining = i_end % 8 == 0 ? 8 : i_end % 8;
for (int i = 0; i < bits_remaining; i++) {
*out++ = ArrowBitGet(&bits[bytes_last_valid], i);
}
}

static inline void ArrowBitSet(uint8_t* bits, int64_t i) {
bits[i / 8] |= _ArrowkBitmask[i % 8];
}
Expand Down
80 changes: 80 additions & 0 deletions src/nanoarrow/buffer_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,86 @@ TEST(BitmapTest, BitmapTestElement) {
EXPECT_EQ(ArrowBitGet(bitmap, 16 + 7), 0);
}

template <int offset, int length>
void TestArrowBitmapUnpackInt8Unsafe(const uint8_t* bitmap, int8_t* out,
std::vector<uint8_t> expected) {
ArrowBitmapUnpackInt8Unsafe(bitmap, offset, length, out);
for (int i = 0; i < expected.size(); i++) {
EXPECT_EQ(out[i], expected[i]);
}
}

TEST(BitmapTest, BitmapTestBitmapUnpackInt8Unsafe) {
uint8_t bitmap[3];
int8_t result[sizeof(bitmap) * 8];

memset(bitmap, 0xff, sizeof(bitmap));
ArrowBitmapUnpackInt8Unsafe(bitmap, 0, sizeof(result), result);
for (int i = 0; i < sizeof(result); i++) {
EXPECT_EQ(result[i], 1);
}

bitmap[0] = 0x93; // 10010011
bitmap[1] = 0x55; // 01010101
bitmap[2] = 0xaa; // 10101010

// offset 0, length boundary, one byte
TestArrowBitmapUnpackInt8Unsafe<0, 8>(bitmap, result, {1, 1, 0, 0, 1, 0, 0, 1});

// offset 0, length boundary, different bytes
TestArrowBitmapUnpackInt8Unsafe<0, 16>(
bitmap, result, {1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0});

// offset 0, length non-boundary, one byte
TestArrowBitmapUnpackInt8Unsafe<0, 5>(bitmap, result, {1, 1, 0, 0, 1});

// offset boundary, length boundary, one byte
TestArrowBitmapUnpackInt8Unsafe<8, 8>(bitmap, result, {1, 0, 1, 0, 1, 0, 1, 0});

// offset boundary, length boundary, different bytes
TestArrowBitmapUnpackInt8Unsafe<8, 16>(
bitmap, result, {1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1});

// offset boundary, length non-boundary, one byte
TestArrowBitmapUnpackInt8Unsafe<8, 5>(bitmap, result, {1, 0, 1, 0, 1});

// offset boundary, length non-boundary, different bytes
TestArrowBitmapUnpackInt8Unsafe<8, 13>(bitmap, result,
{1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0});

// offset non-boundary, length boundary, one byte
TestArrowBitmapUnpackInt8Unsafe<3, 5>(bitmap, result, {0, 1, 0, 0, 1});

// offset non-boundary, length boundary, different bytes
TestArrowBitmapUnpackInt8Unsafe<3, 13>(bitmap, result,
{0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0});

// offset non-boundary, length non-boundary, one byte
TestArrowBitmapUnpackInt8Unsafe<3, 3>(bitmap, result, {0, 1, 0});

// offset non-boundary, length non-boundary, different bytes
TestArrowBitmapUnpackInt8Unsafe<3, 11>(bitmap, result,
{0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0});

// offset non-boundary non-first byte, length boundary, one byte
TestArrowBitmapUnpackInt8Unsafe<11, 5>(bitmap, result, {0, 1, 0, 1, 0});

// offset non-boundary non-first byte, length boundary, different bytes
TestArrowBitmapUnpackInt8Unsafe<11, 13>(bitmap, result,
{0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1});

// offset non-boundary non-first byte, length non-boundary, one byte
TestArrowBitmapUnpackInt8Unsafe<11, 3>(bitmap, result, {0, 1, 0});

// offset non-boundary non-first byte, length non-boundary, different bytes
TestArrowBitmapUnpackInt8Unsafe<11, 11>(bitmap, result,
{0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1});

// non-boundary, three byte span
TestArrowBitmapUnpackInt8Unsafe<7, 11>(bitmap, result,
{1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1});
}

TEST(BitmapTest, BitmapTestSetTo) {
uint8_t bitmap[10];

Expand Down
4 changes: 4 additions & 0 deletions src/nanoarrow/nanoarrow.h
Original file line number Diff line number Diff line change
Expand Up @@ -716,6 +716,10 @@ static inline ArrowErrorCode ArrowBitmapAppend(struct ArrowBitmap* bitmap,
static inline void ArrowBitmapAppendUnsafe(struct ArrowBitmap* bitmap,
uint8_t bits_are_set, int64_t length);

/// \brief Extract boolean values from a range in a bitmap
static inline void ArrowBitmapUnpackInt8Unsafe(const uint8_t* bits, int64_t start_offset,
int64_t length, int8_t* out);

/// \brief Append boolean values encoded as int8_t to a bitmap
///
/// The values must all be 0 or 1.
Expand Down
Loading