diff --git a/src/nanoarrow/buffer_inline.h b/src/nanoarrow/buffer_inline.h index b270b3301..efb5ca712 100644 --- a/src/nanoarrow/buffer_inline.h +++ b/src/nanoarrow/buffer_inline.h @@ -222,6 +222,17 @@ static inline int64_t _ArrowBytesForBits(int64_t bits) { return (bits >> 3) + ((bits & 7) != 0); } +static inline void _ArrowBitmapUnpackInt8(const uint8_t word, int8_t* out) { + out[0] = (word >> 0) & 1; + out[1] = (word >> 1) & 1; + out[2] = (word >> 2) & 1; + out[3] = (word >> 3) & 1; + out[4] = (word >> 4) & 1; + out[5] = (word >> 5) & 1; + out[6] = (word >> 6) & 1; + out[7] = (word >> 7) & 1; +} + static inline void _ArrowBitmapPackInt8(const int8_t* values, uint8_t* out) { *out = (values[0] | values[1] << 1 | values[2] << 2 | values[3] << 3 | values[4] << 4 | values[5] << 5 | values[6] << 6 | values[7] << 7); @@ -236,6 +247,46 @@ static inline int8_t ArrowBitGet(const uint8_t* bits, int64_t i) { return (bits[i >> 3] >> (i & 0x07)) & 1; } +static inline void ArrowBitmapUnpackInt8Unsafe(const uint8_t* bits, int64_t start_offset, + int64_t length, int8_t* out) { + if (length == 0) { + return; + } + + const int64_t i_begin = start_offset; + const int64_t i_end = start_offset + length; + const int64_t i_last_valid = i_end - 1; + + const int64_t bytes_begin = i_begin / 8; + const int64_t bytes_last_valid = i_last_valid / 8; + + if (bytes_begin == bytes_last_valid) { + // count bits within a single byte + for (int i = 0; i < length; i++) { + out[i] = ArrowBitGet(&bits[bytes_begin], i + i_begin % 8); + } + + return; + } + + // first byte + for (int i = 0; i < 8 - (i_begin % 8); i++) { + *out++ = ArrowBitGet(&bits[bytes_begin], i + i_begin % 8); + } + + // middle bytes + for (int64_t i = bytes_begin + 1; i < bytes_last_valid; i++) { + _ArrowBitmapUnpackInt8(bits[i], out); + out += 8; + } + + // last byte + const int bits_remaining = i_end % 8 == 0 ? 8 : i_end % 8; + for (int i = 0; i < bits_remaining; i++) { + *out++ = ArrowBitGet(&bits[bytes_last_valid], i); + } +} + static inline void ArrowBitSet(uint8_t* bits, int64_t i) { bits[i / 8] |= _ArrowkBitmask[i % 8]; } diff --git a/src/nanoarrow/buffer_test.cc b/src/nanoarrow/buffer_test.cc index 9d5032cfa..1907d3712 100644 --- a/src/nanoarrow/buffer_test.cc +++ b/src/nanoarrow/buffer_test.cc @@ -271,6 +271,86 @@ TEST(BitmapTest, BitmapTestElement) { EXPECT_EQ(ArrowBitGet(bitmap, 16 + 7), 0); } +template +void TestArrowBitmapUnpackInt8Unsafe(const uint8_t* bitmap, int8_t* out, + std::vector expected) { + ArrowBitmapUnpackInt8Unsafe(bitmap, offset, length, out); + for (int i = 0; i < expected.size(); i++) { + EXPECT_EQ(out[i], expected[i]); + } +} + +TEST(BitmapTest, BitmapTestBitmapUnpackInt8Unsafe) { + uint8_t bitmap[3]; + int8_t result[sizeof(bitmap) * 8]; + + memset(bitmap, 0xff, sizeof(bitmap)); + ArrowBitmapUnpackInt8Unsafe(bitmap, 0, sizeof(result), result); + for (int i = 0; i < sizeof(result); i++) { + EXPECT_EQ(result[i], 1); + } + + bitmap[0] = 0x93; // 10010011 + bitmap[1] = 0x55; // 01010101 + bitmap[2] = 0xaa; // 10101010 + + // offset 0, length boundary, one byte + TestArrowBitmapUnpackInt8Unsafe<0, 8>(bitmap, result, {1, 1, 0, 0, 1, 0, 0, 1}); + + // offset 0, length boundary, different bytes + TestArrowBitmapUnpackInt8Unsafe<0, 16>( + bitmap, result, {1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0}); + + // offset 0, length non-boundary, one byte + TestArrowBitmapUnpackInt8Unsafe<0, 5>(bitmap, result, {1, 1, 0, 0, 1}); + + // offset boundary, length boundary, one byte + TestArrowBitmapUnpackInt8Unsafe<8, 8>(bitmap, result, {1, 0, 1, 0, 1, 0, 1, 0}); + + // offset boundary, length boundary, different bytes + TestArrowBitmapUnpackInt8Unsafe<8, 16>( + bitmap, result, {1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1}); + + // offset boundary, length non-boundary, one byte + TestArrowBitmapUnpackInt8Unsafe<8, 5>(bitmap, result, {1, 0, 1, 0, 1}); + + // offset boundary, length non-boundary, different bytes + TestArrowBitmapUnpackInt8Unsafe<8, 13>(bitmap, result, + {1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0}); + + // offset non-boundary, length boundary, one byte + TestArrowBitmapUnpackInt8Unsafe<3, 5>(bitmap, result, {0, 1, 0, 0, 1}); + + // offset non-boundary, length boundary, different bytes + TestArrowBitmapUnpackInt8Unsafe<3, 13>(bitmap, result, + {0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0}); + + // offset non-boundary, length non-boundary, one byte + TestArrowBitmapUnpackInt8Unsafe<3, 3>(bitmap, result, {0, 1, 0}); + + // offset non-boundary, length non-boundary, different bytes + TestArrowBitmapUnpackInt8Unsafe<3, 11>(bitmap, result, + {0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0}); + + // offset non-boundary non-first byte, length boundary, one byte + TestArrowBitmapUnpackInt8Unsafe<11, 5>(bitmap, result, {0, 1, 0, 1, 0}); + + // offset non-boundary non-first byte, length boundary, different bytes + TestArrowBitmapUnpackInt8Unsafe<11, 13>(bitmap, result, + {0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1}); + + // offset non-boundary non-first byte, length non-boundary, one byte + TestArrowBitmapUnpackInt8Unsafe<11, 3>(bitmap, result, {0, 1, 0}); + + // offset non-boundary non-first byte, length non-boundary, different bytes + TestArrowBitmapUnpackInt8Unsafe<11, 11>(bitmap, result, + {0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1}); + + // non-boundary, three byte span + TestArrowBitmapUnpackInt8Unsafe<7, 11>(bitmap, result, + {1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1}); +} + TEST(BitmapTest, BitmapTestSetTo) { uint8_t bitmap[10]; diff --git a/src/nanoarrow/nanoarrow.h b/src/nanoarrow/nanoarrow.h index 930841152..09a80a859 100644 --- a/src/nanoarrow/nanoarrow.h +++ b/src/nanoarrow/nanoarrow.h @@ -716,6 +716,10 @@ static inline ArrowErrorCode ArrowBitmapAppend(struct ArrowBitmap* bitmap, static inline void ArrowBitmapAppendUnsafe(struct ArrowBitmap* bitmap, uint8_t bits_are_set, int64_t length); +/// \brief Extract boolean values from a range in a bitmap +static inline void ArrowBitmapUnpackInt8Unsafe(const uint8_t* bits, int64_t start_offset, + int64_t length, int8_t* out); + /// \brief Append boolean values encoded as int8_t to a bitmap /// /// The values must all be 0 or 1.