diff --git a/src/nanoarrow/common/array.c b/src/nanoarrow/common/array.c index 3f296dbd0..dbb902df6 100644 --- a/src/nanoarrow/common/array.c +++ b/src/nanoarrow/common/array.c @@ -115,7 +115,7 @@ static ArrowErrorCode ArrowArraySetStorageType(struct ArrowArray* array, break; case NANOARROW_TYPE_BINARY_VIEW: case NANOARROW_TYPE_STRING_VIEW: - array->n_buffers = NANOARROW_BINARY_VIEW_FIXED_BUFFERS; + array->n_buffers = NANOARROW_BINARY_VIEW_FIXED_BUFFERS + 1; break; case NANOARROW_TYPE_STRING: case NANOARROW_TYPE_LARGE_STRING: @@ -765,11 +765,9 @@ static int ArrowArrayViewSetArrayInternal(struct ArrowArrayView* array_view, const int32_t nfixed_buf = NANOARROW_BINARY_VIEW_FIXED_BUFFERS; const int32_t nvariadic_buf = (int32_t)(n_buffers - nfixed_buf - 1); - if (nvariadic_buf > 0) { - array_view->n_variadic_buffers = nvariadic_buf; - buffers_required += nvariadic_buf + 1; - array_view->variadic_buffer_sizes = (int64_t*)array->buffers[n_buffers - 1]; - } + array_view->n_variadic_buffers = nvariadic_buf; + buffers_required += nvariadic_buf + 1; + array_view->variadic_buffer_sizes = (int64_t*)array->buffers[n_buffers - 1]; } if (buffers_required != array->n_buffers) { diff --git a/src/nanoarrow/common/array_test.cc b/src/nanoarrow/common/array_test.cc index b3e72e445..128c7ea47 100644 --- a/src/nanoarrow/common/array_test.cc +++ b/src/nanoarrow/common/array_test.cc @@ -895,9 +895,48 @@ TEST(ArrayTest, ArrayTestAppendToLargeStringArray) { ArrowArrayRelease(&array); } -template -void TestAppendToDataViewArray() { +template +void TestAppendToInlinedDataViewArray( + std::function AppendFunc) { + struct ArrowArray array; + + ASSERT_EQ(ArrowArrayInitFromType(&array, ArrowT), NANOARROW_OK); + EXPECT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK); + + // Check that we can reserve + ASSERT_EQ(ArrowArrayReserve(&array, 5), NANOARROW_OK); + EXPECT_EQ(ArrowArrayBuffer(&array, 1)->capacity_bytes, + 5 * sizeof(union ArrowBinaryView)); + + EXPECT_EQ(AppendFunc(&array, ValueT{{"inlinestring"}, 12}), NANOARROW_OK); + EXPECT_EQ(ArrowArrayAppendNull(&array, 2), NANOARROW_OK); + EXPECT_EQ(ArrowArrayAppendEmpty(&array, 1), NANOARROW_OK); + EXPECT_EQ(ArrowArrayFinishBuildingDefault(&array, nullptr), NANOARROW_OK); + + EXPECT_EQ(array.length, 4); + EXPECT_EQ(array.null_count, 2); + EXPECT_EQ(array.n_buffers, 3); + auto validity_buffer = reinterpret_cast(array.buffers[0]); + auto inline_buffer = reinterpret_cast(array.buffers[1]); + auto sizes_buffer = reinterpret_cast(array.buffers[2]); + + EXPECT_EQ(validity_buffer[0], 0b00001001); + EXPECT_EQ(memcmp(inline_buffer[0].inlined.data, "inlinestring", 12), 0); + EXPECT_EQ(inline_buffer[0].inlined.size, 12); + + EXPECT_EQ(sizes_buffer, nullptr); + + // TODO: issue #633 + /* + EXPECT_THAT(nanoarrow::ViewArrayAsBytes<64>(&array), + ElementsAre("1234"_asv, NA, NA, "56789"_asv, ""_asv)); + */ + ArrowArrayRelease(&array); +}; + +template +void TestAppendToDataViewArray( + std::function AppendFunc) { struct ArrowArray array; ASSERT_EQ(ArrowArrayInitFromType(&array, ArrowT), NANOARROW_OK); @@ -925,6 +964,7 @@ void TestAppendToDataViewArray() { EXPECT_EQ(array.length, 7); EXPECT_EQ(array.null_count, 2); + EXPECT_EQ(array.n_buffers, 5); auto validity_buffer = reinterpret_cast(array.buffers[0]); auto inline_buffer = reinterpret_cast(array.buffers[1]); auto vbuf1 = reinterpret_cast(array.buffers[2]); @@ -964,13 +1004,17 @@ void TestAppendToDataViewArray() { }; TEST(ArrayTest, ArrayTestAppendToBinaryViewArray) { - TestAppendToDataViewArray(); + TestAppendToInlinedDataViewArray( + ArrowArrayAppendString); + TestAppendToDataViewArray( + ArrowArrayAppendString); }; TEST(ArrayTest, ArrayTestAppendToStringViewArray) { - TestAppendToDataViewArray(); + TestAppendToInlinedDataViewArray( + ArrowArrayAppendBytes); + TestAppendToDataViewArray( + ArrowArrayAppendBytes); }; TEST(ArrayTest, ArrayTestAppendToFixedSizeBinaryArray) { @@ -3343,8 +3387,49 @@ TEST(ArrayViewTest, ArrayViewTestGetString) { TestGetFromBinary(fixed_size_builder); } -template -void TestGetFromBinaryView(BuilderClass& builder) { +template +void TestGetFromInlinedBinaryView( + BuilderClass& builder, + std::function GetValueFunc, + std::function GetValueDataFunc) { + struct ArrowArray array; + struct ArrowSchema schema; + struct ArrowArrayView array_view; + struct ArrowError error; + + auto type = builder.type(); + ARROW_EXPECT_OK(builder.Append("1234")); + ARROW_EXPECT_OK(builder.AppendNulls(2)); + ARROW_EXPECT_OK(builder.Append("four")); + + auto maybe_arrow_array = builder.Finish(); + ARROW_EXPECT_OK(maybe_arrow_array); + auto arrow_array = maybe_arrow_array.ValueUnsafe(); + + ARROW_EXPECT_OK(ExportArray(*arrow_array, &array, &schema)); + ASSERT_EQ(ArrowArrayViewInitFromSchema(&array_view, &schema, &error), NANOARROW_OK); + ASSERT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK); + EXPECT_EQ(ArrowArrayViewValidate(&array_view, NANOARROW_VALIDATION_LEVEL_FULL, &error), + NANOARROW_OK); + + EXPECT_EQ(array_view.n_variadic_buffers, 0); + EXPECT_EQ(ArrowArrayViewIsNull(&array_view, 2), 1); + EXPECT_EQ(ArrowArrayViewIsNull(&array_view, 3), 0); + + const auto value = GetValueFunc(&array_view, 3); + EXPECT_EQ(value.size_bytes, strlen("four")); + EXPECT_EQ(memcmp(GetValueDataFunc(&value), "four", value.size_bytes), 0); + + ArrowArrayViewReset(&array_view); + ArrowArrayRelease(&array); + ArrowSchemaRelease(&schema); +} + +template +void TestGetFromBinaryView( + BuilderClass& builder, + std::function GetValueFunc, + std::function GetValueDataFunc) { struct ArrowArray array; struct ArrowSchema schema; struct ArrowArrayView array_view; @@ -3380,29 +3465,17 @@ void TestGetFromBinaryView(BuilderClass& builder) { EXPECT_EQ(ArrowArrayViewIsNull(&array_view, 2), 1); EXPECT_EQ(ArrowArrayViewIsNull(&array_view, 3), 0); - auto string_view = ArrowArrayViewGetStringUnsafe(&array_view, 3); - EXPECT_EQ(string_view.size_bytes, strlen("four")); - EXPECT_EQ(memcmp(string_view.data, "four", string_view.size_bytes), 0); - - auto buffer_view = ArrowArrayViewGetBytesUnsafe(&array_view, 3); - EXPECT_EQ(buffer_view.size_bytes, strlen("four")); - EXPECT_EQ(memcmp(buffer_view.data.as_char, "four", buffer_view.size_bytes), 0); - - string_view = ArrowArrayViewGetStringUnsafe(&array_view, 4); - EXPECT_EQ(string_view.size_bytes, str1.size()); - EXPECT_EQ(memcmp(string_view.data, str1.c_str(), string_view.size_bytes), 0); + const auto value1 = GetValueFunc(&array_view, 3); + EXPECT_EQ(value1.size_bytes, strlen("four")); + EXPECT_EQ(memcmp(GetValueDataFunc(&value1), "four", value1.size_bytes), 0); - string_view = ArrowArrayViewGetStringUnsafe(&array_view, 6); - EXPECT_EQ(string_view.size_bytes, str2.size()); - EXPECT_EQ(memcmp(string_view.data, str2.c_str(), string_view.size_bytes), 0); + const auto value2 = GetValueFunc(&array_view, 4); + EXPECT_EQ(value2.size_bytes, str1.size()); + EXPECT_EQ(memcmp(GetValueDataFunc(&value2), str1.c_str(), value2.size_bytes), 0); - buffer_view = ArrowArrayViewGetBytesUnsafe(&array_view, 4); - EXPECT_EQ(buffer_view.size_bytes, str1.size()); - EXPECT_EQ(memcmp(buffer_view.data.as_char, str1.c_str(), buffer_view.size_bytes), 0); - - buffer_view = ArrowArrayViewGetBytesUnsafe(&array_view, 6); - EXPECT_EQ(buffer_view.size_bytes, str2.size()); - EXPECT_EQ(memcmp(buffer_view.data.as_char, str2.c_str(), buffer_view.size_bytes), 0); + const auto value3 = GetValueFunc(&array_view, 6); + EXPECT_EQ(value3.size_bytes, str2.size()); + EXPECT_EQ(memcmp(GetValueDataFunc(&value3), str2.c_str(), value3.size_bytes), 0); ArrowArrayViewReset(&array_view); ArrowArrayRelease(&array); @@ -3411,10 +3484,22 @@ void TestGetFromBinaryView(BuilderClass& builder) { TEST(ArrayViewTest, ArrayViewTestGetStringView) { auto string_view_builder = StringViewBuilder(); - TestGetFromBinaryView(string_view_builder); + const auto get_string_view = [](const struct ArrowStringView* sv) { return sv->data; }; + TestGetFromInlinedBinaryView( + string_view_builder, ArrowArrayViewGetStringUnsafe, get_string_view); + TestGetFromBinaryView( + string_view_builder, ArrowArrayViewGetStringUnsafe, get_string_view); +} +TEST(ArrayViewTest, ArrayViewTestGetBinaryView) { auto binary_view_builder = BinaryViewBuilder(); - TestGetFromBinaryView(binary_view_builder); + const auto get_buffer_view = [](const struct ArrowBufferView* bv) { + return bv->data.data; + }; + TestGetFromInlinedBinaryView( + binary_view_builder, ArrowArrayViewGetBytesUnsafe, get_buffer_view); + TestGetFromBinaryView( + binary_view_builder, ArrowArrayViewGetBytesUnsafe, get_buffer_view); } TEST(ArrayViewTest, ArrayViewTestGetIntervalYearMonth) { diff --git a/src/nanoarrow/common/inline_array.h b/src/nanoarrow/common/inline_array.h index 32ff779d1..415e7d928 100644 --- a/src/nanoarrow/common/inline_array.h +++ b/src/nanoarrow/common/inline_array.h @@ -468,6 +468,8 @@ static inline ArrowErrorCode ArrowArrayAppendDouble(struct ArrowArray* array, return NANOARROW_OK; } +// Binary views only have two fixed buffers, but be aware that they must also +// always have more 1 buffer to store variadic buffer sizes (even if there are none) #define NANOARROW_BINARY_VIEW_FIXED_BUFFERS 2 #define NANOARROW_BINARY_VIEW_INLINE_SIZE 12 #define NANOARROW_BINARY_VIEW_PREFIX_SIZE 4 @@ -504,27 +506,28 @@ static inline int32_t ArrowArrayVariadicBufferCount(struct ArrowArray* array) { static inline ArrowErrorCode ArrowArrayAddVariadicBuffers(struct ArrowArray* array, int32_t nbuffers) { const int32_t n_current_bufs = ArrowArrayVariadicBufferCount(array); - const int32_t n_bufs_needed = n_current_bufs + nbuffers; + const int32_t nvariadic_bufs_needed = n_current_bufs + nbuffers; struct ArrowArrayPrivateData* private_data = (struct ArrowArrayPrivateData*)array->private_data; private_data->variadic_buffers = (struct ArrowBuffer*)ArrowRealloc( - private_data->variadic_buffers, sizeof(struct ArrowBuffer) * n_bufs_needed); + private_data->variadic_buffers, sizeof(struct ArrowBuffer) * nvariadic_bufs_needed); if (private_data->variadic_buffers == NULL) { return ENOMEM; } private_data->variadic_buffer_sizes = (int64_t*)ArrowRealloc( - private_data->variadic_buffer_sizes, sizeof(int64_t) * n_bufs_needed); + private_data->variadic_buffer_sizes, sizeof(int64_t) * nvariadic_bufs_needed); if (private_data->variadic_buffer_sizes == NULL) { return ENOMEM; } - for (int32_t i = n_current_bufs; i < n_bufs_needed; i++) { + for (int32_t i = n_current_bufs; i < nvariadic_bufs_needed; i++) { ArrowBufferInit(&private_data->variadic_buffers[i]); private_data->variadic_buffer_sizes[i] = 0; } - private_data->n_variadic_buffers = n_bufs_needed; + private_data->n_variadic_buffers = nvariadic_bufs_needed; + array->n_buffers = NANOARROW_BINARY_VIEW_FIXED_BUFFERS + 1 + nvariadic_bufs_needed; return NANOARROW_OK; }