From 2beaccf4671cd3f68889aef4052e490dda872ed4 Mon Sep 17 00:00:00 2001 From: joel Date: Wed, 24 Jan 2024 19:18:43 -0500 Subject: [PATCH] set max row group length and add note about issue --- go/adbc/driver/snowflake/bulk_ingestion.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/go/adbc/driver/snowflake/bulk_ingestion.go b/go/adbc/driver/snowflake/bulk_ingestion.go index 61b1b1da73..18b9a7b7b4 100644 --- a/go/adbc/driver/snowflake/bulk_ingestion.go +++ b/go/adbc/driver/snowflake/bulk_ingestion.go @@ -27,6 +27,7 @@ import ( "errors" "fmt" "io" + "math" "runtime" "strings" "sync" @@ -276,6 +277,7 @@ func newWriterProps(mem memory.Allocator, opts *ingestOptions) (*parquet.WriterP parquet.WithDictionaryDefault(false), // Stats won't be used since the file is dropped after ingestion completes parquet.WithStats(false), + parquet.WithMaxRowGroupLength(math.MaxInt64), ) arrowProps := pqarrow.NewArrowWriterProperties(pqarrow.WithAllocator(mem)) @@ -560,6 +562,7 @@ func (bp *bufferPool) PutBuffer(buf *bytes.Buffer) { // Wraps an io.Writer and specifies a limit. // Keeps track of how many bytes have been written and can report whether the limit has been exceeded. +// TODO(ARROW-39789): We prefer to use RowGroupTotalBytesWritten on the ParquetWriter, but there seems to be a discrepency with the count. type limitWriter struct { w io.Writer limit int