From 5f825db8cf4265eada32fe625781cdb75193e6f7 Mon Sep 17 00:00:00 2001 From: Gabriel Vasile Date: Tue, 13 Aug 2024 01:56:10 +0900 Subject: [PATCH] use a pool of buffers to alleviate memory allocs in csv; related to #553 When iterating over multiple files, csv detector allocated a new buffer for each file. This change adds a pool of buffers that can be reused between detections. The same pool is shared between csv and tsv detectors. --- internal/magic/text_csv.go | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/internal/magic/text_csv.go b/internal/magic/text_csv.go index af256438..6083ba8c 100644 --- a/internal/magic/text_csv.go +++ b/internal/magic/text_csv.go @@ -1,12 +1,28 @@ package magic import ( + "bufio" "bytes" "encoding/csv" "errors" "io" + "sync" ) +// A bufio.Reader pool to alleviate problems with memory allocations. +var readerPool = sync.Pool{ + New: func() any { + // Initiate with empty source reader. + return bufio.NewReader(nil) + }, +} + +func newReader(r io.Reader) *bufio.Reader { + br := readerPool.Get().(*bufio.Reader) + br.Reset(r) + return br +} + // Csv matches a comma-separated values file. func Csv(raw []byte, limit uint32) bool { return sv(raw, ',', limit) @@ -18,7 +34,11 @@ func Tsv(raw []byte, limit uint32) bool { } func sv(in []byte, comma rune, limit uint32) bool { - r := csv.NewReader(bytes.NewReader(dropLastLine(in, limit))) + in = dropLastLine(in, limit) + + br := newReader(bytes.NewReader(in)) + defer readerPool.Put(br) + r := csv.NewReader(br) r.Comma = comma r.ReuseRecord = true r.LazyQuotes = true