From 386a88607a8baeb80bb11ea51b7a33d82972f4c6 Mon Sep 17 00:00:00 2001 From: conneroisu Date: Mon, 5 Aug 2024 18:31:53 -0400 Subject: [PATCH] add parallel processing functions to slices --- slices.go | 83 ++++++++++++++++++++++++++++++++++++++++ tools/seltabls/README.md | 1 - 2 files changed, 83 insertions(+), 1 deletion(-) diff --git a/slices.go b/slices.go index 8fcb8649..f3c4508a 100644 --- a/slices.go +++ b/slices.go @@ -623,3 +623,86 @@ func NewFromBytesChFn[ } return NewChFn(doc, ch, fn) } + +// NewPl parses a reader into a slice of structs with parallel processing. +func NewPl[T any](doc *goquery.Document) ([]T, error) { + dType := reflect.TypeOf((*T)(nil)).Elem() + if dType.Kind() != reflect.Struct && dType.Kind() != reflect.Ptr { + return nil, fmt.Errorf("expected struct, got %s", dType.Kind()) + } + results := make([]T, 0) + var cfg *SelectorConfig + errCh := make(chan error) + for i := 0; i < dType.NumField(); i++ { + go func(i int) { + cfg = NewSelectorConfig(dType.Field(i).Tag) + if cfg.DataSelector == "" { + errCh <- ErrSelectorNotFound{ + Typ: dType, + Field: dType.Field(i), + Cfg: cfg, + } + } + dataRows := doc.Find(cfg.DataSelector) + if dataRows.Length() <= 0 { + return + } + if cfg.HeadSelector != "" && cfg.HeadSelector != "-" { + _ = dataRows.RemoveFiltered(cfg.HeadSelector) + } + if len(results) < dataRows.Length() { + results = make([]T, dataRows.Length()) + } + for j := 0; j < dataRows.Length(); j++ { + err := SetStructField( + &results[j], + dType.Field(i), // name of the field to set + dataRows.Eq(j), // goquery selection for cell + &selector{ + control: cfg.ControlTag, + query: cfg.QuerySelector, + }, // selector for the inner cell + ) + if err != nil { + errCh <- fmt.Errorf( + "failed to set field %s: %s", + dType.Field(i).Name, + err, + ) + } + } + }(i) + } + if len(results) < 1 { + return nil, fmt.Errorf("no data found") + } + return results, nil +} + +// NewFromReaderPl parses a reader into a slice of structs with parallel processing. +func NewFromReaderPl[T any](r io.Reader) ([]T, error) { + doc, err := goquery.NewDocumentFromReader(r) + if err != nil { + return nil, fmt.Errorf("failed to parse html: %w", err) + } + return NewPl[T](doc) +} + +// NewFromStringPl parses a string into a slice of structs with parallel processing. +func NewFromStringPl[T any](htmlInput string) ([]T, error) { + reader := strings.NewReader(htmlInput) + doc, err := goquery.NewDocumentFromReader(reader) + if err != nil { + return nil, fmt.Errorf("failed to parse html: %w", err) + } + return NewPl[T](doc) +} + +// NewFromBytesPl parses a byte slice into a slice of structs with parallel processing. +func NewFromBytesPl[T any](b []byte) ([]T, error) { + doc, err := goquery.NewDocumentFromReader(bytes.NewReader(b)) + if err != nil { + return nil, fmt.Errorf("failed to parse html: %w", err) + } + return NewPl[T](doc) +} diff --git a/tools/seltabls/README.md b/tools/seltabls/README.md index 07b0ee33..5193554f 100644 --- a/tools/seltabls/README.md +++ b/tools/seltabls/README.md @@ -70,4 +70,3 @@ Manually, one can run the tests with: ```sh go test ./... ``` -