Skip to content
This repository has been archived by the owner on Oct 30, 2024. It is now read-only.

Commit

Permalink
add: postprocessors in retrieval flow
Browse files Browse the repository at this point in the history
  • Loading branch information
iwilltry42 committed May 29, 2024
1 parent 19837ab commit 3be046a
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 2 deletions.
26 changes: 26 additions & 0 deletions pkg/datastore/postprocessors/postprocessors.go
Original file line number Diff line number Diff line change
@@ -1 +1,27 @@
// Package postprocessors is basically the same as package transformers, but used at a different stage of the RAG pipeline
package postprocessors

import (
"fmt"
"github.com/gptscript-ai/knowledge/pkg/datastore/transformers"
"github.com/gptscript-ai/knowledge/pkg/datastore/types"
)

// Postprocessor may be a "normal"
type Postprocessor types.DocumentTransformer

var PostprocessorMap = map[string]Postprocessor{}

func GetPostprocessor(name string) (Postprocessor, error) {
var postprocessor Postprocessor
var ok bool
postprocessor, ok = PostprocessorMap[name]
if !ok {
var err error
postprocessor, err = transformers.GetTransformer(name)
if err != nil {
return nil, fmt.Errorf("unknown postprocessor %q", name)
}
}
return postprocessor, nil
}
17 changes: 17 additions & 0 deletions pkg/flows/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"encoding/json"
"fmt"
"github.com/gptscript-ai/knowledge/pkg/datastore/documentloader"
"github.com/gptscript-ai/knowledge/pkg/datastore/postprocessors"
"github.com/gptscript-ai/knowledge/pkg/datastore/querymodifiers"
"github.com/gptscript-ai/knowledge/pkg/datastore/retrievers"
"github.com/gptscript-ai/knowledge/pkg/datastore/textsplitter"
Expand Down Expand Up @@ -250,5 +251,21 @@ func (r *RetrievalFlowConfig) AsRetrievalFlow() (*flows.RetrievalFlow, error) {
flow.Retriever = ret
}

if len(r.Postprocessors) > 0 {
for _, pp := range r.Postprocessors {
postprocessor, err := postprocessors.GetPostprocessor(pp.Name)
if err != nil {
return nil, err
}
if len(pp.Options) > 0 {
if err := mapstructure.Decode(pp.Options, &postprocessor); err != nil {
return nil, fmt.Errorf("failed to decode postprocessor configuration: %w", err)
}
slog.Debug("Postprocessor custom configuration", "name", pp.Name, "config", postprocessor)
}
flow.Postprocessors = append(flow.Postprocessors, postprocessor)
}
}

return flow, nil
}
10 changes: 8 additions & 2 deletions pkg/flows/flows.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"fmt"
"github.com/gptscript-ai/knowledge/pkg/datastore/defaults"
"github.com/gptscript-ai/knowledge/pkg/datastore/documentloader"
"github.com/gptscript-ai/knowledge/pkg/datastore/postprocessors"
"github.com/gptscript-ai/knowledge/pkg/datastore/querymodifiers"
"github.com/gptscript-ai/knowledge/pkg/datastore/retrievers"
"github.com/gptscript-ai/knowledge/pkg/datastore/textsplitter"
Expand Down Expand Up @@ -98,7 +99,7 @@ func (f *IngestionFlow) Run(ctx context.Context, reader io.Reader) ([]vs.Documen
type RetrievalFlow struct {
QueryModifiers []querymodifiers.QueryModifier
Retriever retrievers.Retriever
// TODO: Postprocessors
Postprocessors []postprocessors.Postprocessor
}

func (f *RetrievalFlow) FillDefaults() {
Expand All @@ -121,7 +122,12 @@ func (f *RetrievalFlow) Run(ctx context.Context, store vs.VectorStore, query str
return nil, err
}

// TODO: add postprocessors
for _, pp := range f.Postprocessors {
docs, err = pp.Transform(ctx, docs)
if err != nil {
return nil, err
}
}

slog.Debug("Retrieved documents", "num_documents", len(docs), "query", query, "dataset", datasetID)
return docs, nil
Expand Down

0 comments on commit 3be046a

Please sign in to comment.