Skip to content
This repository has been archived by the owner on Oct 30, 2024. It is now read-only.

Commit

Permalink
add: blueprint flows file (#80)
Browse files Browse the repository at this point in the history
  • Loading branch information
iwilltry42 authored Aug 20, 2024
1 parent afb9d38 commit b164f86
Show file tree
Hide file tree
Showing 8 changed files with 100 additions and 12 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ ifeq ($(GIT_TAG),)
GIT_TAG := $(shell git describe --always)
endif

GO_TAGS := netgo
GO_TAGS := netgo mupdf
LD_FLAGS := -s -w -X github.com/gptscript-ai/knowledge/version.Version=${GIT_TAG}
build:
go build -mod=mod -o bin/knowledge -tags "${GO_TAGS}" -ldflags '$(LD_FLAGS) ' .
Expand Down
2 changes: 1 addition & 1 deletion pkg/cmd/askdir.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ func (s *ClientAskDir) Run(cmd *cobra.Command, args []string) error {
datasetID := client.HashPath(abspath)

slog.Debug("Loading ingestion flows from config", "flows_file", s.FlowsFile, "dataset", datasetID)
flowCfg, err := flowconfig.FromFile(s.FlowsFile)
flowCfg, err := flowconfig.Load(s.FlowsFile)
if err != nil {
return err
}
Expand Down
9 changes: 5 additions & 4 deletions pkg/cmd/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,15 @@ package cmd
import (
"archive/zip"
"fmt"
"io"
"os"
"path/filepath"

"github.com/gptscript-ai/knowledge/pkg/client"
"github.com/gptscript-ai/knowledge/pkg/config"
"github.com/gptscript-ai/knowledge/pkg/datastore"
"github.com/gptscript-ai/knowledge/pkg/datastore/embeddings"
"github.com/gptscript-ai/knowledge/pkg/datastore/types"
"io"
"os"
"path/filepath"
)

type Client struct {
Expand All @@ -25,7 +26,7 @@ type Client struct {
}

type ClientFlowsConfig struct {
FlowsFile string `usage:"Path to a YAML/JSON file containing ingestion/retrieval flows" env:"KNOW_FLOWS_FILE"`
FlowsFile string `usage:"Path to a YAML/JSON file containing ingestion/retrieval flows" env:"KNOW_FLOWS_FILE" default:""`
Flow string `usage:"Flow name" env:"KNOW_FLOW"`
}

Expand Down
8 changes: 5 additions & 3 deletions pkg/cmd/ingest.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@ package cmd

import (
"fmt"
"github.com/acorn-io/z"
"github.com/spf13/cobra"
"log/slog"
"strings"

"github.com/acorn-io/z"
"github.com/spf13/cobra"

"github.com/gptscript-ai/knowledge/pkg/client"
"github.com/gptscript-ai/knowledge/pkg/datastore/textsplitter"
flowconfig "github.com/gptscript-ai/knowledge/pkg/flows/config"
Expand Down Expand Up @@ -68,7 +69,8 @@ func (s *ClientIngest) Run(cmd *cobra.Command, args []string) error {

if s.FlowsFile != "" {
slog.Debug("Loading ingestion flows from config", "flows_file", s.FlowsFile, "dataset", datasetID)
flowCfg, err := flowconfig.FromFile(s.FlowsFile)

flowCfg, err := flowconfig.Load(s.FlowsFile)
if err != nil {
return err
}
Expand Down
5 changes: 3 additions & 2 deletions pkg/cmd/retrieve.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@ import (
"encoding/json"
"errors"
"fmt"
"log/slog"

"github.com/gptscript-ai/knowledge/pkg/datastore"
flowconfig "github.com/gptscript-ai/knowledge/pkg/flows/config"
vserr "github.com/gptscript-ai/knowledge/pkg/vectorstore/errors"
"github.com/spf13/cobra"
"log/slog"
)

type ClientRetrieve struct {
Expand Down Expand Up @@ -49,7 +50,7 @@ func (s *ClientRetrieve) Run(cmd *cobra.Command, args []string) error {

if s.FlowsFile != "" {
slog.Debug("Loading retrieval flows from config", "flows_file", s.FlowsFile, "dataset", datasetIDs)
flowCfg, err := flowconfig.FromFile(s.FlowsFile)
flowCfg, err := flowconfig.Load(s.FlowsFile)
if err != nil {
return err
}
Expand Down
21 changes: 21 additions & 0 deletions pkg/flows/config/blueprints.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package config

import (
_ "embed"
"fmt"
)

//go:embed blueprints/default.yaml
var BlueprintDefault []byte

var Blueprints = map[string][]byte{
"default": BlueprintDefault,
}

func GetBlueprint(name string) ([]byte, error) {
bp, ok := Blueprints[name]
if !ok {
return nil, fmt.Errorf("blueprint %q not found", name)
}
return bp, nil
}
42 changes: 42 additions & 0 deletions pkg/flows/config/blueprints/default.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
flows:
knowledge:
default: true
globals:
ingestion:
textsplitter:
chunkSize: 800
chunkOverlap: 400
ingestion:
- filetypes: ["*"]
retrieval:
querymodifiers:
# Enhance
- name: enhance
options:
model:
openai:
apiKey: "${OPENAI_API_KEY}"
model: gpt-4o
apiType: OPEN_AI
baseURL: https://api.openai.com/v1
retriever:
name: subquery
options:
limit: 3 # max. 3 subqueries
topK: 10 # topK per search
model:
openai:
apiKey: "${OPENAI_API_KEY}"
model: gpt-4o
apiType: OPEN_AI
baseURL: https://api.openai.com/v1
postprocessors:
- name: similarity
options:
threshold: 0.6
- name: reduce
options:
topK: 10



23 changes: 22 additions & 1 deletion pkg/flows/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,12 @@ package config
import (
"encoding/json"
"fmt"
"github.com/gptscript-ai/knowledge/pkg/output"
"log/slog"
"os"
"strings"

"github.com/gptscript-ai/knowledge/pkg/output"

"github.com/gptscript-ai/knowledge/pkg/datastore/documentloader"
"github.com/gptscript-ai/knowledge/pkg/datastore/postprocessors"
"github.com/gptscript-ai/knowledge/pkg/datastore/querymodifiers"
Expand Down Expand Up @@ -82,16 +83,36 @@ type TransformerConfig struct {
GenericBaseConfig
}

func FromBlueprint(name string) (*FlowConfig, error) {
bp, err := GetBlueprint(name)
if err != nil {
return nil, err
}
return FromBytes(bp)
}

func Load(reference string) (*FlowConfig, error) {
if strings.HasPrefix(reference, "blueprint:") {
return FromBlueprint(strings.TrimPrefix(reference, "blueprint:"))
}
return FromFile(reference)
}

// FromFile reads a configuration file and returns a FlowConfig.
func FromFile(filename string) (*FlowConfig, error) {
content, err := os.ReadFile(filename)
if err != nil {
return nil, err
}
return FromBytes(content)
}

func FromBytes(content []byte) (*FlowConfig, error) {
// Expand environment variables in config
content = []byte(os.ExpandEnv(string(content)))

var err error

var config FlowConfig
jsondata := content
if !json.Valid(content) {
Expand Down

0 comments on commit b164f86

Please sign in to comment.