Feat: rwkv improvements: (#937)

mudler · Aug 22, 2023 · 901f070 · 901f070
1 parent 0d6165e
commit 901f070
Show file tree

Hide file tree

Showing 7 changed files with 208 additions and 150 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,5 +1,6 @@
 # go-llama build artifacts
 go-llama
+go-llama-stable
 /gpt4all
 go-stable-diffusion
 go-piper

diff --git a/api/backend/options.go b/api/backend/options.go
@@ -60,6 +60,8 @@ func gRPCModelOpts(c config.Config) *pb.ModelOptions {
 		Device:           c.AutoGPTQ.Device,
 		UseTriton:        c.AutoGPTQ.Triton,
 		UseFastTokenizer: c.AutoGPTQ.UseFastTokenizer,
+		// RWKV
+		Tokenizer: c.Tokenizer,
 	}
 }
 

diff --git a/api/config/prediction.go b/api/config/prediction.go
@@ -44,4 +44,7 @@ type PredictionOptions struct {
 
 	// Diffusers
 	ClipSkip int `json:"clip_skip" yaml:"clip_skip"`
+
+	// RWKV (?)
+	Tokenizer string `json:"tokenizer" yaml:"tokenizer"`
 }
diff --git a/pkg/backend/llm/rwkv/rwkv.go b/pkg/backend/llm/rwkv/rwkv.go
@@ -20,9 +20,15 @@ type LLM struct {
 }
 
 func (llm *LLM) Load(opts *pb.ModelOptions) error {
+	tokenizerFile := opts.Tokenizer
+	if tokenizerFile == "" {
+		modelFile := filepath.Base(opts.ModelFile)
+		tokenizerFile = modelFile + tokenizerSuffix
+	}
 	modelPath := filepath.Dir(opts.ModelFile)
-	modelFile := filepath.Base(opts.ModelFile)
-	model := rwkv.LoadFiles(opts.ModelFile, filepath.Join(modelPath, modelFile+tokenizerSuffix), uint32(opts.GetThreads()))
+	tokenizerPath := filepath.Join(modelPath, tokenizerFile)
+
+	model := rwkv.LoadFiles(opts.ModelFile, tokenizerPath, uint32(opts.GetThreads()))
 
 	if model == nil {
 		return fmt.Errorf("could not load model")
@@ -68,3 +74,22 @@ func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) erro
 
 	return nil
 }
+
+func (llm *LLM) TokenizeString(opts *pb.PredictOptions) (pb.TokenizationResponse, error) {
+	tokens, err := llm.rwkv.Tokenizer.Encode(opts.Prompt)
+	if err != nil {
+		return pb.TokenizationResponse{}, err
+	}
+
+	l := len(tokens)
+	i32Tokens := make([]int32, l)
+
+	for i, t := range tokens {
+		i32Tokens[i] = int32(t.ID)
+	}
+
+	return pb.TokenizationResponse{
+		Length: int32(l),
+		Tokens: i32Tokens,
+	}, nil
+}
diff --git a/pkg/grpc/proto/backend.pb.go b/pkg/grpc/proto/backend.pb.go
diff --git a/pkg/grpc/proto/backend.proto b/pkg/grpc/proto/backend.proto
@@ -108,6 +108,9 @@ message ModelOptions {
   string CLIPModel = 31;
   string CLIPSubfolder = 32;
   int32 CLIPSkip = 33;
+
+  // RWKV
+  string Tokenizer = 34;
 }
 
 message Result {

diff --git a/pkg/grpc/proto/backend_grpc.pb.go b/pkg/grpc/proto/backend_grpc.pb.go