From 35ec9ea4598fa534481c3e98c6a77fb7417f6df6 Mon Sep 17 00:00:00 2001 From: synw Date: Tue, 9 Apr 2024 19:00:39 +0200 Subject: [PATCH] Updates in packages --- README.md | 1 - packages/api/package.json | 4 +- packages/api/src/providers/koboldcpp.ts | 16 ++--- packages/api/src/providers/ollama.ts | 67 +++++++++++++------ packages/evaluate/package.json | 14 ++-- packages/evaluate/src/evaluate.ts | 50 ++++++++++++-- .../src/evaluators/code/containsCodeBlock.ts | 2 +- .../src/evaluators/code/isOnlyCodeBlock.ts | 19 ++++++ .../evaluators/code/js/containsValidJson.ts | 2 + packages/evaluate/src/evaluators/code/main.ts | 2 + .../src/evaluators/text/containsOnlyOne.ts | 43 ++++++++++++ .../src/evaluators/text/containsText.ts | 2 +- .../evaluate/src/evaluators/text/isText.ts | 27 ++++++++ packages/evaluate/src/evaluators/text/main.ts | 4 ++ .../evaluate/src/evaluators/text/maxLength.ts | 19 ++++++ .../src/evaluators/text/startsWith.ts | 32 +++++++++ packages/evaluate/src/interfaces.ts | 16 +++-- packages/evaluate/src/main.ts | 4 +- packages/evaluate/src/testcase.ts | 48 +++++++------ packages/evaluate/src/testrunner.ts | 12 ++-- packages/evaluate/src/utils.ts | 31 ++++++++- packages/evaluate/tsconfig.json | 6 +- packages/evaluations/package.json | 14 ++-- packages/evaluations/src/code/js/fix_json.ts | 6 +- .../evaluations/src/code/js/generate_js.ts | 4 +- packages/evaluations/src/code/js/index.ts | 3 +- .../evaluations/src/code/js/optimize_ts.ts | 4 +- .../evaluations/src/code/js/pydantic_to_ts.ts | 4 +- .../src/code/python/create_docstring.ts | 2 +- packages/evaluations/src/code/python/index.ts | 3 +- .../src/instructions/format/cot.ts | 35 ++++++++++ .../src/instructions/format/cot_points.ts | 37 ++++++++++ .../src/instructions/format/index.ts | 11 +++ .../src/instructions/format/json.ts | 27 ++++++++ .../src/instructions/format/simple.ts | 29 ++++++++ packages/types/package.json | 2 +- packages/types/src/interfaces.ts | 2 + 37 files changed, 500 insertions(+), 104 deletions(-) create mode 100644 packages/evaluate/src/evaluators/code/isOnlyCodeBlock.ts create mode 100644 packages/evaluate/src/evaluators/text/containsOnlyOne.ts create mode 100644 packages/evaluate/src/evaluators/text/isText.ts create mode 100644 packages/evaluate/src/evaluators/text/maxLength.ts create mode 100644 packages/evaluate/src/evaluators/text/startsWith.ts create mode 100644 packages/evaluations/src/instructions/format/cot.ts create mode 100644 packages/evaluations/src/instructions/format/cot_points.ts create mode 100644 packages/evaluations/src/instructions/format/index.ts create mode 100644 packages/evaluations/src/instructions/format/json.ts create mode 100644 packages/evaluations/src/instructions/format/simple.ts diff --git a/README.md b/README.md index a6d3dd7..0e3adb9 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,6 @@ An api to query local language models using different backends - [Llama.cpp](https://github.com/ggerganov/llama.cpp/tree/master/examples/server) - [Koboldcpp](https://github.com/LostRuins/koboldcpp) - [Ollama](https://github.com/jmorganca/ollama) -- [Goinfer](https://github.com/synw/goinfer) ## Quickstart diff --git a/packages/api/package.json b/packages/api/package.json index ce8c4de..c827d56 100644 --- a/packages/api/package.json +++ b/packages/api/package.json @@ -1,6 +1,6 @@ { "name": "@locallm/api", - "version": "0.0.29", + "version": "0.0.30", "description": "An api to query local language models using different backends", "repository": "https://github.com/synw/locallm", "scripts": { @@ -12,7 +12,7 @@ "restmix": "^0.4.0" }, "devDependencies": { - "@locallm/types": "^0.0.14", + "@locallm/types": "^0.0.15", "@rollup/plugin-node-resolve": "^15.2.3", "@rollup/plugin-terser": "^0.4.4", "@rollup/plugin-typescript": "^11.1.5", diff --git a/packages/api/src/providers/koboldcpp.ts b/packages/api/src/providers/koboldcpp.ts index 74471e8..eafb783 100644 --- a/packages/api/src/providers/koboldcpp.ts +++ b/packages/api/src/providers/koboldcpp.ts @@ -146,18 +146,18 @@ class KoboldcppProvider implements LmProvider { if (this.apiKey.length > 0) { headers["Authorization"] = `Bearer ${this.apiKey}` } - const response = await fetch(url, { - method: 'POST', - headers: headers, - body: body, - }); - if (!response.body) { - throw new Error("No response body") - } let text = ""; let data = {}; if (inferenceParams?.stream == true) { + const response = await fetch(url, { + method: 'POST', + headers: headers, + body: body, + }); + if (!response.body) { + throw new Error("No response body") + } let i = 1; let buf = new Array(); const eventStream = response.body diff --git a/packages/api/src/providers/ollama.ts b/packages/api/src/providers/ollama.ts index 9026b77..e9d60cd 100644 --- a/packages/api/src/providers/ollama.ts +++ b/packages/api/src/providers/ollama.ts @@ -1,4 +1,7 @@ import { useApi } from "restmix"; +import { type ParsedEvent } from 'eventsource-parser' +// @ts-ignore +import { EventSourceParserStream } from 'eventsource-parser/stream'; import { InferenceParams, InferenceResult, LmProvider, LmProviderParams, ModelConf } from "@locallm/types"; //import { InferenceParams, InferenceResult, LmProvider, LmProviderParams, ModelConf } from "@/packages/types/interfaces.js"; import { parseJson as parseJsonUtil } from './utils'; @@ -49,7 +52,11 @@ class OllamaProvider implements LmProvider { if (res.ok) { //console.log("RES", res.data); for (const m of res.data["models"]) { - this.models.push({ name: m.name, ctx: 2048 }); + const info = { + size: m.details.parameter_size, + quant: m.details.quantization_level, + } + this.models.push({ name: m.name, ctx: -1, info: info }); } } else { throw new Error(`Error ${res.status} loading models ${res.text}`); @@ -140,6 +147,7 @@ class OllamaProvider implements LmProvider { raw = params.extra.raw; delete params.extra.raw; } + console.log("PARAMS", params); let inferParams: Record = { model: this.model.name, prompt: prompt, @@ -150,34 +158,34 @@ class OllamaProvider implements LmProvider { }, ...params.extra } - if (params.threads) { + if (params.threads !== undefined) { inferParams.options.num_thread = params.threads; } - if (params.gpu_layers) { + if (params.gpu_layers !== undefined) { inferParams.options.gpu_layers = params.gpu_layers; } - if (params.repeat_penalty) { + if (params.repeat_penalty !== undefined) { inferParams.options.repeat_penalty = params.repeat_penalty; } - if (params.stop && params.stop.length > 0) { + if (params.stop !== undefined && params.stop?.length > 0) { inferParams.options.stop = params.stop; } - if (params.temperature) { + if (params.temperature !== undefined) { inferParams.options.temperature = params.temperature; } - if (params.tfs) { + if (params.tfs !== undefined) { inferParams.options.tfs_z = params.tfs; } - if (params.top_k) { + if (params.top_k !== undefined) { inferParams.options.top_k = params.top_k; } - if (params.top_p) { + if (params.top_p !== undefined) { inferParams.options.top_p = params.top_p; } - if (params.max_tokens) { + if (params.max_tokens !== undefined) { inferParams.options.num_predict = params.max_tokens; } - if (params.extra?.format) { + if (params.extra?.format !== undefined) { inferParams["format"] = params.extra.format; delete params.extra.format } @@ -185,9 +193,10 @@ class OllamaProvider implements LmProvider { if (params.extra && Object.keys(params.extra).length > 0) { inferParams = { ...inferParams, ...params.extra }; } - //console.log("Params", inferParams); + //console.log("INFER PARAMS", inferParams); let text = ""; let data = {}; + let stats: Record = {}; if (inferParams?.stream == true) { const body = JSON.stringify(inferParams); const buf = new Array(); @@ -196,30 +205,42 @@ class OllamaProvider implements LmProvider { headers: { 'Content-Type': 'application/json' }, body: body, }); - if (!response.body) { throw new Error("No response body") } const reader = response.body.getReader(); + const decoder = new TextDecoder(); + let lastBatch: Record = {}; while (true) { const { done, value } = await reader.read(); if (done) break; - const raw = new TextDecoder().decode(value); - const d = JSON.parse(raw); - if (d["done"]) { - break + let raw = decoder.decode(value); + //console.log("RAW", raw); + const parts = raw.split('\n'); + let pbuf = new Array(); + for (const part of parts) { + try { + //console.log(part); + const p = JSON.parse(part); + lastBatch = p; + pbuf.push(p["response"]); + } catch (error) { + console.warn('invalid json: ', part) + } } - const t = d["response"]; + const t = pbuf.join(""); buf.push(t); if (this.onToken) { this.onToken(t); } } - text = buf.join("") + text = buf.join(""); + stats = lastBatch; } else { const res = await this.api.post>("/api/generate", inferParams); if (res.ok) { - text = res.data.response + text = res.data.response; + stats = res.data; } else { throw new Error(`Error ${res.status} posting inference query ${res.data}`) } @@ -227,10 +248,14 @@ class OllamaProvider implements LmProvider { if (parseJson) { data = parseJsonUtil(text, parseJsonFunc); } + delete stats.response; + delete stats.context; + delete stats.done; + //console.log("STATS", stats); const ir: InferenceResult = { text: text, data: data, - stats: {}, + stats: stats, }; return ir } diff --git a/packages/evaluate/package.json b/packages/evaluate/package.json index 60dfeb9..42db23a 100644 --- a/packages/evaluate/package.json +++ b/packages/evaluate/package.json @@ -8,19 +8,19 @@ "build": "rm -f dist/* && rollup -c" }, "dependencies": { - "@locallm/api": "^0.0.21", - "modprompt": "^0.3.6" + "@locallm/api": "^0.0.29", + "modprompt": "^0.6.1" }, "devDependencies": { - "@locallm/types": "^0.0.13", + "@locallm/types": "^0.0.14", "@rollup/plugin-node-resolve": "^15.2.3", "@rollup/plugin-terser": "^0.4.4", - "@rollup/plugin-typescript": "^11.1.5", - "@types/node": "^20.10.5", - "rollup": "^4.9.1", + "@rollup/plugin-typescript": "^11.1.6", + "@types/node": "^20.12.2", + "rollup": "^4.13.2", "ts-node": "^10.9.2", "tslib": "^2.6.2", - "typescript": "^5.3.3" + "typescript": "^5.4.3" }, "type": "module", "files": [ diff --git a/packages/evaluate/src/evaluate.ts b/packages/evaluate/src/evaluate.ts index 83b12a6..43c05d3 100644 --- a/packages/evaluate/src/evaluate.ts +++ b/packages/evaluate/src/evaluate.ts @@ -1,7 +1,12 @@ -import { EvaluationFunction, EvaluationResult, TestResult } from "./interfaces.js"; -import { containsCodeBlock } from "./evaluators/code/main.js"; +import { InferenceParams } from "@locallm/types"; +import { EvaluationFunction, EvaluationResult, FinalEvaluationResult, TestResult } from "./interfaces.js"; +import { containsCodeBlock, isOnlyCodeBlock } from "./evaluators/code/main.js"; import { containsValidJavascript, containsValidJson } from "./evaluators/code/js/main.js"; import { containsText } from "./evaluators/text/containsText.js"; +import { isText } from "./evaluators/text/isText.js"; +import { startsWith } from "./evaluators/text/startsWith.js"; +import { maxLength } from "./evaluators/text/maxLength.js"; +import { containsOnlyOne } from "./evaluators/text/containsOnlyOne.js"; class Evaluator { thresold = 100; @@ -11,19 +16,27 @@ class Evaluator { this.thresold = thresold; } - run(response: string): TestResult { + run(name: string, prompt: string, response: string, inferParams: InferenceParams, stats: Record): TestResult { const results: TestResult = { + name: name, pass: false, score: 0, + thresold: this.thresold, + prompt: prompt, output: response, - evaluations: new Array(), + evaluations: new Array(), + inferParams: inferParams, + stats: stats, }; this.evalFuncs.forEach((fp) => { const res = fp.func(response, fp.name, fp.param, fp.error); if (res.pass) { results.score += fp.passScore; } - results.evaluations.push(res); + results.evaluations.push({ + ...res, + points: fp.passScore, + }); }); if (results.score >= this.thresold) { results.pass = true; @@ -36,8 +49,33 @@ class Evaluator { return this } + isText(passScore: number, param: string | Array, error: string | null = null): Evaluator { + this._stackEvalFunc(`Is text: ${param}`, passScore, isText, param, error); + return this + } + containsText(passScore: number, param: string | Array, error: string | null = null): Evaluator { - this._stackEvalFunc("Contains text", passScore, containsText, param, error); + this._stackEvalFunc(`Contains text ${param}`, passScore, containsText, param, error); + return this + } + + containsOnlyOne(passScore: number, param: string | Array, error: string | null = null): Evaluator { + this._stackEvalFunc(`Contains only one occurence of ${param}`, passScore, containsOnlyOne, param, error); + return this + } + + startsWith(passScore: number, param: string | Array, error: string | null = null): Evaluator { + this._stackEvalFunc(`Starts with ${param}`, passScore, startsWith, param, error); + return this + } + + maxLength(passScore: number, param: number, error: string | null = null): Evaluator { + this._stackEvalFunc(`Max length ${param}`, passScore, maxLength, param, error); + return this + } + + isOnlyCodeBlock(passScore: number, param: string | null = null, error: string | null = null): Evaluator { + this._stackEvalFunc("Is only a code block", passScore, isOnlyCodeBlock, param, error); return this } diff --git a/packages/evaluate/src/evaluators/code/containsCodeBlock.ts b/packages/evaluate/src/evaluators/code/containsCodeBlock.ts index 8b94a74..3bdc46b 100644 --- a/packages/evaluate/src/evaluators/code/containsCodeBlock.ts +++ b/packages/evaluate/src/evaluators/code/containsCodeBlock.ts @@ -13,7 +13,7 @@ function containsCodeBlock(response: string, name: string, param: any, error: st if (error) { res.error = error } else { - res.error = "The output is not valid code" + res.error = "The output does not contain a code block" } } else { res.pass = true diff --git a/packages/evaluate/src/evaluators/code/isOnlyCodeBlock.ts b/packages/evaluate/src/evaluators/code/isOnlyCodeBlock.ts new file mode 100644 index 0000000..89b380d --- /dev/null +++ b/packages/evaluate/src/evaluators/code/isOnlyCodeBlock.ts @@ -0,0 +1,19 @@ +import { trimStr } from "../../utils.js"; +import { EvaluationResult } from "../../interfaces.js"; + +function isOnlyCodeBlock(response: string, name: string, param: any, error: string | null = null): EvaluationResult { + const res: EvaluationResult = { + name: name, + pass: false, + error: null, + }; + const trimedStr = trimStr(response); + if (trimedStr.startsWith("```") && trimedStr.endsWith("```")) { + res.pass = true; + } else { + res.error = "The output is not only a code block" + } + return res +} + +export { isOnlyCodeBlock } \ No newline at end of file diff --git a/packages/evaluate/src/evaluators/code/js/containsValidJson.ts b/packages/evaluate/src/evaluators/code/js/containsValidJson.ts index 1ba7858..ed779f5 100644 --- a/packages/evaluate/src/evaluators/code/js/containsValidJson.ts +++ b/packages/evaluate/src/evaluators/code/js/containsValidJson.ts @@ -10,6 +10,8 @@ function containsValidJson(response: string, name: string, param: any, error: st let code: string | null = null; if (response.includes("```")) { code = extractCodeBetweenTags(response); + } else { + code = response; } if (!code) { res.error = "The output does not contain a code block" diff --git a/packages/evaluate/src/evaluators/code/main.ts b/packages/evaluate/src/evaluators/code/main.ts index ce8f9e5..4b582ce 100644 --- a/packages/evaluate/src/evaluators/code/main.ts +++ b/packages/evaluate/src/evaluators/code/main.ts @@ -1,5 +1,7 @@ import { containsCodeBlock } from "./containsCodeBlock.js"; +import { isOnlyCodeBlock } from "./isOnlyCodeBlock.js"; export { containsCodeBlock, + isOnlyCodeBlock, } \ No newline at end of file diff --git a/packages/evaluate/src/evaluators/text/containsOnlyOne.ts b/packages/evaluate/src/evaluators/text/containsOnlyOne.ts new file mode 100644 index 0000000..3bca6c6 --- /dev/null +++ b/packages/evaluate/src/evaluators/text/containsOnlyOne.ts @@ -0,0 +1,43 @@ +import { containsOneOccurrence } from "../../utils.js"; +import { EvaluationResult } from "../../interfaces.js"; + +function containsOnlyOne(response: string, name: string, strs: string | Array, error: string | null = null): EvaluationResult { + let conditions = new Array(); + if (typeof strs == "string") { + conditions.push(strs) + } else { + conditions = strs + } + const res: EvaluationResult = { + name: name, + pass: false, + error: null, + }; + const errs = new Array(); + let passN = conditions.length; + for (const condition of conditions) { + const occ = containsOneOccurrence(response, condition); + if (occ == null || occ == false) { + let defaultError = `The response does not contain the "${condition}" string`; + if (occ == false) { + defaultError = `The response contains more than one occurence of the "${condition}" string`; + } + if (error) { + errs.push(error); + } else { + errs.push(defaultError) + } + } else { + passN-- + } + } + if (passN == 0) { + res.pass = true + } + if (errs.length > 0) { + res.error = errs.join("\n") + } + return res +} + +export { containsOnlyOne } \ No newline at end of file diff --git a/packages/evaluate/src/evaluators/text/containsText.ts b/packages/evaluate/src/evaluators/text/containsText.ts index e4a251b..e1dc63a 100644 --- a/packages/evaluate/src/evaluators/text/containsText.ts +++ b/packages/evaluate/src/evaluators/text/containsText.ts @@ -24,7 +24,7 @@ function containsText(response: string, name: string, strs: string | Array, error: string | null = null): EvaluationResult { + let pass = false; + let err: string | null = null; + let conditions = new Array(); + if (typeof strs == "string") { + conditions.push(strs) + } else { + conditions = strs + } + const trimedStr = trimStr(response); + if (conditions.includes(trimedStr)) { + pass = true; + } else { + err = `The response is not in: ${conditions.join(", ")}` + } + const res: EvaluationResult = { + name: name, + pass: pass, + error: err, + }; + return res +} + +export { isText } \ No newline at end of file diff --git a/packages/evaluate/src/evaluators/text/main.ts b/packages/evaluate/src/evaluators/text/main.ts index c9005aa..0cc3c78 100644 --- a/packages/evaluate/src/evaluators/text/main.ts +++ b/packages/evaluate/src/evaluators/text/main.ts @@ -1,5 +1,9 @@ import { containsText } from "./containsText.js"; +import { isText } from "./isText.js"; +import { startsWith } from "./startsWith.js"; export { containsText, + isText, + startsWith, } \ No newline at end of file diff --git a/packages/evaluate/src/evaluators/text/maxLength.ts b/packages/evaluate/src/evaluators/text/maxLength.ts new file mode 100644 index 0000000..ecbf7c1 --- /dev/null +++ b/packages/evaluate/src/evaluators/text/maxLength.ts @@ -0,0 +1,19 @@ +import { EvaluationResult } from "../../interfaces.js"; + +function maxLength(response: string, name: string, max: number, error: string | null = null): EvaluationResult { + let pass = false; + let err: string | null = null; + if (response.length <= max) { + pass = true; + } else { + err = `The response length is over ${max} characters` + } + const res: EvaluationResult = { + name: name, + pass: pass, + error: err, + }; + return res +} + +export { maxLength } \ No newline at end of file diff --git a/packages/evaluate/src/evaluators/text/startsWith.ts b/packages/evaluate/src/evaluators/text/startsWith.ts new file mode 100644 index 0000000..cae07e2 --- /dev/null +++ b/packages/evaluate/src/evaluators/text/startsWith.ts @@ -0,0 +1,32 @@ +import { trimStr } from "../../utils.js"; +import { EvaluationResult } from "../../interfaces.js"; + +function startsWith(response: string, name: string, strs: string | Array, error: string | null = null): EvaluationResult { + let pass = false; + let err: string | null = null; + let conditions = new Array(); + if (typeof strs == "string") { + conditions.push(strs) + } else { + conditions = strs + } + const trimedStr = trimStr(response); + //console.log("TRIMED:", "|" + trimedStr + "|"); + for (const str of conditions) { + if (trimedStr.startsWith(str)) { + pass = true; + break; + } + } + if (!pass) { + err = `The response does not start with ${strs}` + } + const res: EvaluationResult = { + name: name, + pass: pass, + error: err, + }; + return res +} + +export { startsWith } \ No newline at end of file diff --git a/packages/evaluate/src/interfaces.ts b/packages/evaluate/src/interfaces.ts index b3ce1e4..21bfe81 100644 --- a/packages/evaluate/src/interfaces.ts +++ b/packages/evaluate/src/interfaces.ts @@ -5,8 +5,6 @@ import { Evaluator } from "./evaluate"; type EvalFunction = (text: string) => TestResult; -// { test: { template: [TestResult]}} -//type TestResultsForModels = Record>>; type TestResults = Record>; interface LmTestParams { @@ -24,14 +22,22 @@ interface EvaluationResult { error: string | null; } +interface FinalEvaluationResult extends EvaluationResult { + points: number; +} + interface TestResult { + name: string; pass: boolean; score: number; + thresold: number; output: string; - evaluations: Array; - error?: string; + prompt: string; + evaluations: Array; + inferParams: InferenceParams; + stats: Record; } type EvaluationFunction = (name: string, response: string, param: any, error: string | null) => EvaluationResult; -export { LmTestParams, EvalFunction, TestResult, TestResults, EvaluationResult, EvaluationFunction } \ No newline at end of file +export { LmTestParams, EvalFunction, TestResult, TestResults, FinalEvaluationResult, EvaluationResult, EvaluationFunction } \ No newline at end of file diff --git a/packages/evaluate/src/main.ts b/packages/evaluate/src/main.ts index 348da0f..e7ed7f5 100644 --- a/packages/evaluate/src/main.ts +++ b/packages/evaluate/src/main.ts @@ -1,6 +1,6 @@ -import { LmTestParams, EvalFunction, TestResult } from "./interfaces.js"; +/*import { LmTestParams, EvalFunction, TestResult } from "./interfaces.js"; import { LmTestCase } from "./testcase.js"; import { LmTestRunner } from "./testrunner.js"; import { extractCodeBetweenTags } from "./utils.js"; -export { LmTestParams, EvalFunction, TestResult, LmTestCase, LmTestRunner, extractCodeBetweenTags } \ No newline at end of file +export { LmTestParams, EvalFunction, TestResult, LmTestCase, LmTestRunner, extractCodeBetweenTags }*/ \ No newline at end of file diff --git a/packages/evaluate/src/testcase.ts b/packages/evaluate/src/testcase.ts index 3bc78ec..d194fe3 100644 --- a/packages/evaluate/src/testcase.ts +++ b/packages/evaluate/src/testcase.ts @@ -50,40 +50,46 @@ class LmTestCase { return this }*/ - async run(lm: Lm, templateName?: string, overrideInferenceParams?: InferenceParams): Promise { - if (lm.model.name.length == 0) { - await lm.loadModel(""); - } - // call api - let inferParams: InferenceParams; - if (overrideInferenceParams) { - //console.log("Override params", overrideInferenceParams) - const _tmp = overrideInferenceParams as Record; - const _newp = this.inferParams as Record; - for (const [k, v] of Object.entries(_tmp)) { - _newp[k] = v + async run( + lm: Lm, + templateName?: string, + overrideInferenceParams?: InferenceParams, + onRunTestcase: () => void = () => null + ): Promise { + if (lm.providerType != "ollama") { + if (lm.model.name.length == 0) { + await lm.modelsInfo() } - inferParams = _newp as InferenceParams; - } else { - //console.log("Test params") - inferParams = this.inferParams; } + onRunTestcase(); + // params + const ip = this.inferParams as Record; + const oip = overrideInferenceParams as Record ?? {}; + Object.keys(oip).forEach((k) => ip[k] = oip[k]); + const inferParams = ip as InferenceParams; if (this._modelName) { inferParams.model = { name: this._modelName, ctx: this._ctx, } } + /*console.log("IP", this.inferParams); + console.log("OIP", overrideInferenceParams); + console.log("FIP", inferParams);*/ + // template let tpl = this.template; if (templateName) { - tpl = this.template.cloneTo(templateName) + tpl = this.template.cloneTo(templateName); + } + if (tpl.stop) { + inferParams.stop = [...(inferParams.stop ?? []), ...tpl.stop]; } - //console.log("PARAMS", inferParams) + //console.log("TPL", tpl.name, "/", tpl.stop); //console.log("Running inference with prompt:"); //console.log(this.template.render()); - const res = await lm.infer(tpl.prompt(this.prompt), inferParams); - const result = this.evaluator.run(res.text); - //console.log(result); + const p = tpl.prompt(this.prompt); + const res = await lm.infer(p, inferParams); + const result = this.evaluator.run(this.name, this.prompt, res.text, inferParams, res.stats); return result } } diff --git a/packages/evaluate/src/testrunner.ts b/packages/evaluate/src/testrunner.ts index 2cc1b2d..93df069 100644 --- a/packages/evaluate/src/testrunner.ts +++ b/packages/evaluate/src/testrunner.ts @@ -1,4 +1,5 @@ -import { Lm } from "@locallm/api"; +//import { Lm } from "@locallm/api"; +import { Lm } from "./packages/locallm/api.js"; import { TestResult, TestResults } from "./interfaces.js"; import { LmTestCase } from "./testcase.js"; import { InferenceParams, ModelConf } from "@locallm/types"; @@ -24,7 +25,7 @@ class LmTestRunner { static async init(lm: Lm, testCases: Array, isVerbose?: boolean, showErrors?: boolean): Promise { const runner = new LmTestRunner(lm, isVerbose, showErrors); - await runner.lm.loadModel(""); + //await runner.lm.loadModel(""); let ctx = runner.lm.model.ctx ?? 2048; if (runner.lm.model.ctx === undefined) { console.warn(`The model conf ${runner.lm.model.name} does not have a ctx value, using 2048`) @@ -38,14 +39,14 @@ class LmTestRunner { return runner } - async run(templateName?: string, inferenceParams?: InferenceParams) { + async run(templateName?: string, inferenceParams?: InferenceParams, onRunTestcase: () => void = () => null) { this.results = {}; if (this.isVerbose) { console.log("----------------------------------"); console.log("🎬", this.modelConf.name, "..."); console.log("----------------------------------"); } - await this.runTestCases(templateName, inferenceParams); + await this.runTestCases(templateName, inferenceParams, onRunTestcase); } printTestResult(testname: string, result: TestResult, _showErrors = true) { @@ -90,9 +91,10 @@ class LmTestRunner { } } - async runTestCases(templateName?: string, inferenceParams?: InferenceParams) { + async runTestCases(templateName?: string, inferenceParams?: InferenceParams, onRunTestcase: () => void = () => null) { let n = 0; for (const testCase of this.testCases) { + onRunTestcase(); let res: TestResult; //console.log("RUN", testCase, m, templateName) res = await testCase.run(this.lm, templateName, inferenceParams); diff --git a/packages/evaluate/src/utils.ts b/packages/evaluate/src/utils.ts index 02f70cf..1bee288 100644 --- a/packages/evaluate/src/utils.ts +++ b/packages/evaluate/src/utils.ts @@ -5,4 +5,33 @@ function extractCodeBetweenTags(input: string): string | null { return match ? match[1].trim() : null; } -export { extractCodeBetweenTags } \ No newline at end of file +function trimStr(str: string): string { + const s = str.trim().replace(/^\s*[\r\n]+|[\r\n]+\s*$/g, ''); + //console.log("TRIMED:", "<|START|>" + s + "<|END|>"); + return s +} + +/** + * Checks if the given input string contains one and only one occurrence of the specified search value. + * + * @param str - The input string to search for the search value. + * @param searchValue - The search value to look for in the input string. + * @returns A boolean value indicating whether the input string contains one and only one occurrence of the search value. Or + * a null value if the input string does not contain any occureence of the search value + * + * @example + * const inputString = "abcxyzdefxyz"; + * const searchValue = "xyz"; + * const result = containsOneOccurrence(inputString, searchValue); + * console.log(result); // Output: false + */ +function containsOneOccurrence(str: string, searchValue: string): boolean | null { + const index = str.indexOf(searchValue); + if (index === -1) { + return null; + } + const lastIndex = str.lastIndexOf(searchValue); + return index === lastIndex; +} + +export { extractCodeBetweenTags, trimStr, containsOneOccurrence } \ No newline at end of file diff --git a/packages/evaluate/tsconfig.json b/packages/evaluate/tsconfig.json index baf94a7..9aed700 100644 --- a/packages/evaluate/tsconfig.json +++ b/packages/evaluate/tsconfig.json @@ -1,7 +1,7 @@ { "compilerOptions": { - "target": "es2015", - "module": "es2015", + "target": "ESNext", + "module": "ESNext", "moduleResolution": "node", "strict": true, "sourceMap": false, @@ -17,7 +17,7 @@ "node" ], "lib": [ - "es2015", + "ESNext", "dom" ], "baseUrl": ".", diff --git a/packages/evaluations/package.json b/packages/evaluations/package.json index cf9fe4a..449fc32 100644 --- a/packages/evaluations/package.json +++ b/packages/evaluations/package.json @@ -8,19 +8,19 @@ "build": "rm -f dist/* && rollup -c" }, "dependencies": { - "@locallm/api": "^0.0.21", - "modprompt": "^0.3.6" + "@locallm/api": "^0.0.29", + "modprompt": "^0.6.1" }, "devDependencies": { - "@locallm/types": "^0.0.13", + "@locallm/types": "^0.0.14", "@rollup/plugin-node-resolve": "^15.2.3", "@rollup/plugin-terser": "^0.4.4", - "@rollup/plugin-typescript": "^11.1.5", - "@types/node": "^20.10.5", - "rollup": "^4.9.1", + "@rollup/plugin-typescript": "^11.1.6", + "@types/node": "^20.12.2", + "rollup": "^4.13.2", "ts-node": "^10.9.2", "tslib": "^2.6.2", - "typescript": "^5.3.3" + "typescript": "^5.4.3" }, "type": "module", "files": [ diff --git a/packages/evaluations/src/code/js/fix_json.ts b/packages/evaluations/src/code/js/fix_json.ts index 3991e79..4d69f42 100644 --- a/packages/evaluations/src/code/js/fix_json.ts +++ b/packages/evaluations/src/code/js/fix_json.ts @@ -1,12 +1,12 @@ import { InferenceParams } from "@locallm/types"; import { PromptTemplate } from "modprompt"; import { Evaluator } from "../../../../../packages/evaluate/src/evaluate.js"; -import { LmTestCase } from "../../../../../packages/evaluate/src/main.js"; +import { LmTestCase } from "../../../../../packages/evaluate/src/testcase.js"; const template = new PromptTemplate("alpaca") .afterSystem("You are a javascript expert") - .replacePrompt("fix this invalid json and respond with valid json only:\n\n```json\n{prompt}\n```") + .replacePrompt("fix this invalid json and respond with a markdown code block only:\n\n```json\n{prompt}\n```") .addShot( `{"a":2, "b": text 585,} // a comment`, `\n\n\`\`\`json @@ -31,7 +31,7 @@ const evaluator = new Evaluator() const inferParams: InferenceParams = { temperature: 0, - max_tokens: 120, + max_tokens: 250, }; const fixJsonTest = new LmTestCase({ name: "fix json", diff --git a/packages/evaluations/src/code/js/generate_js.ts b/packages/evaluations/src/code/js/generate_js.ts index f4c6190..a50bcc5 100644 --- a/packages/evaluations/src/code/js/generate_js.ts +++ b/packages/evaluations/src/code/js/generate_js.ts @@ -1,7 +1,7 @@ import { InferenceParams } from "@locallm/types"; import { PromptTemplate } from "modprompt"; import { Evaluator } from "../../../../../packages/evaluate/src/evaluate.js"; -import { LmTestCase } from "../../../../../packages/evaluate/src/main.js"; +import { LmTestCase } from "../../../../../packages/evaluate/src/testcase.js"; const template = new PromptTemplate("alpaca") @@ -27,7 +27,7 @@ const evaluator = new Evaluator() const inferParams: InferenceParams = { temperature: 0, - max_tokens: 250, + max_tokens: 512, }; const generateJs = new LmTestCase({ name: "generate js code", diff --git a/packages/evaluations/src/code/js/index.ts b/packages/evaluations/src/code/js/index.ts index f894340..f1cb302 100644 --- a/packages/evaluations/src/code/js/index.ts +++ b/packages/evaluations/src/code/js/index.ts @@ -2,7 +2,8 @@ import { fixJsonTest } from "./fix_json.js"; import { pydantic2TsTest } from "./pydantic_to_ts.js"; import { optimizeTsTest } from "./optimize_ts.js"; import { generateJs } from "./generate_js.js"; +import { LmTestCase } from "../../../../evaluate/src/testcase.js"; -const tests = [fixJsonTest, pydantic2TsTest, optimizeTsTest, generateJs]; +const tests = new Array(fixJsonTest, pydantic2TsTest, optimizeTsTest, generateJs); export { tests } \ No newline at end of file diff --git a/packages/evaluations/src/code/js/optimize_ts.ts b/packages/evaluations/src/code/js/optimize_ts.ts index 85f86f2..bb1b382 100644 --- a/packages/evaluations/src/code/js/optimize_ts.ts +++ b/packages/evaluations/src/code/js/optimize_ts.ts @@ -1,7 +1,7 @@ import { InferenceParams } from "@locallm/types"; import { PromptTemplate } from "modprompt"; import { Evaluator } from "../../../../../packages/evaluate/src/evaluate.js"; -import { LmTestCase } from "../../../../../packages/evaluate/src/main.js"; +import { LmTestCase } from "../../../../../packages/evaluate/src/testcase.js"; const template = new PromptTemplate("alpaca") @@ -22,7 +22,7 @@ const evaluator = new Evaluator() const inferParams: InferenceParams = { temperature: 0, - max_tokens: 150, + max_tokens: 250, }; const optimizeTsTest = new LmTestCase({ name: "optimize Typescript", diff --git a/packages/evaluations/src/code/js/pydantic_to_ts.ts b/packages/evaluations/src/code/js/pydantic_to_ts.ts index 448629f..6dd9c56 100644 --- a/packages/evaluations/src/code/js/pydantic_to_ts.ts +++ b/packages/evaluations/src/code/js/pydantic_to_ts.ts @@ -1,7 +1,7 @@ import { InferenceParams } from "@locallm/types"; import { PromptTemplate } from "modprompt"; import { Evaluator } from "../../../../../packages/evaluate/src/evaluate.js"; -import { LmTestCase } from "../../../../../packages/evaluate/src/main.js"; +import { LmTestCase } from "../../../../../packages/evaluate/src/testcase.js"; const template = new PromptTemplate("alpaca") @@ -67,7 +67,7 @@ const evaluator = new Evaluator() const inferParams: InferenceParams = { temperature: 0, - max_tokens: 150, + max_tokens: 250, }; const pydantic2TsTest = new LmTestCase({ name: "pydantic_to_ts", diff --git a/packages/evaluations/src/code/python/create_docstring.ts b/packages/evaluations/src/code/python/create_docstring.ts index 9ecc497..b0559ad 100644 --- a/packages/evaluations/src/code/python/create_docstring.ts +++ b/packages/evaluations/src/code/python/create_docstring.ts @@ -1,7 +1,7 @@ import { InferenceParams } from "@locallm/types"; import { PromptTemplate } from "modprompt"; import { Evaluator } from "../../../../../packages/evaluate/src/evaluate.js"; -import { LmTestCase } from "../../../../../packages/evaluate/src/main.js"; +import { LmTestCase } from "../../../../../packages/evaluate/src/testcase.js"; const baseprompt = `in Python create a detailed and helpful Google style docstring for this code: diff --git a/packages/evaluations/src/code/python/index.ts b/packages/evaluations/src/code/python/index.ts index 33f9b21..18c08c8 100644 --- a/packages/evaluations/src/code/python/index.ts +++ b/packages/evaluations/src/code/python/index.ts @@ -1,5 +1,6 @@ +import { LmTestCase } from "@/packages/evaluate/testcase.js"; import { createDocstringTest } from "./create_docstring.js"; -const tests = [createDocstringTest]; +const tests = new Array(createDocstringTest); export { tests } \ No newline at end of file diff --git a/packages/evaluations/src/instructions/format/cot.ts b/packages/evaluations/src/instructions/format/cot.ts new file mode 100644 index 0000000..7f3cbf8 --- /dev/null +++ b/packages/evaluations/src/instructions/format/cot.ts @@ -0,0 +1,35 @@ +import { InferenceParams } from "@locallm/types"; +import { PromptTemplate } from "modprompt"; +import { Evaluator } from "../../../../evaluate/src/evaluate.js"; +import { LmTestCase } from "../../../../evaluate/src/testcase.js"; + + +const template = new PromptTemplate("alpaca"); +const prompt = `What planet or moon of the solar system would be the best for humans to live, out of Earth? \ +Let's think step by step. Important: answer briefly using bullet points in this format: + +# Observations: +# Thoughts: +# Answer:`; + +const evaluator = new Evaluator() + .containsText(15, "# Observations:") + .containsText(15, "# Thoughts:") + .containsText(15, "# Answer:") + .containsOnlyOne(35, ["# Observations:", "# Thoughts:", "# Answer:"]) + .startsWith(20, "# Observations:") + +const inferParams: InferenceParams = { + temperature: 0, + max_tokens: 500, +}; +const cotFormat = new LmTestCase({ + name: "CoT formating", + prompt: prompt, + template: template, + evaluator: evaluator, + inferParams: inferParams, +}); + +export { cotFormat } + diff --git a/packages/evaluations/src/instructions/format/cot_points.ts b/packages/evaluations/src/instructions/format/cot_points.ts new file mode 100644 index 0000000..8468f20 --- /dev/null +++ b/packages/evaluations/src/instructions/format/cot_points.ts @@ -0,0 +1,37 @@ +import { InferenceParams } from "@locallm/types"; +import { PromptTemplate } from "modprompt"; +import { Evaluator } from "../../../../evaluate/src/evaluate.js"; +import { LmTestCase } from "../../../../evaluate/src/testcase.js"; + + +const template = new PromptTemplate("alpaca"); +const prompt = `What planet or moon of the solar system would be the best for humans to live, out of Earth? \ +Let's think step by step. Important: answer briefly using this format: + +# Observations: (a numbered bullet points list) +# Thoughts: (a numbered bullet points list) +# Answer:`; + +const evaluator = new Evaluator(80) + .containsText(10, "# Observations:") + .containsText(10, "# Thoughts:") + .containsText(10, "# Answer:") + .startsWith(25, "# Observations:") + .containsOnlyOne(25, ["# Observations:", "# Thoughts:", "# Answer:"]) + .containsText(10, "1.") + .containsText(10, "2.") + +const inferParams: InferenceParams = { + temperature: 0, + max_tokens: 800, +}; +const cotPoints = new LmTestCase({ + name: "CoT bullet points", + prompt: prompt, + template: template, + evaluator: evaluator, + inferParams: inferParams, +}); + +export { cotPoints } + diff --git a/packages/evaluations/src/instructions/format/index.ts b/packages/evaluations/src/instructions/format/index.ts new file mode 100644 index 0000000..49fbd5a --- /dev/null +++ b/packages/evaluations/src/instructions/format/index.ts @@ -0,0 +1,11 @@ +import { cotFormat } from "./cot"; +import { jsonFormat } from "./json"; +import { simpleFormat } from "./simple"; +import { cotPoints } from "./cot_points"; +import { LmTestCase } from "../../../../evaluate/src/testcase.js"; + +const tests = new Array( + simpleFormat, jsonFormat, cotFormat, cotPoints +); + +export { tests } \ No newline at end of file diff --git a/packages/evaluations/src/instructions/format/json.ts b/packages/evaluations/src/instructions/format/json.ts new file mode 100644 index 0000000..e41ee2f --- /dev/null +++ b/packages/evaluations/src/instructions/format/json.ts @@ -0,0 +1,27 @@ +import { InferenceParams } from "@locallm/types"; +import { PromptTemplate } from "modprompt"; +import { Evaluator } from "../../../../evaluate/src/evaluate.js"; +import { LmTestCase } from "../../../../evaluate/src/testcase.js"; + + +const template = new PromptTemplate("alpaca"); +const prompt = `List the planets names of the solar system. Important: output only a json code block, nothing else.`; + +const evaluator = new Evaluator() + .isOnlyCodeBlock(50) + .containsValidJson(50) + +const inferParams: InferenceParams = { + temperature: 0, + max_tokens: 500, +}; +const jsonFormat = new LmTestCase({ + name: "Json code block", + prompt: prompt, + template: template, + evaluator: evaluator, + inferParams: inferParams, +}); + +export { jsonFormat } + diff --git a/packages/evaluations/src/instructions/format/simple.ts b/packages/evaluations/src/instructions/format/simple.ts new file mode 100644 index 0000000..dcb12c2 --- /dev/null +++ b/packages/evaluations/src/instructions/format/simple.ts @@ -0,0 +1,29 @@ +import { InferenceParams } from "@locallm/types"; +import { PromptTemplate } from "modprompt"; +import { Evaluator } from "../../../../evaluate/src/evaluate.js"; +import { LmTestCase } from "../../../../evaluate/src/testcase.js"; + + +const template = new PromptTemplate("alpaca"); +const prompt = `Do the monkeys have wings? Important: only answer with this exact format and nothing else: "yes" or "no"`; + +const evaluator = new Evaluator() + .startsWith(30, ["yes", "no"]) + //.containsText(20, ["yes", "no", "Yes", "No"]) + .isText(60, ["yes", "no"]) + .maxLength(10, 4) + +const inferParams: InferenceParams = { + temperature: 0, + max_tokens: 50, +}; +const simpleFormat = new LmTestCase({ + name: "Simple formating", + prompt: prompt, + template: template, + evaluator: evaluator, + inferParams: inferParams, +}); + +export { simpleFormat } + diff --git a/packages/types/package.json b/packages/types/package.json index ea5ff3e..0acbe80 100644 --- a/packages/types/package.json +++ b/packages/types/package.json @@ -1,6 +1,6 @@ { "name": "@locallm/types", - "version": "0.0.14", + "version": "0.0.15", "description": "Shared data types for the LocalLm api", "repository": "https://github.com/synw/locallm", "scripts": { diff --git a/packages/types/src/interfaces.ts b/packages/types/src/interfaces.ts index 4f99118..bb80d3a 100644 --- a/packages/types/src/interfaces.ts +++ b/packages/types/src/interfaces.ts @@ -8,12 +8,14 @@ import { useApi } from "restmix"; * @property {number | undefined} ctx - The context window length, typically used to define how much of the previous data to consider. * @property {string | undefined} threads - The number of threads to use to run inference. * @property {number | undefined} gpu_layers - The number of layers to offload to the GPU. + * @property {number | undefined} info - Some meta info about the model: parameter size and quantization level */ interface ModelConf { name: string; ctx: number; threads?: number; gpu_layers?: number; + info?: { size: string, quant: string }; } /**