From ccbb352f9d2ba8db06ccba1f356c488231252499 Mon Sep 17 00:00:00 2001 From: da730 Date: Tue, 26 Mar 2024 10:00:33 +0000 Subject: [PATCH 01/62] docs: generate changelog of release v1.2.7 --- docs/assets/changelog/en/release.md | 17 +++++++++++++++++ docs/assets/changelog/zh/release.md | 17 +++++++++++++++++ 2 files changed, 34 insertions(+) create mode 100644 docs/assets/changelog/en/release.md create mode 100644 docs/assets/changelog/zh/release.md diff --git a/docs/assets/changelog/en/release.md b/docs/assets/changelog/en/release.md new file mode 100644 index 00000000..81d85f40 --- /dev/null +++ b/docs/assets/changelog/en/release.md @@ -0,0 +1,17 @@ +# v1.2.7 + +2024-03-26 + + +**🆕 New feature** + +- **@visactor/vmind**: support data aggregation with skylark` +- **@visactor/vmind**: support rule-based chart generation` +- **@visactor/vmind**: use fold to process the dataset when there are more than 1 y field` + +**🐛 Bug fix** + +fix several badcases in data aggregation and chart generation. + +[more detail about v1.2.7](https://github.com/VisActor/VMind/releases/tag/v1.2.7) + diff --git a/docs/assets/changelog/zh/release.md b/docs/assets/changelog/zh/release.md new file mode 100644 index 00000000..872e4557 --- /dev/null +++ b/docs/assets/changelog/zh/release.md @@ -0,0 +1,17 @@ +# v1.2.7 + +2024-03-26 + + +**🆕 新增功能** + +- **@visactor/vmind**: support data aggregation with skylark` +- **@visactor/vmind**: support rule-based chart generation` +- **@visactor/vmind**: use fold to process the dataset when there are more than 1 y field` + +**🐛 功能修复** + +fix several badcases in data aggregation and chart generation. + +[更多详情请查看 v1.2.7](https://github.com/VisActor/VMind/releases/tag/v1.2.7) + From 80337040f4b2def5a4d88c7ac6da2d517802870b Mon Sep 17 00:00:00 2001 From: da730 Date: Mon, 1 Apr 2024 20:40:32 +0800 Subject: [PATCH 02/62] docs: fix link error --- docs/assets/api/en/generateChart.md | 2 +- docs/assets/api/zh/generateChart.md | 2 +- docs/assets/changelog/en/changelog.md | 0 docs/assets/changelog/zh/changelog.md | 0 4 files changed, 2 insertions(+), 2 deletions(-) delete mode 100644 docs/assets/changelog/en/changelog.md delete mode 100644 docs/assets/changelog/zh/changelog.md diff --git a/docs/assets/api/en/generateChart.md b/docs/assets/api/en/generateChart.md index 2932f37f..df6f58ff 100644 --- a/docs/assets/api/en/generateChart.md +++ b/docs/assets/api/en/generateChart.md @@ -8,7 +8,7 @@ The generateChart function is used for intelligent chart generation. - GPT-3.5 - GPT-4 - [skylark2-pro](https://www.volcengine.com/product/yunque) -- [chart-advisor](../guide/Basic_tutorial/Chart_Advisor) +- [chart-advisor](../guide/Basic_Tutorial/Chart_Advisor) ## Interface Parameters: diff --git a/docs/assets/api/zh/generateChart.md b/docs/assets/api/zh/generateChart.md index 94dbd06c..5bed2b9f 100644 --- a/docs/assets/api/zh/generateChart.md +++ b/docs/assets/api/zh/generateChart.md @@ -8,7 +8,7 @@ generateChart函数用于图表智能生成。 - GPT-3.5 - GPT-4 - [skylark2-pro](https://www.volcengine.com/product/yunque) -- [chart-advisor](../guide/Basic_tutorial/Chart_Advisor) +- [chart-advisor](../guide/Basic_Tutorial/Chart_Advisor) ## 接口参数: diff --git a/docs/assets/changelog/en/changelog.md b/docs/assets/changelog/en/changelog.md deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/assets/changelog/zh/changelog.md b/docs/assets/changelog/zh/changelog.md deleted file mode 100644 index e69de29b..00000000 From 4db958a1fd618c402c4ef3dcf555e244cbcbaaa3 Mon Sep 17 00:00:00 2001 From: da730 Date: Wed, 3 Apr 2024 14:41:49 +0800 Subject: [PATCH 03/62] fix: replace alasql keywords in sql --- .../browser/src/constants/mockData.ts | 55 +++++ .../__tests__/browser/src/pages/DataInput.tsx | 8 +- .../vmind/src/common/dataProcess/constants.ts | 199 ++++++++++++++++++ .../vmind/src/common/dataProcess/utils.ts | 31 +-- packages/vmind/src/core/VMind.ts | 2 +- 5 files changed, 277 insertions(+), 18 deletions(-) create mode 100644 packages/vmind/src/common/dataProcess/constants.ts diff --git a/packages/vmind/__tests__/browser/src/constants/mockData.ts b/packages/vmind/__tests__/browser/src/constants/mockData.ts index f30fe0d7..97eb1fe6 100644 --- a/packages/vmind/__tests__/browser/src/constants/mockData.ts +++ b/packages/vmind/__tests__/browser/src/constants/mockData.ts @@ -3674,3 +3674,58 @@ export const mockUserInput17 = { 2024-03-07,0.024929771518461413,-1,26.099474885828787,653568,25685979,653568,0.00305250127580487,0.00012037169363684177,-1,0.019630556105233156`, input: '使用折线图展示' }; + +export const mockUserInput18 = { + csv: `Jira Key,Planned BP,Count +JIRA-27358,BP1.4.1,1 +JIRA-27356,BP1.4.1,1 +JIRA-27350,BP1.5.0,1 +JIRA-27349,BP1.4.1,1 +JIRA-27348,BP1.4.1,1 +JIRA-27347,BP1.4.1,1 +JIRA-27344,BP1.5.0,1 +JIRA-27343,BP1.4.1,1 +JIRA-27339,BP1.4.1,1 +JIRA-27338,BP1.4.1,1 +JIRA-27336,BP1.4.1,1 +JIRA-27335,BP1.4.1,1 +JIRA-27334,BP1.4.1,1 +JIRA-27333,BP1.5.0,1 +JIRA-27331,BP1.4.1,1 +JIRA-27330,BP1.4.1,1 +JIRA-27322,BP1.4.1,1 +JIRA-27321,BP1.4.1,1 +JIRA-27320,BP1.4.0,1 +JIRA-27319,BP1.4.0,1 +JIRA-27316,BP1.4.0,1 +JIRA-27315,BP1.4.1,1 +JIRA-27314,BP1.5.0,1 +JIRA-27311,BP1.4.1,1 +JIRA-27310,BP1.4.1,1 +JIRA-27309,BP1.4.1,1 +JIRA-27308,BP1.4.1,1 +JIRA-27307,BP1.4.1,1 +JIRA-27306,BP1.4.1,1 +JIRA-27305,BP1.4.1,1 +JIRA-27304,BP1.4.1,1 +JIRA-27303,BP1.4.1,1 +JIRA-27302,BP1.4.1,1 +JIRA-27301,BP1.4.1,1 +JIRA-27300,BP1.4.1,1 +JIRA-27299,BP1.4.1,1 +JIRA-27297,BP1.4.1,1 +JIRA-27296,BP1.4.1,1 +JIRA-27295,BP1.4.1,1 +JIRA-27294,BP1.4.1,1 +JIRA-27293,BP1.4.1,1 +JIRA-27292,BP1.4.1,1 +JIRA-27291,BP1.4.1,1 +JIRA-27290,BP1.5.0,1 +JIRA-27289,BP1.4.1,1 +JIRA-27281,BP1.4.1,1 +JIRA-27279,BP1.4.0,1 +JIRA-27278,BP1.4.1,1 +JIRA-27277,BP1.4.0,1 +JIRA-27276,BP1.4.0,1`, + input: '按照bp分类统计count之和,绘制柱状堆叠图' +}; diff --git a/packages/vmind/__tests__/browser/src/pages/DataInput.tsx b/packages/vmind/__tests__/browser/src/pages/DataInput.tsx index 7b65aef0..b8f6e2b7 100644 --- a/packages/vmind/__tests__/browser/src/pages/DataInput.tsx +++ b/packages/vmind/__tests__/browser/src/pages/DataInput.tsx @@ -34,7 +34,8 @@ import { mockUserInput13, mockUserInput14, mockUserInput16, - mockUserInput17 + mockUserInput17, + mockUserInput18 } from '../constants/mockData'; import VMind from '../../../../src/index'; import { Model } from '../../../../src/index'; @@ -77,7 +78,8 @@ const demoDataList: { [key: string]: any } = { 'Shopping Mall Sales Performance': mallSalesData, 'Global GDP': mockUserInput6Eng, 'Sales of different drinkings': mockUserInput3Eng, - 'Multi measure': mockUserInput17 + 'Multi measure': mockUserInput17, + DataQuery: mockUserInput18 }; const globalVariables = (import.meta as any).env; @@ -104,7 +106,7 @@ export function DataInput(props: IPropsType) { const [loading, setLoading] = useState(false); - const vmind = useMemo(() => { + const vmind: any = useMemo(() => { if (!url || !apiKey) { Message.error('Please set your LLM URL and API Key!!!'); return null; diff --git a/packages/vmind/src/common/dataProcess/constants.ts b/packages/vmind/src/common/dataProcess/constants.ts new file mode 100644 index 00000000..a158d7a7 --- /dev/null +++ b/packages/vmind/src/common/dataProcess/constants.ts @@ -0,0 +1,199 @@ +import { generateRandomString } from './utils'; + +export const alasqlKeywordList = [ + 'ABSOLUTE', + 'ACTION', + 'ADD', + 'AGGR', + 'ALL', + 'ALTER', + 'AND', + 'ANTI', + 'ANY', + 'APPLY', + 'ARRAY', + 'AS', + 'ASSERT', + 'ASC', + 'ATTACH', + 'AUTOINCREMENT', + 'AUTO_INCREMENT', + 'AVG', + 'BEGIN', + 'BETWEEN', + 'BREAK', + 'BY', + 'CALL', + 'CASE', + 'CAST', + 'CHECK', + 'CLASS', + 'CLOSE', + 'COLLATE', + 'COLUMN', + 'COLUMNS', + 'COMMIT', + 'CONSTRAINT', + 'CONTENT', + 'CONTINUE', + 'CONVERT', + 'CORRESPONDING', + 'COUNT', + 'CREATE', + 'CROSS', + 'CUBE', + 'CURRENT_TIMESTAMP', + 'CURSOR', + 'DATABASE', + 'DECLARE', + 'DEFAULT', + 'DELETE', + 'DELETED', + 'DESC', + 'DETACH', + 'DISTINCT', + 'DOUBLEPRECISION', + 'DROP', + 'ECHO', + 'EDGE', + 'END', + 'ENUM', + 'ELSE', + 'EXCEPT', + 'EXISTS', + 'EXPLAIN', + 'FALSE', + 'FETCH', + 'FIRST', + 'FOREIGN', + 'FROM', + 'GO', + 'GRAPH', + 'GROUP', + 'GROUPING', + 'HAVING', + 'HELP', + 'IF', + 'IDENTITY', + 'IS', + 'IN', + 'INDEX', + 'INNER', + 'INSERT', + 'INSERTED', + 'INTERSECT', + 'INTO', + 'JOIN', + 'KEY', + 'LAST', + 'LET', + 'LEFT', + 'LIKE', + 'LIMIT', + 'LOOP', + 'MATCHED', + 'MATRIX', + 'MAX', + 'MERGE', + 'MIN', + 'MINUS', + 'MODIFY', + 'NATURAL', + 'NEXT', + 'NEW', + 'NOCASE', + 'NO', + 'NOT', + 'NULL', + 'OFF', + 'ON', + 'ONLY', + 'OFFSET', + 'OPEN', + 'OPTION', + 'OR', + 'ORDER', + 'OUTER', + 'OVER', + 'PATH', + 'PARTITION', + 'PERCENT', + 'PLAN', + 'PRIMARY', + 'PRINT', + 'PRIOR', + 'QUERY', + 'READ', + 'RECORDSET', + 'REDUCE', + 'REFERENCES', + 'RELATIVE', + 'REPLACE', + 'REMOVE', + 'RENAME', + 'REQUIRE', + 'RESTORE', + 'RETURN', + 'RETURNS', + 'RIGHT', + 'ROLLBACK', + 'ROLLUP', + 'ROW', + 'SCHEMA(S)?', + 'SEARCH', + 'SELECT', + 'SEMI', + 'SET', + 'SETS', + 'SHOW', + 'SOME', + 'SOURCE', + 'STRATEGY', + 'STORE', + 'SUM', + 'total', + 'TABLE', + 'TABLES', + 'TARGET', + 'TEMP', + 'TEMPORARY', + 'TEXTSTRING', + 'THEN', + 'TIMEOUT', + 'TO', + 'TOP', + 'TRAN', + 'TRANSACTION', + 'TRIGGER', + 'TRUE', + 'TRUNCATE', + 'UNION', + 'UNIQUE', + 'UPDATE', + 'USE', + 'USING', + 'VALUE', + 'VERTEX', + 'VIEW', + 'WHEN', + 'WHERE', + 'WHILE', + 'WITH', + 'WORK' +]; + +export const operatorList = [ + ['+', `_${generateRandomString(3)}_PLUS_${generateRandomString(3)}_`], + ['-', `_${generateRandomString(3)}_DASH_${generateRandomString(3)}_`], + ['*', `_${generateRandomString(3)}_ASTERISK_${generateRandomString(3)}_`], + ['/', `_${generateRandomString(3)}_SLASH_${generateRandomString(3)}_`] +]; + +export const operators = operatorList.map(op => op[0]); + +export const RESERVE_REPLACE_MAP = new Map([ + ...operatorList, + ...(alasqlKeywordList.map(keyword => [keyword, generateRandomString(10)]) as any) +]); + +console.log(RESERVE_REPLACE_MAP); diff --git a/packages/vmind/src/common/dataProcess/utils.ts b/packages/vmind/src/common/dataProcess/utils.ts index 4d276980..596adc49 100644 --- a/packages/vmind/src/common/dataProcess/utils.ts +++ b/packages/vmind/src/common/dataProcess/utils.ts @@ -1,8 +1,9 @@ -import { sampleSize, isNumber, isInteger, isString, isArray } from 'lodash'; +import { sampleSize, isNumber, isInteger, isString, isArray, capitalize, startCase } from 'lodash'; import { DataItem, DataType, ROLE, SimpleFieldInfo } from '../../typings'; import dayjs from 'dayjs'; import { uniqArray } from '@visactor/vutils'; import alasql from 'alasql'; +import { RESERVE_REPLACE_MAP, operators } from './constants'; export const readTopNLine = (csvFile: string, n: number) => { // get top n lines of a csv file @@ -239,16 +240,6 @@ export const replaceByMap = (str: string, replaceMap: Map) => { return finalSql; }; -const RESERVE_REPLACE_MAP = new Map([ - ['+', `_${generateRandomString(3)}_PLUS_${generateRandomString(3)}_`], - ['-', `_${generateRandomString(3)}_DASH_${generateRandomString(3)}_`], - ['*', `_${generateRandomString(3)}_ASTERISK_${generateRandomString(3)}_`], - ['/', `_${generateRandomString(3)}_SLASH_${generateRandomString(3)}_`], - ['value', generateRandomString(10)], - ['key', generateRandomString(10)], - ['total', generateRandomString(10)] -]); - /** * replace operator and reserved words inside the column name in the sql str * operators such as +, -, *, / in column names in sql will cause ambiguity and parsing error @@ -262,10 +253,24 @@ const RESERVE_REPLACE_MAP = new Map([ * */ export const replaceInvalidWords = (sql: string, columns: string[]) => { + const operatorReplaceMap = new Map(); + //replace column names according to RESERVED_REPLACE_MAP const validColumnNames = columns.map(column => { const nameWithoutOperator = [...RESERVE_REPLACE_MAP.keys()].reduce((prev, cur) => { - return replaceAll(prev, cur, RESERVE_REPLACE_MAP.get(cur.toLowerCase())); + //try to match the keywords in column names with different style + const replaceStr = [cur.toUpperCase(), cur.toLowerCase(), capitalize(cur)].find(str => { + //operators need to be replaced if it is includes by the column name + //while other reserved words need to be replaced if it is exactly the same as column words + return operators.includes(cur) ? prev.includes(str) : prev === str; + }); + if (replaceStr) { + if (!operatorReplaceMap.has(replaceStr)) { + operatorReplaceMap.set(replaceStr, RESERVE_REPLACE_MAP.get(cur)); + } + return replaceAll(prev, replaceStr, RESERVE_REPLACE_MAP.get(cur)); + } + return prev; }, column); return nameWithoutOperator; @@ -289,8 +294,6 @@ export const replaceInvalidWords = (sql: string, columns: string[]) => { //replace non-ascii characters in sql const { validStr: sqlWithoutAscii, replaceMap: asciiReplaceMap } = replaceNonASCIICharacters(sqlWithoutOperator); - const operatorReplaceMap = new Map(RESERVE_REPLACE_MAP); - return { validStr: sqlWithoutAscii, columnReplaceMap: operatorReplaceMap, sqlReplaceMap: asciiReplaceMap }; }; diff --git a/packages/vmind/src/core/VMind.ts b/packages/vmind/src/core/VMind.ts index 1e6b7acb..8e78e82d 100644 --- a/packages/vmind/src/core/VMind.ts +++ b/packages/vmind/src/core/VMind.ts @@ -37,7 +37,7 @@ class VMind { * @param userPrompt * @returns */ - parseCSVDataWithLLM(csvString: string, userPrompt: string) { + async parseCSVDataWithLLM(csvString: string, userPrompt: string) { if (this.getModelType() === ModelType.GPT) { return parseCSVDataWithGPT(csvString, userPrompt, this._options); } From 65e491723da946d680c3485ff5e187b0de87f33f Mon Sep 17 00:00:00 2001 From: da730 Date: Wed, 3 Apr 2024 14:46:33 +0800 Subject: [PATCH 04/62] fix: replace alasql keywords in sql --- packages/vmind/src/common/dataProcess/constants.ts | 2 -- 1 file changed, 2 deletions(-) diff --git a/packages/vmind/src/common/dataProcess/constants.ts b/packages/vmind/src/common/dataProcess/constants.ts index a158d7a7..869f8fb2 100644 --- a/packages/vmind/src/common/dataProcess/constants.ts +++ b/packages/vmind/src/common/dataProcess/constants.ts @@ -195,5 +195,3 @@ export const RESERVE_REPLACE_MAP = new Map([ ...operatorList, ...(alasqlKeywordList.map(keyword => [keyword, generateRandomString(10)]) as any) ]); - -console.log(RESERVE_REPLACE_MAP); From ec3fa20081b506c46aa4bc6cbdb42f89cfa27207 Mon Sep 17 00:00:00 2001 From: da730 Date: Wed, 3 Apr 2024 14:47:51 +0800 Subject: [PATCH 05/62] feat: add rush changelog --- .../fix-dataAggregation-issues_2024-04-03-06-47.json | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 common/changes/@visactor/vmind/fix-dataAggregation-issues_2024-04-03-06-47.json diff --git a/common/changes/@visactor/vmind/fix-dataAggregation-issues_2024-04-03-06-47.json b/common/changes/@visactor/vmind/fix-dataAggregation-issues_2024-04-03-06-47.json new file mode 100644 index 00000000..535e27b0 --- /dev/null +++ b/common/changes/@visactor/vmind/fix-dataAggregation-issues_2024-04-03-06-47.json @@ -0,0 +1,10 @@ +{ + "changes": [ + { + "packageName": "@visactor/vmind", + "comment": "replace alasql keywords in sql", + "type": "none" + } + ], + "packageName": "@visactor/vmind" +} \ No newline at end of file From 43149868db127aea9f19d29fd344153b6166fafd Mon Sep 17 00:00:00 2001 From: da730 Date: Sun, 7 Apr 2024 15:29:42 +0800 Subject: [PATCH 06/62] docs: update changelog --- docs/assets/changelog/en/release.md | 60 ++++++++++++++++++++++++----- docs/assets/changelog/zh/release.md | 54 ++++++++++++++++++++++---- 2 files changed, 97 insertions(+), 17 deletions(-) diff --git a/docs/assets/changelog/en/release.md b/docs/assets/changelog/en/release.md index 81d85f40..f6ab10b3 100644 --- a/docs/assets/changelog/en/release.md +++ b/docs/assets/changelog/en/release.md @@ -1,17 +1,57 @@ + # v1.2.7 -2024-03-26 +March 26, 2024 + +**🆕 New Features** + +- **@visactor/vmind**: support data aggregation with skylark +- **@visactor/vmind**: support rule-based chart generation +- **@visactor/vmind**: use fold to process the dataset when there are more than 1 y field + +**🐛 Bug Fixes** + +Fixed several badcases in data aggregation and chart generation. + +[For more details, please see v1.2.7](https://github.com/VisActor/VMind/releases/tag/v1.2.7) + +# v1.2.4 + +**Planned Features** + +- GIF, video export function supports node environment +- Further standardize API +- Remove unrelated dependencies +- Optimize data aggregation function, fix a large number of badcases +- Smart Chart Generation fixes badcases + +# v1.2.3 + +**Planned Features** + +- Data aggregation function, access to headless bi calculator module, vutils +- Intelligent data aggregation, generate SQL through LLM, aggregate, filter, and sort data + +# v1.2.0 + +**Planned Features** + +1. Data module restructuring, supports direct external transmission of the dataset, bypassing the interpretation of field types by LLM. +2. Intelligent chart generation accesses skylark pro, skylark2-pro-4k models +3. Optimize chart generation performance +# v1.1.0 -**🆕 New feature** - -- **@visactor/vmind**: support data aggregation with skylark` -- **@visactor/vmind**: support rule-based chart generation` -- **@visactor/vmind**: use fold to process the dataset when there are more than 1 y field` +**Planned Features** -**🐛 Bug fix** - -fix several badcases in data aggregation and chart generation. +1. Support new chart types: Dual Axis Chart, Funnel Chart, Waterfall Chart, Box Plot +2. Refactor the data module, dock with VizSchema +3. Add a fallback mechanism when the chart generation fails, recommend charts using the chart-advisor module. -[more detail about v1.2.7](https://github.com/VisActor/VMind/releases/tag/v1.2.7) +# v1.0.6-alpha.5 +1. Support new chart types: Sankey Diagram, Radar Chart, Rose Chart. +2. Fixed dependency errors. +3. Support for node-side calls. +4. Support for entering url and request parameters during initialization, customize model request methods: you can customize parameters such as LLM service URL, request headers, request methods, model names, maximum tokens, and temperature, etc. +5. Removed the ffmpeg dependency, replaced with external input to reduce the package volume and difficulty of installing dependencies. diff --git a/docs/assets/changelog/zh/release.md b/docs/assets/changelog/zh/release.md index 872e4557..c093d858 100644 --- a/docs/assets/changelog/zh/release.md +++ b/docs/assets/changelog/zh/release.md @@ -1,17 +1,57 @@ + # v1.2.7 2024-03-26 - **🆕 新增功能** - -- **@visactor/vmind**: support data aggregation with skylark` -- **@visactor/vmind**: support rule-based chart generation` -- **@visactor/vmind**: use fold to process the dataset when there are more than 1 y field` + +- **@visactor/vmind**:支持 skylark 进行数据聚合 +- **@visactor/vmind**:支持规则型图表生成 +- **@visactor/vmind**:在存在多个y字段时使用fold来处理数据集 **🐛 功能修复** - -fix several badcases in data aggregation and chart generation. + +修复了在数据聚合和图表生成中的若干问题。 [更多详情请查看 v1.2.7](https://github.com/VisActor/VMind/releases/tag/v1.2.7) +# v1.2.4 + +**规划功能:** + +- GIF、视频导出功能支持node环境 +- 进一步规范API +- 移除无关依赖 +- 数据聚合功能优化,修复大量badcase +- 图表智能生成 badcase修复 + +# v1.2.3 + +**规划功能:** + +- 数据聚合功能,接入headless bi calculator模块、vutils +- 智能数据聚合,通过大模型生成sql,对数据进行聚合、筛选、排序 + +# v1.2.0 + +**规划功能:** + +1. 数据模块改造,支持外部直接传dataset,不使用模型进行字段类型解析 +2. 图表智能生成接入skylark pro,skylark2-pro-4k模型 +3. 图表生成性能优化 + +# v1.1.0 + +**规划功能:** + +1. 新增图表类型支持:双轴图、漏斗图、瀑布图、箱型图 +2. 数据模块重构,对接VizSchema +3. 增加生成图表失败时的兜底机制,使用chart-advisor模块进行图表推荐 + +# v1.0.6-alpha.5 + +1. 新增图表类型支持:桑基图、雷达图、玫瑰图 +2. 修复依赖报错 +3. 支持node端调用 +4. 支持初始化时传入url和请求参数,自定义模型请求方式:可以自定义包括大模型服务URL、请求头、请求方法、模型名称、最大tokens和温度等参数 +5. 移除ffmpeg依赖,改为外部传入,减少了包体积和依赖安装难度 From 6ace60142d74002176777e92d8a5fb63245ec7af Mon Sep 17 00:00:00 2001 From: da730 Date: Mon, 8 Apr 2024 18:34:14 +0800 Subject: [PATCH 07/62] feat: add base tools --- packages/vmind/src/application/base/index.ts | 1 + packages/vmind/src/application/base/types.ts | 3 +++ .../src/application/chartGeneration/index.ts | 0 packages/vmind/src/baseTools/parser/index.ts | 13 +++++++++++ packages/vmind/src/baseTools/parser/types.ts | 5 +++++ packages/vmind/src/baseTools/patcher/index.ts | 22 +++++++++++++++++++ packages/vmind/src/baseTools/patcher/types.ts | 8 +++++++ packages/vmind/src/baseTools/prompt/index.ts | 14 ++++++++++++ packages/vmind/src/baseTools/prompt/types.ts | 5 +++++ .../vmind/src/baseTools/transformer/index.ts | 15 +++++++++++++ .../vmind/src/baseTools/transformer/types.ts | 6 +++++ packages/vmind/src/taskNode/index.ts | 0 packages/vmind/src/types/index.ts | 1 + 13 files changed, 93 insertions(+) create mode 100644 packages/vmind/src/application/base/index.ts create mode 100644 packages/vmind/src/application/base/types.ts create mode 100644 packages/vmind/src/application/chartGeneration/index.ts create mode 100644 packages/vmind/src/baseTools/parser/index.ts create mode 100644 packages/vmind/src/baseTools/parser/types.ts create mode 100644 packages/vmind/src/baseTools/patcher/index.ts create mode 100644 packages/vmind/src/baseTools/patcher/types.ts create mode 100644 packages/vmind/src/baseTools/prompt/index.ts create mode 100644 packages/vmind/src/baseTools/prompt/types.ts create mode 100644 packages/vmind/src/baseTools/transformer/index.ts create mode 100644 packages/vmind/src/baseTools/transformer/types.ts create mode 100644 packages/vmind/src/taskNode/index.ts create mode 100644 packages/vmind/src/types/index.ts diff --git a/packages/vmind/src/application/base/index.ts b/packages/vmind/src/application/base/index.ts new file mode 100644 index 00000000..07b44980 --- /dev/null +++ b/packages/vmind/src/application/base/index.ts @@ -0,0 +1 @@ +export class BaseApplication {} diff --git a/packages/vmind/src/application/base/types.ts b/packages/vmind/src/application/base/types.ts new file mode 100644 index 00000000..b83036fc --- /dev/null +++ b/packages/vmind/src/application/base/types.ts @@ -0,0 +1,3 @@ +export interface IApplication { + tasks: TaskNode[]; +} diff --git a/packages/vmind/src/application/chartGeneration/index.ts b/packages/vmind/src/application/chartGeneration/index.ts new file mode 100644 index 00000000..e69de29b diff --git a/packages/vmind/src/baseTools/parser/index.ts b/packages/vmind/src/baseTools/parser/index.ts new file mode 100644 index 00000000..0ec35366 --- /dev/null +++ b/packages/vmind/src/baseTools/parser/index.ts @@ -0,0 +1,13 @@ +import { IParser } from './types'; + +export class Parser implements IParser { + input: string; + output: T; + constructor(input: string) { + this.input = input; + } + + parse() { + return this.input as T; + } +} diff --git a/packages/vmind/src/baseTools/parser/types.ts b/packages/vmind/src/baseTools/parser/types.ts new file mode 100644 index 00000000..d6373430 --- /dev/null +++ b/packages/vmind/src/baseTools/parser/types.ts @@ -0,0 +1,5 @@ +export interface IParser { + input: string; + output: T; + parse: () => T; +} diff --git a/packages/vmind/src/baseTools/patcher/index.ts b/packages/vmind/src/baseTools/patcher/index.ts new file mode 100644 index 00000000..3506d3b5 --- /dev/null +++ b/packages/vmind/src/baseTools/patcher/index.ts @@ -0,0 +1,22 @@ +import { IPatcher } from './types'; +import { Transformer } from '../transformer'; + +export class Patcher implements IPatcher { + input: T; + output: T; + context: C; + pipelines: Transformer[]; + constructor(input: T, context: C, transformers: Transformer[]) { + this.input = input; + this.context = context; + this.pipelines = transformers; + } + + patch() { + const result = this.pipelines.reduce((pre: Partial, transformer: Transformer) => { + const result = transformer.transform(); + return result; + }, this.input); + return result; + } +} diff --git a/packages/vmind/src/baseTools/patcher/types.ts b/packages/vmind/src/baseTools/patcher/types.ts new file mode 100644 index 00000000..71f3203d --- /dev/null +++ b/packages/vmind/src/baseTools/patcher/types.ts @@ -0,0 +1,8 @@ +import { Transformer } from '../transformer'; +export interface IPatcher { + input: T; + output: T; + pipelines: Transformer[]; + context: C; + patch: () => T; +} diff --git a/packages/vmind/src/baseTools/prompt/index.ts b/packages/vmind/src/baseTools/prompt/index.ts new file mode 100644 index 00000000..eb1c0d55 --- /dev/null +++ b/packages/vmind/src/baseTools/prompt/index.ts @@ -0,0 +1,14 @@ +import { IPrompt } from './types'; + +export class Prompt implements IPrompt { + template: string; + context: T; + + constructor(template: string, context: T) { + this.template = template; + this.context = context; + } + getPrompt() { + return this.template; + } +} diff --git a/packages/vmind/src/baseTools/prompt/types.ts b/packages/vmind/src/baseTools/prompt/types.ts new file mode 100644 index 00000000..cc6baab6 --- /dev/null +++ b/packages/vmind/src/baseTools/prompt/types.ts @@ -0,0 +1,5 @@ +export interface IPrompt { + readonly template: string; + readonly context: T; + getPrompt: () => string; +} diff --git a/packages/vmind/src/baseTools/transformer/index.ts b/packages/vmind/src/baseTools/transformer/index.ts new file mode 100644 index 00000000..aa6369a1 --- /dev/null +++ b/packages/vmind/src/baseTools/transformer/index.ts @@ -0,0 +1,15 @@ +import { ITransformer } from './types'; + +export class Transformer implements ITransformer { + input: I; + context: T; + output: DSL; + constructor(input: I, context: T) { + this.input = input; + this.context = context; + } + + transform() { + return this.output; + } +} diff --git a/packages/vmind/src/baseTools/transformer/types.ts b/packages/vmind/src/baseTools/transformer/types.ts new file mode 100644 index 00000000..a16ffe8f --- /dev/null +++ b/packages/vmind/src/baseTools/transformer/types.ts @@ -0,0 +1,6 @@ +export interface ITransformer { + input: I; + context: T; + output: DSL; + transform: () => DSL; +} diff --git a/packages/vmind/src/taskNode/index.ts b/packages/vmind/src/taskNode/index.ts new file mode 100644 index 00000000..e69de29b diff --git a/packages/vmind/src/types/index.ts b/packages/vmind/src/types/index.ts new file mode 100644 index 00000000..c8ea88b0 --- /dev/null +++ b/packages/vmind/src/types/index.ts @@ -0,0 +1 @@ +export type Context = Record; From 1bd59bb6f4858a26647f02a09c616adb59aab856 Mon Sep 17 00:00:00 2001 From: da730 Date: Mon, 8 Apr 2024 21:42:20 +0800 Subject: [PATCH 08/62] feat: add task nodes --- .../vmind/src/baseTools/chatManager/index.ts | 22 +++++++++ .../vmind/src/baseTools/chatManager/types.ts | 10 ++++ packages/vmind/src/baseTools/parser/index.ts | 10 +--- packages/vmind/src/baseTools/parser/types.ts | 4 +- packages/vmind/src/baseTools/patcher/index.ts | 19 +++----- packages/vmind/src/baseTools/patcher/types.ts | 7 +-- .../vmind/src/baseTools/transformer/index.ts | 14 ++---- .../vmind/src/baseTools/transformer/types.ts | 7 +-- .../src/gpt/chart-generation/NLToChart.ts | 2 +- packages/vmind/src/gpt/dataProcess/index.ts | 2 +- .../src/gpt/dataProcess/query/queryDataset.ts | 2 +- packages/vmind/src/taskNode/base.ts | 13 +++++ packages/vmind/src/taskNode/index.ts | 0 .../vmind/src/taskNode/llmBasedTaskNode.ts | 48 +++++++++++++++++++ .../vmind/src/taskNode/ruleBasedTaskNode.ts | 23 +++++++++ packages/vmind/src/taskNode/types.ts | 37 ++++++++++++++ packages/vmind/src/{gpt => taskNode}/utils.ts | 1 + packages/vmind/src/typings/index.ts | 29 ----------- 18 files changed, 174 insertions(+), 76 deletions(-) create mode 100644 packages/vmind/src/baseTools/chatManager/index.ts create mode 100644 packages/vmind/src/baseTools/chatManager/types.ts create mode 100644 packages/vmind/src/taskNode/base.ts delete mode 100644 packages/vmind/src/taskNode/index.ts create mode 100644 packages/vmind/src/taskNode/llmBasedTaskNode.ts create mode 100644 packages/vmind/src/taskNode/ruleBasedTaskNode.ts create mode 100644 packages/vmind/src/taskNode/types.ts rename packages/vmind/src/{gpt => taskNode}/utils.ts (99%) diff --git a/packages/vmind/src/baseTools/chatManager/index.ts b/packages/vmind/src/baseTools/chatManager/index.ts new file mode 100644 index 00000000..826274c2 --- /dev/null +++ b/packages/vmind/src/baseTools/chatManager/index.ts @@ -0,0 +1,22 @@ +import { Chat, ChatRole } from './types'; + +export class ChatManager { + chatList: Chat[]; + + constructor() { + this.chatList = []; + } + + addChat(content: string, role: ChatRole) { + const chatNum = this.chatList.length; + this.chatList.push({ + content, + role, + index: chatNum + }); + } + + getLatestUserMessage() { + return this.chatList.find(chat => chat.role === ChatRole.USER); + } +} diff --git a/packages/vmind/src/baseTools/chatManager/types.ts b/packages/vmind/src/baseTools/chatManager/types.ts new file mode 100644 index 00000000..9d08b3d6 --- /dev/null +++ b/packages/vmind/src/baseTools/chatManager/types.ts @@ -0,0 +1,10 @@ +export enum ChatRole { + USER = 'user', + ASSISTANT = 'assistant', + SYSTEM = 'system' +} +export type Chat = { + index: number; + role: ChatRole; + content: string; +}; diff --git a/packages/vmind/src/baseTools/parser/index.ts b/packages/vmind/src/baseTools/parser/index.ts index 0ec35366..3bfccca3 100644 --- a/packages/vmind/src/baseTools/parser/index.ts +++ b/packages/vmind/src/baseTools/parser/index.ts @@ -1,13 +1,7 @@ import { IParser } from './types'; export class Parser implements IParser { - input: string; - output: T; - constructor(input: string) { - this.input = input; - } - - parse() { - return this.input as T; + parse(input: string) { + return input as T; } } diff --git a/packages/vmind/src/baseTools/parser/types.ts b/packages/vmind/src/baseTools/parser/types.ts index d6373430..2f81ed15 100644 --- a/packages/vmind/src/baseTools/parser/types.ts +++ b/packages/vmind/src/baseTools/parser/types.ts @@ -1,5 +1,3 @@ export interface IParser { - input: string; - output: T; - parse: () => T; + parse: (input: string) => T; } diff --git a/packages/vmind/src/baseTools/patcher/index.ts b/packages/vmind/src/baseTools/patcher/index.ts index 3506d3b5..ae79560b 100644 --- a/packages/vmind/src/baseTools/patcher/index.ts +++ b/packages/vmind/src/baseTools/patcher/index.ts @@ -1,22 +1,17 @@ import { IPatcher } from './types'; import { Transformer } from '../transformer'; -export class Patcher implements IPatcher { - input: T; - output: T; - context: C; - pipelines: Transformer[]; - constructor(input: T, context: C, transformers: Transformer[]) { - this.input = input; - this.context = context; +export class Patcher implements IPatcher { + pipelines: Transformer, Context, T>[]; + constructor(transformers: Transformer, Context, T>[]) { this.pipelines = transformers; } - patch() { - const result = this.pipelines.reduce((pre: Partial, transformer: Transformer) => { - const result = transformer.transform(); + patch(input: Partial, context: Context) { + const result: T = this.pipelines.reduce((pre: Partial, transformer: Transformer, Context, T>) => { + const result = transformer.transform(pre, context); return result; - }, this.input); + }, input) as T; return result; } } diff --git a/packages/vmind/src/baseTools/patcher/types.ts b/packages/vmind/src/baseTools/patcher/types.ts index 71f3203d..21502ac6 100644 --- a/packages/vmind/src/baseTools/patcher/types.ts +++ b/packages/vmind/src/baseTools/patcher/types.ts @@ -1,8 +1,5 @@ import { Transformer } from '../transformer'; export interface IPatcher { - input: T; - output: T; - pipelines: Transformer[]; - context: C; - patch: () => T; + pipelines: Transformer, C, T>[]; + patch: (input: Partial, context: C) => T; } diff --git a/packages/vmind/src/baseTools/transformer/index.ts b/packages/vmind/src/baseTools/transformer/index.ts index aa6369a1..01bf0fe0 100644 --- a/packages/vmind/src/baseTools/transformer/index.ts +++ b/packages/vmind/src/baseTools/transformer/index.ts @@ -1,15 +1,7 @@ import { ITransformer } from './types'; -export class Transformer implements ITransformer { - input: I; - context: T; - output: DSL; - constructor(input: I, context: T) { - this.input = input; - this.context = context; - } - - transform() { - return this.output; +export class Transformer implements ITransformer { + transform(input: Input, context: Context): DSL { + return input as unknown as DSL; } } diff --git a/packages/vmind/src/baseTools/transformer/types.ts b/packages/vmind/src/baseTools/transformer/types.ts index a16ffe8f..579f0e7c 100644 --- a/packages/vmind/src/baseTools/transformer/types.ts +++ b/packages/vmind/src/baseTools/transformer/types.ts @@ -1,6 +1,3 @@ -export interface ITransformer { - input: I; - context: T; - output: DSL; - transform: () => DSL; +export interface ITransformer { + transform: (input: I, context: Context) => DSL; } diff --git a/packages/vmind/src/gpt/chart-generation/NLToChart.ts b/packages/vmind/src/gpt/chart-generation/NLToChart.ts index dfbd717b..b9b06bfb 100644 --- a/packages/vmind/src/gpt/chart-generation/NLToChart.ts +++ b/packages/vmind/src/gpt/chart-generation/NLToChart.ts @@ -1,7 +1,7 @@ import { SUPPORTED_CHART_LIST } from '../../common/vizDataToSpec/constants'; import { DataItem, GPTChartAdvisorResult, ILLMOptions, LOCATION, SimpleFieldInfo, VizSchema } from '../../typings'; import { checkChartTypeAndCell, vizDataToSpec } from '../../common/vizDataToSpec'; -import { parseGPTResponse, requestGPT } from '../utils'; +import { parseGPTResponse, requestGPT } from '../../taskNode/utils'; import { patchUserInput } from './utils'; import { ChartAdvisorPromptEnglish } from './prompts'; import { chartAdvisorHandler } from '../../common/chartAdvisor'; diff --git a/packages/vmind/src/gpt/dataProcess/index.ts b/packages/vmind/src/gpt/dataProcess/index.ts index 079ed53a..fe8ba32b 100644 --- a/packages/vmind/src/gpt/dataProcess/index.ts +++ b/packages/vmind/src/gpt/dataProcess/index.ts @@ -1,7 +1,7 @@ import { convertNumberField, getDataset, parseCSVData } from '../../common/dataProcess'; import { getFieldDomain, readTopNLine } from '../../common/dataProcess/utils'; import { ILLMOptions, SimpleFieldInfo } from '../../typings'; -import { parseGPTResponse, requestGPT } from '../utils'; +import { parseGPTResponse, requestGPT } from '../../taskNode/utils'; import { DataProcessPromptEnglish } from './prompts'; /* diff --git a/packages/vmind/src/gpt/dataProcess/query/queryDataset.ts b/packages/vmind/src/gpt/dataProcess/query/queryDataset.ts index 64787517..b1855a5d 100644 --- a/packages/vmind/src/gpt/dataProcess/query/queryDataset.ts +++ b/packages/vmind/src/gpt/dataProcess/query/queryDataset.ts @@ -1,7 +1,7 @@ import { DataItem, ILLMOptions, SimpleFieldInfo } from '../../../typings'; import { parseGPTQueryResponse, parseRespondField, patchQueryInput } from './utils'; import { DataQueryResponse } from './type'; -import { parseGPTResponse as parseGPTResponseAsJSON, requestGPT } from '../../utils'; +import { parseGPTResponse as parseGPTResponseAsJSON, requestGPT } from '../../../taskNode/utils'; import { getQueryDatasetPrompt } from '../prompts'; import { queryDataset } from '../../../common/dataProcess/dataQuery'; diff --git a/packages/vmind/src/taskNode/base.ts b/packages/vmind/src/taskNode/base.ts new file mode 100644 index 00000000..5ce7ddf2 --- /dev/null +++ b/packages/vmind/src/taskNode/base.ts @@ -0,0 +1,13 @@ +import { ITaskNode } from './types'; + +export class BaseTaskNode implements ITaskNode { + context: Context; + output: DSL; + constructor(context: Context) { + this.context = context; + } + + executeTask() { + return this.output as any; + } +} diff --git a/packages/vmind/src/taskNode/index.ts b/packages/vmind/src/taskNode/index.ts deleted file mode 100644 index e69de29b..00000000 diff --git a/packages/vmind/src/taskNode/llmBasedTaskNode.ts b/packages/vmind/src/taskNode/llmBasedTaskNode.ts new file mode 100644 index 00000000..6d106c4c --- /dev/null +++ b/packages/vmind/src/taskNode/llmBasedTaskNode.ts @@ -0,0 +1,48 @@ +import { Prompt } from 'src/baseTools/prompt'; +import { BaseTaskNode } from './base'; +import { Parser } from 'src/baseTools/parser'; +import { Patcher } from 'src/baseTools/patcher'; +import { ChatManager } from 'src/baseTools/chatManager'; +import { ILLMOptions, RequestFunc } from './types'; + +export class LLMBasedTaskNode extends BaseTaskNode { + prompt: Prompt; + parser: Parser; + patcher: Patcher; + requester: RequestFunc; + chatManager: ChatManager; + + constructor(context: Context) { + super(context); + this.chatManager = new ChatManager(); + } + + async requestLLM() { + const { llmOptions } = this.context; + const llmResponse = await this.requester( + this.prompt.getPrompt(), + this.chatManager.getLatestUserMessage().content, + llmOptions + ); + return llmResponse; + } + + parseLLMResponse(llmResponse: any): Partial { + //void function + //A parser must be initialized in the subclass. + return this.parser.parse(llmResponse); + } + + patchLLMResponse(input: Partial): DSL { + //void function + //A patcher must be initialized in the subclass. + return this.patcher.patch(input, this.context); + } + + async executeTask() { + const llmResponse = await this.requestLLM(); + const parsedResponse = this.parseLLMResponse(llmResponse); + const patchedResponse = this.patchLLMResponse(parsedResponse); + return patchedResponse; + } +} diff --git a/packages/vmind/src/taskNode/ruleBasedTaskNode.ts b/packages/vmind/src/taskNode/ruleBasedTaskNode.ts new file mode 100644 index 00000000..ec49f27f --- /dev/null +++ b/packages/vmind/src/taskNode/ruleBasedTaskNode.ts @@ -0,0 +1,23 @@ +import { Transformer } from 'src/baseTools/transformer'; +import { BaseTaskNode } from './base'; + +export class RuleBasedTaskNode extends BaseTaskNode { + input: Input; + pipelines: Transformer[]; + constructor(input: Input, context: Context, pipelines: Transformer[]) { + super(context); + this.input = input; + this.pipelines = pipelines; + } + + executeTask() { + const result: DSL = this.pipelines.reduce( + (pre: Partial | Input, transformer: Transformer | Input, Context, DSL>) => { + const result = transformer.transform(pre, this.context); + return result; + }, + this.input + ) as DSL; + return result; + } +} diff --git a/packages/vmind/src/taskNode/types.ts b/packages/vmind/src/taskNode/types.ts new file mode 100644 index 00000000..cfc05bfa --- /dev/null +++ b/packages/vmind/src/taskNode/types.ts @@ -0,0 +1,37 @@ +export interface ITaskNode { + context: Context; + output: DSL; + executeTask: (() => Promise>) | (() => DSL); +} +export type RequestFunc = ( + prompt: string, + userMessage: string, + options: ILLMOptions | undefined +) => Promise; + +//models that VMind support +//more models is under developing +export enum Model { + GPT3_5 = 'gpt-3.5-turbo', + GPT4 = 'gpt-4', + SKYLARK = 'skylark-pro', + SKYLARK2 = 'skylark2-pro-4k', + CHART_ADVISOR = 'chart-advisor' +} + +export interface ILLMOptions { + url?: string; //URL of your LLM service. For gpt, default is openAI API. + /** llm request header, which has higher priority */ + headers?: HeadersInit; // this will be used directly as the header of the LLM request. + method?: 'POST' | 'GET'; //post or get + model?: Model | string; + max_tokens?: number; + temperature?: number; + showThoughts?: boolean; + customRequestFunc?: { + chartAdvisor?: RequestFunc; + dataProcess?: RequestFunc; + dataQuery?: RequestFunc; + }; + [key: string]: any; +} diff --git a/packages/vmind/src/gpt/utils.ts b/packages/vmind/src/taskNode/utils.ts similarity index 99% rename from packages/vmind/src/gpt/utils.ts rename to packages/vmind/src/taskNode/utils.ts index a83b1417..a1d5d088 100644 --- a/packages/vmind/src/gpt/utils.ts +++ b/packages/vmind/src/taskNode/utils.ts @@ -42,6 +42,7 @@ export const requestGPT = async ( return err.response.data; } }; + export const parseGPTJson = (JsonStr: string, prefix?: string) => { const parseNoPrefixStr = (str: string) => { //尝试不带前缀的解析 diff --git a/packages/vmind/src/typings/index.ts b/packages/vmind/src/typings/index.ts index 78e74dd9..bd9861b8 100644 --- a/packages/vmind/src/typings/index.ts +++ b/packages/vmind/src/typings/index.ts @@ -1,25 +1,6 @@ import type { FFmpeg } from '@ffmpeg/ffmpeg'; import type { ManualTicker, DefaultTimeline } from '@visactor/vrender-core'; -export interface ILLMOptions { - url?: string; //URL of your LLM service. For gpt, default is openAI API. - /** llm request header, which has higher priority */ - headers?: HeadersInit; // this will be used directly as the header of the LLM request. - method?: 'POST' | 'GET'; //post or get - model?: Model | string; - max_tokens?: number; - temperature?: number; - showThoughts?: boolean; - customRequestFunc?: { - chartAdvisor?: RequestFunc; - dataProcess?: RequestFunc; - dataQuery?: RequestFunc; - }; - [key: string]: any; -} - -type RequestFunc = (prompt: string, userMessage: string, options: ILLMOptions | undefined) => Promise; - export type SimpleFieldInfo = { fieldName: string; description?: string; //additional description of the field. This will help the model have a more comprehensive understanding of this field, improving the quality of chart generation. @@ -125,16 +106,6 @@ export type VizSchema = { fields: FieldInfo[]; }; -//models that VMind support -//more models is under developing -export enum Model { - GPT3_5 = 'gpt-3.5-turbo', - GPT4 = 'gpt-4', - SKYLARK = 'skylark-pro', - SKYLARK2 = 'skylark2-pro-4k', - CHART_ADVISOR = 'chart-advisor' -} - export enum ModelType { GPT = 'gpt', SKYLARK = 'skylark', From 54763946e4a2fff2abbc6fd18f71def00571a7c9 Mon Sep 17 00:00:00 2001 From: da730 Date: Tue, 9 Apr 2024 11:43:49 +0800 Subject: [PATCH 09/62] feat: add comments --- .../vmind/src/baseTools/chatManager/index.ts | 3 +++ packages/vmind/src/baseTools/parser/index.ts | 16 ++++++++++-- packages/vmind/src/baseTools/parser/types.ts | 7 +++-- packages/vmind/src/baseTools/patcher/index.ts | 26 +++++++++++++------ packages/vmind/src/baseTools/patcher/types.ts | 7 ++--- packages/vmind/src/baseTools/prompt/index.ts | 15 ++++++----- packages/vmind/src/baseTools/prompt/types.ts | 5 ++-- .../vmind/src/baseTools/transformer/index.ts | 5 ++++ .../vmind/src/baseTools/transformer/types.ts | 4 +-- 9 files changed, 62 insertions(+), 26 deletions(-) diff --git a/packages/vmind/src/baseTools/chatManager/index.ts b/packages/vmind/src/baseTools/chatManager/index.ts index 826274c2..3336d060 100644 --- a/packages/vmind/src/baseTools/chatManager/index.ts +++ b/packages/vmind/src/baseTools/chatManager/index.ts @@ -1,5 +1,8 @@ import { Chat, ChatRole } from './types'; +/** + * ChatManager for multiple-rounds dialogue management + */ export class ChatManager { chatList: Chat[]; diff --git a/packages/vmind/src/baseTools/parser/index.ts b/packages/vmind/src/baseTools/parser/index.ts index 3bfccca3..fd5d6604 100644 --- a/packages/vmind/src/baseTools/parser/index.ts +++ b/packages/vmind/src/baseTools/parser/index.ts @@ -1,7 +1,19 @@ import { IParser } from './types'; +import { Transformer } from '../transformer'; + +/** + * Parser is responsible for parsing the string content generated by LLM into DSL in a specific format (JSON or YAML) + * Use a transformer to complete the conversion from string to DSL + * Pass in the transformer during initialization + */ +export class Parser implements IParser { + transformer: Transformer; + + constructor(transformer: Transformer) { + this.transformer = transformer; + } -export class Parser implements IParser { parse(input: string) { - return input as T; + return this.transformer.transform(input, {}); } } diff --git a/packages/vmind/src/baseTools/parser/types.ts b/packages/vmind/src/baseTools/parser/types.ts index 2f81ed15..bb02abc0 100644 --- a/packages/vmind/src/baseTools/parser/types.ts +++ b/packages/vmind/src/baseTools/parser/types.ts @@ -1,3 +1,6 @@ -export interface IParser { - parse: (input: string) => T; +import { Transformer } from '../transformer'; + +export interface IParser { + transformer: Transformer; + parse: (input: string) => DSL; } diff --git a/packages/vmind/src/baseTools/patcher/index.ts b/packages/vmind/src/baseTools/patcher/index.ts index ae79560b..f9a6d68f 100644 --- a/packages/vmind/src/baseTools/patcher/index.ts +++ b/packages/vmind/src/baseTools/patcher/index.ts @@ -1,17 +1,27 @@ import { IPatcher } from './types'; import { Transformer } from '../transformer'; -export class Patcher implements IPatcher { - pipelines: Transformer, Context, T>[]; - constructor(transformers: Transformer, Context, T>[]) { +/** + * Sometimes the DSL generated by LLM may has some errors and need to be patched + * Patcher is responsible for completing the patch of DSL generated by LLM + * pipelines is composed of a series of transformer, completing the conversion of initial DSL to final DSL + * patch method is responsible for executing pipelines, patch the input based on Context, and return the final DSL + * pass the specific pipelines during initialization + */ +export class Patcher implements IPatcher { + pipelines: Transformer, Context, DSL>[]; + constructor(transformers: Transformer, Context, DSL>[]) { this.pipelines = transformers; } - patch(input: Partial, context: Context) { - const result: T = this.pipelines.reduce((pre: Partial, transformer: Transformer, Context, T>) => { - const result = transformer.transform(pre, context); - return result; - }, input) as T; + patch(input: Partial, context: Context) { + const result: DSL = this.pipelines.reduce( + (pre: Partial, transformer: Transformer, Context, DSL>) => { + const result = transformer.transform(pre, context); + return result; + }, + input + ) as DSL; return result; } } diff --git a/packages/vmind/src/baseTools/patcher/types.ts b/packages/vmind/src/baseTools/patcher/types.ts index 21502ac6..833057c4 100644 --- a/packages/vmind/src/baseTools/patcher/types.ts +++ b/packages/vmind/src/baseTools/patcher/types.ts @@ -1,5 +1,6 @@ import { Transformer } from '../transformer'; -export interface IPatcher { - pipelines: Transformer, C, T>[]; - patch: (input: Partial, context: C) => T; + +export interface IPatcher { + pipelines: Transformer, Context, DSL>[]; + patch: (input: Partial, context: Context) => DSL; } diff --git a/packages/vmind/src/baseTools/prompt/index.ts b/packages/vmind/src/baseTools/prompt/index.ts index eb1c0d55..5504a0a9 100644 --- a/packages/vmind/src/baseTools/prompt/index.ts +++ b/packages/vmind/src/baseTools/prompt/index.ts @@ -1,14 +1,17 @@ import { IPrompt } from './types'; - -export class Prompt implements IPrompt { +/** + * Prompt represents a LLM Prompt + * Pass in Template when initialization + * getPrompt method generates specific Prompt according to Template and Context + * The subclass needs to rewrite the getprompt method to generate a specific Prompt + */ +export class Prompt implements IPrompt { template: string; - context: T; - constructor(template: string, context: T) { + constructor(template: string) { this.template = template; - this.context = context; } - getPrompt() { + getPrompt(context: Context) { return this.template; } } diff --git a/packages/vmind/src/baseTools/prompt/types.ts b/packages/vmind/src/baseTools/prompt/types.ts index cc6baab6..fc135e94 100644 --- a/packages/vmind/src/baseTools/prompt/types.ts +++ b/packages/vmind/src/baseTools/prompt/types.ts @@ -1,5 +1,4 @@ -export interface IPrompt { +export interface IPrompt { readonly template: string; - readonly context: T; - getPrompt: () => string; + getPrompt: (context: Context) => string; } diff --git a/packages/vmind/src/baseTools/transformer/index.ts b/packages/vmind/src/baseTools/transformer/index.ts index 01bf0fe0..7069509e 100644 --- a/packages/vmind/src/baseTools/transformer/index.ts +++ b/packages/vmind/src/baseTools/transformer/index.ts @@ -1,5 +1,10 @@ import { ITransformer } from './types'; +/** + * Transformer is to finish the data conversion work + * * Convert the INPUT type to DSL type based on the Context + * The subclass needs to rewrite the transform method to complete the specific data conversion + */ export class Transformer implements ITransformer { transform(input: Input, context: Context): DSL { return input as unknown as DSL; diff --git a/packages/vmind/src/baseTools/transformer/types.ts b/packages/vmind/src/baseTools/transformer/types.ts index 579f0e7c..859528f5 100644 --- a/packages/vmind/src/baseTools/transformer/types.ts +++ b/packages/vmind/src/baseTools/transformer/types.ts @@ -1,3 +1,3 @@ -export interface ITransformer { - transform: (input: I, context: Context) => DSL; +export interface ITransformer { + transform: (input: Input, context: Context) => DSL; } From 499d435a066f2b27485f6d82eae5b842682e870b Mon Sep 17 00:00:00 2001 From: da730 Date: Tue, 9 Apr 2024 14:18:09 +0800 Subject: [PATCH 10/62] feat: optimize file structure --- packages/vmind/src/application/base/index.ts | 1 - packages/vmind/src/application/base/types.ts | 3 -- .../src/application/chartGeneration/index.ts | 0 packages/vmind/src/base/taskNode/base.ts | 12 ++++++++ .../{ => base}/taskNode/llmBasedTaskNode.ts | 28 +++++++++---------- .../{ => base}/taskNode/ruleBasedTaskNode.ts | 2 +- .../vmind/src/{ => base}/taskNode/types.ts | 14 ++++++++-- .../vmind/src/{ => base}/taskNode/utils.ts | 4 +-- .../tools}/chatManager/index.ts | 0 .../tools}/chatManager/types.ts | 0 .../{baseTools => base/tools}/parser/index.ts | 0 .../{baseTools => base/tools}/parser/types.ts | 0 .../tools}/patcher/index.ts | 0 .../tools}/patcher/types.ts | 0 .../{baseTools => base/tools}/prompt/index.ts | 0 .../{baseTools => base/tools}/prompt/types.ts | 0 .../tools}/transformer/index.ts | 0 .../tools}/transformer/types.ts | 0 .../src/gpt/chart-generation/NLToChart.ts | 2 +- packages/vmind/src/gpt/dataProcess/index.ts | 2 +- .../src/gpt/dataProcess/query/queryDataset.ts | 2 +- packages/vmind/src/taskNode/base.ts | 13 --------- packages/vmind/src/types/index.ts | 1 - packages/vmind/src/typings/index.ts | 9 ------ 24 files changed, 43 insertions(+), 50 deletions(-) delete mode 100644 packages/vmind/src/application/base/index.ts delete mode 100644 packages/vmind/src/application/base/types.ts delete mode 100644 packages/vmind/src/application/chartGeneration/index.ts create mode 100644 packages/vmind/src/base/taskNode/base.ts rename packages/vmind/src/{ => base}/taskNode/llmBasedTaskNode.ts (62%) rename packages/vmind/src/{ => base}/taskNode/ruleBasedTaskNode.ts (92%) rename packages/vmind/src/{ => base}/taskNode/types.ts (83%) rename packages/vmind/src/{ => base}/taskNode/utils.ts (95%) rename packages/vmind/src/{baseTools => base/tools}/chatManager/index.ts (100%) rename packages/vmind/src/{baseTools => base/tools}/chatManager/types.ts (100%) rename packages/vmind/src/{baseTools => base/tools}/parser/index.ts (100%) rename packages/vmind/src/{baseTools => base/tools}/parser/types.ts (100%) rename packages/vmind/src/{baseTools => base/tools}/patcher/index.ts (100%) rename packages/vmind/src/{baseTools => base/tools}/patcher/types.ts (100%) rename packages/vmind/src/{baseTools => base/tools}/prompt/index.ts (100%) rename packages/vmind/src/{baseTools => base/tools}/prompt/types.ts (100%) rename packages/vmind/src/{baseTools => base/tools}/transformer/index.ts (100%) rename packages/vmind/src/{baseTools => base/tools}/transformer/types.ts (100%) delete mode 100644 packages/vmind/src/taskNode/base.ts delete mode 100644 packages/vmind/src/types/index.ts diff --git a/packages/vmind/src/application/base/index.ts b/packages/vmind/src/application/base/index.ts deleted file mode 100644 index 07b44980..00000000 --- a/packages/vmind/src/application/base/index.ts +++ /dev/null @@ -1 +0,0 @@ -export class BaseApplication {} diff --git a/packages/vmind/src/application/base/types.ts b/packages/vmind/src/application/base/types.ts deleted file mode 100644 index b83036fc..00000000 --- a/packages/vmind/src/application/base/types.ts +++ /dev/null @@ -1,3 +0,0 @@ -export interface IApplication { - tasks: TaskNode[]; -} diff --git a/packages/vmind/src/application/chartGeneration/index.ts b/packages/vmind/src/application/chartGeneration/index.ts deleted file mode 100644 index e69de29b..00000000 diff --git a/packages/vmind/src/base/taskNode/base.ts b/packages/vmind/src/base/taskNode/base.ts new file mode 100644 index 00000000..c7111731 --- /dev/null +++ b/packages/vmind/src/base/taskNode/base.ts @@ -0,0 +1,12 @@ +import { ITaskNode } from './types'; + +/** + * A task node in VMind application, used to complete a specific task, such as requesting a large model for chart type, DSL parsing and conversion, etc. + * There are 2 types: rule-based or LLM-based, the former completes a series of tasks based on rule algorithm, the latter calls LLM to complete the task. + * Each Node can also be called as a separate function + */ +export class BaseTaskNode implements ITaskNode { + executeTask(context: Context): Promise | DSL { + return null as DSL; + } +} diff --git a/packages/vmind/src/taskNode/llmBasedTaskNode.ts b/packages/vmind/src/base/taskNode/llmBasedTaskNode.ts similarity index 62% rename from packages/vmind/src/taskNode/llmBasedTaskNode.ts rename to packages/vmind/src/base/taskNode/llmBasedTaskNode.ts index 6d106c4c..d878df99 100644 --- a/packages/vmind/src/taskNode/llmBasedTaskNode.ts +++ b/packages/vmind/src/base/taskNode/llmBasedTaskNode.ts @@ -1,8 +1,8 @@ -import { Prompt } from 'src/baseTools/prompt'; +import { Prompt } from 'src/base/tools/prompt'; import { BaseTaskNode } from './base'; -import { Parser } from 'src/baseTools/parser'; -import { Patcher } from 'src/baseTools/patcher'; -import { ChatManager } from 'src/baseTools/chatManager'; +import { Parser } from 'src/base/tools/parser'; +import { Patcher } from 'src/base/tools/patcher'; +import { ChatManager } from 'src/base/tools/chatManager'; import { ILLMOptions, RequestFunc } from './types'; export class LLMBasedTaskNode extends BaseTaskNode { @@ -12,15 +12,15 @@ export class LLMBasedTaskNode requester: RequestFunc; chatManager: ChatManager; - constructor(context: Context) { - super(context); + constructor() { + super(); this.chatManager = new ChatManager(); } - async requestLLM() { - const { llmOptions } = this.context; + async requestLLM(context: Context) { + const { llmOptions } = context; const llmResponse = await this.requester( - this.prompt.getPrompt(), + this.prompt.getPrompt(context), this.chatManager.getLatestUserMessage().content, llmOptions ); @@ -33,16 +33,16 @@ export class LLMBasedTaskNode return this.parser.parse(llmResponse); } - patchLLMResponse(input: Partial): DSL { + patchLLMResponse(input: Partial, context: Context): DSL { //void function //A patcher must be initialized in the subclass. - return this.patcher.patch(input, this.context); + return this.patcher.patch(input, context); } - async executeTask() { - const llmResponse = await this.requestLLM(); + async executeTask(context: Context) { + const llmResponse = await this.requestLLM(context); const parsedResponse = this.parseLLMResponse(llmResponse); - const patchedResponse = this.patchLLMResponse(parsedResponse); + const patchedResponse = this.patchLLMResponse(parsedResponse, context); return patchedResponse; } } diff --git a/packages/vmind/src/taskNode/ruleBasedTaskNode.ts b/packages/vmind/src/base/taskNode/ruleBasedTaskNode.ts similarity index 92% rename from packages/vmind/src/taskNode/ruleBasedTaskNode.ts rename to packages/vmind/src/base/taskNode/ruleBasedTaskNode.ts index ec49f27f..cd85ca83 100644 --- a/packages/vmind/src/taskNode/ruleBasedTaskNode.ts +++ b/packages/vmind/src/base/taskNode/ruleBasedTaskNode.ts @@ -1,4 +1,4 @@ -import { Transformer } from 'src/baseTools/transformer'; +import { Transformer } from 'src/base/tools/transformer'; import { BaseTaskNode } from './base'; export class RuleBasedTaskNode extends BaseTaskNode { diff --git a/packages/vmind/src/taskNode/types.ts b/packages/vmind/src/base/taskNode/types.ts similarity index 83% rename from packages/vmind/src/taskNode/types.ts rename to packages/vmind/src/base/taskNode/types.ts index cfc05bfa..fa39757a 100644 --- a/packages/vmind/src/taskNode/types.ts +++ b/packages/vmind/src/base/taskNode/types.ts @@ -1,8 +1,16 @@ export interface ITaskNode { - context: Context; - output: DSL; - executeTask: (() => Promise>) | (() => DSL); + executeTask: (context: Context) => Promise | DSL; } + +export type LLMResponse = { + choices: { + index: number; + message: any; + }[]; + usage: any; + [key: string]: any; +}; + export type RequestFunc = ( prompt: string, userMessage: string, diff --git a/packages/vmind/src/taskNode/utils.ts b/packages/vmind/src/base/taskNode/utils.ts similarity index 95% rename from packages/vmind/src/taskNode/utils.ts rename to packages/vmind/src/base/taskNode/utils.ts index a1d5d088..5537d909 100644 --- a/packages/vmind/src/taskNode/utils.ts +++ b/packages/vmind/src/base/taskNode/utils.ts @@ -1,8 +1,8 @@ -import { GPTDataProcessResult, ILLMOptions, LLMResponse } from '../typings'; import axios from 'axios'; import JSON5 from 'json5'; import { omit } from 'lodash'; -import { matchJSONStr } from '../common/utils'; +import { matchJSONStr } from '../../common/utils'; +import { ILLMOptions, LLMResponse } from './types'; export const requestGPT = async ( prompt: string, diff --git a/packages/vmind/src/baseTools/chatManager/index.ts b/packages/vmind/src/base/tools/chatManager/index.ts similarity index 100% rename from packages/vmind/src/baseTools/chatManager/index.ts rename to packages/vmind/src/base/tools/chatManager/index.ts diff --git a/packages/vmind/src/baseTools/chatManager/types.ts b/packages/vmind/src/base/tools/chatManager/types.ts similarity index 100% rename from packages/vmind/src/baseTools/chatManager/types.ts rename to packages/vmind/src/base/tools/chatManager/types.ts diff --git a/packages/vmind/src/baseTools/parser/index.ts b/packages/vmind/src/base/tools/parser/index.ts similarity index 100% rename from packages/vmind/src/baseTools/parser/index.ts rename to packages/vmind/src/base/tools/parser/index.ts diff --git a/packages/vmind/src/baseTools/parser/types.ts b/packages/vmind/src/base/tools/parser/types.ts similarity index 100% rename from packages/vmind/src/baseTools/parser/types.ts rename to packages/vmind/src/base/tools/parser/types.ts diff --git a/packages/vmind/src/baseTools/patcher/index.ts b/packages/vmind/src/base/tools/patcher/index.ts similarity index 100% rename from packages/vmind/src/baseTools/patcher/index.ts rename to packages/vmind/src/base/tools/patcher/index.ts diff --git a/packages/vmind/src/baseTools/patcher/types.ts b/packages/vmind/src/base/tools/patcher/types.ts similarity index 100% rename from packages/vmind/src/baseTools/patcher/types.ts rename to packages/vmind/src/base/tools/patcher/types.ts diff --git a/packages/vmind/src/baseTools/prompt/index.ts b/packages/vmind/src/base/tools/prompt/index.ts similarity index 100% rename from packages/vmind/src/baseTools/prompt/index.ts rename to packages/vmind/src/base/tools/prompt/index.ts diff --git a/packages/vmind/src/baseTools/prompt/types.ts b/packages/vmind/src/base/tools/prompt/types.ts similarity index 100% rename from packages/vmind/src/baseTools/prompt/types.ts rename to packages/vmind/src/base/tools/prompt/types.ts diff --git a/packages/vmind/src/baseTools/transformer/index.ts b/packages/vmind/src/base/tools/transformer/index.ts similarity index 100% rename from packages/vmind/src/baseTools/transformer/index.ts rename to packages/vmind/src/base/tools/transformer/index.ts diff --git a/packages/vmind/src/baseTools/transformer/types.ts b/packages/vmind/src/base/tools/transformer/types.ts similarity index 100% rename from packages/vmind/src/baseTools/transformer/types.ts rename to packages/vmind/src/base/tools/transformer/types.ts diff --git a/packages/vmind/src/gpt/chart-generation/NLToChart.ts b/packages/vmind/src/gpt/chart-generation/NLToChart.ts index b9b06bfb..c273de14 100644 --- a/packages/vmind/src/gpt/chart-generation/NLToChart.ts +++ b/packages/vmind/src/gpt/chart-generation/NLToChart.ts @@ -1,7 +1,7 @@ import { SUPPORTED_CHART_LIST } from '../../common/vizDataToSpec/constants'; import { DataItem, GPTChartAdvisorResult, ILLMOptions, LOCATION, SimpleFieldInfo, VizSchema } from '../../typings'; import { checkChartTypeAndCell, vizDataToSpec } from '../../common/vizDataToSpec'; -import { parseGPTResponse, requestGPT } from '../../taskNode/utils'; +import { parseGPTResponse, requestGPT } from '../../base/taskNode/utils'; import { patchUserInput } from './utils'; import { ChartAdvisorPromptEnglish } from './prompts'; import { chartAdvisorHandler } from '../../common/chartAdvisor'; diff --git a/packages/vmind/src/gpt/dataProcess/index.ts b/packages/vmind/src/gpt/dataProcess/index.ts index fe8ba32b..2495f791 100644 --- a/packages/vmind/src/gpt/dataProcess/index.ts +++ b/packages/vmind/src/gpt/dataProcess/index.ts @@ -1,7 +1,7 @@ import { convertNumberField, getDataset, parseCSVData } from '../../common/dataProcess'; import { getFieldDomain, readTopNLine } from '../../common/dataProcess/utils'; import { ILLMOptions, SimpleFieldInfo } from '../../typings'; -import { parseGPTResponse, requestGPT } from '../../taskNode/utils'; +import { parseGPTResponse, requestGPT } from '../../base/taskNode/utils'; import { DataProcessPromptEnglish } from './prompts'; /* diff --git a/packages/vmind/src/gpt/dataProcess/query/queryDataset.ts b/packages/vmind/src/gpt/dataProcess/query/queryDataset.ts index b1855a5d..a8a297f7 100644 --- a/packages/vmind/src/gpt/dataProcess/query/queryDataset.ts +++ b/packages/vmind/src/gpt/dataProcess/query/queryDataset.ts @@ -1,7 +1,7 @@ import { DataItem, ILLMOptions, SimpleFieldInfo } from '../../../typings'; import { parseGPTQueryResponse, parseRespondField, patchQueryInput } from './utils'; import { DataQueryResponse } from './type'; -import { parseGPTResponse as parseGPTResponseAsJSON, requestGPT } from '../../../taskNode/utils'; +import { parseGPTResponse as parseGPTResponseAsJSON, requestGPT } from '../../../base/taskNode/utils'; import { getQueryDatasetPrompt } from '../prompts'; import { queryDataset } from '../../../common/dataProcess/dataQuery'; diff --git a/packages/vmind/src/taskNode/base.ts b/packages/vmind/src/taskNode/base.ts deleted file mode 100644 index 5ce7ddf2..00000000 --- a/packages/vmind/src/taskNode/base.ts +++ /dev/null @@ -1,13 +0,0 @@ -import { ITaskNode } from './types'; - -export class BaseTaskNode implements ITaskNode { - context: Context; - output: DSL; - constructor(context: Context) { - this.context = context; - } - - executeTask() { - return this.output as any; - } -} diff --git a/packages/vmind/src/types/index.ts b/packages/vmind/src/types/index.ts deleted file mode 100644 index c8ea88b0..00000000 --- a/packages/vmind/src/types/index.ts +++ /dev/null @@ -1 +0,0 @@ -export type Context = Record; diff --git a/packages/vmind/src/typings/index.ts b/packages/vmind/src/typings/index.ts index bd9861b8..34712cde 100644 --- a/packages/vmind/src/typings/index.ts +++ b/packages/vmind/src/typings/index.ts @@ -120,15 +120,6 @@ export type ChartGenerationProps = { export type DataItem = Record; -export type LLMResponse = { - choices: { - index: number; - message: any; - }[]; - usage: any; - [key: string]: any; -}; - export type PatchContext = { chartType: string; cell: Cell; From 18db47b6a0a979ae41577f31bf55266eb260c8c3 Mon Sep 17 00:00:00 2001 From: da730 Date: Tue, 9 Apr 2024 15:08:00 +0800 Subject: [PATCH 11/62] feat: add application class --- packages/vmind/src/base/application/index.ts | 31 +++++++++++++++++++ packages/vmind/src/base/application/types.ts | 11 +++++++ .../taskNode/{base.ts => baseTaskNode.ts} | 1 + .../src/base/taskNode/llmBasedTaskNode.ts | 10 +++--- .../src/base/taskNode/ruleBasedTaskNode.ts | 21 +++++++------ packages/vmind/src/base/tools/parser/index.ts | 6 ++-- 6 files changed, 63 insertions(+), 17 deletions(-) create mode 100644 packages/vmind/src/base/application/index.ts create mode 100644 packages/vmind/src/base/application/types.ts rename packages/vmind/src/base/taskNode/{base.ts => baseTaskNode.ts} (84%) diff --git a/packages/vmind/src/base/application/index.ts b/packages/vmind/src/base/application/index.ts new file mode 100644 index 00000000..aaa672eb --- /dev/null +++ b/packages/vmind/src/base/application/index.ts @@ -0,0 +1,31 @@ +import { BaseTaskNode } from '../taskNode/baseTaskNode'; +import { ChatManager } from '../tools/chatManager'; +import { IApplication } from './types'; + +/** + * VMind application class, representing a specific function, such as chart generation, data aggregation, or chart editing, etc. + * An Application consists of a series of TaskNodes. TaskNodes are executed in order, obtaining the final result (Spec). + * Application can be seen as a collection of a series of TaskNodes. Applications can reference each other (equivalent to reusing TaskNodes to complete tasks) + */ +export class BaseApplication implements IApplication { + tasks: BaseTaskNode[]; + context: Context; + chatManager: ChatManager; + constructor(tasks: BaseTaskNode[], context: Context) { + this.tasks = tasks; + this.context = context; + this.chatManager = new ChatManager(); + } + + async runTasks() { + const result: DSL = this.tasks.reduce(async (pre: any, task: BaseTaskNode) => { + const result = await task.executeTask(pre); + return result; + }, this.context); + return result; + } + + updateContext(context: Context) { + this.context = context; + } +} diff --git a/packages/vmind/src/base/application/types.ts b/packages/vmind/src/base/application/types.ts new file mode 100644 index 00000000..be4ec0a8 --- /dev/null +++ b/packages/vmind/src/base/application/types.ts @@ -0,0 +1,11 @@ +import { BaseTaskNode } from '../taskNode/baseTaskNode'; +import { ChatManager } from '../tools/chatManager'; + +export interface IApplication { + tasks: BaseTaskNode[]; + context: Context; + chatManager: ChatManager; + + runTasks: () => Promise; + updateContext: (context: Context) => void; +} diff --git a/packages/vmind/src/base/taskNode/base.ts b/packages/vmind/src/base/taskNode/baseTaskNode.ts similarity index 84% rename from packages/vmind/src/base/taskNode/base.ts rename to packages/vmind/src/base/taskNode/baseTaskNode.ts index c7111731..f93c3c4d 100644 --- a/packages/vmind/src/base/taskNode/base.ts +++ b/packages/vmind/src/base/taskNode/baseTaskNode.ts @@ -3,6 +3,7 @@ import { ITaskNode } from './types'; /** * A task node in VMind application, used to complete a specific task, such as requesting a large model for chart type, DSL parsing and conversion, etc. * There are 2 types: rule-based or LLM-based, the former completes a series of tasks based on rule algorithm, the latter calls LLM to complete the task. + * TaskNode can be seen as a collection of a series of tools, responsible for completing a specific task. * Each Node can also be called as a separate function */ export class BaseTaskNode implements ITaskNode { diff --git a/packages/vmind/src/base/taskNode/llmBasedTaskNode.ts b/packages/vmind/src/base/taskNode/llmBasedTaskNode.ts index d878df99..86d2ab85 100644 --- a/packages/vmind/src/base/taskNode/llmBasedTaskNode.ts +++ b/packages/vmind/src/base/taskNode/llmBasedTaskNode.ts @@ -1,10 +1,14 @@ import { Prompt } from 'src/base/tools/prompt'; -import { BaseTaskNode } from './base'; +import { BaseTaskNode } from './baseTaskNode'; import { Parser } from 'src/base/tools/parser'; import { Patcher } from 'src/base/tools/patcher'; import { ChatManager } from 'src/base/tools/chatManager'; import { ILLMOptions, RequestFunc } from './types'; +/** + * LLMBasedTaskNode is a task node that needs to use LLM to complete tasks + * Subclasses must assign values to prompt, parser, patcher, and requester + */ export class LLMBasedTaskNode extends BaseTaskNode { prompt: Prompt; parser: Parser; @@ -28,14 +32,10 @@ export class LLMBasedTaskNode } parseLLMResponse(llmResponse: any): Partial { - //void function - //A parser must be initialized in the subclass. return this.parser.parse(llmResponse); } patchLLMResponse(input: Partial, context: Context): DSL { - //void function - //A patcher must be initialized in the subclass. return this.patcher.patch(input, context); } diff --git a/packages/vmind/src/base/taskNode/ruleBasedTaskNode.ts b/packages/vmind/src/base/taskNode/ruleBasedTaskNode.ts index cd85ca83..4ad30c5b 100644 --- a/packages/vmind/src/base/taskNode/ruleBasedTaskNode.ts +++ b/packages/vmind/src/base/taskNode/ruleBasedTaskNode.ts @@ -1,22 +1,25 @@ import { Transformer } from 'src/base/tools/transformer'; -import { BaseTaskNode } from './base'; +import { BaseTaskNode } from './baseTaskNode'; -export class RuleBasedTaskNode extends BaseTaskNode { - input: Input; +/** + * rule-based taskNode, which consists of a series of Pipelines + * It completes the transformation from Input to a specific data structure (DSL) + */ +export class RuleBasedTaskNode extends BaseTaskNode { pipelines: Transformer[]; - constructor(input: Input, context: Context, pipelines: Transformer[]) { - super(context); - this.input = input; + constructor(pipelines: Transformer[]) { + super(); this.pipelines = pipelines; } - executeTask() { + executeTask(context: Context) { + const { input } = context; const result: DSL = this.pipelines.reduce( (pre: Partial | Input, transformer: Transformer | Input, Context, DSL>) => { - const result = transformer.transform(pre, this.context); + const result = transformer.transform(pre, context); return result; }, - this.input + input ) as DSL; return result; } diff --git a/packages/vmind/src/base/tools/parser/index.ts b/packages/vmind/src/base/tools/parser/index.ts index fd5d6604..ec0f4cb9 100644 --- a/packages/vmind/src/base/tools/parser/index.ts +++ b/packages/vmind/src/base/tools/parser/index.ts @@ -7,13 +7,13 @@ import { Transformer } from '../transformer'; * Pass in the transformer during initialization */ export class Parser implements IParser { - transformer: Transformer; + transformer: Transformer; - constructor(transformer: Transformer) { + constructor(transformer: Transformer) { this.transformer = transformer; } parse(input: string) { - return this.transformer.transform(input, {}); + return this.transformer.transform(input, null); } } From 278efd3deaf70e213396b2e12f2a9e07417ab836 Mon Sep 17 00:00:00 2001 From: da730 Date: Tue, 9 Apr 2024 16:16:48 +0800 Subject: [PATCH 12/62] feat: add data aggregation application --- .../src/applications/dataAggregation/index.ts | 5 + .../dataAggregation/prompts/GPT/index.ts | 11 +++ .../dataAggregation/prompts/GPT/template.ts | 97 ++++++++++++++++++ .../taskNodes/executeQuery}/dataQuery.ts | 3 +- .../taskNodes/executeQuery/index.ts | 4 + .../taskNodes/executeQuery/transformers.ts | 0 .../taskNodes/executeQuery}/utils.ts | 4 +- .../src/applications/dataAggregation/types.ts | 9 ++ packages/vmind/src/base/application/index.ts | 30 ++++-- packages/vmind/src/base/application/types.ts | 5 +- .../src/base/taskNode/llmBasedTaskNode.ts | 33 ++++--- packages/vmind/src/base/taskNode/types.ts | 42 -------- packages/vmind/src/base/taskNode/utils.ts | 2 +- packages/vmind/src/base/tools/parser/types.ts | 2 +- .../vmind/src/base/tools/transformer/index.ts | 6 +- .../vmind/src/base/tools/transformer/types.ts | 1 + .../vmind/src/common/dataProcess/constants.ts | 2 +- .../vmind/src/common/dataProcess/index.ts | 2 +- packages/vmind/src/gpt/dataProcess/index.ts | 2 +- packages/vmind/src/gpt/dataProcess/prompts.ts | 98 +------------------ .../src/gpt/dataProcess/query/astPipes.ts | 2 +- .../src/gpt/dataProcess/query/queryDataset.ts | 2 +- .../vmind/src/gpt/dataProcess/query/utils.ts | 2 +- .../src/skylark/dataProcess/query/prompts.ts | 2 +- .../skylark/dataProcess/query/queryDataset.ts | 2 +- .../src/skylark/dataProcess/query/utils.ts | 2 +- packages/vmind/src/typings/index.ts | 43 ++++++++ 27 files changed, 238 insertions(+), 175 deletions(-) create mode 100644 packages/vmind/src/applications/dataAggregation/index.ts create mode 100644 packages/vmind/src/applications/dataAggregation/prompts/GPT/index.ts create mode 100644 packages/vmind/src/applications/dataAggregation/prompts/GPT/template.ts rename packages/vmind/src/{common/dataProcess => applications/dataAggregation/taskNodes/executeQuery}/dataQuery.ts (96%) create mode 100644 packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/index.ts create mode 100644 packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/transformers.ts rename packages/vmind/src/{common/dataProcess => applications/dataAggregation/taskNodes/executeQuery}/utils.ts (99%) create mode 100644 packages/vmind/src/applications/dataAggregation/types.ts diff --git a/packages/vmind/src/applications/dataAggregation/index.ts b/packages/vmind/src/applications/dataAggregation/index.ts new file mode 100644 index 00000000..1c115108 --- /dev/null +++ b/packages/vmind/src/applications/dataAggregation/index.ts @@ -0,0 +1,5 @@ +import { BaseApplication } from 'src/base/application'; +import { SQL } from './types'; +import { DataItem } from 'src/typings'; + +export class DataAggregationApplication extends BaseApplication<{}, DataItem> {} diff --git a/packages/vmind/src/applications/dataAggregation/prompts/GPT/index.ts b/packages/vmind/src/applications/dataAggregation/prompts/GPT/index.ts new file mode 100644 index 00000000..f89d04f3 --- /dev/null +++ b/packages/vmind/src/applications/dataAggregation/prompts/GPT/index.ts @@ -0,0 +1,11 @@ +import { Prompt } from 'src/base/tools/prompt'; +import { getQueryDatasetPrompt } from './template'; +import { DataAggregationContext } from '../../types'; + +export class GPTDataAggregationPrompt extends Prompt { + getPrompt(context: DataAggregationContext) { + const { llmOptions } = context; + const QueryDatasetPrompt = getQueryDatasetPrompt(llmOptions.showThoughts ?? true); + return QueryDatasetPrompt; + } +} diff --git a/packages/vmind/src/applications/dataAggregation/prompts/GPT/template.ts b/packages/vmind/src/applications/dataAggregation/prompts/GPT/template.ts new file mode 100644 index 00000000..da74b32c --- /dev/null +++ b/packages/vmind/src/applications/dataAggregation/prompts/GPT/template.ts @@ -0,0 +1,97 @@ +export const VMIND_DATA_SOURCE = 'VMind_data_source'; + +export const getQueryDatasetPrompt = ( + showThoughts: boolean +) => `You are an expert in data analysis. Here is a raw dataset named ${VMIND_DATA_SOURCE}. User will tell you his command and column information of ${VMIND_DATA_SOURCE}. Your task is to generate a sql and fieldInfo according to Instruction. Response one JSON object only. + +# Instruction +- Supported sql keywords: ["SELECT", "FROM", "WHERE", "GROUP BY", "HAVING", "ORDER BY", "LIMIT", "DISTINCT"]. Supported aggregation methods: ["MAX()", "MIN()", "SUM()", "COUNT()", "AVG()"]. +- Generate a sql query like this: "SELECT \`columnA\`, SUM(\`columnB\`) as \`sum_b\` FROM ${VMIND_DATA_SOURCE} WHERE \`columnA\` = value1 GROUP BY \`columnA\` HAVING \`sum_b\`>0 ORDER BY \`sum_b\` LIMIT 10". +- Don't use unsupported keywords such as WITHIN, FIELD, RANK() OVER, OVER. Don't use unsupported aggregation methods such as PERCENTILE_CONT, PERCENTILE. Don't use unsupported operators. We will execute your sql using alasql. Unsupported keywords, methods and operators will cause system crash. If current keywords and methods can't meet your needs, just simply select the column without any process. +- Don't use aliases in HAVING. +- Make your sql as simple as possible. + +You need to follow the steps below. + +# Steps +1. Extract the part related to the data from the user's instruction. Ignore other parts that is not related to the data. +2. Select useful dimension and measure columns from ${VMIND_DATA_SOURCE}. Don't miss some important columns such as dimensions related to date or time. You can only use columns in Column Information and do not assume non-existent columns. If the existing columns can't meet user's command, just select the most related columns in Column Information. +3. Use the original dimension columns without any process. Aggregate the measure columns using aggregation methods no matter what chart type the user has specified. Don't use unsupported methods. If current keywords and methods can't meet your needs, just simply select the column without any process. +4. Group the data using dimension columns. +5. You can also use WHERE, HAVING, ORDER BY, LIMIT in your sql if necessary. Use the supported operators to finish the WHERE and HAVING. You can only use binary expression such as columnA = value1, sum_b > 0. You can only use dimension values appearing in the domain of dimension columns in your expression. + +Let's think step by step. + +User will parse the content of your response with JSON.parse() directly without further process. Response one JSON object without any additional words. Your JSON object must contain sql and fieldInfo. + +Response in the following format: +\`\`\` +{ + ${showThoughts ? 'thoughts: string //your thoughts' : ''} + sql: string; //your sql. Note that it's a string in a JSON object so it must be in one line without any \\n. + fieldInfo: { + fieldName: string; //name of the field. + description?: string; //description of the field. If it is an aggregated field, please describe how it is generated in detail. + }[]; //array of the information about the fields in your sql. Describing its aggregation method and other information of the fields. +} +\`\`\` + +#Examples: + +User's Command: Show me the change of the GDP rankings of each country. +Column Information: [{"fieldName":"country","type":"string","role":"dimension"},{"fieldName":"continent","type":"string","role":"dimension"},{"fieldName":"GDP","type":"float","role":"measure"},{"fieldName":"year","type":"int","role":"measure"}] + +Response: +\`\`\` +{ + ${showThoughts ? '"thoughts": string //your thoughts' : ''} + "sql": "SELECT \`country\`, \`year\`, SUM(\`GDP\`) AS \`total_GDP\` FROM ${VMIND_DATA_SOURCE} GROUP BY \`country\`, \`year\` ORDER BY \`year\`, \`total_GDP\` DESC", + "fieldInfo": [ + { + "fieldName": "country", + "description": "The name of the country." + }, + { + "fieldName": "year", + "description": "The year of the GDP data." + }, + { + "fieldName": "total_GDP", + "description": "An aggregated field representing the total GDP of each country in each year. It is generated by summing up the GDP values for each country in each year." + } + ] +} +\`\`\` +---------------------------------- + +User's Command: 请使用[柱状图]展示[2022年GDP排名前五的中国城市及其2022年的GDP]. +Column Information: [{"fieldName":"城市","type":"string","role":"dimension"},{"fieldName":"2022年GDP(亿元)","type":"int","role":"measure"}] + +Response: +\`\`\` +{ + ${showThoughts ? '"thoughts": string //your thoughts' : ''} + "sql": "SELECT 城市, SUM(\`2022年GDP(亿元)\`) as \`sum_2022_GDP\` FROM ${VMIND_DATA_SOURCE} ORDER BY \`sum_2022_GDP\` DESC LIMIT 5", + "fieldInfo": [ + { + "fieldName": "城市", + "description": "The name of the city." + }, + { + "fieldName": "sum_2022_GDP", + "description": "The GDP value of the city in 2022." + } + ] +} +\`\`\` +---------------------------------- + +You only need to return the JSON in your response directly to the user. +Finish your tasks in one-step. + +# Constraints: +1. Write your sql statement in one line without any \\n. Your sql must be executable by alasql. +2. Please don't change or translate the field names in your sql statement. Don't miss the GROUP BY in your sql. +3. Wrap all the columns with \`\` in your sql. +4. Response the JSON object directly without any other contents. Make sure it can be directly parsed by JSON.parse() in JavaScript. +`; diff --git a/packages/vmind/src/common/dataProcess/dataQuery.ts b/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/dataQuery.ts similarity index 96% rename from packages/vmind/src/common/dataProcess/dataQuery.ts rename to packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/dataQuery.ts index 091cfcc0..9e025189 100644 --- a/packages/vmind/src/common/dataProcess/dataQuery.ts +++ b/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/dataQuery.ts @@ -8,8 +8,7 @@ import { sumAllMeasureFields } from './utils'; import alasql from 'alasql'; - -export const VMIND_DATA_SOURCE = 'VMind_data_source'; +import { VMIND_DATA_SOURCE } from '../../prompts/GPT/template'; /** * SQL query for SourceDatset diff --git a/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/index.ts b/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/index.ts new file mode 100644 index 00000000..3006934d --- /dev/null +++ b/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/index.ts @@ -0,0 +1,4 @@ +import { RuleBasedTaskNode } from 'src/base/taskNode/ruleBasedTaskNode'; +import { Transformer } from 'src/base/tools/transformer'; + +export const ExecuteQueryTaskNode = new RuleBasedTaskNode(); diff --git a/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/transformers.ts b/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/transformers.ts new file mode 100644 index 00000000..e69de29b diff --git a/packages/vmind/src/common/dataProcess/utils.ts b/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/utils.ts similarity index 99% rename from packages/vmind/src/common/dataProcess/utils.ts rename to packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/utils.ts index 596adc49..73b184cf 100644 --- a/packages/vmind/src/common/dataProcess/utils.ts +++ b/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/utils.ts @@ -1,9 +1,9 @@ import { sampleSize, isNumber, isInteger, isString, isArray, capitalize, startCase } from 'lodash'; -import { DataItem, DataType, ROLE, SimpleFieldInfo } from '../../typings'; +import { DataItem, DataType, ROLE, SimpleFieldInfo } from '../../../../typings'; import dayjs from 'dayjs'; import { uniqArray } from '@visactor/vutils'; import alasql from 'alasql'; -import { RESERVE_REPLACE_MAP, operators } from './constants'; +import { RESERVE_REPLACE_MAP, operators } from '../../../../common/dataProcess/constants'; export const readTopNLine = (csvFile: string, n: number) => { // get top n lines of a csv file diff --git a/packages/vmind/src/applications/dataAggregation/types.ts b/packages/vmind/src/applications/dataAggregation/types.ts new file mode 100644 index 00000000..2a6bcbce --- /dev/null +++ b/packages/vmind/src/applications/dataAggregation/types.ts @@ -0,0 +1,9 @@ +import { DataItem, ILLMOptions, SimpleFieldInfo } from 'src/typings'; + +export type SQL = string; +export type DataAggregationContext = { + llmOptions: ILLMOptions; + userInput: string; + fieldInfo: SimpleFieldInfo[]; + sourceDataset: DataItem[]; +}; diff --git a/packages/vmind/src/base/application/index.ts b/packages/vmind/src/base/application/index.ts index aaa672eb..e33ee6d9 100644 --- a/packages/vmind/src/base/application/index.ts +++ b/packages/vmind/src/base/application/index.ts @@ -8,20 +8,38 @@ import { IApplication } from './types'; * Application can be seen as a collection of a series of TaskNodes. Applications can reference each other (equivalent to reusing TaskNodes to complete tasks) */ export class BaseApplication implements IApplication { - tasks: BaseTaskNode[]; + tasks: { + name: string; + task: BaseTaskNode; + }[]; context: Context; chatManager: ChatManager; - constructor(tasks: BaseTaskNode[], context: Context) { + constructor( + tasks: { + name: string; + task: BaseTaskNode; + }[], + context: Context + ) { this.tasks = tasks; this.context = context; this.chatManager = new ChatManager(); } async runTasks() { - const result: DSL = this.tasks.reduce(async (pre: any, task: BaseTaskNode) => { - const result = await task.executeTask(pre); - return result; - }, this.context); + const result: DSL = this.tasks.reduce( + async ( + pre: any, + curTask: { + name: string; + task: BaseTaskNode; + } + ) => { + const result = await curTask.task.executeTask(pre); + return result; + }, + this.context + ); return result; } diff --git a/packages/vmind/src/base/application/types.ts b/packages/vmind/src/base/application/types.ts index be4ec0a8..1a6687fc 100644 --- a/packages/vmind/src/base/application/types.ts +++ b/packages/vmind/src/base/application/types.ts @@ -2,7 +2,10 @@ import { BaseTaskNode } from '../taskNode/baseTaskNode'; import { ChatManager } from '../tools/chatManager'; export interface IApplication { - tasks: BaseTaskNode[]; + tasks: { + name: string; + task: BaseTaskNode; + }[]; context: Context; chatManager: ChatManager; diff --git a/packages/vmind/src/base/taskNode/llmBasedTaskNode.ts b/packages/vmind/src/base/taskNode/llmBasedTaskNode.ts index 86d2ab85..40160888 100644 --- a/packages/vmind/src/base/taskNode/llmBasedTaskNode.ts +++ b/packages/vmind/src/base/taskNode/llmBasedTaskNode.ts @@ -3,32 +3,39 @@ import { BaseTaskNode } from './baseTaskNode'; import { Parser } from 'src/base/tools/parser'; import { Patcher } from 'src/base/tools/patcher'; import { ChatManager } from 'src/base/tools/chatManager'; -import { ILLMOptions, RequestFunc } from './types'; +import { ILLMOptions, RequestFunc } from 'src/typings'; +export interface ILLMTaskNode { + prompt: Prompt; + chatManager: ChatManager; + parser: Parser; + patcher: Patcher; + + requestLLM: (context: Context) => Promise; + //parseLLMResponse: (response: any) => Partial + //patchLLMResponse: (input: Partial, context: Context) => DSL + //executeTask: (context: Context) => Promise +} /** * LLMBasedTaskNode is a task node that needs to use LLM to complete tasks - * Subclasses must assign values to prompt, parser, patcher, and requester + * Subclasses must assign values to prompt, parser and patcher */ -export class LLMBasedTaskNode extends BaseTaskNode { +export class LLMBasedTaskNode + extends BaseTaskNode + implements ILLMTaskNode +{ prompt: Prompt; + chatManager: ChatManager; parser: Parser; patcher: Patcher; - requester: RequestFunc; - chatManager: ChatManager; constructor() { super(); this.chatManager = new ChatManager(); } - async requestLLM(context: Context) { - const { llmOptions } = context; - const llmResponse = await this.requester( - this.prompt.getPrompt(context), - this.chatManager.getLatestUserMessage().content, - llmOptions - ); - return llmResponse; + async requestLLM(context: Context): Promise { + return null; } parseLLMResponse(llmResponse: any): Partial { diff --git a/packages/vmind/src/base/taskNode/types.ts b/packages/vmind/src/base/taskNode/types.ts index fa39757a..0121ad63 100644 --- a/packages/vmind/src/base/taskNode/types.ts +++ b/packages/vmind/src/base/taskNode/types.ts @@ -1,45 +1,3 @@ export interface ITaskNode { executeTask: (context: Context) => Promise | DSL; } - -export type LLMResponse = { - choices: { - index: number; - message: any; - }[]; - usage: any; - [key: string]: any; -}; - -export type RequestFunc = ( - prompt: string, - userMessage: string, - options: ILLMOptions | undefined -) => Promise; - -//models that VMind support -//more models is under developing -export enum Model { - GPT3_5 = 'gpt-3.5-turbo', - GPT4 = 'gpt-4', - SKYLARK = 'skylark-pro', - SKYLARK2 = 'skylark2-pro-4k', - CHART_ADVISOR = 'chart-advisor' -} - -export interface ILLMOptions { - url?: string; //URL of your LLM service. For gpt, default is openAI API. - /** llm request header, which has higher priority */ - headers?: HeadersInit; // this will be used directly as the header of the LLM request. - method?: 'POST' | 'GET'; //post or get - model?: Model | string; - max_tokens?: number; - temperature?: number; - showThoughts?: boolean; - customRequestFunc?: { - chartAdvisor?: RequestFunc; - dataProcess?: RequestFunc; - dataQuery?: RequestFunc; - }; - [key: string]: any; -} diff --git a/packages/vmind/src/base/taskNode/utils.ts b/packages/vmind/src/base/taskNode/utils.ts index 5537d909..a297250c 100644 --- a/packages/vmind/src/base/taskNode/utils.ts +++ b/packages/vmind/src/base/taskNode/utils.ts @@ -2,7 +2,7 @@ import axios from 'axios'; import JSON5 from 'json5'; import { omit } from 'lodash'; import { matchJSONStr } from '../../common/utils'; -import { ILLMOptions, LLMResponse } from './types'; +import { ILLMOptions, LLMResponse } from 'src/typings'; export const requestGPT = async ( prompt: string, diff --git a/packages/vmind/src/base/tools/parser/types.ts b/packages/vmind/src/base/tools/parser/types.ts index bb02abc0..ffbc7ad8 100644 --- a/packages/vmind/src/base/tools/parser/types.ts +++ b/packages/vmind/src/base/tools/parser/types.ts @@ -1,6 +1,6 @@ import { Transformer } from '../transformer'; export interface IParser { - transformer: Transformer; + transformer: Transformer; parse: (input: string) => DSL; } diff --git a/packages/vmind/src/base/tools/transformer/index.ts b/packages/vmind/src/base/tools/transformer/index.ts index 7069509e..f014e528 100644 --- a/packages/vmind/src/base/tools/transformer/index.ts +++ b/packages/vmind/src/base/tools/transformer/index.ts @@ -6,7 +6,11 @@ import { ITransformer } from './types'; * The subclass needs to rewrite the transform method to complete the specific data conversion */ export class Transformer implements ITransformer { + transformFunc: (input: Input, context: Context) => DSL; + constructor(transFunc: (input: Input, context: Context) => DSL) { + this.transformFunc = transFunc; + } transform(input: Input, context: Context): DSL { - return input as unknown as DSL; + return this.transformFunc(input, context); } } diff --git a/packages/vmind/src/base/tools/transformer/types.ts b/packages/vmind/src/base/tools/transformer/types.ts index 859528f5..1b852006 100644 --- a/packages/vmind/src/base/tools/transformer/types.ts +++ b/packages/vmind/src/base/tools/transformer/types.ts @@ -1,3 +1,4 @@ export interface ITransformer { + transformFunc: (input: Input, context: Context) => DSL; transform: (input: Input, context: Context) => DSL; } diff --git a/packages/vmind/src/common/dataProcess/constants.ts b/packages/vmind/src/common/dataProcess/constants.ts index 869f8fb2..c14b0cc7 100644 --- a/packages/vmind/src/common/dataProcess/constants.ts +++ b/packages/vmind/src/common/dataProcess/constants.ts @@ -1,4 +1,4 @@ -import { generateRandomString } from './utils'; +import { generateRandomString } from '../../applications/dataAggregation/taskNodes/executeQuery/utils'; export const alasqlKeywordList = [ 'ABSOLUTE', diff --git a/packages/vmind/src/common/dataProcess/index.ts b/packages/vmind/src/common/dataProcess/index.ts index 2d31fea4..e402a4bd 100644 --- a/packages/vmind/src/common/dataProcess/index.ts +++ b/packages/vmind/src/common/dataProcess/index.ts @@ -1,6 +1,6 @@ import { DataSet, DataView, csvParser, fold } from '@visactor/vdataset'; import { DataItem, DataType, SimpleFieldInfo } from '../../typings'; -import { getFieldInfo } from './utils'; +import { getFieldInfo } from '../../applications/dataAggregation/taskNodes/executeQuery/utils'; import { isNil } from 'lodash'; export const parseCSVWithVChart = (csvString: string) => { diff --git a/packages/vmind/src/gpt/dataProcess/index.ts b/packages/vmind/src/gpt/dataProcess/index.ts index 2495f791..782bda57 100644 --- a/packages/vmind/src/gpt/dataProcess/index.ts +++ b/packages/vmind/src/gpt/dataProcess/index.ts @@ -1,5 +1,5 @@ import { convertNumberField, getDataset, parseCSVData } from '../../common/dataProcess'; -import { getFieldDomain, readTopNLine } from '../../common/dataProcess/utils'; +import { getFieldDomain, readTopNLine } from '../../applications/dataAggregation/taskNodes/executeQuery/utils'; import { ILLMOptions, SimpleFieldInfo } from '../../typings'; import { parseGPTResponse, requestGPT } from '../../base/taskNode/utils'; import { DataProcessPromptEnglish } from './prompts'; diff --git a/packages/vmind/src/gpt/dataProcess/prompts.ts b/packages/vmind/src/gpt/dataProcess/prompts.ts index c1f0b817..06165b5b 100644 --- a/packages/vmind/src/gpt/dataProcess/prompts.ts +++ b/packages/vmind/src/gpt/dataProcess/prompts.ts @@ -1,4 +1,4 @@ -import { VMIND_DATA_SOURCE } from '../..//common/dataProcess/dataQuery'; +import { VMIND_DATA_SOURCE } from '../../applications/dataAggregation/taskNodes/executeQuery/dataQuery'; export const DataProcessPromptEnglish = `You are an expert in data analysis. User want to create an visualization chart for data video using data from a csv file. Let's think step by step. Fill your thoughts in {THOUGHT}. @@ -194,99 +194,3 @@ Response: } \`\`\` `; - -export const getQueryDatasetPrompt = ( - showThoughts: boolean -) => `You are an expert in data analysis. Here is a raw dataset named ${VMIND_DATA_SOURCE}. User will tell you his command and column information of ${VMIND_DATA_SOURCE}. Your task is to generate a sql and fieldInfo according to Instruction. Response one JSON object only. - -# Instruction -- Supported sql keywords: ["SELECT", "FROM", "WHERE", "GROUP BY", "HAVING", "ORDER BY", "LIMIT", "DISTINCT"]. Supported aggregation methods: ["MAX()", "MIN()", "SUM()", "COUNT()", "AVG()"]. -- Generate a sql query like this: "SELECT \`columnA\`, SUM(\`columnB\`) as \`sum_b\` FROM ${VMIND_DATA_SOURCE} WHERE \`columnA\` = value1 GROUP BY \`columnA\` HAVING \`sum_b\`>0 ORDER BY \`sum_b\` LIMIT 10". -- Don't use unsupported keywords such as WITHIN, FIELD, RANK() OVER, OVER. Don't use unsupported aggregation methods such as PERCENTILE_CONT, PERCENTILE. Don't use unsupported operators. We will execute your sql using alasql. Unsupported keywords, methods and operators will cause system crash. If current keywords and methods can't meet your needs, just simply select the column without any process. -- Don't use aliases in HAVING. -- Make your sql as simple as possible. - -You need to follow the steps below. - -# Steps -1. Extract the part related to the data from the user's instruction. Ignore other parts that is not related to the data. -2. Select useful dimension and measure columns from ${VMIND_DATA_SOURCE}. Don't miss some important columns such as dimensions related to date or time. You can only use columns in Column Information and do not assume non-existent columns. If the existing columns can't meet user's command, just select the most related columns in Column Information. -3. Use the original dimension columns without any process. Aggregate the measure columns using aggregation methods no matter what chart type the user has specified. Don't use unsupported methods. If current keywords and methods can't meet your needs, just simply select the column without any process. -4. Group the data using dimension columns. -5. You can also use WHERE, HAVING, ORDER BY, LIMIT in your sql if necessary. Use the supported operators to finish the WHERE and HAVING. You can only use binary expression such as columnA = value1, sum_b > 0. You can only use dimension values appearing in the domain of dimension columns in your expression. - -Let's think step by step. - -User will parse the content of your response with JSON.parse() directly without further process. Response one JSON object without any additional words. Your JSON object must contain sql and fieldInfo. - -Response in the following format: -\`\`\` -{ - ${showThoughts ? 'thoughts: string //your thoughts' : ''} - sql: string; //your sql. Note that it's a string in a JSON object so it must be in one line without any \\n. - fieldInfo: { - fieldName: string; //name of the field. - description?: string; //description of the field. If it is an aggregated field, please describe how it is generated in detail. - }[]; //array of the information about the fields in your sql. Describing its aggregation method and other information of the fields. -} -\`\`\` - -#Examples: - -User's Command: Show me the change of the GDP rankings of each country. -Column Information: [{"fieldName":"country","type":"string","role":"dimension"},{"fieldName":"continent","type":"string","role":"dimension"},{"fieldName":"GDP","type":"float","role":"measure"},{"fieldName":"year","type":"int","role":"measure"}] - -Response: -\`\`\` -{ - ${showThoughts ? '"thoughts": string //your thoughts' : ''} - "sql": "SELECT \`country\`, \`year\`, SUM(\`GDP\`) AS \`total_GDP\` FROM ${VMIND_DATA_SOURCE} GROUP BY \`country\`, \`year\` ORDER BY \`year\`, \`total_GDP\` DESC", - "fieldInfo": [ - { - "fieldName": "country", - "description": "The name of the country." - }, - { - "fieldName": "year", - "description": "The year of the GDP data." - }, - { - "fieldName": "total_GDP", - "description": "An aggregated field representing the total GDP of each country in each year. It is generated by summing up the GDP values for each country in each year." - } - ] -} -\`\`\` ----------------------------------- - -User's Command: 请使用[柱状图]展示[2022年GDP排名前五的中国城市及其2022年的GDP]. -Column Information: [{"fieldName":"城市","type":"string","role":"dimension"},{"fieldName":"2022年GDP(亿元)","type":"int","role":"measure"}] - -Response: -\`\`\` -{ - ${showThoughts ? '"thoughts": string //your thoughts' : ''} - "sql": "SELECT 城市, SUM(\`2022年GDP(亿元)\`) as \`sum_2022_GDP\` FROM ${VMIND_DATA_SOURCE} ORDER BY \`sum_2022_GDP\` DESC LIMIT 5", - "fieldInfo": [ - { - "fieldName": "城市", - "description": "The name of the city." - }, - { - "fieldName": "sum_2022_GDP", - "description": "The GDP value of the city in 2022." - } - ] -} -\`\`\` ----------------------------------- - -You only need to return the JSON in your response directly to the user. -Finish your tasks in one-step. - -# Constraints: -1. Write your sql statement in one line without any \\n. Your sql must be executable by alasql. -2. Please don't change or translate the field names in your sql statement. Don't miss the GROUP BY in your sql. -3. Wrap all the columns with \`\` in your sql. -4. Response the JSON object directly without any other contents. Make sure it can be directly parsed by JSON.parse() in JavaScript. -`; diff --git a/packages/vmind/src/gpt/dataProcess/query/astPipes.ts b/packages/vmind/src/gpt/dataProcess/query/astPipes.ts index b3c5ce2e..e900b780 100644 --- a/packages/vmind/src/gpt/dataProcess/query/astPipes.ts +++ b/packages/vmind/src/gpt/dataProcess/query/astPipes.ts @@ -13,7 +13,7 @@ import { ASTParserContext, ASTParserPipe } from './type'; import { checkIsColumnNode, toFirstUpperCase } from './utils'; import { SimpleFieldInfo } from '../../../typings'; import { isArray } from 'lodash'; -import { replaceString } from '../../../common/dataProcess/utils'; +import { replaceString } from '../../../applications/dataAggregation/taskNodes/executeQuery/utils'; export const from: ASTParserPipe = (query: Partial, context: ASTParserContext) => { const { dataSource, fieldInfo } = context; diff --git a/packages/vmind/src/gpt/dataProcess/query/queryDataset.ts b/packages/vmind/src/gpt/dataProcess/query/queryDataset.ts index a8a297f7..40ee2480 100644 --- a/packages/vmind/src/gpt/dataProcess/query/queryDataset.ts +++ b/packages/vmind/src/gpt/dataProcess/query/queryDataset.ts @@ -3,7 +3,7 @@ import { parseGPTQueryResponse, parseRespondField, patchQueryInput } from './uti import { DataQueryResponse } from './type'; import { parseGPTResponse as parseGPTResponseAsJSON, requestGPT } from '../../../base/taskNode/utils'; import { getQueryDatasetPrompt } from '../prompts'; -import { queryDataset } from '../../../common/dataProcess/dataQuery'; +import { queryDataset } from '../../../applications/dataAggregation/taskNodes/executeQuery/dataQuery'; /** * query the source dataset according to user's input and fieldInfo to get aggregated dataset diff --git a/packages/vmind/src/gpt/dataProcess/query/utils.ts b/packages/vmind/src/gpt/dataProcess/query/utils.ts index 6be55806..17e7a637 100644 --- a/packages/vmind/src/gpt/dataProcess/query/utils.ts +++ b/packages/vmind/src/gpt/dataProcess/query/utils.ts @@ -7,7 +7,7 @@ import { generateRandomString, mergeMap, replaceNonASCIICharacters -} from '../../../common/dataProcess/utils'; +} from '../../../applications/dataAggregation/taskNodes/executeQuery/utils'; import { DataItem, SimpleFieldInfo } from '../../../typings'; import { ASTParserContext, ASTParserPipe } from './type'; diff --git a/packages/vmind/src/skylark/dataProcess/query/prompts.ts b/packages/vmind/src/skylark/dataProcess/query/prompts.ts index 973d32e5..80b8cbd1 100644 --- a/packages/vmind/src/skylark/dataProcess/query/prompts.ts +++ b/packages/vmind/src/skylark/dataProcess/query/prompts.ts @@ -1,4 +1,4 @@ -import { VMIND_DATA_SOURCE } from '../../../common/dataProcess/dataQuery'; +import { VMIND_DATA_SOURCE } from '../../../applications/dataAggregation/taskNodes/executeQuery/dataQuery'; export const getQueryDatasetPrompt = ( showThoughts: boolean diff --git a/packages/vmind/src/skylark/dataProcess/query/queryDataset.ts b/packages/vmind/src/skylark/dataProcess/query/queryDataset.ts index d8642de5..af7ff51e 100644 --- a/packages/vmind/src/skylark/dataProcess/query/queryDataset.ts +++ b/packages/vmind/src/skylark/dataProcess/query/queryDataset.ts @@ -4,7 +4,7 @@ import { getQueryDatasetPrompt } from './prompts'; import { requestSkyLark } from '../../chart-generation/utils'; import { parseRespondField } from '../../../gpt/dataProcess/query/utils'; import { parseSkylarkResponseAsJSON, patchDataQueryInput } from './utils'; -import { queryDataset } from '../../../common/dataProcess/dataQuery'; +import { queryDataset } from '../../../applications/dataAggregation/taskNodes/executeQuery/dataQuery'; /** * query the source dataset according to user's input and fieldInfo to get aggregated dataset diff --git a/packages/vmind/src/skylark/dataProcess/query/utils.ts b/packages/vmind/src/skylark/dataProcess/query/utils.ts index c81878d4..f179aa37 100644 --- a/packages/vmind/src/skylark/dataProcess/query/utils.ts +++ b/packages/vmind/src/skylark/dataProcess/query/utils.ts @@ -1,6 +1,6 @@ import { LLMResponse } from 'src/typings'; import JSON5 from 'json5'; -import { replaceAll } from '../../../common/dataProcess/utils'; +import { replaceAll } from '../../../applications/dataAggregation/taskNodes/executeQuery/utils'; import { matchJSONStr } from '../../../common/utils'; export const parseJson = (JsonStr: string, prefix?: string) => { diff --git a/packages/vmind/src/typings/index.ts b/packages/vmind/src/typings/index.ts index 34712cde..4d6bb897 100644 --- a/packages/vmind/src/typings/index.ts +++ b/packages/vmind/src/typings/index.ts @@ -1,5 +1,46 @@ import type { FFmpeg } from '@ffmpeg/ffmpeg'; import type { ManualTicker, DefaultTimeline } from '@visactor/vrender-core'; +//models that VMind support +//more models is under developing +export enum Model { + GPT3_5 = 'gpt-3.5-turbo', + GPT4 = 'gpt-4', + SKYLARK = 'skylark-pro', + SKYLARK2 = 'skylark2-pro-4k', + CHART_ADVISOR = 'chart-advisor' +} + +export type LLMResponse = { + choices: { + index: number; + message: any; + }[]; + usage: any; + [key: string]: any; +}; + +export type RequestFunc = ( + prompt: string, + userMessage: string, + options: ILLMOptions | undefined +) => Promise; + +export interface ILLMOptions { + url?: string; //URL of your LLM service. For gpt, default is openAI API. + /** llm request header, which has higher priority */ + headers?: HeadersInit; // this will be used directly as the header of the LLM request. + method?: 'POST' | 'GET'; //post or get + model?: Model | string; + max_tokens?: number; + temperature?: number; + showThoughts?: boolean; + customRequestFunc?: { + chartAdvisor?: RequestFunc; + dataProcess?: RequestFunc; + dataQuery?: RequestFunc; + }; + [key: string]: any; +} export type SimpleFieldInfo = { fieldName: string; @@ -120,6 +161,8 @@ export type ChartGenerationProps = { export type DataItem = Record; +export type VMindDataset = DataItem[]; + export type PatchContext = { chartType: string; cell: Cell; From 62824d2edbd327e69bb46a51d47631bed205dd7c Mon Sep 17 00:00:00 2001 From: da730 Date: Tue, 9 Apr 2024 21:45:01 +0800 Subject: [PATCH 13/62] feat: finish data aggregation nodes --- .../taskNodes/executeQuery/dataQuery.ts | 54 -------- .../taskNodes/executeQuery/index.ts | 10 +- .../taskNodes/executeQuery/transformers.ts | 126 ++++++++++++++++++ .../taskNodes/executeQuery/types.ts | 6 + .../taskNodes/executeQuery/utils.ts | 15 +++ .../taskNodes/getQuerySQL/GPT/index.ts | 32 +++++ .../getQuerySQL/GPT/prompt}/index.ts | 5 +- .../getQuerySQL/GPT/prompt}/template.ts | 0 .../taskNodes/getQuerySQL/GPT/types.ts | 0 .../taskNodes/getQuerySQL/GPT}/utils.ts | 49 ++++++- .../taskNodes/getQuerySQL/index.ts | 0 .../taskNodes/getQuerySQL/types.ts | 8 ++ .../src/applications/dataAggregation/types.ts | 9 +- .../vmind/src/base/taskNode/baseTaskNode.ts | 6 +- .../src/base/taskNode/llmBasedTaskNode.ts | 17 +-- .../src/base/taskNode/ruleBasedTaskNode.ts | 20 ++- packages/vmind/src/base/taskNode/types.ts | 4 +- packages/vmind/src/base/tools/parser/index.ts | 15 +-- packages/vmind/src/base/tools/parser/types.ts | 6 - .../vmind/src/base/tools/patcher/index.ts | 13 +- .../vmind/src/base/tools/transformer/index.ts | 12 +- .../vmind/src/base/tools/transformer/types.ts | 4 - .../src/gpt/chart-generation/NLToChart.ts | 2 +- packages/vmind/src/gpt/dataProcess/index.ts | 2 +- .../src/gpt/dataProcess/query/queryDataset.ts | 5 +- .../vmind/src/gpt/dataProcess/query/utils.ts | 21 --- packages/vmind/src/typings/index.ts | 8 -- 27 files changed, 284 insertions(+), 165 deletions(-) delete mode 100644 packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/dataQuery.ts create mode 100644 packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/types.ts create mode 100644 packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/GPT/index.ts rename packages/vmind/src/applications/dataAggregation/{prompts/GPT => taskNodes/getQuerySQL/GPT/prompt}/index.ts (79%) rename packages/vmind/src/applications/dataAggregation/{prompts/GPT => taskNodes/getQuerySQL/GPT/prompt}/template.ts (100%) create mode 100644 packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/GPT/types.ts rename packages/vmind/src/{base/taskNode => applications/dataAggregation/taskNodes/getQuerySQL/GPT}/utils.ts (61%) create mode 100644 packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/index.ts create mode 100644 packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/types.ts delete mode 100644 packages/vmind/src/base/tools/parser/types.ts delete mode 100644 packages/vmind/src/base/tools/transformer/types.ts diff --git a/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/dataQuery.ts b/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/dataQuery.ts deleted file mode 100644 index 9e025189..00000000 --- a/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/dataQuery.ts +++ /dev/null @@ -1,54 +0,0 @@ -import { DataItem, SimpleFieldInfo } from 'src/typings'; -import { - replaceDataset, - replaceInvalidWords, - swapMap, - replaceBlankSpace, - replaceString, - sumAllMeasureFields -} from './utils'; -import alasql from 'alasql'; -import { VMIND_DATA_SOURCE } from '../../prompts/GPT/template'; - -/** - * SQL query for SourceDatset - * It has nothing to do with the model type model - * @param sql - * @param sourceDataset - * @param fieldInfo - * @returns dataset after query - */ -export const queryDataset = (sql: string, sourceDataset: DataItem[], fieldInfo: SimpleFieldInfo[]) => { - const fieldNames = fieldInfo.map(field => field.fieldName); - const { validStr, sqlReplaceMap, columnReplaceMap } = replaceInvalidWords(sql, fieldNames); - - //replace field names according to replaceMap - const validColumnDataset = replaceDataset(sourceDataset, columnReplaceMap, true); - - //replace field names and data values according to replaceMap - const validDataset = replaceDataset(validColumnDataset, sqlReplaceMap, false); - - //replace blank spaces in column name - const replacedFieldNames = fieldNames - .map(field => replaceString(field, columnReplaceMap)) - .map(field => replaceString(field, sqlReplaceMap)); - const validSql = replaceBlankSpace(validStr, replacedFieldNames as string[]); - - const finalSql = sumAllMeasureFields(validSql, fieldInfo, columnReplaceMap, sqlReplaceMap); - //convertGroupByToString(finalSql, validDataset) - - //replace VMIND_DATA_SOURCE with placeholder "?" - const sqlParts = (finalSql + ' ').split(VMIND_DATA_SOURCE); - const sqlCount = sqlParts.length - 1; - const alasqlQuery = sqlParts.join('?'); - //do the query - const alasqlDataset = alasql(alasqlQuery, new Array(sqlCount).fill(validDataset)); - - //restore the dataset - const columnReversedMap = swapMap(columnReplaceMap); - const columnRestoredDataset = replaceDataset(alasqlDataset, columnReversedMap, true); - const sqlReversedMap = swapMap(sqlReplaceMap); - const sqlRestoredDataset = replaceDataset(columnRestoredDataset, sqlReversedMap, false); - - return sqlRestoredDataset; -}; diff --git a/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/index.ts b/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/index.ts index 3006934d..9b04573c 100644 --- a/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/index.ts +++ b/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/index.ts @@ -1,4 +1,10 @@ import { RuleBasedTaskNode } from 'src/base/taskNode/ruleBasedTaskNode'; -import { Transformer } from 'src/base/tools/transformer'; +import { DataAggregationResult } from '../../types'; +import { executeDataQuery, getFinalQueryResult, patchSQLBeforeQuery, restoreDatasetAfterQuery } from './transformers'; +import { ExecuteQueryContext, ExecuteQueryInput } from './types'; -export const ExecuteQueryTaskNode = new RuleBasedTaskNode(); +export const ExecuteQueryTaskNode = new RuleBasedTaskNode< + ExecuteQueryInput, + ExecuteQueryContext, + DataAggregationResult +>([patchSQLBeforeQuery, executeDataQuery, restoreDatasetAfterQuery, getFinalQueryResult]); diff --git a/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/transformers.ts b/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/transformers.ts index e69de29b..34e6e362 100644 --- a/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/transformers.ts +++ b/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/transformers.ts @@ -0,0 +1,126 @@ +import { Transformer } from 'src/base/tools/transformer'; +import { SimpleFieldInfo, VMindDataset } from 'src/typings'; +import { DataAggregationResult, SQL } from '../../types'; +import { + parseRespondField, + replaceBlankSpace, + replaceDataset, + replaceInvalidWords, + replaceString, + sumAllMeasureFields, + swapMap +} from './utils'; +import alasql from 'alasql'; +import { VMIND_DATA_SOURCE } from '../getQuerySQL/GPT/prompt/template'; +import { ExecuteQueryContext, ExecuteQueryInput } from './types'; + +/** + * patch the errors in sql according to the feature of alasql: + * 1. replace invalid characters such as operator, non-ascii characters and alasql keywords in sql and dataset before executing. + * 2. sum all the non-aggregation measure columns + * @param sourceDataset + * @param context + * @returns valid sql string and dataset, and the replace map + */ +type PatchSQLResult = { + finalSql: SQL; + validDataset: VMindDataset; + columnReplaceMap: Map; + sqlReplaceMap: Map; +}; +export const patchSQLBeforeQuery: Transformer = ( + input, + context: ExecuteQueryContext +) => { + const { sql } = input; + const { sourceDataset } = context; + const { fieldInfo } = context; + const fieldNames = fieldInfo.map(field => field.fieldName); + const { validStr, sqlReplaceMap, columnReplaceMap } = replaceInvalidWords(sql, fieldNames); + + //replace field names according to replaceMap + const validColumnDataset = replaceDataset(sourceDataset, columnReplaceMap, true); + + //replace field names and data values according to replaceMap + const validDataset = replaceDataset(validColumnDataset, sqlReplaceMap, false); + + //replace blank spaces in column name + const replacedFieldNames = fieldNames + .map(field => replaceString(field, columnReplaceMap)) + .map(field => replaceString(field, sqlReplaceMap)); + const validSql = replaceBlankSpace(validStr, replacedFieldNames as string[]); + + //sum all the non-aggregation measure columns + const finalSql = sumAllMeasureFields(validSql, fieldInfo, columnReplaceMap, sqlReplaceMap); + + return { + finalSql, + validDataset, + columnReplaceMap, + sqlReplaceMap + }; +}; + +type QueryResult = PatchSQLResult & { alasqlDataset: VMindDataset }; +/** + * execute sql after patching using alasql + * @param input + * @param context + * @returns dataset after executing sql query + */ +export const executeDataQuery: Transformer = ( + input: PatchSQLResult, + context: ExecuteQueryContext +) => { + const { finalSql, validDataset } = input; + //replace VMIND_DATA_SOURCE with placeholder "?" + const sqlParts = (finalSql + ' ').split(VMIND_DATA_SOURCE); + const sqlCount = sqlParts.length - 1; + const alasqlQuery = sqlParts.join('?'); + //do the query + const alasqlDataset = alasql(alasqlQuery, new Array(sqlCount).fill(validDataset)); + + return { + ...input, + alasqlDataset + }; +}; + +type RestoreResult = VMindDataset; +/** + * restore the dataset after query according to replace maps + * @param input + * @param context + * @returns restored dataset + */ +export const restoreDatasetAfterQuery: Transformer = ( + input: QueryResult, + context: ExecuteQueryContext +) => { + const { columnReplaceMap, sqlReplaceMap, alasqlDataset } = input; + //restore the dataset + const columnReversedMap = swapMap(columnReplaceMap); + const columnRestoredDataset = replaceDataset(alasqlDataset, columnReversedMap, true); + const sqlReversedMap = swapMap(sqlReplaceMap); + const sqlRestoredDataset = replaceDataset(columnRestoredDataset, sqlReversedMap, false); + + return sqlRestoredDataset; +}; + +export const getFinalQueryResult: Transformer = ( + input: RestoreResult, + context: ExecuteQueryContext +) => { + const { llmFieldInfo: responseFieldInfo, sourceDataset, fieldInfo, usage } = context; + const datasetAfterQuery = input; + const fieldInfoNew = parseRespondField(responseFieldInfo, datasetAfterQuery); + if (datasetAfterQuery.length === 0) { + console.warn('empty dataset after query!'); + } + + return { + dataset: datasetAfterQuery.length === 0 ? sourceDataset : datasetAfterQuery, + fieldInfo: datasetAfterQuery.length === 0 ? fieldInfo : fieldInfoNew, + usage + }; +}; diff --git a/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/types.ts b/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/types.ts new file mode 100644 index 00000000..cd732ce6 --- /dev/null +++ b/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/types.ts @@ -0,0 +1,6 @@ +import { SimpleFieldInfo } from 'src/typings'; +import { DataAggregationContext, SQL } from '../../types'; +import { GetQuerySQLResult } from '../getQuerySQL/types'; + +export type ExecuteQueryContext = DataAggregationContext & GetQuerySQLResult; +export type ExecuteQueryInput = GetQuerySQLResult; diff --git a/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/utils.ts b/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/utils.ts index 73b184cf..7b2576b0 100644 --- a/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/utils.ts +++ b/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/utils.ts @@ -427,3 +427,18 @@ export const convertGroupByToString = (sql: string, dataset: DataItem[]) => { }); }); }; + +/** + * parse the respond field in data query to get field type and role + * @param fieldInfo + * @param responseFieldInfo + * @param dataset + */ +export const parseRespondField = ( + responseFieldInfo: { fieldName: string; description?: string }[], + dataset: DataItem[] +) => + responseFieldInfo.map(field => ({ + ...field, + ...detectFieldType(dataset, field.fieldName) + })); diff --git a/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/GPT/index.ts b/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/GPT/index.ts new file mode 100644 index 00000000..2164670c --- /dev/null +++ b/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/GPT/index.ts @@ -0,0 +1,32 @@ +import { ILLMTaskNode, LLMBasedTaskNode } from 'src/base/taskNode/llmBasedTaskNode'; +import { Parser } from 'src/base/tools/parser'; +import { Patcher } from 'src/base/tools/patcher'; +import { Transformer } from 'src/base/tools/transformer'; +import { GetQuerySQLResult } from '../types'; +import { parseDataQueryResponse, requestGPT } from './utils'; +import { GPTDataAggregationPrompt } from './prompt'; +import { DataAggregationContext } from 'src/applications/dataAggregation/types'; + +export class GetSQLGPTNode + extends LLMBasedTaskNode + implements ILLMTaskNode +{ + constructor() { + super(); + this.parser = parseDataQueryResponse; + this.patcher = new Patcher([ + (input: Partial, context: DataAggregationContext) => input as GetQuerySQLResult + ]); + this.prompt = new GPTDataAggregationPrompt(); + } + + async requestLLM(context: DataAggregationContext): Promise { + const { userInput, fieldInfo, llmOptions } = context; + const queryDatasetMessage = `User's Command: ${userInput}\nColumn Information: ${JSON.stringify(fieldInfo)}`; + + const requestFunc = llmOptions.customRequestFunc?.dataQuery ?? requestGPT; + const QueryDatasetPrompt = this.prompt.getPrompt(context); + const dataProcessRes = await requestFunc(QueryDatasetPrompt, queryDatasetMessage, llmOptions); + return dataProcessRes; + } +} diff --git a/packages/vmind/src/applications/dataAggregation/prompts/GPT/index.ts b/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/GPT/prompt/index.ts similarity index 79% rename from packages/vmind/src/applications/dataAggregation/prompts/GPT/index.ts rename to packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/GPT/prompt/index.ts index f89d04f3..411b51fc 100644 --- a/packages/vmind/src/applications/dataAggregation/prompts/GPT/index.ts +++ b/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/GPT/prompt/index.ts @@ -1,8 +1,11 @@ import { Prompt } from 'src/base/tools/prompt'; import { getQueryDatasetPrompt } from './template'; -import { DataAggregationContext } from '../../types'; +import { DataAggregationContext } from '../../../../types'; export class GPTDataAggregationPrompt extends Prompt { + constructor() { + super(''); + } getPrompt(context: DataAggregationContext) { const { llmOptions } = context; const QueryDatasetPrompt = getQueryDatasetPrompt(llmOptions.showThoughts ?? true); diff --git a/packages/vmind/src/applications/dataAggregation/prompts/GPT/template.ts b/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/GPT/prompt/template.ts similarity index 100% rename from packages/vmind/src/applications/dataAggregation/prompts/GPT/template.ts rename to packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/GPT/prompt/template.ts diff --git a/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/GPT/types.ts b/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/GPT/types.ts new file mode 100644 index 00000000..e69de29b diff --git a/packages/vmind/src/base/taskNode/utils.ts b/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/GPT/utils.ts similarity index 61% rename from packages/vmind/src/base/taskNode/utils.ts rename to packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/GPT/utils.ts index a297250c..fa08dba7 100644 --- a/packages/vmind/src/base/taskNode/utils.ts +++ b/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/GPT/utils.ts @@ -1,8 +1,10 @@ import axios from 'axios'; import JSON5 from 'json5'; -import { omit } from 'lodash'; -import { matchJSONStr } from '../../common/utils'; +import { isArray, omit } from 'lodash'; +import { matchJSONStr } from 'src/common/utils'; import { ILLMOptions, LLMResponse } from 'src/typings'; +import { GetQuerySQLResult } from '../types'; +import { Parser } from 'src/base/tools/parser'; export const requestGPT = async ( prompt: string, @@ -42,7 +44,6 @@ export const requestGPT = async ( return err.response.data; } }; - export const parseGPTJson = (JsonStr: string, prefix?: string) => { const parseNoPrefixStr = (str: string) => { //尝试不带前缀的解析 @@ -69,7 +70,7 @@ export const parseGPTJson = (JsonStr: string, prefix?: string) => { return res2; }; -export const parseGPTResponse = (GPTRes: LLMResponse) => { +const parseGPTResponse = (GPTRes: LLMResponse) => { try { if (GPTRes.error) { return { @@ -81,7 +82,7 @@ export const parseGPTResponse = (GPTRes: LLMResponse) => { const content = choices[0].message.content; const jsonStr = matchJSONStr(content); - const resJson: GPTDataProcessResult = parseGPTJson(jsonStr, '```'); + const resJson = parseGPTJson(jsonStr, '```'); return resJson; } catch (err: any) { return { @@ -90,3 +91,41 @@ export const parseGPTResponse = (GPTRes: LLMResponse) => { }; } }; + +type DataQueryResponse = GetQuerySQLResult & { THOUGHT: string }; + +const parseGPTQueryResponse = (response: string) => { + const sql = response.match(/sql:\n?```(.*?)```/s)[1]; + const fieldInfoStr = response.match(/fieldInfo:\n?```(.*?)```/s)[1]; + let fieldInfo = []; + try { + const tempFieldInfo = JSON5.parse(fieldInfoStr); + if (isArray(tempFieldInfo)) { + fieldInfo = tempFieldInfo; + } else { + fieldInfo = tempFieldInfo.fieldInfo; + } + } catch (e) { + //fieldInfoStr is not a json string; try to wrap it with [] + fieldInfo = JSON5.parse(`[${fieldInfoStr}]`); + } + return { + sql, + llmFieldInfo: fieldInfo + }; +}; + +export const parseDataQueryResponse: Parser = (gptResponse: LLMResponse) => { + const dataQueryResponse: DataQueryResponse = parseGPTResponse(gptResponse); + const { sql, llmFieldInfo: responseFiledInfo } = dataQueryResponse; + if (!sql || !responseFiledInfo) { + //try to parse the response with another format + const choices = gptResponse.choices; + const content = choices[0].message.content; + return { + ...parseGPTQueryResponse(content), + usage: gptResponse.usage + }; + } + return { ...dataQueryResponse, usage: gptResponse.usage }; +}; diff --git a/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/index.ts b/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/index.ts new file mode 100644 index 00000000..e69de29b diff --git a/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/types.ts b/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/types.ts new file mode 100644 index 00000000..81f92189 --- /dev/null +++ b/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/types.ts @@ -0,0 +1,8 @@ +import { SimpleFieldInfo } from 'src/typings'; +import { SQL } from '../../types'; + +export type GetQuerySQLResult = { + sql: SQL; + llmFieldInfo: SimpleFieldInfo[]; + usage: any; +}; diff --git a/packages/vmind/src/applications/dataAggregation/types.ts b/packages/vmind/src/applications/dataAggregation/types.ts index 2a6bcbce..18939f51 100644 --- a/packages/vmind/src/applications/dataAggregation/types.ts +++ b/packages/vmind/src/applications/dataAggregation/types.ts @@ -1,9 +1,16 @@ -import { DataItem, ILLMOptions, SimpleFieldInfo } from 'src/typings'; +import { DataItem, ILLMOptions, SimpleFieldInfo, VMindDataset } from 'src/typings'; export type SQL = string; + export type DataAggregationContext = { llmOptions: ILLMOptions; userInput: string; fieldInfo: SimpleFieldInfo[]; sourceDataset: DataItem[]; }; + +export type DataAggregationResult = { + dataset: VMindDataset; + fieldInfo: SimpleFieldInfo[]; + usage: any; +}; diff --git a/packages/vmind/src/base/taskNode/baseTaskNode.ts b/packages/vmind/src/base/taskNode/baseTaskNode.ts index f93c3c4d..e0b9881e 100644 --- a/packages/vmind/src/base/taskNode/baseTaskNode.ts +++ b/packages/vmind/src/base/taskNode/baseTaskNode.ts @@ -6,8 +6,8 @@ import { ITaskNode } from './types'; * TaskNode can be seen as a collection of a series of tools, responsible for completing a specific task. * Each Node can also be called as a separate function */ -export class BaseTaskNode implements ITaskNode { - executeTask(context: Context): Promise | DSL { - return null as DSL; +export class BaseTaskNode implements ITaskNode { + executeTask(input: Input, context: Context): Promise | Result { + return null as Result; } } diff --git a/packages/vmind/src/base/taskNode/llmBasedTaskNode.ts b/packages/vmind/src/base/taskNode/llmBasedTaskNode.ts index 40160888..a38174e7 100644 --- a/packages/vmind/src/base/taskNode/llmBasedTaskNode.ts +++ b/packages/vmind/src/base/taskNode/llmBasedTaskNode.ts @@ -8,25 +8,22 @@ import { ILLMOptions, RequestFunc } from 'src/typings'; export interface ILLMTaskNode { prompt: Prompt; chatManager: ChatManager; - parser: Parser; + parser: Parser; patcher: Patcher; requestLLM: (context: Context) => Promise; - //parseLLMResponse: (response: any) => Partial - //patchLLMResponse: (input: Partial, context: Context) => DSL - //executeTask: (context: Context) => Promise } /** * LLMBasedTaskNode is a task node that needs to use LLM to complete tasks - * Subclasses must assign values to prompt, parser and patcher + * Subclasses must assign values to prompt, parser and patcher, and rewrite requestLLM function */ -export class LLMBasedTaskNode - extends BaseTaskNode +export class LLMBasedTaskNode + extends BaseTaskNode implements ILLMTaskNode { prompt: Prompt; chatManager: ChatManager; - parser: Parser; + parser: Parser; patcher: Patcher; constructor() { @@ -39,14 +36,14 @@ export class LLMBasedTaskNode } parseLLMResponse(llmResponse: any): Partial { - return this.parser.parse(llmResponse); + return this.parser(llmResponse); } patchLLMResponse(input: Partial, context: Context): DSL { return this.patcher.patch(input, context); } - async executeTask(context: Context) { + async executeTask(_input: Input, context: Context) { const llmResponse = await this.requestLLM(context); const parsedResponse = this.parseLLMResponse(llmResponse); const patchedResponse = this.patchLLMResponse(parsedResponse, context); diff --git a/packages/vmind/src/base/taskNode/ruleBasedTaskNode.ts b/packages/vmind/src/base/taskNode/ruleBasedTaskNode.ts index 4ad30c5b..c44218f7 100644 --- a/packages/vmind/src/base/taskNode/ruleBasedTaskNode.ts +++ b/packages/vmind/src/base/taskNode/ruleBasedTaskNode.ts @@ -5,22 +5,18 @@ import { BaseTaskNode } from './baseTaskNode'; * rule-based taskNode, which consists of a series of Pipelines * It completes the transformation from Input to a specific data structure (DSL) */ -export class RuleBasedTaskNode extends BaseTaskNode { - pipelines: Transformer[]; - constructor(pipelines: Transformer[]) { +export class RuleBasedTaskNode extends BaseTaskNode { + pipelines: Transformer[]; + constructor(pipelines: Transformer[]) { super(); this.pipelines = pipelines; } - executeTask(context: Context) { - const { input } = context; - const result: DSL = this.pipelines.reduce( - (pre: Partial | Input, transformer: Transformer | Input, Context, DSL>) => { - const result = transformer.transform(pre, context); - return result; - }, - input - ) as DSL; + executeTask(input: Input, context: Context): Result { + const result: Result = this.pipelines.reduce((pre: any, transformer: Transformer) => { + const res = transformer(pre, context); + return res; + }, input); return result; } } diff --git a/packages/vmind/src/base/taskNode/types.ts b/packages/vmind/src/base/taskNode/types.ts index 0121ad63..fff74566 100644 --- a/packages/vmind/src/base/taskNode/types.ts +++ b/packages/vmind/src/base/taskNode/types.ts @@ -1,3 +1,3 @@ -export interface ITaskNode { - executeTask: (context: Context) => Promise | DSL; +export interface ITaskNode { + executeTask: (input: Input, context: Context) => Promise | DSL; } diff --git a/packages/vmind/src/base/tools/parser/index.ts b/packages/vmind/src/base/tools/parser/index.ts index ec0f4cb9..c5ed5c46 100644 --- a/packages/vmind/src/base/tools/parser/index.ts +++ b/packages/vmind/src/base/tools/parser/index.ts @@ -1,19 +1,6 @@ -import { IParser } from './types'; -import { Transformer } from '../transformer'; - /** * Parser is responsible for parsing the string content generated by LLM into DSL in a specific format (JSON or YAML) * Use a transformer to complete the conversion from string to DSL * Pass in the transformer during initialization */ -export class Parser implements IParser { - transformer: Transformer; - - constructor(transformer: Transformer) { - this.transformer = transformer; - } - - parse(input: string) { - return this.transformer.transform(input, null); - } -} +export type Parser = (input: Input) => DSL; diff --git a/packages/vmind/src/base/tools/parser/types.ts b/packages/vmind/src/base/tools/parser/types.ts deleted file mode 100644 index ffbc7ad8..00000000 --- a/packages/vmind/src/base/tools/parser/types.ts +++ /dev/null @@ -1,6 +0,0 @@ -import { Transformer } from '../transformer'; - -export interface IParser { - transformer: Transformer; - parse: (input: string) => DSL; -} diff --git a/packages/vmind/src/base/tools/patcher/index.ts b/packages/vmind/src/base/tools/patcher/index.ts index f9a6d68f..bff11797 100644 --- a/packages/vmind/src/base/tools/patcher/index.ts +++ b/packages/vmind/src/base/tools/patcher/index.ts @@ -14,14 +14,11 @@ export class Patcher implements IPatcher { this.pipelines = transformers; } - patch(input: Partial, context: Context) { - const result: DSL = this.pipelines.reduce( - (pre: Partial, transformer: Transformer, Context, DSL>) => { - const result = transformer.transform(pre, context); - return result; - }, - input - ) as DSL; + patch(input: any, context: Context) { + const result = this.pipelines.reduce((pre, pipeline) => { + const res = pipeline(pre, context); + return res; + }, input); return result; } } diff --git a/packages/vmind/src/base/tools/transformer/index.ts b/packages/vmind/src/base/tools/transformer/index.ts index f014e528..ba462bb9 100644 --- a/packages/vmind/src/base/tools/transformer/index.ts +++ b/packages/vmind/src/base/tools/transformer/index.ts @@ -1,16 +1,6 @@ -import { ITransformer } from './types'; - /** * Transformer is to finish the data conversion work * * Convert the INPUT type to DSL type based on the Context * The subclass needs to rewrite the transform method to complete the specific data conversion */ -export class Transformer implements ITransformer { - transformFunc: (input: Input, context: Context) => DSL; - constructor(transFunc: (input: Input, context: Context) => DSL) { - this.transformFunc = transFunc; - } - transform(input: Input, context: Context): DSL { - return this.transformFunc(input, context); - } -} +export type Transformer = (input: Input, context: Context) => Result; diff --git a/packages/vmind/src/base/tools/transformer/types.ts b/packages/vmind/src/base/tools/transformer/types.ts deleted file mode 100644 index 1b852006..00000000 --- a/packages/vmind/src/base/tools/transformer/types.ts +++ /dev/null @@ -1,4 +0,0 @@ -export interface ITransformer { - transformFunc: (input: Input, context: Context) => DSL; - transform: (input: Input, context: Context) => DSL; -} diff --git a/packages/vmind/src/gpt/chart-generation/NLToChart.ts b/packages/vmind/src/gpt/chart-generation/NLToChart.ts index c273de14..101789a7 100644 --- a/packages/vmind/src/gpt/chart-generation/NLToChart.ts +++ b/packages/vmind/src/gpt/chart-generation/NLToChart.ts @@ -1,7 +1,7 @@ import { SUPPORTED_CHART_LIST } from '../../common/vizDataToSpec/constants'; import { DataItem, GPTChartAdvisorResult, ILLMOptions, LOCATION, SimpleFieldInfo, VizSchema } from '../../typings'; import { checkChartTypeAndCell, vizDataToSpec } from '../../common/vizDataToSpec'; -import { parseGPTResponse, requestGPT } from '../../base/taskNode/utils'; +import { parseGPTResponse, requestGPT } from '../../applications/dataAggregation/taskNodes/getQuerySQL/GPT/utils'; import { patchUserInput } from './utils'; import { ChartAdvisorPromptEnglish } from './prompts'; import { chartAdvisorHandler } from '../../common/chartAdvisor'; diff --git a/packages/vmind/src/gpt/dataProcess/index.ts b/packages/vmind/src/gpt/dataProcess/index.ts index 782bda57..34c13bd0 100644 --- a/packages/vmind/src/gpt/dataProcess/index.ts +++ b/packages/vmind/src/gpt/dataProcess/index.ts @@ -1,7 +1,7 @@ import { convertNumberField, getDataset, parseCSVData } from '../../common/dataProcess'; import { getFieldDomain, readTopNLine } from '../../applications/dataAggregation/taskNodes/executeQuery/utils'; import { ILLMOptions, SimpleFieldInfo } from '../../typings'; -import { parseGPTResponse, requestGPT } from '../../base/taskNode/utils'; +import { parseGPTResponse, requestGPT } from '../../applications/dataAggregation/taskNodes/getQuerySQL/GPT/utils'; import { DataProcessPromptEnglish } from './prompts'; /* diff --git a/packages/vmind/src/gpt/dataProcess/query/queryDataset.ts b/packages/vmind/src/gpt/dataProcess/query/queryDataset.ts index 40ee2480..2aba0dcf 100644 --- a/packages/vmind/src/gpt/dataProcess/query/queryDataset.ts +++ b/packages/vmind/src/gpt/dataProcess/query/queryDataset.ts @@ -1,7 +1,10 @@ import { DataItem, ILLMOptions, SimpleFieldInfo } from '../../../typings'; import { parseGPTQueryResponse, parseRespondField, patchQueryInput } from './utils'; import { DataQueryResponse } from './type'; -import { parseGPTResponse as parseGPTResponseAsJSON, requestGPT } from '../../../base/taskNode/utils'; +import { + parseGPTResponse as parseGPTResponseAsJSON, + requestGPT +} from '../../../applications/dataAggregation/taskNodes/getQuerySQL/GPT/utils'; import { getQueryDatasetPrompt } from '../prompts'; import { queryDataset } from '../../../applications/dataAggregation/taskNodes/executeQuery/dataQuery'; diff --git a/packages/vmind/src/gpt/dataProcess/query/utils.ts b/packages/vmind/src/gpt/dataProcess/query/utils.ts index 17e7a637..e7900ce2 100644 --- a/packages/vmind/src/gpt/dataProcess/query/utils.ts +++ b/packages/vmind/src/gpt/dataProcess/query/utils.ts @@ -123,24 +123,3 @@ export const parseRespondField = ( export const patchQueryInput = (userInput: string) => { return userInput; }; - -export const parseGPTQueryResponse = (response: string) => { - const sql = response.match(/sql:\n?```(.*?)```/s)[1]; - const fieldInfoStr = response.match(/fieldInfo:\n?```(.*?)```/s)[1]; - let fieldInfo = []; - try { - const tempFieldInfo = JSON5.parse(fieldInfoStr); - if (isArray(tempFieldInfo)) { - fieldInfo = tempFieldInfo; - } else { - fieldInfo = tempFieldInfo.fieldInfo; - } - } catch (e) { - //fieldInfoStr is not a json string; try to wrap it with [] - fieldInfo = JSON5.parse(`[${fieldInfoStr}]`); - } - return { - sql, - fieldInfo - }; -}; diff --git a/packages/vmind/src/typings/index.ts b/packages/vmind/src/typings/index.ts index 4d6bb897..84884943 100644 --- a/packages/vmind/src/typings/index.ts +++ b/packages/vmind/src/typings/index.ts @@ -49,14 +49,6 @@ export type SimpleFieldInfo = { role: ROLE; domain?: (string | number)[]; }; -export type GPTDataProcessResult = { - fieldInfo: SimpleFieldInfo[]; - videoDuration?: number; - colorPalette?: string[]; - thought: string; - usefulFields: string[]; - error?: boolean; //解析JSON出错 -}; export type Cell = { //字段映射,可用的视觉通道:["x","y","color","size","angle","time"] From a22be77199a2164e6bb1284c5a7aa139fe1ce352 Mon Sep 17 00:00:00 2001 From: da730 Date: Wed, 10 Apr 2024 11:51:17 +0800 Subject: [PATCH 14/62] feat: refactor data aggregation task nodes --- .../taskNodes/executeQuery/index.ts | 14 ++++++------ .../taskNodes/executeQuery/transformers.ts | 13 +++++------ .../taskNodes/executeQuery/types.ts | 6 ----- .../taskNodes/getQuerySQL/GPT/index.ts | 19 +++++----------- .../taskNodes/getQuerySQL/types.ts | 8 ------- .../src/applications/dataAggregation/types.ts | 18 +++++++-------- packages/vmind/src/applications/types.ts | 22 +++++++++++++++++++ packages/vmind/src/base/application/index.ts | 16 ++++++-------- .../vmind/src/base/taskNode/baseTaskNode.ts | 10 +++++++-- .../src/base/taskNode/llmBasedTaskNode.ts | 9 +++++--- .../src/base/taskNode/ruleBasedTaskNode.ts | 7 +++--- packages/vmind/src/base/taskNode/types.ts | 6 +++-- 12 files changed, 79 insertions(+), 69 deletions(-) create mode 100644 packages/vmind/src/applications/types.ts diff --git a/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/index.ts b/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/index.ts index 9b04573c..fab94081 100644 --- a/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/index.ts +++ b/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/index.ts @@ -1,10 +1,10 @@ import { RuleBasedTaskNode } from 'src/base/taskNode/ruleBasedTaskNode'; -import { DataAggregationResult } from '../../types'; import { executeDataQuery, getFinalQueryResult, patchSQLBeforeQuery, restoreDatasetAfterQuery } from './transformers'; -import { ExecuteQueryContext, ExecuteQueryInput } from './types'; +import { ExecuteQueryContext, ExecuteQueryOutput } from '../../types'; -export const ExecuteQueryTaskNode = new RuleBasedTaskNode< - ExecuteQueryInput, - ExecuteQueryContext, - DataAggregationResult ->([patchSQLBeforeQuery, executeDataQuery, restoreDatasetAfterQuery, getFinalQueryResult]); +export const ExecuteQueryTaskNode = new RuleBasedTaskNode([ + patchSQLBeforeQuery, + executeDataQuery, + restoreDatasetAfterQuery, + getFinalQueryResult +]); diff --git a/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/transformers.ts b/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/transformers.ts index 34e6e362..3a1b1455 100644 --- a/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/transformers.ts +++ b/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/transformers.ts @@ -1,6 +1,6 @@ import { Transformer } from 'src/base/tools/transformer'; import { SimpleFieldInfo, VMindDataset } from 'src/typings'; -import { DataAggregationResult, SQL } from '../../types'; +import { ExecuteQueryContext, ExecuteQueryOutput, SQL } from '../../types'; import { parseRespondField, replaceBlankSpace, @@ -12,7 +12,6 @@ import { } from './utils'; import alasql from 'alasql'; import { VMIND_DATA_SOURCE } from '../getQuerySQL/GPT/prompt/template'; -import { ExecuteQueryContext, ExecuteQueryInput } from './types'; /** * patch the errors in sql according to the feature of alasql: @@ -28,14 +27,14 @@ type PatchSQLResult = { columnReplaceMap: Map; sqlReplaceMap: Map; }; -export const patchSQLBeforeQuery: Transformer = ( +export const patchSQLBeforeQuery: Transformer = ( input, context: ExecuteQueryContext ) => { const { sql } = input; const { sourceDataset } = context; const { fieldInfo } = context; - const fieldNames = fieldInfo.map(field => field.fieldName); + const fieldNames = fieldInfo.map((field: SimpleFieldInfo) => field.fieldName); const { validStr, sqlReplaceMap, columnReplaceMap } = replaceInvalidWords(sql, fieldNames); //replace field names according to replaceMap @@ -46,8 +45,8 @@ export const patchSQLBeforeQuery: Transformer replaceString(field, columnReplaceMap)) - .map(field => replaceString(field, sqlReplaceMap)); + .map((field: string | number) => replaceString(field, columnReplaceMap)) + .map((field: string | number) => replaceString(field, sqlReplaceMap)); const validSql = replaceBlankSpace(validStr, replacedFieldNames as string[]); //sum all the non-aggregation measure columns @@ -107,7 +106,7 @@ export const restoreDatasetAfterQuery: Transformer = ( +export const getFinalQueryResult: Transformer = ( input: RestoreResult, context: ExecuteQueryContext ) => { diff --git a/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/types.ts b/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/types.ts index cd732ce6..e69de29b 100644 --- a/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/types.ts +++ b/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/types.ts @@ -1,6 +0,0 @@ -import { SimpleFieldInfo } from 'src/typings'; -import { DataAggregationContext, SQL } from '../../types'; -import { GetQuerySQLResult } from '../getQuerySQL/types'; - -export type ExecuteQueryContext = DataAggregationContext & GetQuerySQLResult; -export type ExecuteQueryInput = GetQuerySQLResult; diff --git a/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/GPT/index.ts b/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/GPT/index.ts index 2164670c..ded22e6d 100644 --- a/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/GPT/index.ts +++ b/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/GPT/index.ts @@ -1,26 +1,19 @@ import { ILLMTaskNode, LLMBasedTaskNode } from 'src/base/taskNode/llmBasedTaskNode'; -import { Parser } from 'src/base/tools/parser'; -import { Patcher } from 'src/base/tools/patcher'; -import { Transformer } from 'src/base/tools/transformer'; -import { GetQuerySQLResult } from '../types'; import { parseDataQueryResponse, requestGPT } from './utils'; import { GPTDataAggregationPrompt } from './prompt'; -import { DataAggregationContext } from 'src/applications/dataAggregation/types'; +import { GetQuerySQLContext, GetQuerySQLOutput } from 'src/applications/dataAggregation/types'; -export class GetSQLGPTNode - extends LLMBasedTaskNode - implements ILLMTaskNode +export class GetSQLNode + extends LLMBasedTaskNode + implements ILLMTaskNode { constructor() { super(); - this.parser = parseDataQueryResponse; - this.patcher = new Patcher([ - (input: Partial, context: DataAggregationContext) => input as GetQuerySQLResult - ]); this.prompt = new GPTDataAggregationPrompt(); + this.parser = parseDataQueryResponse; } - async requestLLM(context: DataAggregationContext): Promise { + async requestLLM(context: GetQuerySQLContext): Promise { const { userInput, fieldInfo, llmOptions } = context; const queryDatasetMessage = `User's Command: ${userInput}\nColumn Information: ${JSON.stringify(fieldInfo)}`; diff --git a/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/types.ts b/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/types.ts index 81f92189..e69de29b 100644 --- a/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/types.ts +++ b/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/types.ts @@ -1,8 +0,0 @@ -import { SimpleFieldInfo } from 'src/typings'; -import { SQL } from '../../types'; - -export type GetQuerySQLResult = { - sql: SQL; - llmFieldInfo: SimpleFieldInfo[]; - usage: any; -}; diff --git a/packages/vmind/src/applications/dataAggregation/types.ts b/packages/vmind/src/applications/dataAggregation/types.ts index 18939f51..9ca47667 100644 --- a/packages/vmind/src/applications/dataAggregation/types.ts +++ b/packages/vmind/src/applications/dataAggregation/types.ts @@ -1,16 +1,16 @@ import { DataItem, ILLMOptions, SimpleFieldInfo, VMindDataset } from 'src/typings'; +import { DataAggregationContext, DataAggregationOutput } from '../types'; export type SQL = string; -export type DataAggregationContext = { - llmOptions: ILLMOptions; - userInput: string; - fieldInfo: SimpleFieldInfo[]; - sourceDataset: DataItem[]; -}; +export type GetQuerySQLContext = DataAggregationContext; -export type DataAggregationResult = { - dataset: VMindDataset; - fieldInfo: SimpleFieldInfo[]; +export type GetQuerySQLOutput = { + sql: SQL; + llmFieldInfo: SimpleFieldInfo[]; //fieldInfo generated by LLM; It may has some new fields after sql query. usage: any; }; + +export type ExecuteQueryContext = DataAggregationContext & GetQuerySQLOutput; + +export type ExecuteQueryOutput = DataAggregationOutput; diff --git a/packages/vmind/src/applications/types.ts b/packages/vmind/src/applications/types.ts new file mode 100644 index 00000000..afb6df2b --- /dev/null +++ b/packages/vmind/src/applications/types.ts @@ -0,0 +1,22 @@ +import { ILLMOptions, SimpleFieldInfo, VMindDataset } from 'src/typings'; + +//context of the DataAggregation Application +export type DataAggregationContext = { + llmOptions: ILLMOptions; + userInput: string; + fieldInfo: SimpleFieldInfo[]; + sourceDataset: VMindDataset; //original dataset +}; + +//output of the DataAggregation Application +export type DataAggregationOutput = { + dataset: VMindDataset; //dataset after aggregation + fieldInfo: SimpleFieldInfo[]; //fieldInfo after aggregation + usage: any; //token usage of the LLM +}; + +export type ChartGenerationInput = { + userInput: string; + fieldInfo: SimpleFieldInfo[]; + dataset: VMindDataset; +}; diff --git a/packages/vmind/src/base/application/index.ts b/packages/vmind/src/base/application/index.ts index e33ee6d9..e0f23e75 100644 --- a/packages/vmind/src/base/application/index.ts +++ b/packages/vmind/src/base/application/index.ts @@ -28,15 +28,13 @@ export class BaseApplication implements IApplication async runTasks() { const result: DSL = this.tasks.reduce( - async ( - pre: any, - curTask: { - name: string; - task: BaseTaskNode; - } - ) => { - const result = await curTask.task.executeTask(pre); - return result; + async (pre: any, curTask: { name: string; task: BaseTaskNode }) => { + const result = await curTask.task.executeTask(this.context); + this.updateContext({ + ...this.context, + ...result + }); + return this.context; }, this.context ); diff --git a/packages/vmind/src/base/taskNode/baseTaskNode.ts b/packages/vmind/src/base/taskNode/baseTaskNode.ts index e0b9881e..02b57d64 100644 --- a/packages/vmind/src/base/taskNode/baseTaskNode.ts +++ b/packages/vmind/src/base/taskNode/baseTaskNode.ts @@ -6,8 +6,14 @@ import { ITaskNode } from './types'; * TaskNode can be seen as a collection of a series of tools, responsible for completing a specific task. * Each Node can also be called as a separate function */ -export class BaseTaskNode implements ITaskNode { - executeTask(input: Input, context: Context): Promise | Result { +export class BaseTaskNode implements ITaskNode { + context: Context; + + executeTask(context: Context): Promise | Result { + this.updateContext(context); return null as Result; } + updateContext(context: Context) { + this.context = context; + } } diff --git a/packages/vmind/src/base/taskNode/llmBasedTaskNode.ts b/packages/vmind/src/base/taskNode/llmBasedTaskNode.ts index a38174e7..0232c405 100644 --- a/packages/vmind/src/base/taskNode/llmBasedTaskNode.ts +++ b/packages/vmind/src/base/taskNode/llmBasedTaskNode.ts @@ -17,8 +17,8 @@ export interface ILLMTaskNode { * LLMBasedTaskNode is a task node that needs to use LLM to complete tasks * Subclasses must assign values to prompt, parser and patcher, and rewrite requestLLM function */ -export class LLMBasedTaskNode - extends BaseTaskNode +export class LLMBasedTaskNode + extends BaseTaskNode implements ILLMTaskNode { prompt: Prompt; @@ -29,9 +29,11 @@ export class LLMBasedTaskNode([(input: Partial, context: Context) => input as DSL]); } async requestLLM(context: Context): Promise { + this.updateContext(context); return null; } @@ -43,7 +45,8 @@ export class LLMBasedTaskNode extends BaseTaskNode { +export class RuleBasedTaskNode extends BaseTaskNode { pipelines: Transformer[]; constructor(pipelines: Transformer[]) { super(); this.pipelines = pipelines; } - executeTask(input: Input, context: Context): Result { + executeTask(context: Context): Result { + this.updateContext(context); const result: Result = this.pipelines.reduce((pre: any, transformer: Transformer) => { const res = transformer(pre, context); return res; - }, input); + }, context); return result; } } diff --git a/packages/vmind/src/base/taskNode/types.ts b/packages/vmind/src/base/taskNode/types.ts index fff74566..a60816ec 100644 --- a/packages/vmind/src/base/taskNode/types.ts +++ b/packages/vmind/src/base/taskNode/types.ts @@ -1,3 +1,5 @@ -export interface ITaskNode { - executeTask: (input: Input, context: Context) => Promise | DSL; +export interface ITaskNode { + context: Context; + executeTask: (context: Context) => Promise | DSL; + updateContext: (context: Context) => void; } From a91555853a17854e519f1a6276855001cee0e60d Mon Sep 17 00:00:00 2001 From: da730 Date: Wed, 10 Apr 2024 18:19:04 +0800 Subject: [PATCH 15/62] feat: describe applications by meta info --- .../src/applications/dataAggregation/index.ts | 18 ++++++-- .../taskNodes/executeQuery/index.ts | 14 +++--- .../taskNodes/executeQuery/types.ts | 0 .../taskNodes/getQuerySQL/GPT/index.ts | 34 ++++++-------- .../taskNodes/getQuerySQL/GPT/prompt/index.ts | 6 +-- .../taskNodes/getQuerySQL/GPT/utils.ts | 15 +++++++ packages/vmind/src/base/application/index.ts | 45 +++++++++++++------ packages/vmind/src/base/application/types.ts | 10 ++--- packages/vmind/src/base/metaTypes.ts | 31 +++++++++++++ .../vmind/src/base/taskNode/baseTaskNode.ts | 4 +- .../src/base/taskNode/llmBasedTaskNode.ts | 43 +++++++++++++----- .../src/base/taskNode/ruleBasedTaskNode.ts | 7 +++ packages/vmind/src/base/taskNode/types.ts | 5 +++ .../vmind/src/base/tools/patcher/index.ts | 15 +------ .../vmind/src/base/tools/patcher/types.ts | 6 --- .../vmind/src/base/tools/requester/index.ts | 1 + 16 files changed, 169 insertions(+), 85 deletions(-) delete mode 100644 packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/types.ts create mode 100644 packages/vmind/src/base/metaTypes.ts create mode 100644 packages/vmind/src/base/tools/requester/index.ts diff --git a/packages/vmind/src/applications/dataAggregation/index.ts b/packages/vmind/src/applications/dataAggregation/index.ts index 1c115108..969b3004 100644 --- a/packages/vmind/src/applications/dataAggregation/index.ts +++ b/packages/vmind/src/applications/dataAggregation/index.ts @@ -1,5 +1,15 @@ -import { BaseApplication } from 'src/base/application'; -import { SQL } from './types'; -import { DataItem } from 'src/typings'; +import { ApplicationMeta } from 'src/base/metaTypes'; +import ExecuteQueryTaskNodeMeta from './taskNodes/executeQuery'; +import GetSQLTaskNodeGPTMeta from './taskNodes/getQuerySQL/GPT'; +import { ModelType } from 'src/typings'; -export class DataAggregationApplication extends BaseApplication<{}, DataItem> {} +const dataAggregationGPTMeta: ApplicationMeta = [ + { taskNode: GetSQLTaskNodeGPTMeta, name: 'getQuerySQL' }, + { taskNode: ExecuteQueryTaskNodeMeta, name: 'executeQuery' } +]; + +const dataAggregationMetaByModel = { + [ModelType.GPT]: dataAggregationGPTMeta +}; + +export default dataAggregationMetaByModel; diff --git a/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/index.ts b/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/index.ts index fab94081..83ef759c 100644 --- a/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/index.ts +++ b/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/index.ts @@ -1,10 +1,12 @@ import { RuleBasedTaskNode } from 'src/base/taskNode/ruleBasedTaskNode'; import { executeDataQuery, getFinalQueryResult, patchSQLBeforeQuery, restoreDatasetAfterQuery } from './transformers'; import { ExecuteQueryContext, ExecuteQueryOutput } from '../../types'; +import { RuleBasedTaskNodeMeta } from 'src/base/metaTypes'; +import { TaskNodeType } from 'src/base/taskNode/types'; -export const ExecuteQueryTaskNode = new RuleBasedTaskNode([ - patchSQLBeforeQuery, - executeDataQuery, - restoreDatasetAfterQuery, - getFinalQueryResult -]); +const ExecuteQueryTaskNodeMeta: RuleBasedTaskNodeMeta = { + type: TaskNodeType.RULE_BASED, + pipelines: [patchSQLBeforeQuery, executeDataQuery, restoreDatasetAfterQuery, getFinalQueryResult] +}; + +export default ExecuteQueryTaskNodeMeta; diff --git a/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/types.ts b/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/types.ts deleted file mode 100644 index e69de29b..00000000 diff --git a/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/GPT/index.ts b/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/GPT/index.ts index ded22e6d..b7ef4eba 100644 --- a/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/GPT/index.ts +++ b/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/GPT/index.ts @@ -1,25 +1,17 @@ -import { ILLMTaskNode, LLMBasedTaskNode } from 'src/base/taskNode/llmBasedTaskNode'; -import { parseDataQueryResponse, requestGPT } from './utils'; +import { dataQueryRequestLLM, parseDataQueryResponse } from './utils'; import { GPTDataAggregationPrompt } from './prompt'; import { GetQuerySQLContext, GetQuerySQLOutput } from 'src/applications/dataAggregation/types'; +import { LLMBasedTaskNodeMeta } from 'src/base/metaTypes'; +import { TaskNodeType } from 'src/base/taskNode/types'; +import { ModelType } from 'src/typings'; -export class GetSQLNode - extends LLMBasedTaskNode - implements ILLMTaskNode -{ - constructor() { - super(); - this.prompt = new GPTDataAggregationPrompt(); - this.parser = parseDataQueryResponse; - } +const GetSQLTaskNodeGPTMeta: LLMBasedTaskNodeMeta = { + type: TaskNodeType.LLM_BASED, + modelType: ModelType.GPT, + parser: parseDataQueryResponse, + patcher: [(input: Partial, context: GetQuerySQLContext) => input as GetQuerySQLOutput], + requester: dataQueryRequestLLM, + prompt: new GPTDataAggregationPrompt() +}; - async requestLLM(context: GetQuerySQLContext): Promise { - const { userInput, fieldInfo, llmOptions } = context; - const queryDatasetMessage = `User's Command: ${userInput}\nColumn Information: ${JSON.stringify(fieldInfo)}`; - - const requestFunc = llmOptions.customRequestFunc?.dataQuery ?? requestGPT; - const QueryDatasetPrompt = this.prompt.getPrompt(context); - const dataProcessRes = await requestFunc(QueryDatasetPrompt, queryDatasetMessage, llmOptions); - return dataProcessRes; - } -} +export default GetSQLTaskNodeGPTMeta; diff --git a/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/GPT/prompt/index.ts b/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/GPT/prompt/index.ts index 411b51fc..66c40b9f 100644 --- a/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/GPT/prompt/index.ts +++ b/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/GPT/prompt/index.ts @@ -1,12 +1,12 @@ import { Prompt } from 'src/base/tools/prompt'; import { getQueryDatasetPrompt } from './template'; -import { DataAggregationContext } from '../../../../types'; +import { GetQuerySQLContext } from 'src/applications/dataAggregation/types'; -export class GPTDataAggregationPrompt extends Prompt { +export class GPTDataAggregationPrompt extends Prompt { constructor() { super(''); } - getPrompt(context: DataAggregationContext) { + getPrompt(context: GetQuerySQLContext) { const { llmOptions } = context; const QueryDatasetPrompt = getQueryDatasetPrompt(llmOptions.showThoughts ?? true); return QueryDatasetPrompt; diff --git a/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/GPT/utils.ts b/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/GPT/utils.ts index fa08dba7..40ad36af 100644 --- a/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/GPT/utils.ts +++ b/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/GPT/utils.ts @@ -5,6 +5,8 @@ import { matchJSONStr } from 'src/common/utils'; import { ILLMOptions, LLMResponse } from 'src/typings'; import { GetQuerySQLResult } from '../types'; import { Parser } from 'src/base/tools/parser'; +import { GetQuerySQLContext } from 'src/applications/dataAggregation/types'; +import { Requester } from 'src/base/tools/requester'; export const requestGPT = async ( prompt: string, @@ -129,3 +131,16 @@ export const parseDataQueryResponse: Parser = (g } return { ...dataQueryResponse, usage: gptResponse.usage }; }; + +export const dataQueryRequestLLM: Requester = async ( + prompt: string, + context: GetQuerySQLContext +) => { + const { userInput, fieldInfo, llmOptions } = context; + const queryDatasetMessage = `User's Command: ${userInput}\nColumn Information: ${JSON.stringify(fieldInfo)}`; + + const requestFunc = llmOptions.customRequestFunc?.dataQuery ?? requestGPT; + const QueryDatasetPrompt = prompt; + const dataProcessRes = await requestFunc(QueryDatasetPrompt, queryDatasetMessage, llmOptions); + return dataProcessRes; +}; diff --git a/packages/vmind/src/base/application/index.ts b/packages/vmind/src/base/application/index.ts index e0f23e75..b29b00de 100644 --- a/packages/vmind/src/base/application/index.ts +++ b/packages/vmind/src/base/application/index.ts @@ -1,6 +1,11 @@ +import { ILLMOptions } from 'src/typings'; +import { ApplicationMeta, LLMBasedTaskNodeMeta, RuleBasedTaskNodeMeta, TaskNode } from '../metaTypes'; import { BaseTaskNode } from '../taskNode/baseTaskNode'; +import LLMBasedTaskNode from '../taskNode/llmBasedTaskNode'; +import { TaskNodeType } from '../taskNode/types'; import { ChatManager } from '../tools/chatManager'; import { IApplication } from './types'; +import { RuleBasedTaskNode } from '../taskNode/ruleBasedTaskNode'; /** * VMind application class, representing a specific function, such as chart generation, data aggregation, or chart editing, etc. @@ -8,25 +13,37 @@ import { IApplication } from './types'; * Application can be seen as a collection of a series of TaskNodes. Applications can reference each other (equivalent to reusing TaskNodes to complete tasks) */ export class BaseApplication implements IApplication { - tasks: { - name: string; - task: BaseTaskNode; - }[]; + tasks: { task: BaseTaskNode; name: string }[]; context: Context; chatManager: ChatManager; - constructor( - tasks: { - name: string; - task: BaseTaskNode; - }[], - context: Context - ) { - this.tasks = tasks; - this.context = context; + constructor(taskNodeList: ApplicationMeta) { this.chatManager = new ChatManager(); + this.registerTaskNodes(taskNodeList); + } + + registerTaskNodes(taskNodeList: ApplicationMeta) { + const taskNodeInstanceList = taskNodeList.map((taskInfo: TaskNode) => { + const { taskNode, name } = taskInfo; + const { type } = taskNode; + if (type === TaskNodeType.LLM_BASED) { + const { modelType, parser, patcher, prompt, requester } = taskNode as LLMBasedTaskNodeMeta; + return { + name, + task: new LLMBasedTaskNode({ modelType, parser, patcher, prompt, requester }) + }; + } else if (type === TaskNodeType.RULE_BASED) { + const { pipelines } = taskNode as RuleBasedTaskNodeMeta; + return { + name, + task: new RuleBasedTaskNode(pipelines) + }; + } + }); + this.tasks = taskNodeInstanceList; } - async runTasks() { + async runTasks(context: Context) { + this.updateContext(context); const result: DSL = this.tasks.reduce( async (pre: any, curTask: { name: string; task: BaseTaskNode }) => { const result = await curTask.task.executeTask(this.context); diff --git a/packages/vmind/src/base/application/types.ts b/packages/vmind/src/base/application/types.ts index 1a6687fc..0554050b 100644 --- a/packages/vmind/src/base/application/types.ts +++ b/packages/vmind/src/base/application/types.ts @@ -1,14 +1,14 @@ +import { ApplicationMeta } from '../metaTypes'; import { BaseTaskNode } from '../taskNode/baseTaskNode'; import { ChatManager } from '../tools/chatManager'; export interface IApplication { - tasks: { - name: string; - task: BaseTaskNode; - }[]; + tasks: { task: BaseTaskNode; name: string }[]; context: Context; chatManager: ChatManager; - runTasks: () => Promise; + registerTaskNodes: (tasks: ApplicationMeta) => void; + + runTasks: (context: Context) => Promise; updateContext: (context: Context) => void; } diff --git a/packages/vmind/src/base/metaTypes.ts b/packages/vmind/src/base/metaTypes.ts new file mode 100644 index 00000000..cb7d3716 --- /dev/null +++ b/packages/vmind/src/base/metaTypes.ts @@ -0,0 +1,31 @@ +import { ModelType } from 'src/typings'; +import { TaskNodeType } from './taskNode/types'; +import { Parser } from './tools/parser'; +import { Patcher } from './tools/patcher'; +import { Prompt } from './tools/prompt'; +import { Requester } from './tools/requester'; +import { Transformer } from './tools/transformer'; +export type LLMBasedTaskNodeMeta = { + type: TaskNodeType.LLM_BASED; + modelType: ModelType; + parser: Parser; + patcher: Patcher; + prompt: Prompt; + requester: Requester; +}; + +export type RuleBasedTaskNodeMeta = { + type: TaskNodeType.RULE_BASED; + pipelines: Transformer[]; +}; + +export type TaskNodeMeta = + | LLMBasedTaskNodeMeta + | RuleBasedTaskNodeMeta; + +export type TaskNode = { + name: string; + taskNode: TaskNodeMeta; +}; + +export type ApplicationMeta = TaskNode[]; diff --git a/packages/vmind/src/base/taskNode/baseTaskNode.ts b/packages/vmind/src/base/taskNode/baseTaskNode.ts index 02b57d64..a74a2bbc 100644 --- a/packages/vmind/src/base/taskNode/baseTaskNode.ts +++ b/packages/vmind/src/base/taskNode/baseTaskNode.ts @@ -1,4 +1,4 @@ -import { ITaskNode } from './types'; +import { ITaskNode, TaskNodeType } from './types'; /** * A task node in VMind application, used to complete a specific task, such as requesting a large model for chart type, DSL parsing and conversion, etc. @@ -8,7 +8,7 @@ import { ITaskNode } from './types'; */ export class BaseTaskNode implements ITaskNode { context: Context; - + type: TaskNodeType; executeTask(context: Context): Promise | Result { this.updateContext(context); return null as Result; diff --git a/packages/vmind/src/base/taskNode/llmBasedTaskNode.ts b/packages/vmind/src/base/taskNode/llmBasedTaskNode.ts index 0232c405..e597c31c 100644 --- a/packages/vmind/src/base/taskNode/llmBasedTaskNode.ts +++ b/packages/vmind/src/base/taskNode/llmBasedTaskNode.ts @@ -3,38 +3,57 @@ import { BaseTaskNode } from './baseTaskNode'; import { Parser } from 'src/base/tools/parser'; import { Patcher } from 'src/base/tools/patcher'; import { ChatManager } from 'src/base/tools/chatManager'; -import { ILLMOptions, RequestFunc } from 'src/typings'; +import { ILLMOptions, ModelType, RequestFunc } from 'src/typings'; +import { TaskNodeType } from './types'; +import { Requester } from '../tools/requester'; export interface ILLMTaskNode { + modelType: ModelType; prompt: Prompt; chatManager: ChatManager; parser: Parser; - patcher: Patcher; + patcher: Patcher; - requestLLM: (context: Context) => Promise; + requester: Requester; } + +export type LLMTaskNodeOptions = { + parser: Parser; + patcher: Patcher; + prompt: Prompt; + requester: Requester; + modelType: ModelType; +}; /** * LLMBasedTaskNode is a task node that needs to use LLM to complete tasks * Subclasses must assign values to prompt, parser and patcher, and rewrite requestLLM function */ -export class LLMBasedTaskNode +export default class LLMBasedTaskNode extends BaseTaskNode implements ILLMTaskNode { prompt: Prompt; chatManager: ChatManager; parser: Parser; - patcher: Patcher; + patcher: Patcher; + requester: Requester; + modelType: ModelType; - constructor() { + constructor(options: LLMTaskNodeOptions) { super(); + this.type = TaskNodeType.LLM_BASED; this.chatManager = new ChatManager(); - this.patcher = new Patcher([(input: Partial, context: Context) => input as DSL]); + const { parser, patcher, requester, prompt, modelType } = options; + this.parser = parser; + this.patcher = patcher; + this.requester = requester; + this.prompt = prompt; + this.modelType = modelType; } async requestLLM(context: Context): Promise { - this.updateContext(context); - return null; + const prompt = this.prompt.getPrompt(context); + return this.requester(prompt, context); } parseLLMResponse(llmResponse: any): Partial { @@ -42,7 +61,11 @@ export class LLMBasedTaskNode } patchLLMResponse(input: Partial, context: Context): DSL { - return this.patcher.patch(input, context); + const result = this.patcher.reduce((pre, pipeline) => { + const res = pipeline(pre, context); + return res; + }, input) as DSL; + return result; } async executeTask(context: Context) { diff --git a/packages/vmind/src/base/taskNode/ruleBasedTaskNode.ts b/packages/vmind/src/base/taskNode/ruleBasedTaskNode.ts index 87252f77..12d35945 100644 --- a/packages/vmind/src/base/taskNode/ruleBasedTaskNode.ts +++ b/packages/vmind/src/base/taskNode/ruleBasedTaskNode.ts @@ -1,14 +1,21 @@ import { Transformer } from 'src/base/tools/transformer'; import { BaseTaskNode } from './baseTaskNode'; +import { TaskNodeType } from './types'; /** * rule-based taskNode, which consists of a series of Pipelines * It completes the transformation from Input to a specific data structure (DSL) + * subclasses must call registerPipelines in their constructor */ export class RuleBasedTaskNode extends BaseTaskNode { pipelines: Transformer[]; constructor(pipelines: Transformer[]) { super(); + this.type = TaskNodeType.RULE_BASED; + this.registerPipelines(pipelines); + } + + registerPipelines(pipelines: Transformer[]) { this.pipelines = pipelines; } diff --git a/packages/vmind/src/base/taskNode/types.ts b/packages/vmind/src/base/taskNode/types.ts index a60816ec..096e0067 100644 --- a/packages/vmind/src/base/taskNode/types.ts +++ b/packages/vmind/src/base/taskNode/types.ts @@ -1,5 +1,10 @@ export interface ITaskNode { + type: TaskNodeType; context: Context; executeTask: (context: Context) => Promise | DSL; updateContext: (context: Context) => void; } +export enum TaskNodeType { + RULE_BASED = 'rule-based', + LLM_BASED = 'llm-based' +} diff --git a/packages/vmind/src/base/tools/patcher/index.ts b/packages/vmind/src/base/tools/patcher/index.ts index bff11797..aefffe6f 100644 --- a/packages/vmind/src/base/tools/patcher/index.ts +++ b/packages/vmind/src/base/tools/patcher/index.ts @@ -1,4 +1,3 @@ -import { IPatcher } from './types'; import { Transformer } from '../transformer'; /** @@ -8,17 +7,5 @@ import { Transformer } from '../transformer'; * patch method is responsible for executing pipelines, patch the input based on Context, and return the final DSL * pass the specific pipelines during initialization */ -export class Patcher implements IPatcher { - pipelines: Transformer, Context, DSL>[]; - constructor(transformers: Transformer, Context, DSL>[]) { - this.pipelines = transformers; - } - patch(input: any, context: Context) { - const result = this.pipelines.reduce((pre, pipeline) => { - const res = pipeline(pre, context); - return res; - }, input); - return result; - } -} +export type Patcher = Transformer, Context, DSL>[]; diff --git a/packages/vmind/src/base/tools/patcher/types.ts b/packages/vmind/src/base/tools/patcher/types.ts index 833057c4..e69de29b 100644 --- a/packages/vmind/src/base/tools/patcher/types.ts +++ b/packages/vmind/src/base/tools/patcher/types.ts @@ -1,6 +0,0 @@ -import { Transformer } from '../transformer'; - -export interface IPatcher { - pipelines: Transformer, Context, DSL>[]; - patch: (input: Partial, context: Context) => DSL; -} diff --git a/packages/vmind/src/base/tools/requester/index.ts b/packages/vmind/src/base/tools/requester/index.ts new file mode 100644 index 00000000..7bfadc28 --- /dev/null +++ b/packages/vmind/src/base/tools/requester/index.ts @@ -0,0 +1 @@ +export type Requester = (prompt: string, context: Context) => Promise; From a45aeefc3c6eec32a248b6cc50dc6aa1faba1279 Mon Sep 17 00:00:00 2001 From: da730 Date: Wed, 10 Apr 2024 20:07:55 +0800 Subject: [PATCH 16/62] feat: finish data aggregation application --- .../src/applications/dataAggregation/index.ts | 20 ++++++++--- .../taskNodes/getQuerySQL/GPT/types.ts | 0 .../taskNodes/getQuerySQL/index.ts | 0 .../taskNodes/getQuerySQL/types.ts | 0 .../src/applications/dataAggregation/types.ts | 2 +- packages/vmind/src/base/application/index.ts | 17 ++++++--- packages/vmind/src/base/application/types.ts | 3 +- packages/vmind/src/base/metaTypes.ts | 5 ++- packages/vmind/src/core/VMind.ts | 35 +++++++++++++++++-- packages/vmind/src/core/applications.ts | 12 +++++++ packages/vmind/src/core/types.ts | 8 +++++ 11 files changed, 89 insertions(+), 13 deletions(-) delete mode 100644 packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/GPT/types.ts delete mode 100644 packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/index.ts delete mode 100644 packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/types.ts create mode 100644 packages/vmind/src/core/applications.ts create mode 100644 packages/vmind/src/core/types.ts diff --git a/packages/vmind/src/applications/dataAggregation/index.ts b/packages/vmind/src/applications/dataAggregation/index.ts index 969b3004..8d5757cf 100644 --- a/packages/vmind/src/applications/dataAggregation/index.ts +++ b/packages/vmind/src/applications/dataAggregation/index.ts @@ -2,11 +2,23 @@ import { ApplicationMeta } from 'src/base/metaTypes'; import ExecuteQueryTaskNodeMeta from './taskNodes/executeQuery'; import GetSQLTaskNodeGPTMeta from './taskNodes/getQuerySQL/GPT'; import { ModelType } from 'src/typings'; +import { DataAggregationContext, DataAggregationOutput } from '../types'; +import { ApplicationType } from 'src/core/applications'; -const dataAggregationGPTMeta: ApplicationMeta = [ - { taskNode: GetSQLTaskNodeGPTMeta, name: 'getQuerySQL' }, - { taskNode: ExecuteQueryTaskNodeMeta, name: 'executeQuery' } -]; +/** + * data aggregation application in vmind + * pipeline: getQuerySQL=>executeQuery + * first it gets userInput, fieldInfo, sourceDataset as input (DataAggregationContext) + * then it run getQuerySQL to get sql and llmFieldInfo + * finally it runs the sql using alasql and return the final dataset and fieldInfo (DataAggregationOutput) + */ +const dataAggregationGPTMeta: ApplicationMeta = { + name: ApplicationType.DataAggregation, + taskNodes: [ + { taskNode: GetSQLTaskNodeGPTMeta, name: 'getQuerySQL' }, + { taskNode: ExecuteQueryTaskNodeMeta, name: 'executeQuery' } + ] +}; const dataAggregationMetaByModel = { [ModelType.GPT]: dataAggregationGPTMeta diff --git a/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/GPT/types.ts b/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/GPT/types.ts deleted file mode 100644 index e69de29b..00000000 diff --git a/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/index.ts b/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/index.ts deleted file mode 100644 index e69de29b..00000000 diff --git a/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/types.ts b/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/types.ts deleted file mode 100644 index e69de29b..00000000 diff --git a/packages/vmind/src/applications/dataAggregation/types.ts b/packages/vmind/src/applications/dataAggregation/types.ts index 9ca47667..c84ebbd9 100644 --- a/packages/vmind/src/applications/dataAggregation/types.ts +++ b/packages/vmind/src/applications/dataAggregation/types.ts @@ -1,4 +1,4 @@ -import { DataItem, ILLMOptions, SimpleFieldInfo, VMindDataset } from 'src/typings'; +import { SimpleFieldInfo } from 'src/typings'; import { DataAggregationContext, DataAggregationOutput } from '../types'; export type SQL = string; diff --git a/packages/vmind/src/base/application/index.ts b/packages/vmind/src/base/application/index.ts index b29b00de..4e517ced 100644 --- a/packages/vmind/src/base/application/index.ts +++ b/packages/vmind/src/base/application/index.ts @@ -13,16 +13,18 @@ import { RuleBasedTaskNode } from '../taskNode/ruleBasedTaskNode'; * Application can be seen as a collection of a series of TaskNodes. Applications can reference each other (equivalent to reusing TaskNodes to complete tasks) */ export class BaseApplication implements IApplication { + name: string; tasks: { task: BaseTaskNode; name: string }[]; context: Context; chatManager: ChatManager; - constructor(taskNodeList: ApplicationMeta) { + constructor(meta: ApplicationMeta) { this.chatManager = new ChatManager(); - this.registerTaskNodes(taskNodeList); + this.name = meta.name; + this.registerTaskNodes(meta); } - registerTaskNodes(taskNodeList: ApplicationMeta) { - const taskNodeInstanceList = taskNodeList.map((taskInfo: TaskNode) => { + registerTaskNodes(meta: ApplicationMeta) { + const taskNodeInstanceList = meta.taskNodes.map((taskInfo: TaskNode) => { const { taskNode, name } = taskInfo; const { type } = taskNode; if (type === TaskNodeType.LLM_BASED) { @@ -42,11 +44,18 @@ export class BaseApplication implements IApplication this.tasks = taskNodeInstanceList; } + /** + * run the task nodes of this application + * The output results of the preceding nodes are treated as the context of all subsequent nodes. + * @param context initial context of this application + * @returns DSL + */ async runTasks(context: Context) { this.updateContext(context); const result: DSL = this.tasks.reduce( async (pre: any, curTask: { name: string; task: BaseTaskNode }) => { const result = await curTask.task.executeTask(this.context); + //Put the running result of the current node into the context. this.updateContext({ ...this.context, ...result diff --git a/packages/vmind/src/base/application/types.ts b/packages/vmind/src/base/application/types.ts index 0554050b..292aab70 100644 --- a/packages/vmind/src/base/application/types.ts +++ b/packages/vmind/src/base/application/types.ts @@ -3,11 +3,12 @@ import { BaseTaskNode } from '../taskNode/baseTaskNode'; import { ChatManager } from '../tools/chatManager'; export interface IApplication { + name: string; tasks: { task: BaseTaskNode; name: string }[]; context: Context; chatManager: ChatManager; - registerTaskNodes: (tasks: ApplicationMeta) => void; + registerTaskNodes: (meta: ApplicationMeta) => void; runTasks: (context: Context) => Promise; updateContext: (context: Context) => void; diff --git a/packages/vmind/src/base/metaTypes.ts b/packages/vmind/src/base/metaTypes.ts index cb7d3716..9e3c45ca 100644 --- a/packages/vmind/src/base/metaTypes.ts +++ b/packages/vmind/src/base/metaTypes.ts @@ -28,4 +28,7 @@ export type TaskNode = { taskNode: TaskNodeMeta; }; -export type ApplicationMeta = TaskNode[]; +export type ApplicationMeta = { + name: string; + taskNodes: TaskNode[]; +}; diff --git a/packages/vmind/src/core/VMind.ts b/packages/vmind/src/core/VMind.ts index 8e78e82d..4f7e7b83 100644 --- a/packages/vmind/src/core/VMind.ts +++ b/packages/vmind/src/core/VMind.ts @@ -7,15 +7,39 @@ import { generateChartWithSkylark } from '../skylark/chart-generation'; import { queryDatasetWithGPT } from '../gpt/dataProcess/query/queryDataset'; import { generateChartWithAdvisor } from '../common/chartAdvisor'; import { queryDatasetWithSkylark } from '../skylark/dataProcess/query/queryDataset'; +import applicationMetaList, { ApplicationType } from './applications'; +import { BaseApplication } from 'src/base/application'; +import { VMindApplicationMap } from './types'; class VMind { private _FPS = 30; private _options: ILLMOptions | undefined; private _model: Model | string; + private _applicationMap: VMindApplicationMap; constructor(options?: ILLMOptions) { this._options = { ...(options ?? {}) }; this._model = options.model ?? Model.GPT3_5; + this.registerApplications(); + } + + private registerApplications() { + const applicationList = {}; + Object.keys(applicationMetaList).forEach(applicationName => { + applicationList[applicationName] = {}; + const applicationMetaByModel = applicationMetaList[applicationName]; + Object.keys(applicationMetaByModel).forEach(modelType => { + const applicationMeta = applicationMetaByModel[modelType]; + applicationList[applicationName][modelType] = new BaseApplication(applicationMeta); + }); + }); + this._applicationMap = applicationList; + } + + public addApplication() {} + + private getApplication(name: ApplicationType, modelType: ModelType) { + return this._applicationMap[name][modelType]; } /** @@ -113,10 +137,17 @@ class VMind { dataset: DataItem[] ) { if (this.getModelType() === ModelType.GPT) { - return queryDatasetWithGPT(userPrompt, fieldInfo, dataset, this._options); + const context = { + userInput: userPrompt, + fieldInfo, + dataset, + llmOptions: this._options + }; + const application = this.getApplication(ApplicationType.DataAggregation, ModelType.GPT); + return application.runTasks(context); } if (this.getModelType() === ModelType.SKYLARK) { - return queryDatasetWithSkylark(userPrompt, fieldInfo, dataset, this._options); + //return queryDatasetWithSkylark(userPrompt, fieldInfo, dataset, this._options); } console.error('unsupported model in data query!'); diff --git a/packages/vmind/src/core/applications.ts b/packages/vmind/src/core/applications.ts new file mode 100644 index 00000000..44972b4f --- /dev/null +++ b/packages/vmind/src/core/applications.ts @@ -0,0 +1,12 @@ +import dataAggregationMetaByModel from 'src/applications/dataAggregation'; +import { ModelType } from 'src/typings'; + +export enum ApplicationType { + DataAggregation = 'dataAggregation' +} + +const applicationMetaList = { + [ApplicationType.DataAggregation]: dataAggregationMetaByModel +}; + +export default applicationMetaList; diff --git a/packages/vmind/src/core/types.ts b/packages/vmind/src/core/types.ts new file mode 100644 index 00000000..21d1bab4 --- /dev/null +++ b/packages/vmind/src/core/types.ts @@ -0,0 +1,8 @@ +import { BaseApplication } from 'src/base/application'; +import { ApplicationType } from './applications'; + +export type VMindApplicationMap = { + [name: string]: { + [modelType: string]: BaseApplication; + }; +}; From 7268d8ef15a1c5dee448df2966df6287a4cde061 Mon Sep 17 00:00:00 2001 From: da730 Date: Wed, 10 Apr 2024 20:38:31 +0800 Subject: [PATCH 17/62] feat: remove unused files --- .../vmind/__tests__/browser/vite.config.ts | 1 + packages/vmind/src/base/application/index.ts | 1 + packages/vmind/src/core/VMind.ts | 67 +--- .../src/gpt/chart-generation/NLToChart.ts | 128 ------- .../vmind/src/gpt/chart-generation/index.ts | 1 - .../vmind/src/gpt/chart-generation/patch.ts | 359 ------------------ .../vmind/src/gpt/chart-generation/prompts.ts | 185 --------- .../vmind/src/gpt/chart-generation/utils.ts | 30 -- packages/vmind/src/gpt/dataProcess/index.ts | 48 --- packages/vmind/src/gpt/dataProcess/prompts.ts | 196 ---------- .../src/gpt/dataProcess/query/astPipes.ts | 243 ------------ .../vmind/src/gpt/dataProcess/query/index.ts | 0 .../src/gpt/dataProcess/query/parseSqlAST.ts | 20 - .../src/gpt/dataProcess/query/queryDataset.ts | 64 ---- .../vmind/src/gpt/dataProcess/query/type.ts | 19 - .../vmind/src/gpt/dataProcess/query/utils.ts | 125 ------ .../src/skylark/chart-generation/NLToChart.ts | 143 ------- .../src/skylark/chart-generation/constants.ts | 229 ----------- .../src/skylark/chart-generation/index.ts | 1 - .../src/skylark/chart-generation/patch.ts | 237 ------------ .../src/skylark/chart-generation/prompts.ts | 62 --- .../src/skylark/chart-generation/utils.ts | 51 --- .../src/skylark/dataProcess/query/prompts.ts | 76 ---- .../skylark/dataProcess/query/queryDataset.ts | 60 --- .../src/skylark/dataProcess/query/type.ts | 5 - .../src/skylark/dataProcess/query/utils.ts | 54 --- packages/vmind/src/skylark/typings/index.ts | 7 - packages/vmind/src/skylark/utils.ts | 58 --- 28 files changed, 3 insertions(+), 2467 deletions(-) delete mode 100644 packages/vmind/src/gpt/chart-generation/NLToChart.ts delete mode 100644 packages/vmind/src/gpt/chart-generation/index.ts delete mode 100644 packages/vmind/src/gpt/chart-generation/patch.ts delete mode 100644 packages/vmind/src/gpt/chart-generation/prompts.ts delete mode 100644 packages/vmind/src/gpt/chart-generation/utils.ts delete mode 100644 packages/vmind/src/gpt/dataProcess/index.ts delete mode 100644 packages/vmind/src/gpt/dataProcess/prompts.ts delete mode 100644 packages/vmind/src/gpt/dataProcess/query/astPipes.ts delete mode 100644 packages/vmind/src/gpt/dataProcess/query/index.ts delete mode 100644 packages/vmind/src/gpt/dataProcess/query/parseSqlAST.ts delete mode 100644 packages/vmind/src/gpt/dataProcess/query/queryDataset.ts delete mode 100644 packages/vmind/src/gpt/dataProcess/query/type.ts delete mode 100644 packages/vmind/src/gpt/dataProcess/query/utils.ts delete mode 100644 packages/vmind/src/skylark/chart-generation/NLToChart.ts delete mode 100644 packages/vmind/src/skylark/chart-generation/constants.ts delete mode 100644 packages/vmind/src/skylark/chart-generation/index.ts delete mode 100644 packages/vmind/src/skylark/chart-generation/patch.ts delete mode 100644 packages/vmind/src/skylark/chart-generation/prompts.ts delete mode 100644 packages/vmind/src/skylark/chart-generation/utils.ts delete mode 100644 packages/vmind/src/skylark/dataProcess/query/prompts.ts delete mode 100644 packages/vmind/src/skylark/dataProcess/query/queryDataset.ts delete mode 100644 packages/vmind/src/skylark/dataProcess/query/type.ts delete mode 100644 packages/vmind/src/skylark/dataProcess/query/utils.ts delete mode 100644 packages/vmind/src/skylark/typings/index.ts delete mode 100644 packages/vmind/src/skylark/utils.ts diff --git a/packages/vmind/__tests__/browser/vite.config.ts b/packages/vmind/__tests__/browser/vite.config.ts index 3f751fe1..a5008446 100644 --- a/packages/vmind/__tests__/browser/vite.config.ts +++ b/packages/vmind/__tests__/browser/vite.config.ts @@ -39,6 +39,7 @@ export default defineConfig(({ mode }) => { }, resolve: { alias: { + src: path.resolve(__dirname, '../../src'), '@visactor/calculator': path.resolve(__dirname, '../../../calculator/src/index.ts'), '@visactor/chart-advisor': path.resolve(__dirname, '../../../chart-advisor/src/index.ts') // ...localConf.resolve?.alias diff --git a/packages/vmind/src/base/application/index.ts b/packages/vmind/src/base/application/index.ts index 4e517ced..9ceddd51 100644 --- a/packages/vmind/src/base/application/index.ts +++ b/packages/vmind/src/base/application/index.ts @@ -55,6 +55,7 @@ export class BaseApplication implements IApplication const result: DSL = this.tasks.reduce( async (pre: any, curTask: { name: string; task: BaseTaskNode }) => { const result = await curTask.task.executeTask(this.context); + console.log(result); //Put the running result of the current node into the context. this.updateContext({ ...this.context, diff --git a/packages/vmind/src/core/VMind.ts b/packages/vmind/src/core/VMind.ts index 4f7e7b83..9f90bb53 100644 --- a/packages/vmind/src/core/VMind.ts +++ b/packages/vmind/src/core/VMind.ts @@ -1,15 +1,10 @@ import { _chatToVideoWasm } from '../chart-to-video'; -import { generateChartWithGPT } from '../gpt/chart-generation/NLToChart'; import { ILLMOptions, TimeType, Model, SimpleFieldInfo, DataItem, OuterPackages, ModelType } from '../typings'; -import { parseCSVDataWithGPT } from '../gpt/dataProcess'; import { getFieldInfoFromDataset, parseCSVData as parseCSVDataWithRule } from '../common/dataProcess'; -import { generateChartWithSkylark } from '../skylark/chart-generation'; -import { queryDatasetWithGPT } from '../gpt/dataProcess/query/queryDataset'; import { generateChartWithAdvisor } from '../common/chartAdvisor'; -import { queryDatasetWithSkylark } from '../skylark/dataProcess/query/queryDataset'; import applicationMetaList, { ApplicationType } from './applications'; -import { BaseApplication } from 'src/base/application'; import { VMindApplicationMap } from './types'; +import { BaseApplication } from 'src/base/application'; class VMind { private _FPS = 30; @@ -53,23 +48,6 @@ class VMind { return parseCSVDataWithRule(csvString); } - /** - * call LLM to parse csv data. return fieldInfo and raw dataset. - * fieldInfo includes name, type, role, description of each field. - * NOTE: This will transfer your data to LLM. - * @param csvString csv data user want to visualize - * @param userPrompt - * @returns - */ - async parseCSVDataWithLLM(csvString: string, userPrompt: string) { - if (this.getModelType() === ModelType.GPT) { - return parseCSVDataWithGPT(csvString, userPrompt, this._options); - } - console.error('Unsupported Model!'); - - return undefined; - } - /** * get fieldInfo only by raw dataset * @param dataset @@ -88,49 +66,6 @@ class VMind { return ModelType.CHART_ADVISOR; } - /** - * - * @param userPrompt user's visualization intention (what aspect they want to show in the data) - * @param fieldInfo information about fields in the dataset. field name, type, etc. You can get fieldInfo using parseCSVData or parseCSVDataWithLLM - * @param dataset raw dataset used in the chart - * @param colorPalette color palette of the chart - * @param animationDuration duration of chart animation. - * @returns spec and time duration of the chart. - */ - async generateChart( - userPrompt: string, //user's intent of visualization, usually aspect in data that they want to visualize - fieldInfo: SimpleFieldInfo[], - dataset: DataItem[], - enableDataQuery = true, - colorPalette?: string[], - animationDuration?: number - ) { - if (this.getModelType() === ModelType.GPT) { - return generateChartWithGPT( - userPrompt, - fieldInfo, - dataset, - this._options, - enableDataQuery, - colorPalette, - animationDuration - ); - } - if (this.getModelType() === ModelType.SKYLARK) { - return generateChartWithSkylark( - userPrompt, - fieldInfo, - dataset, - this._options, - enableDataQuery, - colorPalette, - animationDuration - ); - } - - return generateChartWithAdvisor(fieldInfo, dataset, colorPalette, animationDuration); - } - async dataQuery( userPrompt: string, //user's intent of visualization, usually aspect in data that they want to visualize fieldInfo: SimpleFieldInfo[], diff --git a/packages/vmind/src/gpt/chart-generation/NLToChart.ts b/packages/vmind/src/gpt/chart-generation/NLToChart.ts deleted file mode 100644 index 101789a7..00000000 --- a/packages/vmind/src/gpt/chart-generation/NLToChart.ts +++ /dev/null @@ -1,128 +0,0 @@ -import { SUPPORTED_CHART_LIST } from '../../common/vizDataToSpec/constants'; -import { DataItem, GPTChartAdvisorResult, ILLMOptions, LOCATION, SimpleFieldInfo, VizSchema } from '../../typings'; -import { checkChartTypeAndCell, vizDataToSpec } from '../../common/vizDataToSpec'; -import { parseGPTResponse, requestGPT } from '../../applications/dataAggregation/taskNodes/getQuerySQL/GPT/utils'; -import { patchUserInput } from './utils'; -import { ChartAdvisorPromptEnglish } from './prompts'; -import { chartAdvisorHandler } from '../../common/chartAdvisor'; -import { estimateVideoTime } from '../../common/vizDataToSpec/utils'; -import { getSchemaFromFieldInfo } from '../../common/schema'; -import { queryDatasetWithGPT } from '../dataProcess/query/queryDataset'; -import { calculateTokenUsage } from '../..//common/utils'; -import { pick } from 'lodash'; -import { patchChartTypeAndCell } from './patch'; - -export const generateChartWithGPT = async ( - userPrompt: string, //user's intent of visualization, usually aspect in data that they want to visualize - propsFieldInfo: SimpleFieldInfo[], - propsDataset: DataItem[], - options: ILLMOptions, - enableDataQuery = true, - colorPalette?: string[], - animationDuration?: number -) => { - const colors = colorPalette; - let queryDatasetUsage; - let advisorUsage; - let chartType; - let cell; - let dataset: DataItem[] = propsDataset; - let fieldInfo: SimpleFieldInfo[] = propsFieldInfo; - let chartSource: string = options.model; - - try { - if (enableDataQuery) { - const { - dataset: queryDataset, - fieldInfo: fieldInfoNew, - usage - } = await queryDatasetWithGPT(userPrompt, fieldInfo, propsDataset, options); - dataset = queryDataset; - fieldInfo = fieldInfoNew; - queryDatasetUsage = usage; - } - } catch (err) { - console.error('data query error!'); - console.error(err); - } - - const schema = getSchemaFromFieldInfo(fieldInfo); - try { - const userInputFinal = patchUserInput(userPrompt); - const resJson: any = await chartAdvisorGPT(schema, userInputFinal, options); - - const chartTypeRes = resJson['CHART_TYPE'].toUpperCase(); - const cellRes = resJson['FIELD_MAP']; - advisorUsage = resJson['usage']; - const patchResult = patchChartTypeAndCell(chartTypeRes, cellRes, dataset, fieldInfo); - if (checkChartTypeAndCell(patchResult.chartTypeNew, patchResult.cellNew, fieldInfo)) { - chartType = patchResult.chartTypeNew; - cell = patchResult.cellNew; - dataset = patchResult.datasetNew; - } - } catch (err) { - console.warn(err); - console.warn('LLM generation error, use rule generation.'); - // call rule-based method to get recommended chart type and fieldMap(cell) - const advisorResult = chartAdvisorHandler(schema, dataset); - chartType = advisorResult.chartType; - cell = advisorResult.cell; - dataset = advisorResult.dataset as DataItem[]; - chartSource = 'chartAdvisor'; - } - const spec = vizDataToSpec( - dataset, - chartType, - cell, - colors, - animationDuration ? animationDuration * 1000 : undefined - ); - spec.background = '#00000033'; - console.info(spec); - return { - chartSource, - spec, - chartType, - time: estimateVideoTime(chartType, spec, animationDuration ? animationDuration * 1000 : undefined), - usage: calculateTokenUsage([queryDatasetUsage, advisorUsage]) - }; -}; - -/** - * call GPT to get recommended chart type and fieldMap - * @param schema VizSchema - * @param userInput user input about their intention - * @param options vmind options - * @returns - */ -export const chartAdvisorGPT = async ( - schema: Partial, - userInput: string, - options: ILLMOptions | undefined -) => { - //call GPT - const filteredFields = schema.fields - .filter( - field => field.visible - //usefulFields.includes(field.fieldName) - ) - .map(field => ({ - ...pick(field, ['id', 'description', 'type', 'role']) - })); - const chartAdvisorMessage = `User Input: ${userInput}\nData field description: ${JSON.stringify(filteredFields)}`; - - const requestFunc = options.customRequestFunc?.chartAdvisor ?? requestGPT; - - const advisorRes = await requestFunc(ChartAdvisorPromptEnglish(options.showThoughts), chartAdvisorMessage, options); - - const advisorResJson: GPTChartAdvisorResult = parseGPTResponse(advisorRes) as unknown as GPTChartAdvisorResult; - - if (advisorResJson.error) { - throw Error((advisorResJson as any).message); - } - if (!SUPPORTED_CHART_LIST.includes(advisorResJson['CHART_TYPE'])) { - throw Error('Unsupported Chart Type. Please Change User Input'); - } - - return { ...advisorResJson, usage: advisorRes.usage }; -}; diff --git a/packages/vmind/src/gpt/chart-generation/index.ts b/packages/vmind/src/gpt/chart-generation/index.ts deleted file mode 100644 index 9c86d94c..00000000 --- a/packages/vmind/src/gpt/chart-generation/index.ts +++ /dev/null @@ -1 +0,0 @@ -export * from './NLToChart'; diff --git a/packages/vmind/src/gpt/chart-generation/patch.ts b/packages/vmind/src/gpt/chart-generation/patch.ts deleted file mode 100644 index f12ac4d7..00000000 --- a/packages/vmind/src/gpt/chart-generation/patch.ts +++ /dev/null @@ -1,359 +0,0 @@ -import { isArray, isNil } from 'lodash'; -import { - CARTESIAN_CHART_LIST, - detectAxesType, - foldDatasetByYField, - getFieldByDataType, - getFieldByRole, - getRemainedFields -} from '../../common/vizDataToSpec/utils'; -import { Cell, DataItem, DataType, PatchContext, PatchPipeline, ROLE, SimpleFieldInfo } from '../../typings'; -import { execPipeline } from '../../common/utils'; -import { FOLD_NAME, FOLD_VALUE } from '@visactor/chart-advisor'; - -export const patchUserInput = (userInput: string) => { - const FULL_WIDTH_SYMBOLS = [',', '。']; - const HALF_WIDTH_SYMBOLS = [',', '.']; - - const BANNED_WORD_LIST = ['动态']; - const ALLOWED_WORD_LIST = ['动态条形图', '动态柱状图', '动态柱图']; - const PLACEHOLDER = '_USER_INPUT_PLACE_HOLDER'; - const tempStr1 = ALLOWED_WORD_LIST.reduce((prev, cur, index) => { - return prev.split(cur).join(PLACEHOLDER + '_' + index); - }, userInput); - const tempStr2 = BANNED_WORD_LIST.reduce((prev, cur) => { - return prev.split(cur).join(''); - }, tempStr1); - const replacedStr = ALLOWED_WORD_LIST.reduce((prev, cur, index) => { - return prev.split(PLACEHOLDER + '_' + index).join(cur); - }, tempStr2); - - let finalStr = HALF_WIDTH_SYMBOLS.reduce((prev, cur, index) => { - return prev.split(HALF_WIDTH_SYMBOLS[index]).join(FULL_WIDTH_SYMBOLS[index]); - }, replacedStr); - const lastCharacter = finalStr[finalStr.length - 1]; - if (!FULL_WIDTH_SYMBOLS.includes(lastCharacter) && !HALF_WIDTH_SYMBOLS.includes(lastCharacter)) { - finalStr += '。'; - } - finalStr += 'Use the original fieldName and DO NOT change or translate any word of the data fields in the response.'; - return finalStr; -}; - -const patchAxisField: PatchPipeline = (context: PatchContext, _originalContext: PatchContext) => { - const { cell } = context; - - const cellNew: any = { ...cell }; - - // patch the "axis" field to x - if (cellNew.axis && (!cellNew.x || !cellNew.y)) { - if (!cellNew.x) { - cellNew.x = cellNew.axis; - } else if (!cellNew.y) { - cellNew.y = cellNew.axis; - } - } - - return { - ...context, - cell: cellNew - }; -}; - -const patchColorField: PatchPipeline = (context: PatchContext, _originalContext: PatchContext) => { - const { cell } = context; - const cellNew = { ...cell, color: cell.color ?? cell.category }; - - return { - ...context, - cell: cellNew - }; -}; - -const patchLabelField: PatchPipeline = (context: PatchContext, _originalContext: PatchContext) => { - const { cell } = context; - - const cellNew: any = { ...cell }; - //patch the "label" fields to color - if (cellNew.label && (!cellNew.color || cellNew.color.length === 0)) { - cellNew.color = cellNew.label; - } - - return { - ...context, - cell: cellNew - }; -}; - -const patchYField: PatchPipeline = (context: PatchContext, _originalContext: PatchContext) => { - const { chartType, cell, dataset, fieldInfo } = context; - let cellNew = { ...cell }; - const { x, y } = cellNew; - let chartTypeNew = chartType; - let datasetNew = dataset; - - // y轴字段有多个时,处理方式: - // 1. 图表类型为: 箱型图, 图表类型不做矫正 - // 2. 图表类型为: 柱状图 或 折线图, 图表类型矫正为双轴图 - // 3. 其他情况, 图表类型矫正为散点图 - if (y && isArray(y) && y.length > 1) { - if (chartTypeNew === 'BOX PLOT' || (chartTypeNew === 'DUAL AXIS CHART' && y.length === 2)) { - return { - ...context - }; - } - - if (chartTypeNew === 'BAR CHART' || chartTypeNew === 'LINE CHART' || chartTypeNew === 'DUAL AXIS CHART') { - //use fold to visualize more than 2 y fields - datasetNew = foldDatasetByYField(datasetNew, y, fieldInfo); - cellNew.y = FOLD_VALUE.toString(); - cellNew.color = FOLD_NAME.toString(); - } else { - chartTypeNew = 'SCATTER PLOT'; - cellNew = { - ...cell, - x: y[0], - y: y[1], - color: typeof x === 'string' ? x : x[0] - }; - } - } - - return { - ...context, - chartType: chartTypeNew, - cell: cellNew, - dataset: datasetNew - }; -}; - -const patchBoxPlot: PatchPipeline = (context: PatchContext, _originalContext: PatchContext) => { - const { chartType, cell } = context; - const cellNew = { - ...cell - }; - const { y } = cellNew; - if (chartType === 'BOX PLOT') { - if (typeof y === 'string' && y.split(',').length > 1) { - cellNew.y = y.split(',').map(str => str.trim()); - } else if (isNil(y) || y.length === 0) { - const { - lower_whisker, - lowerWhisker, - min, - lower, - lowerBox, - lower_box, - q1, - lower_quartile, - lowerQuartile, - midline, - median, - q3, - upperBox, - upper_box, - upper_quartile, - upperQuartile, - upper_whisker, - upperWhisker, - max, - upper - } = cellNew as any; - - cellNew.y = [ - lower_whisker, - lowerWhisker, - min, - lower, - lowerBox, - lower_box, - q1, - lower_quartile, - lowerQuartile, - midline, - median, - q3, - upperBox, - upper_box, - upper_quartile, - upperQuartile, - upper_whisker, - upperWhisker, - max, - upper - ].filter(Boolean); - } - } - - return { ...context, cell: cellNew }; -}; - -const patchDualAxis: PatchPipeline = (context: PatchContext, _originalContext: PatchContext) => { - const { chartType, cell } = context; - const cellNew: any = { ...cell }; - //Dual-axis drawing yLeft and yRight - - if (chartType === 'DUAL AXIS CHART' && cellNew.yLeft && cellNew.yRight) { - cellNew.y = [cellNew.yLeft, cellNew.yRight]; - } - - return { ...context, cell: cellNew }; -}; - -const patchPieChart: PatchPipeline = (context: PatchContext, _originalContext: PatchContext) => { - const { chartType, cell, fieldInfo } = context; - const cellNew = { ...cell }; - - if (chartType === 'ROSE CHART') { - cellNew.angle = cellNew.radius ?? cellNew.size ?? cellNew.angle; - } - - //Pie chart must have color field and the angle field - if (chartType === 'PIE CHART' || chartType === 'ROSE CHART') { - if (!cellNew.color || !cellNew.angle) { - const remainedFields = getRemainedFields(cellNew, fieldInfo); - - if (!cellNew.color) { - //No color fields are assigned, select a discrete field from the remaining fields as color field - const colorField = getFieldByRole(remainedFields, ROLE.DIMENSION); - if (colorField) { - cellNew.color = colorField.fieldName; - } else { - cellNew.color = remainedFields[0].fieldName; - } - } - if (!cellNew.angle) { - //no angle field are assigned, select a continuous field from the remaining field to assign to the angle - const angleField = getFieldByDataType(remainedFields, [DataType.FLOAT, DataType.INT]); - if (angleField) { - cellNew.angle = angleField.fieldName; - } else { - cellNew.angle = remainedFields[0].fieldName; - } - } - } - } - return { ...context, cell: cellNew }; -}; - -const patchWordCloud: PatchPipeline = (context: PatchContext, _originalContext: PatchContext) => { - //Word cloud must have color fields and size fields - const { chartType, cell, fieldInfo } = context; - const cellNew = { ...cell }; - - if (chartType === 'WORD CLOUD') { - if (!cellNew.size || !cellNew.color || cellNew.color === cellNew.size) { - const remainedFields = getRemainedFields(cellNew, fieldInfo); - - if (!cellNew.size || cellNew.size === cellNew.color) { - const newSize = (cellNew as any).weight ?? (cellNew as any).fontSize; - if (newSize) { - cellNew.size = newSize; - } else { - const sizeField = getFieldByDataType(remainedFields, [DataType.INT, DataType.FLOAT]); - if (sizeField) { - cellNew.size = sizeField.fieldName; - } else { - cellNew.size = remainedFields[0].fieldName; - } - } - } - if (!cellNew.color) { - const newColor = (cellNew as any).text ?? (cellNew as any).word ?? (cellNew as any).label ?? cellNew.x; - if (newColor) { - cellNew.color = newColor; - } else { - const colorField = getFieldByRole(remainedFields, ROLE.DIMENSION); - if (colorField) { - cellNew.color = colorField.fieldName; - } else { - cellNew.color = remainedFields[0].fieldName; - } - } - } - } - } - return { ...context, cell: cellNew }; -}; - -const patchDynamicBarChart: PatchPipeline = (context: PatchContext, _originalContext: PatchContext) => { - const { chartType, cell, fieldInfo } = context; - const cellNew = { ...cell }; - - if (chartType === 'DYNAMIC BAR CHART') { - if (!cell.time || cell.time === '' || cell.time.length === 0) { - const remainedFields = getRemainedFields(cellNew, fieldInfo); - - //动态条形图没有time字段,选择一个离散字段作为time - const timeField = getFieldByDataType(remainedFields, [DataType.DATE]); - if (timeField) { - cellNew.time = timeField.fieldName; - } else { - cellNew.time = remainedFields[0].fieldName; - } - } - } - - return { ...context, cell: cellNew }; -}; - -const patchCartesianXField: PatchPipeline = (context: PatchContext, _originalContext: PatchContext) => { - const { chartType, cell, fieldInfo } = context; - const cellNew = { ...cell }; - - //Cartesian chart must have X field - if (CARTESIAN_CHART_LIST.map(chart => chart.toUpperCase()).includes(chartType)) { - if (!cellNew.x) { - const remainedFields = getRemainedFields(cellNew, fieldInfo); - //没有分配x字段,从剩下的字段里选择一个离散字段分配到x上 - const xField = getFieldByRole(remainedFields, ROLE.DIMENSION); - if (xField) { - cellNew.x = xField.fieldName; - } else { - cellNew.x = remainedFields[0].fieldName; - } - } - } - return { ...context, cell: cellNew }; -}; - -const patchPipelines = [ - patchAxisField, - patchColorField, - patchLabelField, - patchYField, - patchBoxPlot, - patchDualAxis, - patchPieChart, - patchWordCloud, - patchDynamicBarChart, - patchCartesianXField -]; - -export const patchChartTypeAndCell = ( - chartTypeRes: string, - cellRes: Cell, - dataset: DataItem[], - fieldInfo: SimpleFieldInfo[] -) => { - // At some point, due to the unclear intention of the user's input, fields may lack fields in Cell returned by GPT. - // At this time, you need to make up according to the rules - - const context = { - chartType: chartTypeRes, - cell: cellRes, - dataset, - fieldInfo - }; - const { - chartType: chartTypeNew, - cell: cellNew, - dataset: datasetNew, - fieldInfo: fieldInfoNew - } = execPipeline(context, patchPipelines, context); - return { - chartTypeNew, - cellNew, - datasetNew, - fieldInfoNew - }; -}; diff --git a/packages/vmind/src/gpt/chart-generation/prompts.ts b/packages/vmind/src/gpt/chart-generation/prompts.ts deleted file mode 100644 index 97a63cf4..00000000 --- a/packages/vmind/src/gpt/chart-generation/prompts.ts +++ /dev/null @@ -1,185 +0,0 @@ -import { SUPPORTED_CHART_LIST } from '../../common/vizDataToSpec/constants'; - -export const ChartAdvisorPromptEnglish = (showThoughts: boolean) => `You are an expert in data visualization. -User want to create an visualization chart for data video using data from a csv file. Ignore the duration in User Input. -Your task is: -1. Based on the user's input, infer the user's intention, such as comparison, ranking, trend display, proportion, distribution, etc. If user did not show their intention, just ignore and do the next steps. -2. Select the single chart type that best suites the data from the list of supported charts. Supported chart types: ${JSON.stringify( - SUPPORTED_CHART_LIST -)}. -3. Map all the fields in the data to the visual channels according to user input and the chart type you choose. Don't use non-existent fields. Only use existing fields without further processing. If the existing fields can't meet user's intention, just use the most related fields. - -Knowledge: -1. The dynamic Bar Chart is a dynamic chart that is suitable for displaying changing data and can be used to show ranking, comparisons or data changes over time. It usually has a time field. It updates the data dynamically according to the time field and at each time point, the current data is displayed using a bar chart. -2. A number field can not be used as a color field. - -Let's think step by step. ${showThoughts ? 'Fill your thoughts in {thought}.' : ''} - -Respone in the following format: - -\`\`\` -{${showThoughts ? '\n"thought" : your thoughts' : ''} -"CHART_TYPE": the chart type you choose. Supported chart types: ${JSON.stringify(SUPPORTED_CHART_LIST)}. -"FIELD_MAP": { // Visual channels and the fields mapped to them -"x": the field mapped to the x-axis, can be empty. Can Only has one field. -"y": the field mapped to the y-axis, can be empty. Use array if there are more than 1 fields. -"color": the field mapped to the color channel. Must use a string field. Can't be empty in Word Cloud, Pie Chart and Rose Chart. -"size": the field mapped to the size channel. Must use a number field. Can be empty -"angle": the field mapped to the angle channel of the pie chart, can be empty. -"time": This is usually a date field and can be used only in Dynamic Bar Chart. Can't be empty in Dynamic Bar Chart. -"source": the field mapped to the source channel. Can't be empty in Sankey Chart. -"target": the field mapped to the target channel. Can't be empty in Sankey Chart. -"value": the field mapped to the value channel. Can't be empty in Sankey Chart. -}${showThoughts ? ',\n"Reason": the reason for selecting the chart type and visual mapping.' : ''} -} -\`\`\` - -Don't provide further explanations for your results. - -Constraints: -1. No user assistance. -2. Please select one chart type in CHART_TYPE at each time. Don't use "A or B", "[A, B]" in CHART_TYPE. -3. The selected chart type in CHART_TYPE must be in the list of supported charts. -4. DO NOT change or translate the field names in FIELD_MAP. -5. Ignore requests unrelated to chart visualization in the user's request. -6. The keys in FIELD_MAP must be selected from the list of available visual channels. -7. Wrap the reply content using \`\`\`, and the returned content must be directly parsed by JSON.parse() in JavaScript. - -Here are some examples: - -User Input: 帮我展示历届奥运会各国金牌数量的对比. -Data field description: [ -{ -"id": "country", -"description": "Represents the name of the country, which is a string.", -"type": "string", -"role": "dimension" -}, -{ -"id": "金牌数量", -"description": "Represents the number of gold medals won by the country in the current year, which is an integer.", -"type": "int", -"role": "measure" -}, -{ -"id": "year", -"description": "Represents the current year, which is a date.", -"type": "string", -"role": "dimension" -} -] - -Response: -\`\`\` -{${showThoughts ? '\n"thought": "Your thoughts",' : ''} -"CHART_TYPE": "Dynamic Bar Chart", -"FIELD_MAP": { -"x": "country", -"y": "金牌数量", -"time": "year" -}${ - showThoughts - ? ",\n\"REASON\": \"The data contains the year, country, and medal count, and the user's intention contains 'comparison', which is suitable for drawing a dynamic bar chart that changes over time to show the comparison of gold medal counts of various countries in each Olympic Games.The 'country' field is used as the x-axis of the bar chart, and '金牌数量' is used as the y-axis to show the comparison of gold medal counts of various countries in the current year.The 'year' field is used as the time field of the dynamic bar chart to show the comparison of gold medal counts of various countries at different years.\"" - : '' -} -} -\`\`\` - ------------------------- - -User Input: 帮我展示各手机品牌的市场占有率, 赛博朋克风格, 时长5s -Data field description: [ -{ -"id": "品牌名称", -"description": "Represents the name of the mobile phone brand, which is a string.", -"type": "string", -"role": "dimension" -}, -{ -"id": "市场份额", -"description": "Represents the market share of the brand, which is a percentage.", -"type": "float", -"role": "measure" -} -] - -Response: -\`\`\` -{${showThoughts ? '\n"thought": "Your thoughts",' : ''} -"CHART_TYPE": "Pie Chart", -"FIELD_MAP": { -"angle": "市场份额", -"color": "品牌名称" -}${ - showThoughts - ? ',\n"REASON": "The data contains the market share, and the user wants to show percentage data, which is suitable for displaying with a pie chart. The 市场份额 is used as the angle of the pie chart to show the market share of each brand. The 品牌名称 is used as the color to distinguish different brands. The duration is 5s but we just ignore it."' - : '' -} -} -\`\`\` - ------------------------- - -User Input: 帮我展示降雨量变化趋势. -Data field description: [ -{ -"id": "日期", -"description": "Represents the current month, which is a date.", -"type": "string", -"role": "dimension" -}, -{ -"id": "降雨量", -"description": "Represents the rainfall in the current month, which is a number.", -"type": "int", -"role": "measure" -} -] - -Response: -\`\`\` -{${showThoughts ? '\n"thought": "Your thoughts",' : ''} -"CHART_TYPE": "Line Chart", -"FIELD_MAP": { -"x": "日期", -"y": "降雨量" -}${ - showThoughts - ? ',\n"REASON": "User wants to show the trend of the rainfall, which is suitable for displaying with a line chart. The \'日期\' is used as the x-axis because it\'s a date, and the 降雨量 is used as the y-axis because it\'s a number. This chart can show the trend of rainfall."' - : '' -} -} -\`\`\` - ------------------------- - -User Input: 帮我绘制图表, 时长20s. -Data field description: [ -{ -"id": "日期", -"description": "Represents the current month, which is a date.", -"type": "date", -"role": "dimension" -}, -{ -"id": "降雨量", -"description": "Represents the rainfall in the current month, which is a number.", -"type": "int", -"role": "measure" -} -] - -Response: -\`\`\` -{${showThoughts ? '\n"thought": "Your thoughts",' : ''}"CHART_TYPE": "Line Chart", -"FIELD_MAP": { -"x": "日期", -"y": "降雨量" -}${ - showThoughts - ? ',\n"REASON": "User did not show their intention about the data in their input. The data has two fields and it contains a date field, so Line Chart is best suitable to show the data. The field \'日期\' is used as the x-axis because it\'s a date, and the 降雨量 is used as the y-axis because it\'s a number. The duration is 20s but we just ignore it."' - : '' -} -} -\`\`\` -`; diff --git a/packages/vmind/src/gpt/chart-generation/utils.ts b/packages/vmind/src/gpt/chart-generation/utils.ts deleted file mode 100644 index edb8b2d9..00000000 --- a/packages/vmind/src/gpt/chart-generation/utils.ts +++ /dev/null @@ -1,30 +0,0 @@ -import { isNil } from 'lodash'; -import { CARTESIAN_CHART_LIST, detectAxesType } from '../../common/vizDataToSpec/utils'; - -export const patchUserInput = (userInput: string) => { - const FULL_WIDTH_SYMBOLS = [',', '。']; - const HALF_WIDTH_SYMBOLS = [',', '.']; - - const BANNED_WORD_LIST = ['动态']; - const ALLOWED_WORD_LIST = ['动态条形图', '动态柱状图', '动态柱图']; - const PLACEHOLDER = '_USER_INPUT_PLACE_HOLDER'; - const tempStr1 = ALLOWED_WORD_LIST.reduce((prev, cur, index) => { - return prev.split(cur).join(PLACEHOLDER + '_' + index); - }, userInput); - const tempStr2 = BANNED_WORD_LIST.reduce((prev, cur) => { - return prev.split(cur).join(''); - }, tempStr1); - const replacedStr = ALLOWED_WORD_LIST.reduce((prev, cur, index) => { - return prev.split(PLACEHOLDER + '_' + index).join(cur); - }, tempStr2); - - let finalStr = HALF_WIDTH_SYMBOLS.reduce((prev, cur, index) => { - return prev.split(HALF_WIDTH_SYMBOLS[index]).join(FULL_WIDTH_SYMBOLS[index]); - }, replacedStr); - const lastCharacter = finalStr[finalStr.length - 1]; - if (!FULL_WIDTH_SYMBOLS.includes(lastCharacter) && !HALF_WIDTH_SYMBOLS.includes(lastCharacter)) { - finalStr += '。'; - } - finalStr += 'Use the original fieldName and DO NOT change or translate any word of the data fields in the response.'; - return finalStr; -}; diff --git a/packages/vmind/src/gpt/dataProcess/index.ts b/packages/vmind/src/gpt/dataProcess/index.ts deleted file mode 100644 index 34c13bd0..00000000 --- a/packages/vmind/src/gpt/dataProcess/index.ts +++ /dev/null @@ -1,48 +0,0 @@ -import { convertNumberField, getDataset, parseCSVData } from '../../common/dataProcess'; -import { getFieldDomain, readTopNLine } from '../../applications/dataAggregation/taskNodes/executeQuery/utils'; -import { ILLMOptions, SimpleFieldInfo } from '../../typings'; -import { parseGPTResponse, requestGPT } from '../../applications/dataAggregation/taskNodes/getQuerySQL/GPT/utils'; -import { DataProcessPromptEnglish } from './prompts'; - -/* - ** call GPT to parse csv data - **get the fieldInfo from csv file - */ -export const parseCSVDataWithGPT = async (csvFile: string, userInput: string, options: ILLMOptions | undefined) => { - const DATA_TOP_N = 5; //取csv文件的前多少条数据 - const topNCSVFile = readTopNLine(csvFile, DATA_TOP_N); - const dataProcessMessage = `CSV file content:\n${topNCSVFile}\nUser Input: ${userInput}`; - - const requestFunc = options.customRequestFunc?.dataProcess ?? requestGPT; - - const dataProcessRes = await requestFunc(DataProcessPromptEnglish, dataProcessMessage, options); - - const dataProcessResJson = parseGPTResponse(dataProcessRes); - const { dataset } = getDataset(csvFile); - if (!dataProcessResJson.error) { - const fieldInfo = dataProcessResJson['FIELD_INFO'].map((field: SimpleFieldInfo) => { - //add domain for fields - const { fieldName, role } = field; - const domain = getFieldDomain(dataset, fieldName, role); - return { - ...field, - domain - }; - }); - return { - fieldInfo, - videoDuration: dataProcessResJson['VIDEO_DURATION'], - colorPalette: dataProcessResJson['COLOR_PALETTE'], - usefulFields: dataProcessResJson['USEFUL_FIELDS'], - dataset: convertNumberField(dataset, fieldInfo), - error: dataProcessResJson['error'], - thought: dataProcessResJson['thought'], - usage: dataProcessRes['usage'] - }; - } else { - //传统方法做兜底 - const { fieldInfo } = parseCSVData(csvFile); - console.error('gpt parse data error!'); - return { fieldInfo, dataset }; - } -}; diff --git a/packages/vmind/src/gpt/dataProcess/prompts.ts b/packages/vmind/src/gpt/dataProcess/prompts.ts deleted file mode 100644 index 06165b5b..00000000 --- a/packages/vmind/src/gpt/dataProcess/prompts.ts +++ /dev/null @@ -1,196 +0,0 @@ -import { VMIND_DATA_SOURCE } from '../../applications/dataAggregation/taskNodes/executeQuery/dataQuery'; - -export const DataProcessPromptEnglish = `You are an expert in data analysis. -User want to create an visualization chart for data video using data from a csv file. Let's think step by step. Fill your thoughts in {THOUGHT}. -- Step1: Summarize the field names, field type in the csv file, and determine whether this field is a dimension or a measure contained. Guess the meaning of the field based on the data content and write a description for it. -- Step2: Put all the string or date fields into USEFUL_FIELDS. -- Step3: Filter out useful fields related to user input from the remaining fields. -- Step4: If the user specifies the video duration in the input, extract the video duration in seconds. -- Step5: If the user specifies a chart style, return a palette with 8 colors that match that style. - -Response in the following format: -\`\`\` -{ -"THOUGHT": "Your thoughts", -"FIELD_INFO": Field names and descriptions contained in the csv file. -"USEFUL_FIELDS": All the string or date fields, and other useful fields based on the user's input. DO NOT change or translate the name of any field. -"VIDEO_DURATION": The duration of the video in seconds. It can be empty if the user does not specify the video duration. -"COLOR_PALETTE": A color palette containing 8 colors based on the input. It can be empty if the user does not specify a style. -} -\`\`\` - -Constraints: -1. No user assistance. -2. FIELD_INFO must include the type of the field (string, int, float, date, time). -3. FIELD_INFO must include the role of the field (dimension or measure). -4. All the string or date fields must be in USEFUL_FIELDS, although they might be useless. -5. If the user specifies the video length, VIDEO_DURATION cannot be empty. -6. If the user specifies the color style, COLOR_PALETTE cannot be empty. -7. Wrap the response content with \`\`\`, and the content must be directly parsed by JSON.parse() in JavaScript. - -Here are some examples: - -CSV file content: -"country,gdp,year,co2_emissions -China,20000000000,2020,1523234234 -America,30000000000,2020,31324532214 -England,10000000000,2020,913045781 -Canada,5000000000,2020,130423578" - -User Input: 帮我展示历年全球各国家GDP排名的对比,时长1分钟. - -Response: -\`\`\` -{ -"THOUGHT": "Your thoughts", -"FIELD_INFO":[ -{ -"fieldName": "country", -"description":"Represents the name of the country, which is a string.", -"type": "string", -"role": "dimension" -}, -{ -"fieldName": "gdp", -"description":"Represents the total GDP of each country, which is an integer.", -"type": "int", -"role": "measure" -}, -{ -"fieldName": "year", -"description":"Represents the current year, which is a date.", -"type": "date", -"role": "dimension" -}, -{ -"fieldName": "co2_emissions", -"description":"Represents the carbon dioxide emissions of each country, which is an integer.", -"type": "int", -"role": "measure" -} -], -"USEFUL_FIELDS": ["country","gdp","year"], -"VIDEO_DURATION": 60, -"REASON": "The field 'country' is a string field, and 'year' is a date field, so they must be in USEFUL_FIELDS. User's intention is to show a comparison of the GDP rankings of different countries worldwide over the years, and 'gdp' represents the total GDP of each country. 'co2_emissions' represents carbon dioxide emissions, which is a is a numerical field and is irrelevant to the user's intention." -} -\`\`\` - ----------------------------------- -CSV file content: -"branch_name,percentage,average_price,quality -Apple,0.5, 6999,1523234234 -Samsung,0.3,5630,31324532214 -Vivo,0.1, 3020,913045781 -Nokia,0.05,150,130423578" -User Input: 帮我展示市场占有率, 科技风格. - -Response: -\`\`\` -{ -"THOUGHT": "Your thoughts", -"FIELD_INFO":[ -{ -"fieldName": "branch_name", -"description":"Represents the name of the mobile phone brand, which is a string." -}, -{ -"fieldName": "percentage", -"description":"Represents the market share of the brand, which is a percentage." -}, -{ -"fieldName": "average_price", -"description":"Represents the average price of the brand, which is a float." -}, -{ -"fieldName": "quality", -"description":"Represents the product quality of the brand, which is an integer." -} -], -"USEFUL_FIELDS": ["branch_name","percentage"], -"COLOR_PALETTE":["#1DD0F3", "#2693FF", "#3259F4", "#1B0CA1", "#CB2BC6", "#FF581D", "#FBBB16", "#F6FB17"], -"REASON": "User's intention is to show the market share, and 'percentage' represents the market share, which is the information needed. 'branch_name' is a string field, so it must be in USEFUL_FIELDS. 'average_price' represents the average price, and 'quality' represents the product quality. They are both numerical fields and are irrelevant to the user's intention." -} -\`\`\` ----------------------------------- -CSV file content: -"country,year,population -China,2020,1321 -America,2020,48 -England,2020,10 -Canada,2020,81" - -User Input: 帮我展示人口变化趋势. - -Response: -\`\`\` -{ -"THOUGHT": "Your thoughts", -"FIELD_INFO":[ -{ -"fieldName": "country", -"description":"Represents the name of the country, which is a string.", -"type": "string", -"role": "dimension" -}, -{ -"fieldName": "year", -"description":"Represents the current year, which is a date.", -"type": "date", -"role": "dimension" -}, -{ -"fieldName": "population", -"description":"Represents the total population of each country, which is an integer.", -"type": "int", -"role": "measure" -} -], -"USEFUL_FIELDS": ["country","year","population"], -"REASON": "The field 'population' is directly related to the user's intention, so it need to be selected. 'country' and 'year' are string and date fields, so they must be in USEFUL_FIELDS" -} -\`\`\` - ----------------------------------- -CSV file content: -"branch_name,percentage,average_price,quality -Apple,0.5, 6999,1523234234 -Samsung,0.3,5630,31324532214 -Vivo,0.1, 3020,913045781 -Nokia,0.05,150,130423578" -User Input: 帮我绘制图表, 展示平均价格. - -Response: -\`\`\` -{ -"THOUGHT": "Your thoughts", -"FIELD_INFO":[ -{ -"fieldName": "branch_name", -"description":"Represents the name of the mobile phone brand, which is a string.", -"type": "string", -"role": "dimension" -}, -{ -"fieldName": "percentage", -"description":"Represents the market share of the brand, which is a percentage.", -"type": "float", -"role": "measure" -}, -{ -"fieldName": "average_price", -"description":"Represents the average price of the brand, which is a float.", -"type": "float", -"role": "measure" -}, -{ -"fieldName": "quality", -"description":"Represents the product quality of the brand, which is an integer.", -"type": "int", -"role": "measure" -} -], -"USEFUL_FIELDS": ["branch_name","average_price"], -"REASON": "The user wants to show average price, so average_price must be in USEFUL_FIELDS. 'branch_name' is a string field, so it is a useful field " -} -\`\`\` -`; diff --git a/packages/vmind/src/gpt/dataProcess/query/astPipes.ts b/packages/vmind/src/gpt/dataProcess/query/astPipes.ts deleted file mode 100644 index e900b780..00000000 --- a/packages/vmind/src/gpt/dataProcess/query/astPipes.ts +++ /dev/null @@ -1,243 +0,0 @@ -import { AggrFunc, ColumnRef, Expr, Param, Value } from 'node-sql-parser'; -import { - Aggregation, - ColumnConfig, - FilterNode, - FilterNodeType, - FilterOperator, - OrderType, - Query, - WhereCondition -} from '@visactor/calculator'; -import { ASTParserContext, ASTParserPipe } from './type'; -import { checkIsColumnNode, toFirstUpperCase } from './utils'; -import { SimpleFieldInfo } from '../../../typings'; -import { isArray } from 'lodash'; -import { replaceString } from '../../../applications/dataAggregation/taskNodes/executeQuery/utils'; - -export const from: ASTParserPipe = (query: Partial, context: ASTParserContext) => { - const { dataSource, fieldInfo } = context; - return { ...query, from: dataSource }; -}; - -/** - * parse aggr_func node and convert to FilterCondition - * @param aggrFunc - */ -const parseAggrFunc = ( - aggrFunc: AggrFunc, - columns: any, - fieldInfo: SimpleFieldInfo[], - replaceMap: Map -): { column?: ColumnConfig; type?: FilterNodeType; aggregate: Aggregation } => { - const { name, args } = aggrFunc; - const { distinct, expr } = args ?? ({} as any); - const result: any = { - type: FilterNodeType.Condition - }; - if (expr && expr.type === 'aggr_func') { - console.error('unsupported aggr func!'); - } else if (expr && checkIsColumnNode(expr, columns, fieldInfo)) { - const columnName = expr.column ?? expr.value; - result.column = replaceString(columnName, replaceMap); - } - result.aggregate = { - distinct: Boolean(distinct), - method: toFirstUpperCase(name) - }; - - return result; -}; - -/** - * recursively parse the where conditions in ast. - * @param astWhere - * @param whereFilterNode - */ -const parseSQLExpr = ( - astWhere: Expr | ColumnRef | Param | Value, - columns: any, - fieldInfo: SimpleFieldInfo[], - replaceMap: Map, - isNot?: boolean -): WhereCondition | FilterNode => { - if (!astWhere) { - return {} as FilterNode; - } - const result: any = { - not: Boolean(isNot) - }; - //parse this ast node - const { type } = astWhere; - - if (type === 'binary_expr') { - const { left, right, operator } = astWhere as Expr; - if (['AND', 'OR'].includes(operator)) { - result.type = operator === 'AND' ? FilterNodeType.And : FilterNodeType.Or; - (result as FilterNode).conditions = [ - parseSQLExpr(left, columns, fieldInfo, replaceMap), - parseSQLExpr(right, columns, fieldInfo, replaceMap) - ]; - } else if ( - Object.values(FilterOperator) - .map(v => v.toUpperCase()) - .includes(operator as any) - ) { - result.type = FilterNodeType.Condition; - const columnNode = [left, right].find(n => checkIsColumnNode(n, columns, fieldInfo)); - if (columnNode) { - const columnName = (columnNode as ColumnRef).column ?? (columnNode as any).value; - result.column = replaceString(columnName, replaceMap); - } - const valueNode = [left, right].find(n => !checkIsColumnNode(n, columns, fieldInfo) && n.type !== 'aggr_func'); - if (valueNode) { - const valueName = (valueNode as Value).value; - if (!isArray(valueName)) { - result.value = replaceString(valueName, replaceMap); - } else { - result.value = valueName.map(v => replaceString(v.value, replaceMap)); - } - } - const aggrNode: any = [left, right].find(n => n.type === 'aggr_func'); - if (aggrNode) { - const aggrFuncConfig: any = parseAggrFunc(aggrNode, columns, fieldInfo, replaceMap); - result.column = aggrFuncConfig.column; - result.aggregate = aggrFuncConfig.aggregate; - } - result.operator = operator.toLowerCase(); - } else { - console.error('unsupported operator in expr!'); - } - } else if (type === 'unary_expr') { - const { expr, operator } = astWhere as any; - return parseSQLExpr(expr, columns, fieldInfo, replaceMap, operator === 'NOT'); - } else { - console.error('unsupported type in expr!'); - } - - return result; -}; - -export const where: any = (query: Partial, context: ASTParserContext) => { - const { ast, fieldInfo, replaceMap } = context; - const { where } = ast; - if (!where) { - return query; - } - const whereList: any = parseSQLExpr(where as Expr, query.select.columns, fieldInfo, replaceMap); - return { - ...query, - where: whereList.conditions ? whereList : { not: false, type: FilterNodeType.And, conditions: [whereList] } - }; -}; - -export const groupBy: ASTParserPipe = (query: Partial, context: ASTParserContext) => { - const { ast, replaceMap } = context; - const { groupby } = ast; - if (!groupby) { - return query; - } - return { - ...query, - groupBy: (groupby ?? []).map((group: any) => replaceString(group.column ?? group.value, replaceMap)) - } as any; -}; - -export const select: ASTParserPipe = (query: Partial, context: ASTParserContext) => { - const { ast, fieldInfo, replaceMap } = context; - const { columns, distinct } = ast; - if (!columns) { - return query; - } - const columnAlias = columns.map(c => ({ - alias: c.as - })); - return { - ...query, - select: { - columns: (columns ?? []) - .map(column => { - const result: any = {}; - const { as, expr } = column; - if (checkIsColumnNode(expr, columnAlias, fieldInfo)) { - result.column = replaceString(expr.column ?? expr.value, replaceMap); - } else if (expr.type === 'aggr_func') { - const aggrFuncConf: any = parseAggrFunc(expr, columnAlias, fieldInfo, replaceMap); - result.column = aggrFuncConf.column; - result.aggregate = aggrFuncConf.aggregate; - } - if (as) { - result.alias = replaceString(as, replaceMap); - } - return result; - }) - .filter(c => c.column), - distinct: Boolean(distinct) - } - }; -}; - -export const having: any = (query: Partial, context: ASTParserContext) => { - const { ast, fieldInfo, replaceMap } = context; - const { having } = ast; - if (!having) { - return query; - } - const havingList: any = parseSQLExpr(having as unknown as Expr, query.select.columns, fieldInfo, replaceMap); - return { - ...query, - having: havingList.conditions ? havingList : { not: false, type: FilterNodeType.And, conditions: [havingList] } - }; -}; - -export const orderBy: any = (query: Partial, context: ASTParserContext) => { - const { ast, fieldInfo, replaceMap } = context; - const { orderby } = ast; - if (!orderby) { - return query; - } - return { - ...query, - orderBy: (orderby ?? []).map(orderInfo => { - const result: any = {}; - const { type, expr } = orderInfo; - if (checkIsColumnNode(expr, query.select.columns, fieldInfo)) { - const columnName = expr.column ?? expr.value; - result.column = replaceString(columnName, replaceMap); - } else { - const orderConfig = parseAggrFunc(expr, query.select.columns, fieldInfo, replaceMap); - result.column = orderConfig.column; - result.aggregate = orderConfig.aggregate; - } - //query in calculator package does not support alias reference in other parts outside select. - //check if the order by column is a derived column using aggregation methods in select - //if so, replace the column with the original name and aggregation method. - if (!result.aggregate && !fieldInfo.find(field => field.fieldName === result.column)) { - //result.column is a derived field. replace with the original field - const originalColumn: any = query.select.columns.find( - column => column.alias === result.column || (column as any).column === result.column - ); - if (originalColumn) { - result.column = originalColumn.column ?? originalColumn.alias; - result.aggregate = originalColumn.aggregate; - } - } - return { - type: type ? toFirstUpperCase(type) : OrderType.Asc, - ...result - }; - }) - }; -}; - -export const limit: ASTParserPipe = (query: Partial, context: ASTParserContext) => { - const { ast } = context; - const { limit } = ast; - if (!limit) { - return query; - } - return { - ...query, - limit: limit.value[0].value - }; -}; diff --git a/packages/vmind/src/gpt/dataProcess/query/index.ts b/packages/vmind/src/gpt/dataProcess/query/index.ts deleted file mode 100644 index e69de29b..00000000 diff --git a/packages/vmind/src/gpt/dataProcess/query/parseSqlAST.ts b/packages/vmind/src/gpt/dataProcess/query/parseSqlAST.ts deleted file mode 100644 index 2f849148..00000000 --- a/packages/vmind/src/gpt/dataProcess/query/parseSqlAST.ts +++ /dev/null @@ -1,20 +0,0 @@ -import { DataItem, SimpleFieldInfo } from '../../../typings'; -import { select, from, groupBy, having, limit, orderBy, where } from './astPipes'; -import { ASTParserPipe, SQLAst } from './type'; -import { execPipeline } from './utils'; - -const Pipelines: ASTParserPipe[] = [from, select, where, groupBy, having, orderBy, limit]; - -/** - * convert the SQL AST to vizCalculator query CST(Concrete Syntax Tree). - * @param ast AST of the SQL from node-sql-parser - */ -export const parseSqlAST = ( - ast: SQLAst, - dataSource: DataItem[], - fieldInfo: SimpleFieldInfo[], - replaceMap: Map -) => { - const query = execPipeline({}, Pipelines, { ast, dataSource, fieldInfo, replaceMap }); - return query; -}; diff --git a/packages/vmind/src/gpt/dataProcess/query/queryDataset.ts b/packages/vmind/src/gpt/dataProcess/query/queryDataset.ts deleted file mode 100644 index 2aba0dcf..00000000 --- a/packages/vmind/src/gpt/dataProcess/query/queryDataset.ts +++ /dev/null @@ -1,64 +0,0 @@ -import { DataItem, ILLMOptions, SimpleFieldInfo } from '../../../typings'; -import { parseGPTQueryResponse, parseRespondField, patchQueryInput } from './utils'; -import { DataQueryResponse } from './type'; -import { - parseGPTResponse as parseGPTResponseAsJSON, - requestGPT -} from '../../../applications/dataAggregation/taskNodes/getQuerySQL/GPT/utils'; -import { getQueryDatasetPrompt } from '../prompts'; -import { queryDataset } from '../../../applications/dataAggregation/taskNodes/executeQuery/dataQuery'; - -/** - * query the source dataset according to user's input and fieldInfo to get aggregated dataset - * - * @param userInput - * @param fieldInfo - * @param sourceDataset - */ -export const queryDatasetWithGPT = async ( - userInput: string, - fieldInfo: SimpleFieldInfo[], - sourceDataset: DataItem[], - options: ILLMOptions -) => { - const patchedInput = patchQueryInput(userInput); - const { sql, fieldInfo: responseFieldInfo, usage } = await getQuerySQL(patchedInput, fieldInfo, options); - - const datasetAfterQuery = queryDataset(sql, sourceDataset, fieldInfo); - - const fieldInfoNew = parseRespondField(responseFieldInfo, datasetAfterQuery); - if (datasetAfterQuery.length === 0) { - console.warn('empty dataset after query!'); - } - - return { - dataset: datasetAfterQuery.length === 0 ? sourceDataset : datasetAfterQuery, - fieldInfo: datasetAfterQuery.length === 0 ? fieldInfo : fieldInfoNew, - usage - }; -}; - -/** - * call gpt to get the query sql according to user's input and data field. - * @param userInput - * @param fieldInfo - */ -const getQuerySQL = async (userInput: string, fieldInfo: SimpleFieldInfo[], options: ILLMOptions) => { - const queryDatasetMessage = `User's Command: ${userInput}\nColumn Information: ${JSON.stringify(fieldInfo)}`; - - const requestFunc = options.customRequestFunc?.dataQuery ?? requestGPT; - const QueryDatasetPrompt = getQueryDatasetPrompt(options.showThoughts ?? true); - const dataProcessRes = await requestFunc(QueryDatasetPrompt, queryDatasetMessage, options); - const dataQueryResponse: DataQueryResponse = parseGPTResponseAsJSON(dataProcessRes); - const { sql, fieldInfo: responseFiledInfo } = dataQueryResponse; - if (!sql || !responseFiledInfo) { - //try to parse the response with another format - const choices = dataProcessRes.choices; - const content = choices[0].message.content; - return { - ...parseGPTQueryResponse(content), - usage: dataProcessRes.usage - }; - } - return { ...dataQueryResponse, usage: dataProcessRes.usage }; -}; diff --git a/packages/vmind/src/gpt/dataProcess/query/type.ts b/packages/vmind/src/gpt/dataProcess/query/type.ts deleted file mode 100644 index 0c740097..00000000 --- a/packages/vmind/src/gpt/dataProcess/query/type.ts +++ /dev/null @@ -1,19 +0,0 @@ -import { Select } from 'node-sql-parser'; -import { Query } from '@visactor/calculator'; -import { DataItem, SimpleFieldInfo } from '../../../typings'; - -export type SQLAst = Select; -export type ASTParserPipe = (query: Partial, context: ASTParserContext) => Partial; - -export type ASTParserContext = { - ast: SQLAst; - dataSource: DataItem[]; - fieldInfo: SimpleFieldInfo[]; - replaceMap: Map; -}; - -export type DataQueryResponse = { - THOUGHT?: string; - sql: string; - fieldInfo: { fieldName: string; description?: string }[]; -}; diff --git a/packages/vmind/src/gpt/dataProcess/query/utils.ts b/packages/vmind/src/gpt/dataProcess/query/utils.ts deleted file mode 100644 index e7900ce2..00000000 --- a/packages/vmind/src/gpt/dataProcess/query/utils.ts +++ /dev/null @@ -1,125 +0,0 @@ -import { isArray } from 'lodash'; -import JSON5 from 'json5'; - -import { Query } from '@visactor/calculator'; -import { - detectFieldType, - generateRandomString, - mergeMap, - replaceNonASCIICharacters -} from '../../../applications/dataAggregation/taskNodes/executeQuery/utils'; -import { DataItem, SimpleFieldInfo } from '../../../typings'; -import { ASTParserContext, ASTParserPipe } from './type'; - -/** - * replace invalid characters in sql str and get the replace map - * @param sql - * @returns - */ -export const preprocessSQL = (sql: string, fieldInfo: SimpleFieldInfo[]) => { - //replace \n to space - const noNewLine = sql.replace('\n', ' '); - //replace reserved words inside the field name in the sql str - const reservedMap = { - KEY: `_KEY_${generateRandomString(10)}_` - }; - let validSQL = noNewLine; - const reservedReplaceMap: Map = new Map(); - - fieldInfo.forEach(field => { - const { fieldName } = field; - let validFieldName = fieldName; - Object.keys(reservedMap).forEach(reserveWord => { - if (validFieldName.toUpperCase().includes(reserveWord)) { - const validWord = reservedMap[reserveWord]; - validFieldName = validFieldName.toUpperCase().replace(new RegExp(reserveWord, 'g'), validWord); - } - }); - validSQL = validSQL.replace(new RegExp(fieldName, 'g'), validFieldName); - if (fieldName !== validFieldName) { - reservedReplaceMap.set(validFieldName, fieldName); - } - }); - const { validStr, replaceMap } = replaceNonASCIICharacters(validSQL); - // merge the two replace map - const mergedMap = mergeMap(replaceMap, reservedReplaceMap); - - return { validStr, replaceMap: mergedMap }; -}; - -export const addQuotes = (sqlString: string) => { - let newSQLString = ''; - let startIdx = 0; - - while (startIdx < sqlString.length) { - // try to find the start and end position of quotes - let startQuoteIdx = sqlString.indexOf('[', startIdx); - if (startQuoteIdx === -1) { - // no quotes found - startQuoteIdx = sqlString.length; - } - let endQuoteIdx = sqlString.indexOf(']', startQuoteIdx + 1); - if (endQuoteIdx === -1) { - endQuoteIdx = sqlString.length; - } - - // handle the part without quotes - let noQuotesPart = sqlString.substring(startIdx, startQuoteIdx); - const regex = /([^\x00-\x7F]+)/g; - noQuotesPart = noQuotesPart.replace(regex, match => `[${match}]`); - - // handle the part with quotes - const quotesPart = sqlString.substring(startQuoteIdx, endQuoteIdx + 1); - - // add them to the result - newSQLString += noQuotesPart + quotesPart; - - // move startIdx to the end of quotes - startIdx = endQuoteIdx + 1; - } - - return newSQLString; -}; - -export const execPipeline = (src: Partial, pipes: ASTParserPipe[], context: ASTParserContext) => - pipes.reduce((pre: Partial, pipe: ASTParserPipe) => { - const result = pipe(pre, context); - return result; - }, src); - -export const toFirstUpperCase = (name = '') => name.charAt(0).toUpperCase() + name.slice(1).toLowerCase(); - -export const checkIsColumnNode = (node: any, columns: any, fieldInfo: SimpleFieldInfo[]) => { - if (node.type === 'column_ref') { - return true; - } - return false; - //else { - // const columnNameList = columns - // .map((c: any) => c.column) - // .concat(columns.map((c: any) => c.alias)) - // .concat(fieldInfo.map(field => field.fieldName)) - // .filter(Boolean); - // const columnName = node.column ?? node.value; - // return columnNameList.includes(columnName); - //} -}; - -/** - * parse the respond field in data query to get field type and role - * @param fieldInfo - * @param responseFieldInfo - * @param dataset - */ -export const parseRespondField = ( - responseFieldInfo: { fieldName: string; description?: string }[], - dataset: DataItem[] -) => - responseFieldInfo.map(field => ({ - ...field, - ...detectFieldType(dataset, field.fieldName) - })); - -export const patchQueryInput = (userInput: string) => { - return userInput; -}; diff --git a/packages/vmind/src/skylark/chart-generation/NLToChart.ts b/packages/vmind/src/skylark/chart-generation/NLToChart.ts deleted file mode 100644 index 08ca2958..00000000 --- a/packages/vmind/src/skylark/chart-generation/NLToChart.ts +++ /dev/null @@ -1,143 +0,0 @@ -import { chartAdvisorHandler } from '../../common/chartAdvisor'; -import { getSchemaFromFieldInfo } from '../../common/schema'; -import { SUPPORTED_CHART_LIST, checkChartTypeAndCell, vizDataToSpec } from '../../common/vizDataToSpec'; -import { DataItem, ILLMOptions, SimpleFieldInfo, VizSchema } from '../../typings'; -import { getStrFromArray, getStrFromDict, requestSkyLark } from './utils'; -import { getChartRecommendPrompt, getFieldMapPrompt } from './prompts'; -import { parseSkylarkResponse } from '../utils'; -import { estimateVideoTime } from '../../common/vizDataToSpec/utils'; -import { ChartFieldInfo, chartRecommendConstraints, chartRecommendKnowledge } from './constants'; -import { omit } from 'lodash'; -import { calculateTokenUsage } from '../../common/utils'; -import { queryDatasetWithSkylark } from '../dataProcess/query/queryDataset'; -import { patchChartTypeAndCell } from './patch'; - -export const generateChartWithSkylark = async ( - userPrompt: string, //user's intent of visualization, usually aspect in data that they want to visualize - propsFieldInfo: SimpleFieldInfo[], - propsDataset: DataItem[], - options: ILLMOptions, - enableDataQuery = true, - colorPalette?: string[], - animationDuration?: number -) => { - let queryDatasetUsage; - let advisorUsage; - let chartType; - let cell; - let dataset: DataItem[] = propsDataset; - let fieldInfo: SimpleFieldInfo[] = propsFieldInfo; - let chartSource: string = options.model; - - try { - if (enableDataQuery) { - const { - dataset: queryDataset, - fieldInfo: fieldInfoNew, - usage - } = await queryDatasetWithSkylark(userPrompt, fieldInfo, propsDataset, options); - dataset = queryDataset; - fieldInfo = fieldInfoNew; - queryDatasetUsage = usage; - } - } catch (err) { - console.error('data query error!'); - console.error(err); - } - - const schema = getSchemaFromFieldInfo(fieldInfo); - const colors = colorPalette; - - try { - // throw 'test chartAdvisorHandler'; - const resJson: any = await chartAdvisorSkylark(schema, fieldInfo, userPrompt, options); - advisorUsage = resJson.usage; - const chartTypeRes = resJson.chartType.toUpperCase(); - const cellRes = resJson['cell']; - const patchResult = patchChartTypeAndCell(chartTypeRes, cellRes, dataset, fieldInfo); - if (checkChartTypeAndCell(patchResult.chartTypeNew, patchResult.cellNew, patchResult.fieldInfoNew)) { - chartType = patchResult.chartTypeNew; - cell = patchResult.cellNew; - dataset = patchResult.datasetNew; - } - } catch (err) { - console.warn(err); - console.warn('LLM generation error, use rule generation.'); - const advisorResult = chartAdvisorHandler(schema, dataset); - chartType = advisorResult.chartType; - cell = advisorResult.cell; - dataset = advisorResult.dataset as DataItem[]; - chartSource = 'chartAdvisor'; - } - const spec = vizDataToSpec( - dataset, - chartType, - cell, - colors, - animationDuration ? animationDuration * 1000 : undefined - ); - spec.background = '#00000033'; - return { - chartSource, - chartType, - spec, - usage: calculateTokenUsage([queryDatasetUsage, advisorUsage]), - time: estimateVideoTime(chartType, spec, animationDuration ? animationDuration * 1000 : undefined) - }; -}; - -export const chartAdvisorSkylark = async ( - schema: Partial, - fieldInfo: SimpleFieldInfo[], - userInput: string, - options: ILLMOptions | undefined -) => { - const userMessage = `User's Command: ${userInput}\nData field description: ${JSON.stringify(fieldInfo)}`; - - //call skylark to get recommended chart - const chartRecommendKnowledgeStr = getStrFromArray(chartRecommendKnowledge); - const chartRecommendConstraintsStr = getStrFromArray(chartRecommendConstraints); - const chartRecommendPrompt = getChartRecommendPrompt( - chartRecommendKnowledgeStr, - chartRecommendConstraintsStr, - options.showThoughts ?? true - ); - - const requestFunc = options.customRequestFunc?.chartAdvisor ?? requestSkyLark; - - const chartRecommendRes = await requestFunc(chartRecommendPrompt, userMessage, options); - const chartRecommendResJSON = parseSkylarkResponse(chartRecommendRes); - if (chartRecommendResJSON.error) { - throw Error(chartRecommendResJSON.message); - } - if (!SUPPORTED_CHART_LIST.includes(chartRecommendResJSON['charttype'])) { - throw Error('Unsupported Chart Type. Please Change User Input'); - } - - const { charttype: chartType } = chartRecommendResJSON; - - //call skylark to get field map result. - const { visualChannels, responseDescription, knowledge } = ChartFieldInfo[chartType.toUpperCase()]; - const visualChannelInfoStr = getStrFromDict(visualChannels); - const channelResponseStr = getStrFromDict(responseDescription); - const fieldMapKnowledgeStr = getStrFromArray(knowledge); - const fieldMapPrompt = getFieldMapPrompt( - chartType, - visualChannelInfoStr, - channelResponseStr, - fieldMapKnowledgeStr, - options.showThoughts ?? true - ); - - const fieldMapRes = await requestFunc(fieldMapPrompt, userMessage, options); - const fieldMapResJson = parseSkylarkResponse(fieldMapRes); - if (fieldMapResJson.error) { - throw Error('Network Error!'); - } - - return { - chartType, - cell: omit(fieldMapResJson, ['thoughts', 'usage']), - usage: calculateTokenUsage([chartRecommendRes.usage, fieldMapRes.usage]) - }; -}; diff --git a/packages/vmind/src/skylark/chart-generation/constants.ts b/packages/vmind/src/skylark/chart-generation/constants.ts deleted file mode 100644 index 51ff3c7b..00000000 --- a/packages/vmind/src/skylark/chart-generation/constants.ts +++ /dev/null @@ -1,229 +0,0 @@ -import { ChannelInfo } from '../typings'; - -export const ChartFieldInfo: ChannelInfo = { - 'BAR CHART': { - visualChannels: { - x: "x-axis of bar chart. Can't be empty. Only string fields", - y: "y-axis of bar chart. Can't be empty. Only number fields. Use array if there are more than one number fields need to show.", - color: - 'color channel of bar chart. Used to distinguish different bars. Only string fields. Can be empty if no suitable field.' - }, - responseDescription: { - x: 'field assigned to x channel', - y: 'field assigned to y channel', - color: 'field assigned to color channel' - }, - knowledge: [ - 'Only string fields can be used in color channel.', - 'You can use color channel to distinguish different categories.', - 'Use an array in y-axis if you want to assign more than one fields in y-axis.' - ] - }, - 'PIE CHART': { - visualChannels: { - value: "angle of sectors in the pie chart. Only number fields. Can't be empty.", - color: - "color of sectors in the pie chart. Used to distinguish different sectors. Only string fields. Can't be empty." - }, - responseDescription: { - value: 'field assigned to angle channel', - color: 'field assigned to color channel' - }, - knowledge: ['Only string fields can be used in color channel.'] - }, - 'LINE CHART': { - visualChannels: { - x: "x-axis of line chart. Can't be empty. Only string fields", - y: "y-axis of line chart. Can't be empty. Only number fields. Use array if there are more than one number fields need to show.", - color: - 'color channel of line chart. Used to distinguish different lines. Only string fields. Can be empty if no suitable field.' - }, - responseDescription: { - x: 'field assigned to x channel', - y: 'field assigned to y channel', - color: 'field assigned to color channel. Can be empty if no suitable field.' - }, - knowledge: [ - 'Only string fields can be used in color channel.', - 'Use an array in y-axis if you want to assign more than one fields in y-axis.' - ] - }, - 'SCATTER PLOT': { - visualChannels: { - x: "x-axis of scatter plot. Can't be empty.", - y: "y-axis of scatter plot. Can't be empty.", - color: 'color channel of scatter plot. Used to distinguish different points. Can be empty if no suitable field.', - size: 'size channel of scatter plot. Mapped to the size of each point. Only number fields. Can be empty if no suitable field.' - }, - responseDescription: { - x: 'field assigned to x channel', - y: 'field assigned to y channel', - color: 'field assigned to color channel', - size: 'field assigned to size channel' - }, - knowledge: ['Only number fields can be used in size channel.'] - }, - 'WORD CLOUD': { - visualChannels: { - size: "size channel of wordcloud. Mapped to the size of each word. Only number fields. Can't be empty", - color: - "color channel of wordcloud. Mapped to the color of each word. Used to distinguish different words. Only string fields. Can't be empty." - }, - responseDescription: { - size: 'field assigned to x channel', - color: 'field assigned to color channel' - }, - knowledge: ['Only string fields can be used in color channel.', 'Only number fields can be used in size channel.'] - }, - 'RADAR CHART': { - visualChannels: { - angle: "angle channel of radar chart. Used to distinguish different variables. Can't be empty.", - value: "Used to show the value of each variable in radar chart. Can't be empty.", - color: 'color channel of radar chart. Used to distinguish different variables. Can be empty if no suitable field.' - }, - responseDescription: { - angle: 'field assigned to angle channel', - value: 'field assigned to value channel', - color: 'field assigned to color channel' - }, - knowledge: [ - 'Only string fields can be used in angle channel.', - 'Only number fields can be used in value channel.', - 'Only string fields can be used in color channel.' - ] - }, - 'SANKEY CHART': { - visualChannels: { - source: "mapped to the source node of flow in sankey chart. Can't be empty.", - target: "mapped to the target node of flow in sankey chart. Can't be empty.", - value: "mapped to the amount of the flow in sankey chart. Can't be empty." - }, - responseDescription: { - source: 'field assigned to source channel', - target: 'field assigned to target channel', - value: 'field assigned to value channel' - }, - knowledge: [ - 'Only string fields can be used in source channel.', - 'Only number fields can be used in value channel.', - 'Only string fields can be used in target channel.' - ] - }, - 'ROSE CHART': { - visualChannels: { - radius: 'radius of sectors in the rose chart. Only number fields', - color: - "color of sectors in rose chart. Used to distinguish different sectors. Only string fields. Can't be empty." - }, - responseDescription: { - radius: 'field assigned to radius channel', - color: 'field assigned to color channel' - }, - knowledge: ['Only string fields can be used in color channel.'] - }, - 'FUNNEL CHART': { - visualChannels: { - color: - "color of each category or stage. Used to distinguish different category or stages in funnel chart. Only string fields. Can't be empty", - value: - "values of each category or stage. Mapped to the width of the bar representing each category or stage. Only number fields. Can't be empty." - }, - responseDescription: { - color: 'field assigned to color channel', - value: 'field assigned to value channel' - }, - knowledge: ['Only string fields can be used in color channel.', 'Only number fields can be used in value channel.'] - }, - 'WATERFALL CHART': { - visualChannels: { - x: "x-axis of waterfall chart. Can't be empty. Only string fields", - y: "y-axis of waterfall chart. Can't be empty. Only number fields", - color: - 'color channel of waterfall chart. Used to distinguish different categories. Only string fields. Can be empty if no suitable field.' - }, - responseDescription: { - x: 'field assigned to x channel', - y: 'field assigned to y channel', - color: 'field assigned to color channel' - }, - knowledge: ['Only string fields can be used in color channel.'] - }, - 'BOX PLOT': { - visualChannels: { - x: "x-axis of box plot. Can't be empty. Only string fields", - min: 'field representing min value of box plot. Can be empty. Only number fields', - q1: 'field representing lower quartile of box plot. Can be empty. Only number fields', - median: 'field representing median of box plot. Can be empty. Only number fields', - q3: 'field representing upper quartile of box plot. Can be empty. Only number fields', - max: 'field representing max value of box plot. Can be empty. Only number fields' - }, - responseDescription: { - x: 'field assigned to x channel', - min: 'field assigned to min channel', - q1: 'field assigned to q1 channel', - median: 'field assigned to median channel', - q3: 'field assigned to q3 channel', - max: 'field assigned to max channel' - }, - knowledge: ['Only string fields can be used in color channel.'] - }, - 'DUAL AXIS CHART': { - visualChannels: { - x: "x-axis of dual axis chart. Can't be empty. Only string fields", - leftAxis: "left y-axis of dual axis chart. Can't be empty. Only number fields", - rightAxis: "right y-axis of dual axis chart. Can't be empty. Only number fields" - }, - responseDescription: { - x: 'field assigned to x channel', - leftAxis: 'field assigned to leftAxis channel', - rightAxis: 'field assigned to rightAxis channel' - }, - knowledge: [ - 'left y-axis is used as main axis and usually used to show main field', - 'right y-axis is used as sub-axis' - ] - }, - 'DYNAMIC BAR CHART': { - visualChannels: { - time: "date field used to divide time frames. Used to divide the data into time frames. Can't be empty. Only date fields. Can't be the same as x-axis.", - x: "x-axis of bar chart. Can't be empty. Can only use categorical field. Can't use time field", - y: "y-axis of bar chart. Can't be empty. Only number fields", - color: - 'color channel of bar chart. Used to distinguish different bars. Only categorical fields. Can be empty if no suitable field.' - }, - responseDescription: { - time: 'field assigned to time channel', - x: 'field assigned to x channel', - y: 'field assigned to y channel', - color: 'field assigned to color channel' - }, - knowledge: [ - "x-axis in dynamic bar chart can only be a categorical field. Don't use time field", - 'Only use categorical field can be used in x channel', - "time channel can't be empty", - 'Only date fields can be used in time channel.' - ] - } -}; - -export const chartRecommendKnowledge = [ - 'Bar chart shows the changes or comparisons of various categories of data.', - 'Line Chart shows the trend of data over time.', - 'Pie chart shows the proportion of each part in the total.', - 'Scatter plot shows the relationship between two variables', - 'Word cloud shows word frequency of text data, usually used to show trends, comparison or popularity of keywords.', - 'Dual-axis chart is used when there are two y-fields need to visualize.', - 'Sankey chart shows the transfer of flow or energy, reflecting the relationship between various parts.', - 'Radar chart shows data of multiple variables, allowing comparisons between various variables.', - 'Rose chart shows the distribution of periodic data.', - 'Waterfall chart shows the cumulative effect of data, particularly suitable for showing the total change between the beginning and the end, and how this total change is composed of increases and decreases from individual sub-items.', - 'Funnel chart shows the process or stages of data, or conversion rates.', - 'Box Plot is suitable for displaying data that contains maximum values, lower quartiles, medians, upper quartiles, and maximum values.', - 'Dynamic Bar Chart shows changes in rankings over time.', - 'Dynamic Bar Chart can only be used when data has a field that is date type.' -]; - -export const chartRecommendConstraints = [ - 'Use Box Plot if data includes fields related to the minimum value, lower quartile, median, upper quartile, and maximum value.', - 'Use Dynamic Bar Chart if user want to show changes of rankings in data.' -]; diff --git a/packages/vmind/src/skylark/chart-generation/index.ts b/packages/vmind/src/skylark/chart-generation/index.ts deleted file mode 100644 index 9c86d94c..00000000 --- a/packages/vmind/src/skylark/chart-generation/index.ts +++ /dev/null @@ -1 +0,0 @@ -export * from './NLToChart'; diff --git a/packages/vmind/src/skylark/chart-generation/patch.ts b/packages/vmind/src/skylark/chart-generation/patch.ts deleted file mode 100644 index 1f60772e..00000000 --- a/packages/vmind/src/skylark/chart-generation/patch.ts +++ /dev/null @@ -1,237 +0,0 @@ -import { isArray, isString } from 'lodash'; -import { Cell, DataItem, DataType, PatchContext, PatchPipeline, ROLE, SimpleFieldInfo } from '../../typings'; -import { execPipeline } from '../../common/utils'; -import { foldDatasetByYField } from '../../common/vizDataToSpec/utils'; -import { FOLD_NAME, FOLD_VALUE } from '@visactor/chart-advisor'; - -const matchFieldWithoutPunctuation = (field: string, fieldList: string[]): string | undefined => { - //try to match the field without punctuation - //return undefined if no field is match - if (!field) { - return field; - } - const punctuationRegex = /[.,\/#!$%\^&\*;:{}=\-_`~()\s]/g; - const pureFieldStr = field.replace(punctuationRegex, ''); - let matchedField = undefined; - fieldList.some((f: string) => { - const pureStr = f.replace(punctuationRegex, ''); - if (pureStr === pureFieldStr) { - matchedField = f; - return true; - } - return false; - }); - return matchedField; -}; - -const patchNullField: PatchPipeline = (context: PatchContext, _originalContext: PatchContext) => { - const { fieldInfo, cell } = context; - const cellNew = { ...cell }; - - const columns = fieldInfo.map(field => field.fieldName); - - //set null field to undefined - Object.keys(cellNew).forEach(key => { - const value = cellNew[key]; - if (isArray(value)) { - cellNew[key] = value - .map(v => (columns.includes(v) ? v : matchFieldWithoutPunctuation(v, columns))) - .filter(Boolean); - } else if (!columns.includes(value) || value === '') { - cellNew[key] = matchFieldWithoutPunctuation(cellNew[key], columns); - } - }); - - return { - ...context, - cell: cellNew - }; -}; - -const patchField: PatchPipeline = (context: PatchContext, _originalContext: PatchContext) => { - const { fieldInfo, cell } = context; - const fieldNames = fieldInfo.map(field => field.fieldName); - const cellNew = { ...cell }; - Object.keys(cellNew).forEach(key => { - const value = cellNew[key]; - if (isString(value) && (value ?? '').includes(',')) { - const newValue = (value as string).split(',').map(f => f.trim()); - if (newValue.every(f => fieldNames.includes(f))) { - cellNew[key] = newValue; - } - } - }); - return { - ...context, - cell: cellNew - }; -}; - -const patchColorField: PatchPipeline = (context: PatchContext, _originalContext: PatchContext) => { - const { chartType, fieldInfo, cell } = context; - const cellNew = { ...cell }; - const { color } = cellNew; - let chartTypeNew = chartType; - if (color) { - const colorField = fieldInfo.find(f => f.fieldName === color); - if (colorField && colorField.role === ROLE.MEASURE) { - cellNew.color = undefined; - if (['BAR CHART', 'LINE CHART', 'DUAL AXIS CHART'].includes(chartTypeNew)) { - cellNew.y = [cellNew.y, color].flat(); - if (chartTypeNew === 'DUAL AXIS CHART' && cellNew.y.length > 2) { - chartTypeNew = 'BAR CHART'; - } - } - } - } - - return { - ...context, - cell: cellNew - }; -}; - -const patchRadarChart: PatchPipeline = (context: PatchContext, _originalContext: PatchContext) => { - const { chartType, cell } = context; - - if (chartType === 'RADAR CHART') { - const cellNew = { - x: cell.angle, - y: cell.value, - color: cell.color - }; - - return { - ...context, - cell: cellNew - }; - } - return context; -}; - -const patchBoxPlot: PatchPipeline = (context: PatchContext, _originalContext: PatchContext) => { - const { chartType, cell } = context; - - if (chartType === 'BOX PLOT') { - const { x, min, q1, median, q3, max } = cell as any; - const cellNew = { - x, - y: [min, q1, median, q3, max].filter(Boolean) - }; - return { - ...context, - cell: cellNew - }; - } - return context; -}; - -const patchBarChart: PatchPipeline = (context: PatchContext, _originalContext: PatchContext) => { - const { chartType, cell, fieldInfo, dataset } = context; - const chartTypeNew = chartType; - const cellNew = { ...cell }; - let datasetNew = dataset; - if (chartTypeNew === 'BAR CHART' || chartTypeNew === 'LINE CHART') { - if (isArray(cellNew.y) && cellNew.y.length > 1) { - datasetNew = foldDatasetByYField(datasetNew, cellNew.y, fieldInfo); - cellNew.y = FOLD_VALUE.toString(); - cellNew.color = FOLD_NAME.toString(); - } - } - return { - ...context, - chartType: chartTypeNew, - cell: cellNew, - dataset: datasetNew - }; -}; - -const patchDynamicBarChart: PatchPipeline = (context: PatchContext, _originalContext: PatchContext) => { - const { chartType, cell, fieldInfo } = context; - const cellNew = { - ...cell - }; - - if (chartType === 'DYNAMIC BAR CHART') { - if (!cellNew.time || cellNew.time === '' || cellNew.time.length === 0) { - const flattenedXField = Array.isArray(cellNew.x) ? cellNew.x : [cellNew.x]; - const usedFields = Object.values(cellNew).filter(f => !Array.isArray(f)); - usedFields.push(...flattenedXField); - const remainedFields = fieldInfo.filter(f => !usedFields.includes(f.fieldName)); - - //动态条形图没有time字段,选择一个离散字段作为time - const timeField = remainedFields.find(f => { - return f.type === DataType.DATE; - }); - if (timeField) { - cellNew.time = timeField.fieldName; - } else { - const stringField = remainedFields.find(f => { - return f.type === DataType.STRING; - }); - if (stringField) { - cellNew.time = stringField.fieldName; - } - } - } - } - return { - ...context, - cell: cellNew - }; -}; - -const patchArrayField: PatchPipeline = (context: PatchContext, _originalContext: PatchContext) => { - const { cell } = context; - const cellNew = { - ...cell - }; - //only x and y field can be array - Object.keys(cellNew).forEach(key => { - if (key !== 'x' && key !== 'y' && isArray(cellNew[key])) { - cellNew[key] = cellNew[key][0]; - } - }); - - return { - ...context, - cell: cellNew - }; -}; - -const patchPipelines: PatchPipeline[] = [ - patchNullField, - patchField, - patchColorField, - patchRadarChart, - patchBoxPlot, - patchBarChart, - patchDynamicBarChart, - patchArrayField -]; - -export const patchChartTypeAndCell = ( - chartTypeRes: string, - cellRes: Cell, - dataset: DataItem[], - fieldInfo: SimpleFieldInfo[] -) => { - const context = { - chartType: chartTypeRes, - cell: cellRes, - dataset, - fieldInfo - }; - const { - chartType: chartTypeNew, - cell: cellNew, - dataset: datasetNew, - fieldInfo: fieldInfoNew - } = execPipeline(context, patchPipelines, context); - return { - chartTypeNew, - cellNew, - datasetNew, - fieldInfoNew - }; -}; diff --git a/packages/vmind/src/skylark/chart-generation/prompts.ts b/packages/vmind/src/skylark/chart-generation/prompts.ts deleted file mode 100644 index 7707ed8a..00000000 --- a/packages/vmind/src/skylark/chart-generation/prompts.ts +++ /dev/null @@ -1,62 +0,0 @@ -import { SUPPORTED_CHART_LIST } from '../../common/vizDataToSpec/constants'; -import { FieldInfo } from '../../typings'; - -export const getChartRecommendPrompt = ( - knowledgeStr: string, - constraintsStr: string, - showThoughts: boolean -) => `You are an export in data visualization. -Your task is: -1. Based on the user's command, infer the user's intention and data field description, such as comparison, trend, proportion, distribution, etc. Don't consider intentions that the current data field cannot show. -2. Select a single chart type that best suites the data and user's intention from the list of supported charts: ${JSON.stringify( - SUPPORTED_CHART_LIST -)}. -3. Response in YAML format without any additional descriptions - -Here is some knowledge you can refer to when selecting chart type: -${knowledgeStr} - -Must follow these constraints: -${constraintsStr} - -Let's think step by step. ${showThoughts ? 'Fill your thoughts in {thoughts}.' : ''} - -Response in the following format: -${ - showThoughts ? 'thoughts: //Your thoughts\n' : '' -}chartType: //chart type you choose based on data and user's command. Only one chart type can be used. -`; - -export const getFieldMapPrompt = ( - chartType: string, - availableChannels: string, - channelsInResponse: string, - channelKnowledge: string, - showThoughts: boolean -) => `You are an export in data visualization. User wants to generate a ${chartType.toLocaleLowerCase()} using the fields provided. -Your task is: -1. Filter out useful fields related to user's command. -2. Assign the useful fields to the available visual channels according to field name and type. -3. Response in YAML format without any additional descriptions - -Available visual channels: -${availableChannels} - -Knowledge: -1. Visual channels are described with grammar of graphics and can be used to generate a chart. -${channelKnowledge} - -Must follow these constraints: -1. Only use available visual channels. Don't fabricate non-existent visual channels. -2. Only use fields in data field description. Don't make up non-existent fields. -3. Keep the field names unchanged and don't translate them, even though they are in different languages. -4. All the data are indivisible. Don't use their initials as fields in chart. Use the original field instead. -5. Must follow the field type restrictions in each visual channel. -6. Don't use operator symbols or expressions in any visual channel even though there are no direct fields corresponding to user's command, choose the most related field from data instead. -7. Please assign appropriate fields to each channel so that the chart can best visualize the data user wants to show. - -Let's think step by step. ${showThoughts ? 'Fill your thoughts in thoughts in one line.' : ''} - -Response in the following format: -${showThoughts ? 'thoughts: //Your thoughts in one line. Must show your thought process.\n' : ''}${channelsInResponse}. -`; diff --git a/packages/vmind/src/skylark/chart-generation/utils.ts b/packages/vmind/src/skylark/chart-generation/utils.ts deleted file mode 100644 index 98d86d63..00000000 --- a/packages/vmind/src/skylark/chart-generation/utils.ts +++ /dev/null @@ -1,51 +0,0 @@ -import axios from 'axios'; -import { ILLMOptions, LLMResponse } from '../../typings'; -import { omit } from 'lodash'; - -/** - * - * @param prompt - * @param message - * @param options - */ -export const requestSkyLark = async (prompt: string, message: string, options: ILLMOptions): Promise => { - const url: string = options?.url; - const headers: any = { ...(options.headers ?? {}), 'Content-Type': 'application/json' }; - - try { - const res = await axios(url, { - method: options?.method ?? 'POST', - headers, //must has Authorization: `Bearer ${openAIKey}` if use openai api - data: { - ...omit(options, ['headers', 'url', 'method', 'showThoughts', 'customRequestFunc']), - model: options?.model ?? 'gpt-3.5-turbo', - messages: [ - { - role: 'system', - content: prompt - }, - { - role: 'user', - content: message - } - ], - max_tokens: options?.max_tokens ?? 500, - temperature: options?.temperature ?? 0, - stream: false - } - }).then(response => response.data); - - return res; - } catch (err: any) { - return err.response.data; - } -}; - -export const getStrFromDict = (dict: Record) => - Object.keys(dict) - .map(key => `${key}: ${dict[key]}`) - .join('\n'); - -const KNOWLEDGE_START_INDEX = 1; -export const getStrFromArray = (array: string[]) => - array.map((item, index) => `${index + KNOWLEDGE_START_INDEX}. ${item}`).join('\n'); diff --git a/packages/vmind/src/skylark/dataProcess/query/prompts.ts b/packages/vmind/src/skylark/dataProcess/query/prompts.ts deleted file mode 100644 index 80b8cbd1..00000000 --- a/packages/vmind/src/skylark/dataProcess/query/prompts.ts +++ /dev/null @@ -1,76 +0,0 @@ -import { VMIND_DATA_SOURCE } from '../../../applications/dataAggregation/taskNodes/executeQuery/dataQuery'; - -export const getQueryDatasetPrompt = ( - showThoughts: boolean -) => `您是一位数据分析的专家。这是一个名为${VMIND_DATA_SOURCE}的原始数据集。用户会告诉您他的命令和${VMIND_DATA_SOURCE}的列信息。您的任务是根据指令生成一个sql和fieldInfo。只返回一个JSON对象。 - -# SQL语句编写要求 -- 您需要编写一个标准的sql语句。 -- 所有的度量列必须被聚合,即使用户没有要求你这样做。支持的聚合函数:["MAX()", "MIN()", "SUM()", "COUNT()", "AVG()"] -- 支持的sql关键字:["SELECT", "FROM", "WHERE", "GROUP BY", "HAVING", "ORDER BY", "LIMIT", "DISTINCT"]. -- 不要使用不支持的关键词,如:WITHIN, FIELD。不要使用不支持的聚合函数,如:PERCENTILE_CONT, PERCENTILE。不要使用不支持的操作符。我们将使用alasql执行您的sql。不支持的关键词、函数和操作符会导致系统崩溃。 -- 使用\` \`包裹sql中的所有列名 -- 让你的sql尽可能简单。 - -您需要按照以下步骤编写sql语句。 - -# 步骤 -1. 从用户的指令中提取与数据相关的部分。忽略其他与数据无关的部分。 -2. 根据列的名称和类型,推断${VMIND_DATA_SOURCE}中与用户指令有关的列,并将其添加到SELECT中。尽可能多地选择相关列,不要遗漏一些关键的列,比如与日期相关的维度等。你只能使用Column Information中提到的列,不要假设不存在的列。如果现有的列不能满足用户的命令,选择Column Information中最相关的列。 -3. 不论用户指定了哪种图表类型,将所选择的度量列使用聚合函数聚合,即使你推断它们不适合被聚合,即使用户没有要求你这样做。如果你不确定使用哪个聚合函数,使用SUM()。不要使用不支持的聚合函数。 -4. 使用维度列对数据进行分组。 -5. 在您的sql中,如有必要,您也可以使用WHERE, HAVING, ORDER BY, LIMIT。使用支持的操作符完成WHERE和HAVING。只能使用如columnA = value1,sum_b > 0的二元表达式。在您的表达式中,只能使用在维度列的domain中出现的维度值。 - -让我们一步一步思考。不要忘了将所有度量列聚合。 - -用户将会直接使用JSON.parse()解析您返回的内容,只返回一个不带任何额外内容的JSON对象。您的JSON对象必须包含sql和fieldInfo。 - -请按以下格式回复: -\`\`\` -{ -${showThoughts ? 'thoughts: string //你的想法' : ''} -sql: string; //你的sql。注意,这是一个JSON对象中的字符串,所以必须是一行,不含任何\\n。 -fieldInfo: { -fieldName: string; //字段名。 -type: string; //字段类型,string,int,date或float。 -}[]; //您的sql中字段信息的数组。描述其名称和类型。 -} -\`\`\` - -#Examples: - -User's Command: Show me the change of the GDP rankings of each country. -Column Information: [{"fieldName":"country","type":"string","role":"dimension","domain":["USA", "China", "England"]},{"fieldName":"continent","type":"string","role":"dimension","domain":["North America","Asia","Europe"]},{"fieldName":"GDP","type":"float","role":"measure","domain":[2780,617030]},{"fieldName":"year","type":"int","role":"measure","domain":[1973,2018]}] - -Response: -\`\`\` -{ - ${showThoughts ? '"thoughts": string //your thoughts' : ''} - "sql": "SELECT \`country\`, \`year\`, SUM(\`GDP\`) AS \`total_GDP\` FROM ${VMIND_DATA_SOURCE} GROUP BY \`country\`, \`year\` ORDER BY \`year\`, \`total_GDP\` DESC", - "fieldInfo": [ - { - "fieldName": "country", - "type": "string" - }, - { - "fieldName": "year", - "type": "date" - }, - { - "fieldName": "total_GDP", - "type": "int" - } - ] -} -\`\`\` - -在上面这个例子中,用户想要展示不同国家GDP排名的变化,相关列有country和GDP。用户需要一个年份列才能展示“变化”,因此我们还需要选择year。GDP是一个指标列,因此我们要将它聚合。从用户输入中无法推断聚合方式,因此使用SUM()。您只需要将生成的JSON返回给用户。 - -一步完成您的任务。 - -# 约束: -- 在一行内写出您的sql语句,不要有任何\\n。您的sql必须能够由alasql执行。 -- 请不要在您的sql语句中改变或翻译列名,请保持原有的列名不变,即使他们含有空格或-。 -- 在你的sql中不要遗漏GROUP BY。 -- 直接返回JSON对象,不要有任何其他内容。确保它能够被JavaScript中的JSON.parse()直接解析。 -`; diff --git a/packages/vmind/src/skylark/dataProcess/query/queryDataset.ts b/packages/vmind/src/skylark/dataProcess/query/queryDataset.ts deleted file mode 100644 index af7ff51e..00000000 --- a/packages/vmind/src/skylark/dataProcess/query/queryDataset.ts +++ /dev/null @@ -1,60 +0,0 @@ -import { DataItem, ILLMOptions, SimpleFieldInfo } from '../../../typings'; -import { DataQueryResponse } from './type'; -import { getQueryDatasetPrompt } from './prompts'; -import { requestSkyLark } from '../../chart-generation/utils'; -import { parseRespondField } from '../../../gpt/dataProcess/query/utils'; -import { parseSkylarkResponseAsJSON, patchDataQueryInput } from './utils'; -import { queryDataset } from '../../../applications/dataAggregation/taskNodes/executeQuery/dataQuery'; - -/** - * query the source dataset according to user's input and fieldInfo to get aggregated dataset - * - * @param userInput - * @param fieldInfo - * @param sourceDataset - */ -export const queryDatasetWithSkylark = async ( - userInput: string, - fieldInfo: SimpleFieldInfo[], - sourceDataset: DataItem[], - options: ILLMOptions -) => { - const patchedInput = patchDataQueryInput(userInput); - const { sql, fieldInfo: responseFieldInfo, usage } = await getQuerySQL(patchedInput, fieldInfo, options); - const datasetAfterQuery = queryDataset(sql, sourceDataset, fieldInfo); - - const fieldInfoNew = parseRespondField(responseFieldInfo, datasetAfterQuery); - if (datasetAfterQuery.length === 0) { - console.warn('empty dataset after query!'); - } - return { - dataset: datasetAfterQuery.length === 0 ? sourceDataset : datasetAfterQuery, - fieldInfo: datasetAfterQuery.length === 0 ? fieldInfo : fieldInfoNew, - usage - }; -}; - -/** - * call gpt to get the query sql according to user's input and data field. - * @param userInput - * @param fieldInfo - */ -const getQuerySQL = async (userInput: string, fieldInfo: SimpleFieldInfo[], options: ILLMOptions) => { - const queryDatasetMessage = `User's Command: ${userInput}\nColumn Information: ${JSON.stringify(fieldInfo)}`; - - const requestFunc = options.customRequestFunc?.dataQuery ?? requestSkyLark; - const QueryDatasetPrompt = getQueryDatasetPrompt(options.showThoughts ?? true); - const dataProcessRes = await requestFunc(QueryDatasetPrompt, queryDatasetMessage, options); - const dataQueryResponse: DataQueryResponse = parseSkylarkResponseAsJSON(dataProcessRes); - //const { sql, fieldInfo: responseFiledInfo } = dataQueryResponse; - //if (!sql || !responseFiledInfo) { - // //try to parse the response with another format - // const choices = dataProcessRes.choices; - // const content = choices[0].message.content; - // return { - // ...parseGPTQueryResponse(content), - // usage: dataProcessRes.usage - // }; - //} - return { ...dataQueryResponse, usage: dataProcessRes.usage }; -}; diff --git a/packages/vmind/src/skylark/dataProcess/query/type.ts b/packages/vmind/src/skylark/dataProcess/query/type.ts deleted file mode 100644 index 486b6e50..00000000 --- a/packages/vmind/src/skylark/dataProcess/query/type.ts +++ /dev/null @@ -1,5 +0,0 @@ -export type DataQueryResponse = { - THOUGHT?: string; - sql: string; - fieldInfo: { fieldName: string; description?: string }[]; -}; diff --git a/packages/vmind/src/skylark/dataProcess/query/utils.ts b/packages/vmind/src/skylark/dataProcess/query/utils.ts deleted file mode 100644 index f179aa37..00000000 --- a/packages/vmind/src/skylark/dataProcess/query/utils.ts +++ /dev/null @@ -1,54 +0,0 @@ -import { LLMResponse } from 'src/typings'; -import JSON5 from 'json5'; -import { replaceAll } from '../../../applications/dataAggregation/taskNodes/executeQuery/utils'; -import { matchJSONStr } from '../../../common/utils'; - -export const parseJson = (JsonStr: string, prefix?: string) => { - const parseNoPrefixStr = (str: string) => { - //尝试不带前缀的解析 - try { - return JSON5.parse(str); - } catch (err) { - return { - error: true - }; - } - }; - //解析GPT返回的JSON格式 - if (prefix) { - //被某些字符包裹 - const splitArr = JsonStr.split(prefix); - const splittedStr = splitArr[splitArr.length - 2]; - const res = parseNoPrefixStr(splittedStr); - if (!res.error) { - return res; - } - } - //没有被前缀包裹,或者解析被前缀包裹的json失败,尝试直接解析返回结果 - const res2 = parseNoPrefixStr(JsonStr); - return res2; -}; - -export const parseSkylarkResponseAsJSON = (skylarkRes: LLMResponse) => { - try { - if (skylarkRes.error) { - return { - error: true, - ...skylarkRes.error - }; - } - const choices = skylarkRes.choices; - const content = replaceAll(choices[0].message.content, '\n', ' '); - const jsonStr = matchJSONStr(content); - const resJson = parseJson(jsonStr, '```'); - return resJson; - } catch (err: any) { - return { - error: true, - message: err.message - }; - } -}; - -export const patchDataQueryInput = (userInput: string) => - userInput + ' 使用` `包裹sql中的所有列名。使用支持的聚合函数将所有的度量列聚合。'; diff --git a/packages/vmind/src/skylark/typings/index.ts b/packages/vmind/src/skylark/typings/index.ts deleted file mode 100644 index d6e32558..00000000 --- a/packages/vmind/src/skylark/typings/index.ts +++ /dev/null @@ -1,7 +0,0 @@ -export type ChannelInfo = { - [chartType: string]: { - visualChannels: Record; //Visual channel available in this chart type - responseDescription: Record; //description of the visual channel used in the llm response - knowledge: string[]; //list of additional knowledge about the visual channels model need to know when applying field map - }; -}; diff --git a/packages/vmind/src/skylark/utils.ts b/packages/vmind/src/skylark/utils.ts deleted file mode 100644 index 22207e67..00000000 --- a/packages/vmind/src/skylark/utils.ts +++ /dev/null @@ -1,58 +0,0 @@ -import yaml from 'js-yaml'; -import { LLMResponse } from '../typings'; - -const startsWithTextAndColon = (str: string) => { - const regex = /^.+\:/; - return regex.test(str); -}; - -const isStringArray = (str: string) => { - const regex = /^(.*)\: ".+"(, ".+")+$/; - return regex.test(str); -}; -export const parseSkylarkResponse = (larkResponse: LLMResponse): Record => { - try { - if (larkResponse.error) { - console.error(larkResponse.error); - return { error: true, ...larkResponse.error }; - } - const responseStr = larkResponse.choices[0].message.content; - const usage = larkResponse.usage; - //replace all the {key} into key: - const replacedStr = responseStr.replace( - /{(.*?)}/g, - (matchedStr: string, matchedGroup: string) => matchedGroup + ':' - ); - const patchedStr = replacedStr - .split('\n') - //remove lines that is not start with text and colon - .filter((str: string) => startsWithTextAndColon(str)) - //remove blank space at the start of each line - .map((str: string) => str.replace(/^\s+/, '')) - //wrap string list with [] - .map((str: string) => { - if (isStringArray(str)) { - return str.replace(/(.*): (.*)/, '$1: [$2]'); - } - return str; - }) - //check if there are other : after the first : in YAML; If so, wrap the str with "" - .map((str: string) => { - const parts = str.split(':'); - return parts.length > 2 ? `${parts[0]}: "${parts.slice(1).join(':').trim()}"` : str; - }) - //replace ": -" with ": null" - .map((str: string) => { - return str.replace(/: -/g, ': null'); - }) - .join('\n'); - - const resJson = yaml.load(patchedStr) as Record; - resJson.usage = usage; - //replace all the keys to lower case. - return Object.keys(resJson).reduce((prev, cur) => ({ ...prev, [cur.toLocaleLowerCase()]: resJson[cur] }), {}); - } catch (err: any) { - console.error(err); - return { error: true, message: err.message }; - } -}; From 2e1d0448d8722bfed6f69c8264e0c6a7aa63d0f2 Mon Sep 17 00:00:00 2001 From: da730 Date: Wed, 10 Apr 2024 21:36:01 +0800 Subject: [PATCH 18/62] feat: fix async reduce --- .../__tests__/browser/src/pages/DataInput.tsx | 22 ++++++------ .../src/applications/dataAggregation/index.ts | 3 +- .../taskNodes/executeQuery/transformers.ts | 15 +++++--- .../taskNodes/getQuerySQL/GPT/utils.ts | 18 ++++++---- packages/vmind/src/base/application/index.ts | 36 ++++++++++++------- .../src/base/taskNode/llmBasedTaskNode.ts | 2 +- packages/vmind/src/core/VMind.ts | 6 ++-- packages/vmind/src/core/applications.ts | 1 - packages/vmind/src/core/types.ts | 1 - 9 files changed, 60 insertions(+), 44 deletions(-) diff --git a/packages/vmind/__tests__/browser/src/pages/DataInput.tsx b/packages/vmind/__tests__/browser/src/pages/DataInput.tsx index b8f6e2b7..e8f1d012 100644 --- a/packages/vmind/__tests__/browser/src/pages/DataInput.tsx +++ b/packages/vmind/__tests__/browser/src/pages/DataInput.tsx @@ -125,21 +125,21 @@ export function DataInput(props: IPropsType) { const askGPT = useCallback(async () => { //setLoading(true); const { fieldInfo, dataset } = vmind.parseCSVData(csv); - //const { fieldInfo: fieldInfoQuery, dataset: datasetQuery } = await vmind?.dataQuery(describe, fieldInfo, dataset); + const { fieldInfo: fieldInfoQuery, dataset: datasetQuery } = await vmind?.dataQuery(describe, fieldInfo, dataset); //const { fieldInfo, dataset, usage } = await vmind.parseCSVDataWithLLM(csv, describe); //const dataset = mockData4; //const fieldInfo = vmind?.getFieldInfo(dataset); - const startTime = new Date().getTime(); - const chartGenerationRes = await vmind.generateChart(describe, fieldInfo, dataset, true); - const endTime = new Date().getTime(); - if (isArray(chartGenerationRes)) { - props.onSpecListGenerate(chartGenerationRes.map(res => res.spec)); - } else { - const { spec, time } = chartGenerationRes; - const costTime = endTime - startTime; - props.onSpecGenerate(spec, time as any, costTime); - } + //const startTime = new Date().getTime(); + ////const chartGenerationRes = await vmind.generateChart(describe, fieldInfo, dataset, true); + //const endTime = new Date().getTime(); + //if (isArray(chartGenerationRes)) { + // props.onSpecListGenerate(chartGenerationRes.map(res => res.spec)); + //} else { + // const { spec, time } = chartGenerationRes; + // const costTime = endTime - startTime; + // props.onSpecGenerate(spec, time as any, costTime); + //} setLoading(false); }, [vmind, csv, describe, props]); diff --git a/packages/vmind/src/applications/dataAggregation/index.ts b/packages/vmind/src/applications/dataAggregation/index.ts index 8d5757cf..ac3052ff 100644 --- a/packages/vmind/src/applications/dataAggregation/index.ts +++ b/packages/vmind/src/applications/dataAggregation/index.ts @@ -3,7 +3,6 @@ import ExecuteQueryTaskNodeMeta from './taskNodes/executeQuery'; import GetSQLTaskNodeGPTMeta from './taskNodes/getQuerySQL/GPT'; import { ModelType } from 'src/typings'; import { DataAggregationContext, DataAggregationOutput } from '../types'; -import { ApplicationType } from 'src/core/applications'; /** * data aggregation application in vmind @@ -13,7 +12,7 @@ import { ApplicationType } from 'src/core/applications'; * finally it runs the sql using alasql and return the final dataset and fieldInfo (DataAggregationOutput) */ const dataAggregationGPTMeta: ApplicationMeta = { - name: ApplicationType.DataAggregation, + name: 'dataAggregation', taskNodes: [ { taskNode: GetSQLTaskNodeGPTMeta, name: 'getQuerySQL' }, { taskNode: ExecuteQueryTaskNodeMeta, name: 'executeQuery' } diff --git a/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/transformers.ts b/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/transformers.ts index 3a1b1455..d76c4939 100644 --- a/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/transformers.ts +++ b/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/transformers.ts @@ -85,7 +85,9 @@ export const executeDataQuery: Transformer = ( +export const getFinalQueryResult: Transformer = ( input: RestoreResult, context: ExecuteQueryContext ) => { - const { llmFieldInfo: responseFieldInfo, sourceDataset, fieldInfo, usage } = context; - const datasetAfterQuery = input; + const { sourceDataset, fieldInfo, usage, llmFieldInfo: responseFieldInfo } = context; + const { datasetAfterQuery } = input; const fieldInfoNew = parseRespondField(responseFieldInfo, datasetAfterQuery); if (datasetAfterQuery.length === 0) { console.warn('empty dataset after query!'); diff --git a/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/GPT/utils.ts b/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/GPT/utils.ts index 40ad36af..d14cae33 100644 --- a/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/GPT/utils.ts +++ b/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/GPT/utils.ts @@ -2,10 +2,9 @@ import axios from 'axios'; import JSON5 from 'json5'; import { isArray, omit } from 'lodash'; import { matchJSONStr } from 'src/common/utils'; -import { ILLMOptions, LLMResponse } from 'src/typings'; -import { GetQuerySQLResult } from '../types'; +import { ILLMOptions, LLMResponse, SimpleFieldInfo } from 'src/typings'; import { Parser } from 'src/base/tools/parser'; -import { GetQuerySQLContext } from 'src/applications/dataAggregation/types'; +import { GetQuerySQLContext, GetQuerySQLOutput, SQL } from 'src/applications/dataAggregation/types'; import { Requester } from 'src/base/tools/requester'; export const requestGPT = async ( @@ -94,7 +93,12 @@ const parseGPTResponse = (GPTRes: LLMResponse) => { } }; -type DataQueryResponse = GetQuerySQLResult & { THOUGHT: string }; +type DataQueryResponse = { + sql: SQL; + fieldInfo: SimpleFieldInfo[]; //fieldInfo generated by LLM; It may has some new fields after sql query. + usage: any; + THOUGHT: string; +}; const parseGPTQueryResponse = (response: string) => { const sql = response.match(/sql:\n?```(.*?)```/s)[1]; @@ -117,9 +121,9 @@ const parseGPTQueryResponse = (response: string) => { }; }; -export const parseDataQueryResponse: Parser = (gptResponse: LLMResponse) => { +export const parseDataQueryResponse: Parser = (gptResponse: LLMResponse) => { const dataQueryResponse: DataQueryResponse = parseGPTResponse(gptResponse); - const { sql, llmFieldInfo: responseFiledInfo } = dataQueryResponse; + const { sql, fieldInfo: responseFiledInfo } = dataQueryResponse; if (!sql || !responseFiledInfo) { //try to parse the response with another format const choices = gptResponse.choices; @@ -129,7 +133,7 @@ export const parseDataQueryResponse: Parser = (g usage: gptResponse.usage }; } - return { ...dataQueryResponse, usage: gptResponse.usage }; + return { sql, llmFieldInfo: responseFiledInfo, usage: gptResponse.usage }; }; export const dataQueryRequestLLM: Requester = async ( diff --git a/packages/vmind/src/base/application/index.ts b/packages/vmind/src/base/application/index.ts index 9ceddd51..086456e1 100644 --- a/packages/vmind/src/base/application/index.ts +++ b/packages/vmind/src/base/application/index.ts @@ -52,19 +52,20 @@ export class BaseApplication implements IApplication */ async runTasks(context: Context) { this.updateContext(context); - const result: DSL = this.tasks.reduce( - async (pre: any, curTask: { name: string; task: BaseTaskNode }) => { - const result = await curTask.task.executeTask(this.context); - console.log(result); - //Put the running result of the current node into the context. - this.updateContext({ - ...this.context, - ...result - }); - return this.context; - }, - this.context - ); + + const handler = async (pre: any, curTask: { name: string; task: BaseTaskNode }) => { + console.log(curTask.name); + const result = await curTask.task.executeTask(this.context); + console.log(result); + //Put the running result of the current node into the context. + this.updateContext({ + ...this.context, + ...result + }); + return this.context; + }; + + const result: DSL = await asyncReduce(this.tasks, handler, this.context); return result; } @@ -72,3 +73,12 @@ export class BaseApplication implements IApplication this.context = context; } } +async function asyncReduce(array: any[], handler: Function, initialValue: any) { + let result = initialValue; + + for (const item of array) { + result = await handler(result, item); + } + + return result; +} diff --git a/packages/vmind/src/base/taskNode/llmBasedTaskNode.ts b/packages/vmind/src/base/taskNode/llmBasedTaskNode.ts index e597c31c..214573b9 100644 --- a/packages/vmind/src/base/taskNode/llmBasedTaskNode.ts +++ b/packages/vmind/src/base/taskNode/llmBasedTaskNode.ts @@ -53,7 +53,7 @@ export default class LLMBasedTaskNode { const prompt = this.prompt.getPrompt(context); - return this.requester(prompt, context); + return await this.requester(prompt, context); } parseLLMResponse(llmResponse: any): Partial { diff --git a/packages/vmind/src/core/VMind.ts b/packages/vmind/src/core/VMind.ts index 9f90bb53..9cd22021 100644 --- a/packages/vmind/src/core/VMind.ts +++ b/packages/vmind/src/core/VMind.ts @@ -1,10 +1,10 @@ import { _chatToVideoWasm } from '../chart-to-video'; import { ILLMOptions, TimeType, Model, SimpleFieldInfo, DataItem, OuterPackages, ModelType } from '../typings'; import { getFieldInfoFromDataset, parseCSVData as parseCSVDataWithRule } from '../common/dataProcess'; -import { generateChartWithAdvisor } from '../common/chartAdvisor'; import applicationMetaList, { ApplicationType } from './applications'; import { VMindApplicationMap } from './types'; import { BaseApplication } from 'src/base/application'; +import { DataAggregationContext } from 'src/applications/types'; class VMind { private _FPS = 30; @@ -72,10 +72,10 @@ class VMind { dataset: DataItem[] ) { if (this.getModelType() === ModelType.GPT) { - const context = { + const context: DataAggregationContext = { userInput: userPrompt, fieldInfo, - dataset, + sourceDataset: dataset, llmOptions: this._options }; const application = this.getApplication(ApplicationType.DataAggregation, ModelType.GPT); diff --git a/packages/vmind/src/core/applications.ts b/packages/vmind/src/core/applications.ts index 44972b4f..63783456 100644 --- a/packages/vmind/src/core/applications.ts +++ b/packages/vmind/src/core/applications.ts @@ -1,5 +1,4 @@ import dataAggregationMetaByModel from 'src/applications/dataAggregation'; -import { ModelType } from 'src/typings'; export enum ApplicationType { DataAggregation = 'dataAggregation' diff --git a/packages/vmind/src/core/types.ts b/packages/vmind/src/core/types.ts index 21d1bab4..50b078cf 100644 --- a/packages/vmind/src/core/types.ts +++ b/packages/vmind/src/core/types.ts @@ -1,5 +1,4 @@ import { BaseApplication } from 'src/base/application'; -import { ApplicationType } from './applications'; export type VMindApplicationMap = { [name: string]: { From 7984f1d6c437fa72533dc7cfd8db70dc18cd5b57 Mon Sep 17 00:00:00 2001 From: da730 Date: Wed, 10 Apr 2024 21:38:52 +0800 Subject: [PATCH 19/62] fix: build error --- packages/vmind/src/base/application/index.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/vmind/src/base/application/index.ts b/packages/vmind/src/base/application/index.ts index 086456e1..d38b3b86 100644 --- a/packages/vmind/src/base/application/index.ts +++ b/packages/vmind/src/base/application/index.ts @@ -40,6 +40,7 @@ export class BaseApplication implements IApplication task: new RuleBasedTaskNode(pipelines) }; } + return {} as { task: BaseTaskNode; name: string }; }); this.tasks = taskNodeInstanceList; } From a7881e06ce8b381ca655cc18bcbf22fe685e970f Mon Sep 17 00:00:00 2001 From: da730 Date: Thu, 11 Apr 2024 11:48:15 +0800 Subject: [PATCH 20/62] feat: add comments --- packages/vmind/src/base/application/index.ts | 5 +++++ packages/vmind/src/base/metaTypes.ts | 11 +++++++++++ packages/vmind/src/base/taskNode/baseTaskNode.ts | 3 ++- packages/vmind/src/base/taskNode/llmBasedTaskNode.ts | 1 - packages/vmind/src/base/taskNode/ruleBasedTaskNode.ts | 1 - 5 files changed, 18 insertions(+), 3 deletions(-) diff --git a/packages/vmind/src/base/application/index.ts b/packages/vmind/src/base/application/index.ts index d38b3b86..f31a2fe7 100644 --- a/packages/vmind/src/base/application/index.ts +++ b/packages/vmind/src/base/application/index.ts @@ -23,6 +23,11 @@ export class BaseApplication implements IApplication this.registerTaskNodes(meta); } + /** + * register the task nodes of this application + * + * @param meta meta information of this application + */ registerTaskNodes(meta: ApplicationMeta) { const taskNodeInstanceList = meta.taskNodes.map((taskInfo: TaskNode) => { const { taskNode, name } = taskInfo; diff --git a/packages/vmind/src/base/metaTypes.ts b/packages/vmind/src/base/metaTypes.ts index 9e3c45ca..145784c6 100644 --- a/packages/vmind/src/base/metaTypes.ts +++ b/packages/vmind/src/base/metaTypes.ts @@ -5,6 +5,10 @@ import { Patcher } from './tools/patcher'; import { Prompt } from './tools/prompt'; import { Requester } from './tools/requester'; import { Transformer } from './tools/transformer'; + +/** + * meta used to describe LLM-based task node (see LLMBasedTaskNode class) + */ export type LLMBasedTaskNodeMeta = { type: TaskNodeType.LLM_BASED; modelType: ModelType; @@ -14,6 +18,9 @@ export type LLMBasedTaskNodeMeta = { requester: Requester; }; +/** + * meta used to describe rule-based task node (see RuleBasedTaskNode) + */ export type RuleBasedTaskNodeMeta = { type: TaskNodeType.RULE_BASED; pipelines: Transformer[]; @@ -28,6 +35,10 @@ export type TaskNode = { taskNode: TaskNodeMeta; }; +/** + * meta used to describe an application (see BaseApplication class) + * metas will be registered in the constructor of BaseApplication + */ export type ApplicationMeta = { name: string; taskNodes: TaskNode[]; diff --git a/packages/vmind/src/base/taskNode/baseTaskNode.ts b/packages/vmind/src/base/taskNode/baseTaskNode.ts index a74a2bbc..02680286 100644 --- a/packages/vmind/src/base/taskNode/baseTaskNode.ts +++ b/packages/vmind/src/base/taskNode/baseTaskNode.ts @@ -4,7 +4,8 @@ import { ITaskNode, TaskNodeType } from './types'; * A task node in VMind application, used to complete a specific task, such as requesting a large model for chart type, DSL parsing and conversion, etc. * There are 2 types: rule-based or LLM-based, the former completes a series of tasks based on rule algorithm, the latter calls LLM to complete the task. * TaskNode can be seen as a collection of a series of tools, responsible for completing a specific task. - * Each Node can also be called as a separate function + * Each Node can also be called as a separate function in VMind core + * It can be described using TaskNodeMeta (see packages/vmind/src/base/metaTypes.ts) */ export class BaseTaskNode implements ITaskNode { context: Context; diff --git a/packages/vmind/src/base/taskNode/llmBasedTaskNode.ts b/packages/vmind/src/base/taskNode/llmBasedTaskNode.ts index 214573b9..ad62ba3f 100644 --- a/packages/vmind/src/base/taskNode/llmBasedTaskNode.ts +++ b/packages/vmind/src/base/taskNode/llmBasedTaskNode.ts @@ -26,7 +26,6 @@ export type LLMTaskNodeOptions = { }; /** * LLMBasedTaskNode is a task node that needs to use LLM to complete tasks - * Subclasses must assign values to prompt, parser and patcher, and rewrite requestLLM function */ export default class LLMBasedTaskNode extends BaseTaskNode diff --git a/packages/vmind/src/base/taskNode/ruleBasedTaskNode.ts b/packages/vmind/src/base/taskNode/ruleBasedTaskNode.ts index 12d35945..5aa5393a 100644 --- a/packages/vmind/src/base/taskNode/ruleBasedTaskNode.ts +++ b/packages/vmind/src/base/taskNode/ruleBasedTaskNode.ts @@ -5,7 +5,6 @@ import { TaskNodeType } from './types'; /** * rule-based taskNode, which consists of a series of Pipelines * It completes the transformation from Input to a specific data structure (DSL) - * subclasses must call registerPipelines in their constructor */ export class RuleBasedTaskNode extends BaseTaskNode { pipelines: Transformer[]; From 758cc7e920306d37d193b3a7a611feae62adebc2 Mon Sep 17 00:00:00 2001 From: da730 Date: Thu, 11 Apr 2024 14:50:23 +0800 Subject: [PATCH 21/62] feat: data aggregation with skylark --- .../src/applications/dataAggregation/index.ts | 12 +- .../taskNodes/executeQuery/utils.ts | 5 +- .../taskNodes/getQuerySQL/GPT/utils.ts | 8 +- .../taskNodes/getQuerySQL/skylark/index.ts | 17 +++ .../getQuerySQL/skylark/prompt/index.ts | 14 +++ .../getQuerySQL/skylark/prompt/template.ts | 76 ++++++++++++ .../taskNodes/getQuerySQL/skylark/utils.ts | 112 ++++++++++++++++++ packages/vmind/src/common/utils.ts | 4 + packages/vmind/src/core/VMind.ts | 29 ++--- 9 files changed, 255 insertions(+), 22 deletions(-) create mode 100644 packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/skylark/index.ts create mode 100644 packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/skylark/prompt/index.ts create mode 100644 packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/skylark/prompt/template.ts create mode 100644 packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/skylark/utils.ts diff --git a/packages/vmind/src/applications/dataAggregation/index.ts b/packages/vmind/src/applications/dataAggregation/index.ts index ac3052ff..8fe62162 100644 --- a/packages/vmind/src/applications/dataAggregation/index.ts +++ b/packages/vmind/src/applications/dataAggregation/index.ts @@ -3,6 +3,7 @@ import ExecuteQueryTaskNodeMeta from './taskNodes/executeQuery'; import GetSQLTaskNodeGPTMeta from './taskNodes/getQuerySQL/GPT'; import { ModelType } from 'src/typings'; import { DataAggregationContext, DataAggregationOutput } from '../types'; +import GetSQLTaskNodeSkylarkMeta from './taskNodes/getQuerySQL/skylark'; /** * data aggregation application in vmind @@ -19,8 +20,17 @@ const dataAggregationGPTMeta: ApplicationMeta = { + name: 'dataAggregation', + taskNodes: [ + { taskNode: GetSQLTaskNodeSkylarkMeta, name: 'getQuerySQL' }, + { taskNode: ExecuteQueryTaskNodeMeta, name: 'executeQuery' } + ] +}; + const dataAggregationMetaByModel = { - [ModelType.GPT]: dataAggregationGPTMeta + [ModelType.GPT]: dataAggregationGPTMeta, + [ModelType.SKYLARK]: dataAggregationSkylarkMeta }; export default dataAggregationMetaByModel; diff --git a/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/utils.ts b/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/utils.ts index 7b2576b0..b2d7421a 100644 --- a/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/utils.ts +++ b/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/utils.ts @@ -4,6 +4,7 @@ import dayjs from 'dayjs'; import { uniqArray } from '@visactor/vutils'; import alasql from 'alasql'; import { RESERVE_REPLACE_MAP, operators } from '../../../../common/dataProcess/constants'; +import { replaceAll } from 'src/common/utils'; export const readTopNLine = (csvFile: string, n: number) => { // get top n lines of a csv file @@ -297,10 +298,6 @@ export const replaceInvalidWords = (sql: string, columns: string[]) => { return { validStr: sqlWithoutAscii, columnReplaceMap: operatorReplaceMap, sqlReplaceMap: asciiReplaceMap }; }; -export const replaceAll = (originStr: string, replaceStr: string, newStr: string) => { - return originStr.split(replaceStr).join(newStr); -}; - /** * merge two maps * @param map1 diff --git a/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/GPT/utils.ts b/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/GPT/utils.ts index d14cae33..f3a118c4 100644 --- a/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/GPT/utils.ts +++ b/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/GPT/utils.ts @@ -136,12 +136,18 @@ export const parseDataQueryResponse: Parser = (g return { sql, llmFieldInfo: responseFiledInfo, usage: gptResponse.usage }; }; +const patchQueryInput = (userInput: string) => { + return userInput; +}; + export const dataQueryRequestLLM: Requester = async ( prompt: string, context: GetQuerySQLContext ) => { const { userInput, fieldInfo, llmOptions } = context; - const queryDatasetMessage = `User's Command: ${userInput}\nColumn Information: ${JSON.stringify(fieldInfo)}`; + const patchedInput = patchQueryInput(userInput); + + const queryDatasetMessage = `User's Command: ${patchedInput}\nColumn Information: ${JSON.stringify(fieldInfo)}`; const requestFunc = llmOptions.customRequestFunc?.dataQuery ?? requestGPT; const QueryDatasetPrompt = prompt; diff --git a/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/skylark/index.ts b/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/skylark/index.ts new file mode 100644 index 00000000..97e145cd --- /dev/null +++ b/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/skylark/index.ts @@ -0,0 +1,17 @@ +import { GetQuerySQLContext, GetQuerySQLOutput } from 'src/applications/dataAggregation/types'; +import { LLMBasedTaskNodeMeta } from 'src/base/metaTypes'; +import { TaskNodeType } from 'src/base/taskNode/types'; +import { ModelType } from 'src/typings'; +import { dataQueryRequestLLM, parseSkylarkResponseAsJSON } from './utils'; +import { SkylarkDataAggregationPrompt } from './prompt'; + +const GetSQLTaskNodeSkylarkMeta: LLMBasedTaskNodeMeta = { + type: TaskNodeType.LLM_BASED, + modelType: ModelType.SKYLARK, + parser: parseSkylarkResponseAsJSON, + patcher: [(input: Partial, context: GetQuerySQLContext) => input as GetQuerySQLOutput], + requester: dataQueryRequestLLM, + prompt: new SkylarkDataAggregationPrompt() +}; + +export default GetSQLTaskNodeSkylarkMeta; diff --git a/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/skylark/prompt/index.ts b/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/skylark/prompt/index.ts new file mode 100644 index 00000000..146c8fce --- /dev/null +++ b/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/skylark/prompt/index.ts @@ -0,0 +1,14 @@ +import { Prompt } from 'src/base/tools/prompt'; +import { GetQuerySQLContext } from 'src/applications/dataAggregation/types'; +import { getQueryDatasetPrompt } from './template'; + +export class SkylarkDataAggregationPrompt extends Prompt { + constructor() { + super(''); + } + getPrompt(context: GetQuerySQLContext) { + const { llmOptions } = context; + const QueryDatasetPrompt = getQueryDatasetPrompt(llmOptions.showThoughts ?? true); + return QueryDatasetPrompt; + } +} diff --git a/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/skylark/prompt/template.ts b/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/skylark/prompt/template.ts new file mode 100644 index 00000000..6c80f795 --- /dev/null +++ b/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/skylark/prompt/template.ts @@ -0,0 +1,76 @@ +export const VMIND_DATA_SOURCE = 'VMind_data_source'; + +export const getQueryDatasetPrompt = ( + showThoughts: boolean +) => `您是一位数据分析的专家。这是一个名为${VMIND_DATA_SOURCE}的原始数据集。用户会告诉您他的命令和${VMIND_DATA_SOURCE}的列信息。您的任务是根据指令生成一个sql和fieldInfo。只返回一个JSON对象。 + +# SQL语句编写要求 +- 您需要编写一个标准的sql语句。 +- 所有的度量列必须被聚合,即使用户没有要求你这样做。支持的聚合函数:["MAX()", "MIN()", "SUM()", "COUNT()", "AVG()"] +- 支持的sql关键字:["SELECT", "FROM", "WHERE", "GROUP BY", "HAVING", "ORDER BY", "LIMIT", "DISTINCT"]. +- 不要使用不支持的关键词,如:WITHIN, FIELD。不要使用不支持的聚合函数,如:PERCENTILE_CONT, PERCENTILE。不要使用不支持的操作符。我们将使用alasql执行您的sql。不支持的关键词、函数和操作符会导致系统崩溃。 +- 使用\` \`包裹sql中的所有列名 +- 让你的sql尽可能简单。 + +您需要按照以下步骤编写sql语句。 + +# 步骤 +1. 从用户的指令中提取与数据相关的部分。忽略其他与数据无关的部分。 +2. 根据列的名称和类型,推断${VMIND_DATA_SOURCE}中与用户指令有关的列,并将其添加到SELECT中。尽可能多地选择相关列,不要遗漏一些关键的列,比如与日期相关的维度等。你只能使用Column Information中提到的列,不要假设不存在的列。如果现有的列不能满足用户的命令,选择Column Information中最相关的列。 +3. 不论用户指定了哪种图表类型,将所选择的度量列使用聚合函数聚合,即使你推断它们不适合被聚合,即使用户没有要求你这样做。如果你不确定使用哪个聚合函数,使用SUM()。不要使用不支持的聚合函数。 +4. 使用维度列对数据进行分组。 +5. 在您的sql中,如有必要,您也可以使用WHERE, HAVING, ORDER BY, LIMIT。使用支持的操作符完成WHERE和HAVING。只能使用如columnA = value1,sum_b > 0的二元表达式。在您的表达式中,只能使用在维度列的domain中出现的维度值。 + +让我们一步一步思考。不要忘了将所有度量列聚合。 + +用户将会直接使用JSON.parse()解析您返回的内容,只返回一个不带任何额外内容的JSON对象。您的JSON对象必须包含sql和fieldInfo。 + +请按以下格式回复: +\`\`\` +{ +${showThoughts ? 'thoughts: string //你的想法' : ''} +sql: string; //你的sql。注意,这是一个JSON对象中的字符串,所以必须是一行,不含任何\\n。 +fieldInfo: { +fieldName: string; //字段名。 +type: string; //字段类型,string,int,date或float。 +}[]; //您的sql中字段信息的数组。描述其名称和类型。 +} +\`\`\` + +#Examples: + +User's Command: Show me the change of the GDP rankings of each country. +Column Information: [{"fieldName":"country","type":"string","role":"dimension","domain":["USA", "China", "England"]},{"fieldName":"continent","type":"string","role":"dimension","domain":["North America","Asia","Europe"]},{"fieldName":"GDP","type":"float","role":"measure","domain":[2780,617030]},{"fieldName":"year","type":"int","role":"measure","domain":[1973,2018]}] + +Response: +\`\`\` +{ + ${showThoughts ? '"thoughts": string //your thoughts' : ''} + "sql": "SELECT \`country\`, \`year\`, SUM(\`GDP\`) AS \`total_GDP\` FROM ${VMIND_DATA_SOURCE} GROUP BY \`country\`, \`year\` ORDER BY \`year\`, \`total_GDP\` DESC", + "fieldInfo": [ + { + "fieldName": "country", + "type": "string" + }, + { + "fieldName": "year", + "type": "date" + }, + { + "fieldName": "total_GDP", + "type": "int" + } + ] +} +\`\`\` + +在上面这个例子中,用户想要展示不同国家GDP排名的变化,相关列有country和GDP。用户需要一个年份列才能展示“变化”,因此我们还需要选择year。GDP是一个指标列,因此我们要将它聚合。从用户输入中无法推断聚合方式,因此使用SUM()。您只需要将生成的JSON返回给用户。 + +一步完成您的任务。 + +# 约束: +- 在一行内写出您的sql语句,不要有任何\\n。您的sql必须能够由alasql执行。 +- 请不要在您的sql语句中改变或翻译列名,请保持原有的列名不变,即使他们含有空格或-。 +- 在你的sql中不要遗漏GROUP BY。 +- 直接返回JSON对象,不要有任何其他内容。确保它能够被JavaScript中的JSON.parse()直接解析。 +`; diff --git a/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/skylark/utils.ts b/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/skylark/utils.ts new file mode 100644 index 00000000..dd91a2b9 --- /dev/null +++ b/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/skylark/utils.ts @@ -0,0 +1,112 @@ +import { ILLMOptions, LLMResponse } from 'src/typings'; +import { matchJSONStr, replaceAll } from 'src/common/utils'; +import { GetQuerySQLContext } from 'src/applications/dataAggregation/types'; +import { Requester } from 'src/base/tools/requester'; +import axios from 'axios'; +import { omit } from 'lodash'; +import JSON5 from 'json5'; + +const patchDataQueryInput = (userInput: string) => + userInput + ' 使用` `包裹sql中的所有列名。使用支持的聚合函数将所有的度量列聚合。'; + +export const parseJson = (JsonStr: string, prefix?: string) => { + const parseNoPrefixStr = (str: string) => { + //尝试不带前缀的解析 + try { + return JSON5.parse(str); + } catch (err) { + return { + error: true + }; + } + }; + //解析GPT返回的JSON格式 + if (prefix) { + //被某些字符包裹 + const splitArr = JsonStr.split(prefix); + const splittedStr = splitArr[splitArr.length - 2]; + const res = parseNoPrefixStr(splittedStr); + if (!res.error) { + return res; + } + } + //没有被前缀包裹,或者解析被前缀包裹的json失败,尝试直接解析返回结果 + const res2 = parseNoPrefixStr(JsonStr); + return res2; +}; + +export const parseSkylarkResponseAsJSON = (skylarkRes: LLMResponse) => { + try { + if (skylarkRes.error) { + return { + error: true, + ...skylarkRes.error + }; + } + const choices = skylarkRes.choices; + const content = replaceAll(choices[0].message.content, '\n', ' '); + const jsonStr = matchJSONStr(content); + const resJson = parseJson(jsonStr, '```'); + const { sql, fieldInfo: responseFiledInfo } = resJson; + return { sql, llmFieldInfo: responseFiledInfo, usage: skylarkRes.usage }; + } catch (err: any) { + return { + error: true, + message: err.message + }; + } +}; + +/** + * + * @param prompt + * @param message + * @param options + */ +export const requestSkyLark = async (prompt: string, message: string, options: ILLMOptions): Promise => { + const url: string = options?.url; + const headers: any = { ...(options.headers ?? {}), 'Content-Type': 'application/json' }; + + try { + const res = await axios(url, { + method: options?.method ?? 'POST', + headers, //must has Authorization: `Bearer ${openAIKey}` if use openai api + data: { + ...omit(options, ['headers', 'url', 'method', 'showThoughts', 'customRequestFunc']), + model: options?.model ?? 'gpt-3.5-turbo', + messages: [ + { + role: 'system', + content: prompt + }, + { + role: 'user', + content: message + } + ], + max_tokens: options?.max_tokens ?? 500, + temperature: options?.temperature ?? 0, + stream: false + } + }).then(response => response.data); + + return res; + } catch (err: any) { + return err.response.data; + } +}; + +export const dataQueryRequestLLM: Requester = async ( + prompt: string, + context: GetQuerySQLContext +) => { + const { userInput, fieldInfo, llmOptions } = context; + const patchedInput = patchDataQueryInput(userInput); + + const queryDatasetMessage = `User's Command: ${patchedInput}\nColumn Information: ${JSON.stringify(fieldInfo)}`; + + const requestFunc = llmOptions.customRequestFunc?.dataQuery ?? requestSkyLark; + const QueryDatasetPrompt = prompt; + const dataProcessRes = await requestFunc(QueryDatasetPrompt, queryDatasetMessage, llmOptions); + return dataProcessRes; +}; diff --git a/packages/vmind/src/common/utils.ts b/packages/vmind/src/common/utils.ts index 7e5712f0..1610c157 100644 --- a/packages/vmind/src/common/utils.ts +++ b/packages/vmind/src/common/utils.ts @@ -12,6 +12,10 @@ export const calculateTokenUsage = (usageList: any[]) => { return totalUsage; }; +export const replaceAll = (originStr: string, replaceStr: string, newStr: string) => { + return originStr.split(replaceStr).join(newStr); +}; + export const execPipeline = ( src: any, pipes: ((src: any, context: PipelineContext) => any)[], diff --git a/packages/vmind/src/core/VMind.ts b/packages/vmind/src/core/VMind.ts index 9cd22021..432c4095 100644 --- a/packages/vmind/src/core/VMind.ts +++ b/packages/vmind/src/core/VMind.ts @@ -37,6 +37,11 @@ class VMind { return this._applicationMap[name][modelType]; } + private async runApplication(applicationName: ApplicationType, modelType: ModelType, context: any) { + const application = this.getApplication(applicationName, modelType); + return application.runTasks(context); + } + /** * parse csv string and get the name, type of each field using rule-based method. * @param csvString csv data user want to visualize @@ -71,22 +76,14 @@ class VMind { fieldInfo: SimpleFieldInfo[], dataset: DataItem[] ) { - if (this.getModelType() === ModelType.GPT) { - const context: DataAggregationContext = { - userInput: userPrompt, - fieldInfo, - sourceDataset: dataset, - llmOptions: this._options - }; - const application = this.getApplication(ApplicationType.DataAggregation, ModelType.GPT); - return application.runTasks(context); - } - if (this.getModelType() === ModelType.SKYLARK) { - //return queryDatasetWithSkylark(userPrompt, fieldInfo, dataset, this._options); - } - console.error('unsupported model in data query!'); - - return { fieldInfo: [], dataset } as any; + const modelType = this.getModelType(); + const context: DataAggregationContext = { + userInput: userPrompt, + fieldInfo, + sourceDataset: dataset, + llmOptions: this._options + }; + return await this.runApplication(ApplicationType.DataAggregation, modelType, context); } async exportVideo(spec: any, time: TimeType, outerPackages: OuterPackages, mode?: 'node' | 'desktop-browser') { From 773950fd93ce5280ba5ce929815990ad9cd1b8c8 Mon Sep 17 00:00:00 2001 From: da730 Date: Thu, 11 Apr 2024 20:40:25 +0800 Subject: [PATCH 22/62] feat: chart generation application with gpt --- .vscode/settings.json | 2 + .../applications/chartGeneration/constants.ts | 15 + .../src/applications/chartGeneration/index.ts | 0 .../generateTypeAndFieldMap/GPT/index.ts | 45 +++ .../GPT/patcher/index.ts | 338 ++++++++++++++++++ .../GPT/prompt/index.ts | 15 + .../GPT/prompt/template.ts | 188 ++++++++++ .../generateTypeAndFieldMap/GPT/utils.ts | 89 +++++ .../taskNodes/getVizSchema/index.ts | 11 + .../taskNodes/getVizSchema/utils.ts | 35 ++ .../src/applications/chartGeneration/types.ts | 39 ++ .../src/applications/dataAggregation/index.ts | 4 +- .../taskNodes/executeQuery/index.ts | 1 - .../taskNodes/executeQuery/transformers.ts | 2 +- .../taskNodes/executeQuery/utils.ts | 2 +- .../GPT/index.ts | 0 .../GPT/prompt/index.ts | 0 .../GPT/prompt/template.ts | 0 .../taskNodes/generateQuerySQL/GPT/utils.ts | 70 ++++ .../skylark/index.ts | 0 .../skylark/prompt/index.ts | 0 .../skylark/prompt/template.ts | 0 .../skylark/utils.ts | 2 +- .../taskNodes/getQuerySQL/GPT/utils.ts | 156 -------- packages/vmind/src/applications/types.ts | 13 +- .../vmind/src/base/tools/patcher/index.ts | 2 +- packages/vmind/src/common/utils.ts | 34 -- packages/vmind/src/common/utils/gpt.ts | 92 +++++ packages/vmind/src/common/utils/utils.ts | 103 ++++++ .../vmind/src/common/vizDataToSpec/utils.ts | 67 ---- .../src/common/vizDataToSpec/vizDataToSpec.ts | 2 +- packages/vmind/src/typings/index.ts | 14 - 32 files changed, 1060 insertions(+), 281 deletions(-) create mode 100644 .vscode/settings.json create mode 100644 packages/vmind/src/applications/chartGeneration/constants.ts create mode 100644 packages/vmind/src/applications/chartGeneration/index.ts create mode 100644 packages/vmind/src/applications/chartGeneration/taskNodes/generateTypeAndFieldMap/GPT/index.ts create mode 100644 packages/vmind/src/applications/chartGeneration/taskNodes/generateTypeAndFieldMap/GPT/patcher/index.ts create mode 100644 packages/vmind/src/applications/chartGeneration/taskNodes/generateTypeAndFieldMap/GPT/prompt/index.ts create mode 100644 packages/vmind/src/applications/chartGeneration/taskNodes/generateTypeAndFieldMap/GPT/prompt/template.ts create mode 100644 packages/vmind/src/applications/chartGeneration/taskNodes/generateTypeAndFieldMap/GPT/utils.ts create mode 100644 packages/vmind/src/applications/chartGeneration/taskNodes/getVizSchema/index.ts create mode 100644 packages/vmind/src/applications/chartGeneration/taskNodes/getVizSchema/utils.ts create mode 100644 packages/vmind/src/applications/chartGeneration/types.ts rename packages/vmind/src/applications/dataAggregation/taskNodes/{getQuerySQL => generateQuerySQL}/GPT/index.ts (100%) rename packages/vmind/src/applications/dataAggregation/taskNodes/{getQuerySQL => generateQuerySQL}/GPT/prompt/index.ts (100%) rename packages/vmind/src/applications/dataAggregation/taskNodes/{getQuerySQL => generateQuerySQL}/GPT/prompt/template.ts (100%) create mode 100644 packages/vmind/src/applications/dataAggregation/taskNodes/generateQuerySQL/GPT/utils.ts rename packages/vmind/src/applications/dataAggregation/taskNodes/{getQuerySQL => generateQuerySQL}/skylark/index.ts (100%) rename packages/vmind/src/applications/dataAggregation/taskNodes/{getQuerySQL => generateQuerySQL}/skylark/prompt/index.ts (100%) rename packages/vmind/src/applications/dataAggregation/taskNodes/{getQuerySQL => generateQuerySQL}/skylark/prompt/template.ts (100%) rename packages/vmind/src/applications/dataAggregation/taskNodes/{getQuerySQL => generateQuerySQL}/skylark/utils.ts (98%) delete mode 100644 packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/GPT/utils.ts delete mode 100644 packages/vmind/src/common/utils.ts create mode 100644 packages/vmind/src/common/utils/gpt.ts create mode 100644 packages/vmind/src/common/utils/utils.ts diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 00000000..2c63c085 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,2 @@ +{ +} diff --git a/packages/vmind/src/applications/chartGeneration/constants.ts b/packages/vmind/src/applications/chartGeneration/constants.ts new file mode 100644 index 00000000..c82f0b49 --- /dev/null +++ b/packages/vmind/src/applications/chartGeneration/constants.ts @@ -0,0 +1,15 @@ +export const SUPPORTED_CHART_LIST = [ + 'Bar Chart', + 'Line Chart', + 'Pie Chart', + 'Scatter Plot', + 'Word Cloud', + 'Rose Chart', + 'Radar Chart', + 'Sankey Chart', + 'Funnel Chart', + 'Dual Axis Chart', + 'Waterfall Chart', + 'Box Plot', + 'Dynamic Bar Chart' +]; diff --git a/packages/vmind/src/applications/chartGeneration/index.ts b/packages/vmind/src/applications/chartGeneration/index.ts new file mode 100644 index 00000000..e69de29b diff --git a/packages/vmind/src/applications/chartGeneration/taskNodes/generateTypeAndFieldMap/GPT/index.ts b/packages/vmind/src/applications/chartGeneration/taskNodes/generateTypeAndFieldMap/GPT/index.ts new file mode 100644 index 00000000..b4c713c5 --- /dev/null +++ b/packages/vmind/src/applications/chartGeneration/taskNodes/generateTypeAndFieldMap/GPT/index.ts @@ -0,0 +1,45 @@ +import { + GenerateChartAndFieldMapContext, + GenerateChartAndFieldMapOutput +} from 'src/applications/chartGeneration/types'; +import { LLMBasedTaskNodeMeta } from 'src/base/metaTypes'; +import { TaskNodeType } from 'src/base/taskNode/types'; +import { ModelType } from 'src/typings'; +import { chartGenerationRequestLLM, parseChartGenerationResponse } from './utils'; +import { GPTChartGenerationPrompt } from './prompt'; +import { + patchAxisField, + patchBoxPlot, + patchCartesianXField, + patchColorField, + patchDualAxis, + patchDynamicBarChart, + patchLabelField, + patchPieChart, + patchWordCloud, + patchYField +} from './patcher'; + +const GetSQLTaskNodeGPTMeta: LLMBasedTaskNodeMeta = { + type: TaskNodeType.LLM_BASED, + modelType: ModelType.GPT, + parser: parseChartGenerationResponse, + // At some point, due to the unclear intention of the user's input, fields may lack fields in Cell returned by GPT. + // At this time, you need to make up according to the rules + patcher: [ + patchAxisField, + patchColorField, + patchLabelField, + patchYField, + patchBoxPlot, + patchDualAxis, + patchPieChart, + patchWordCloud, + patchDynamicBarChart, + patchCartesianXField + ], + requester: chartGenerationRequestLLM, + prompt: new GPTChartGenerationPrompt() +}; + +export default GetSQLTaskNodeGPTMeta; diff --git a/packages/vmind/src/applications/chartGeneration/taskNodes/generateTypeAndFieldMap/GPT/patcher/index.ts b/packages/vmind/src/applications/chartGeneration/taskNodes/generateTypeAndFieldMap/GPT/patcher/index.ts new file mode 100644 index 00000000..e30a36b3 --- /dev/null +++ b/packages/vmind/src/applications/chartGeneration/taskNodes/generateTypeAndFieldMap/GPT/patcher/index.ts @@ -0,0 +1,338 @@ +import { FOLD_NAME, FOLD_VALUE } from '@visactor/chart-advisor'; +import { isArray, isNil } from 'lodash'; +import { + GenerateChartAndFieldMapContext, + GenerateChartAndFieldMapOutput +} from 'src/applications/chartGeneration/types'; +import { Transformer } from 'src/base/tools/transformer'; +import { + CARTESIAN_CHART_LIST, + foldDatasetByYField, + getFieldByDataType, + getFieldByRole, + getRemainedFields +} from 'src/common/utils/utils'; +import { DataType, ROLE } from 'src/typings'; + +export const patchAxisField: Transformer< + GenerateChartAndFieldMapOutput, + GenerateChartAndFieldMapContext, + GenerateChartAndFieldMapOutput +> = (input: GenerateChartAndFieldMapOutput, _context: GenerateChartAndFieldMapContext) => { + const { cell } = input; + + const cellNew: any = { ...cell }; + + // patch the "axis" field to x + if (cellNew.axis && (!cellNew.x || !cellNew.y)) { + if (!cellNew.x) { + cellNew.x = cellNew.axis; + } else if (!cellNew.y) { + cellNew.y = cellNew.axis; + } + } + + return { + ...input, + cell: cellNew + }; +}; + +export const patchColorField: Transformer< + GenerateChartAndFieldMapOutput, + GenerateChartAndFieldMapContext, + GenerateChartAndFieldMapOutput +> = (input: GenerateChartAndFieldMapOutput, _context: GenerateChartAndFieldMapContext) => { + const { cell } = input; + const cellNew = { ...cell, color: cell.color ?? cell.category }; + + return { + ...input, + cell: cellNew + }; +}; + +export const patchLabelField: Transformer< + GenerateChartAndFieldMapOutput, + GenerateChartAndFieldMapContext, + GenerateChartAndFieldMapOutput +> = (input: GenerateChartAndFieldMapOutput, _context: GenerateChartAndFieldMapContext) => { + const { cell } = input; + + const cellNew: any = { ...cell }; + //patch the "label" fields to color + if (cellNew.label && (!cellNew.color || cellNew.color.length === 0)) { + cellNew.color = cellNew.label; + } + + return { + ...input, + cell: cellNew + }; +}; + +export const patchYField: Transformer< + GenerateChartAndFieldMapOutput, + GenerateChartAndFieldMapContext, + GenerateChartAndFieldMapOutput +> = (input: GenerateChartAndFieldMapOutput, context: GenerateChartAndFieldMapContext) => { + const { chartType, cell } = input; + const { dataset, fieldInfo } = context; + let cellNew = { ...cell }; + const { x, y } = cellNew; + let chartTypeNew = chartType; + let datasetNew = dataset; + + // When there are multiple y-axis fields, the processing methods are: + // 1. For box plot, the chart type is not corrected. + // 2. For bar chart or line chart, the chart type is corrected to double axis chart. + // 3. In other cases, the chart type is corrected to scatter plot. + + if (y && isArray(y) && y.length > 1) { + if (chartTypeNew === 'BOX PLOT' || (chartTypeNew === 'DUAL AXIS CHART' && y.length === 2)) { + return { + ...input + }; + } + + if (chartTypeNew === 'BAR CHART' || chartTypeNew === 'LINE CHART' || chartTypeNew === 'DUAL AXIS CHART') { + //use fold to visualize more than 2 y fields + datasetNew = foldDatasetByYField(datasetNew, y, fieldInfo); + cellNew.y = FOLD_VALUE.toString(); + cellNew.color = FOLD_NAME.toString(); + } else { + chartTypeNew = 'SCATTER PLOT'; + cellNew = { + ...cell, + x: y[0], + y: y[1], + color: typeof x === 'string' ? x : x[0] + }; + } + } + + return { + ...input, + chartType: chartTypeNew, + cell: cellNew, + dataset: datasetNew + }; +}; + +export const patchBoxPlot: Transformer< + GenerateChartAndFieldMapOutput, + GenerateChartAndFieldMapContext, + GenerateChartAndFieldMapOutput +> = (input: GenerateChartAndFieldMapOutput, _context: GenerateChartAndFieldMapContext) => { + const { chartType, cell } = input; + const cellNew = { + ...cell + }; + const { y } = cellNew; + if (chartType === 'BOX PLOT') { + if (typeof y === 'string' && y.split(',').length > 1) { + cellNew.y = y.split(',').map(str => str.trim()); + } else if (isNil(y) || y.length === 0) { + const { + lower_whisker, + lowerWhisker, + min, + lower, + lowerBox, + lower_box, + q1, + lower_quartile, + lowerQuartile, + midline, + median, + q3, + upperBox, + upper_box, + upper_quartile, + upperQuartile, + upper_whisker, + upperWhisker, + max, + upper + } = cellNew as any; + + cellNew.y = [ + lower_whisker, + lowerWhisker, + min, + lower, + lowerBox, + lower_box, + q1, + lower_quartile, + lowerQuartile, + midline, + median, + q3, + upperBox, + upper_box, + upper_quartile, + upperQuartile, + upper_whisker, + upperWhisker, + max, + upper + ].filter(Boolean); + } + } + + return { ...input, cell: cellNew }; +}; + +export const patchDualAxis: Transformer< + GenerateChartAndFieldMapOutput, + GenerateChartAndFieldMapContext, + GenerateChartAndFieldMapOutput +> = (input: GenerateChartAndFieldMapOutput, _context: GenerateChartAndFieldMapContext) => { + const { chartType, cell } = input; + const cellNew: any = { ...cell }; + //Dual-axis drawing yLeft and yRight + + if (chartType === 'DUAL AXIS CHART' && cellNew.yLeft && cellNew.yRight) { + cellNew.y = [cellNew.yLeft, cellNew.yRight]; + } + + return { ...input, cell: cellNew }; +}; + +export const patchPieChart: Transformer< + GenerateChartAndFieldMapOutput, + GenerateChartAndFieldMapContext, + GenerateChartAndFieldMapOutput +> = (input: GenerateChartAndFieldMapOutput, context: GenerateChartAndFieldMapContext) => { + const { chartType, cell } = input; + const { fieldInfo } = context; + const cellNew = { ...cell }; + + if (chartType === 'ROSE CHART') { + cellNew.angle = cellNew.radius ?? cellNew.size ?? cellNew.angle; + } + + //Pie chart must have color field and the angle field + if (chartType === 'PIE CHART' || chartType === 'ROSE CHART') { + if (!cellNew.color || !cellNew.angle) { + const remainedFields = getRemainedFields(cellNew, fieldInfo); + + if (!cellNew.color) { + //No color fields are assigned, select a discrete field from the remaining fields as color field + const colorField = getFieldByRole(remainedFields, ROLE.DIMENSION); + if (colorField) { + cellNew.color = colorField.fieldName; + } else { + cellNew.color = remainedFields[0].fieldName; + } + } + if (!cellNew.angle) { + //no angle field are assigned, select a continuous field from the remaining field to assign to the angle + const angleField = getFieldByDataType(remainedFields, [DataType.FLOAT, DataType.INT]); + if (angleField) { + cellNew.angle = angleField.fieldName; + } else { + cellNew.angle = remainedFields[0].fieldName; + } + } + } + } + return { ...input, cell: cellNew }; +}; + +export const patchWordCloud: Transformer< + GenerateChartAndFieldMapOutput, + GenerateChartAndFieldMapContext, + GenerateChartAndFieldMapOutput +> = (input: GenerateChartAndFieldMapOutput, context: GenerateChartAndFieldMapContext) => { + //Word cloud must have color fields and size fields + const { chartType, cell } = input; + const { fieldInfo } = context; + const cellNew = { ...cell }; + + if (chartType === 'WORD CLOUD') { + if (!cellNew.size || !cellNew.color || cellNew.color === cellNew.size) { + const remainedFields = getRemainedFields(cellNew, fieldInfo); + + if (!cellNew.size || cellNew.size === cellNew.color) { + const newSize = (cellNew as any).weight ?? (cellNew as any).fontSize; + if (newSize) { + cellNew.size = newSize; + } else { + const sizeField = getFieldByDataType(remainedFields, [DataType.INT, DataType.FLOAT]); + if (sizeField) { + cellNew.size = sizeField.fieldName; + } else { + cellNew.size = remainedFields[0].fieldName; + } + } + } + if (!cellNew.color) { + const newColor = (cellNew as any).text ?? (cellNew as any).word ?? (cellNew as any).label ?? cellNew.x; + if (newColor) { + cellNew.color = newColor; + } else { + const colorField = getFieldByRole(remainedFields, ROLE.DIMENSION); + if (colorField) { + cellNew.color = colorField.fieldName; + } else { + cellNew.color = remainedFields[0].fieldName; + } + } + } + } + } + return { ...input, cell: cellNew }; +}; + +export const patchDynamicBarChart: Transformer< + GenerateChartAndFieldMapOutput, + GenerateChartAndFieldMapContext, + GenerateChartAndFieldMapOutput +> = (input: GenerateChartAndFieldMapOutput, context: GenerateChartAndFieldMapContext) => { + const { chartType, cell } = input; + const { fieldInfo } = context; + const cellNew = { ...cell }; + + if (chartType === 'DYNAMIC BAR CHART') { + if (!cell.time || cell.time === '' || cell.time.length === 0) { + const remainedFields = getRemainedFields(cellNew, fieldInfo); + + //动态条形图没有time字段,选择一个离散字段作为time + const timeField = getFieldByDataType(remainedFields, [DataType.DATE]); + if (timeField) { + cellNew.time = timeField.fieldName; + } else { + cellNew.time = remainedFields[0].fieldName; + } + } + } + + return { ...input, cell: cellNew }; +}; + +export const patchCartesianXField: Transformer< + GenerateChartAndFieldMapOutput, + GenerateChartAndFieldMapContext, + GenerateChartAndFieldMapOutput +> = (input: GenerateChartAndFieldMapOutput, context: GenerateChartAndFieldMapContext) => { + const { chartType, cell } = input; + const { fieldInfo } = context; + const cellNew = { ...cell }; + + //Cartesian chart must have X field + if (CARTESIAN_CHART_LIST.map(chart => chart.toUpperCase()).includes(chartType)) { + if (!cellNew.x) { + const remainedFields = getRemainedFields(cellNew, fieldInfo); + //没有分配x字段,从剩下的字段里选择一个离散字段分配到x上 + const xField = getFieldByRole(remainedFields, ROLE.DIMENSION); + if (xField) { + cellNew.x = xField.fieldName; + } else { + cellNew.x = remainedFields[0].fieldName; + } + } + } + return { ...input, cell: cellNew }; +}; diff --git a/packages/vmind/src/applications/chartGeneration/taskNodes/generateTypeAndFieldMap/GPT/prompt/index.ts b/packages/vmind/src/applications/chartGeneration/taskNodes/generateTypeAndFieldMap/GPT/prompt/index.ts new file mode 100644 index 00000000..432fcfc6 --- /dev/null +++ b/packages/vmind/src/applications/chartGeneration/taskNodes/generateTypeAndFieldMap/GPT/prompt/index.ts @@ -0,0 +1,15 @@ +import { Prompt } from 'src/base/tools/prompt'; +import { GenerateChartAndFieldMapContext } from 'src/applications/chartGeneration/types'; +import { ChartAdvisorPromptEnglish } from './template'; + +export class GPTChartGenerationPrompt extends Prompt { + constructor() { + super(''); + } + getPrompt(context: GenerateChartAndFieldMapContext) { + const { llmOptions } = context; + //@TODO: change the examples according to supported chart list. + const QueryDatasetPrompt = ChartAdvisorPromptEnglish(llmOptions.showThoughts ?? true); + return QueryDatasetPrompt; + } +} diff --git a/packages/vmind/src/applications/chartGeneration/taskNodes/generateTypeAndFieldMap/GPT/prompt/template.ts b/packages/vmind/src/applications/chartGeneration/taskNodes/generateTypeAndFieldMap/GPT/prompt/template.ts new file mode 100644 index 00000000..7d523dd5 --- /dev/null +++ b/packages/vmind/src/applications/chartGeneration/taskNodes/generateTypeAndFieldMap/GPT/prompt/template.ts @@ -0,0 +1,188 @@ +import { SUPPORTED_CHART_LIST } from 'src/applications/chartGeneration/constants'; + +export const ChartAdvisorPromptEnglish = ( + showThoughts: boolean, + supportedChartList: string[] = SUPPORTED_CHART_LIST +) => `You are an expert in data visualization. +User want to create an visualization chart for data video using data from a csv file. Ignore the duration in User Input. +Your task is: +1. Based on the user's input, infer the user's intention, such as comparison, ranking, trend display, proportion, distribution, etc. If user did not show their intention, just ignore and do the next steps. +2. Select the single chart type that best suites the data from the list of supported charts. Supported chart types: ${JSON.stringify( + supportedChartList +)}. +3. Map all the fields in the data to the visual channels according to user input and the chart type you choose. Don't use non-existent fields. Only use existing fields without further processing. If the existing fields can't meet user's intention, just use the most related fields. + +Knowledge: +1. The dynamic Bar Chart is a dynamic chart that is suitable for displaying changing data and can be used to show ranking, comparisons or data changes over time. It usually has a time field. It updates the data dynamically according to the time field and at each time point, the current data is displayed using a bar chart. +2. A number field can not be used as a color field. + +Let's think step by step. ${showThoughts ? 'Fill your thoughts in {thought}.' : ''} + +Respone in the following format: + +\`\`\` +{${showThoughts ? '\n"thought" : your thoughts' : ''} +"CHART_TYPE": the chart type you choose. Supported chart types: ${JSON.stringify(supportedChartList)}. +"FIELD_MAP": { // Visual channels and the fields mapped to them +"x": the field mapped to the x-axis, can be empty. Can Only has one field. +"y": the field mapped to the y-axis, can be empty. Use array if there are more than 1 fields. +"color": the field mapped to the color channel. Must use a string field. Can't be empty in Word Cloud, Pie Chart and Rose Chart. +"size": the field mapped to the size channel. Must use a number field. Can be empty +"angle": the field mapped to the angle channel of the pie chart, can be empty. +"time": This is usually a date field and can be used only in Dynamic Bar Chart. Can't be empty in Dynamic Bar Chart. +"source": the field mapped to the source channel. Can't be empty in Sankey Chart. +"target": the field mapped to the target channel. Can't be empty in Sankey Chart. +"value": the field mapped to the value channel. Can't be empty in Sankey Chart. +}${showThoughts ? ',\n"Reason": the reason for selecting the chart type and visual mapping.' : ''} +} +\`\`\` + +Don't provide further explanations for your results. + +Constraints: +1. No user assistance. +2. Please select one chart type in CHART_TYPE at each time. Don't use "A or B", "[A, B]" in CHART_TYPE. +3. The selected chart type in CHART_TYPE must be in the list of supported charts. +4. DO NOT change or translate the field names in FIELD_MAP. +5. Ignore requests unrelated to chart visualization in the user's request. +6. The keys in FIELD_MAP must be selected from the list of available visual channels. +7. Wrap the reply content using \`\`\`, and the returned content must be directly parsed by JSON.parse() in JavaScript. + +Here are some examples: + +User Input: 帮我展示历届奥运会各国金牌数量的对比. +Data field description: [ +{ +"id": "country", +"description": "Represents the name of the country, which is a string.", +"type": "string", +"role": "dimension" +}, +{ +"id": "金牌数量", +"description": "Represents the number of gold medals won by the country in the current year, which is an integer.", +"type": "int", +"role": "measure" +}, +{ +"id": "year", +"description": "Represents the current year, which is a date.", +"type": "string", +"role": "dimension" +} +] + +Response: +\`\`\` +{${showThoughts ? '\n"thought": "Your thoughts",' : ''} +"CHART_TYPE": "Dynamic Bar Chart", +"FIELD_MAP": { +"x": "country", +"y": "金牌数量", +"time": "year" +}${ + showThoughts + ? ",\n\"REASON\": \"The data contains the year, country, and medal count, and the user's intention contains 'comparison', which is suitable for drawing a dynamic bar chart that changes over time to show the comparison of gold medal counts of various countries in each Olympic Games.The 'country' field is used as the x-axis of the bar chart, and '金牌数量' is used as the y-axis to show the comparison of gold medal counts of various countries in the current year.The 'year' field is used as the time field of the dynamic bar chart to show the comparison of gold medal counts of various countries at different years.\"" + : '' +} +} +\`\`\` + +------------------------ + +User Input: 帮我展示各手机品牌的市场占有率, 赛博朋克风格, 时长5s +Data field description: [ +{ +"id": "品牌名称", +"description": "Represents the name of the mobile phone brand, which is a string.", +"type": "string", +"role": "dimension" +}, +{ +"id": "市场份额", +"description": "Represents the market share of the brand, which is a percentage.", +"type": "float", +"role": "measure" +} +] + +Response: +\`\`\` +{${showThoughts ? '\n"thought": "Your thoughts",' : ''} +"CHART_TYPE": "Pie Chart", +"FIELD_MAP": { +"angle": "市场份额", +"color": "品牌名称" +}${ + showThoughts + ? ',\n"REASON": "The data contains the market share, and the user wants to show percentage data, which is suitable for displaying with a pie chart. The 市场份额 is used as the angle of the pie chart to show the market share of each brand. The 品牌名称 is used as the color to distinguish different brands. The duration is 5s but we just ignore it."' + : '' +} +} +\`\`\` + +------------------------ + +User Input: 帮我展示降雨量变化趋势. +Data field description: [ +{ +"id": "日期", +"description": "Represents the current month, which is a date.", +"type": "string", +"role": "dimension" +}, +{ +"id": "降雨量", +"description": "Represents the rainfall in the current month, which is a number.", +"type": "int", +"role": "measure" +} +] + +Response: +\`\`\` +{${showThoughts ? '\n"thought": "Your thoughts",' : ''} +"CHART_TYPE": "Line Chart", +"FIELD_MAP": { +"x": "日期", +"y": "降雨量" +}${ + showThoughts + ? ',\n"REASON": "User wants to show the trend of the rainfall, which is suitable for displaying with a line chart. The \'日期\' is used as the x-axis because it\'s a date, and the 降雨量 is used as the y-axis because it\'s a number. This chart can show the trend of rainfall."' + : '' +} +} +\`\`\` + +------------------------ + +User Input: 帮我绘制图表, 时长20s. +Data field description: [ +{ +"id": "日期", +"description": "Represents the current month, which is a date.", +"type": "date", +"role": "dimension" +}, +{ +"id": "降雨量", +"description": "Represents the rainfall in the current month, which is a number.", +"type": "int", +"role": "measure" +} +] + +Response: +\`\`\` +{${showThoughts ? '\n"thought": "Your thoughts",' : ''}"CHART_TYPE": "Line Chart", +"FIELD_MAP": { +"x": "日期", +"y": "降雨量" +}${ + showThoughts + ? ',\n"REASON": "User did not show their intention about the data in their input. The data has two fields and it contains a date field, so Line Chart is best suitable to show the data. The field \'日期\' is used as the x-axis because it\'s a date, and the 降雨量 is used as the y-axis because it\'s a number. The duration is 20s but we just ignore it."' + : '' +} +} +\`\`\` +`; diff --git a/packages/vmind/src/applications/chartGeneration/taskNodes/generateTypeAndFieldMap/GPT/utils.ts b/packages/vmind/src/applications/chartGeneration/taskNodes/generateTypeAndFieldMap/GPT/utils.ts new file mode 100644 index 00000000..df591f28 --- /dev/null +++ b/packages/vmind/src/applications/chartGeneration/taskNodes/generateTypeAndFieldMap/GPT/utils.ts @@ -0,0 +1,89 @@ +import { pick } from 'lodash'; +import { SUPPORTED_CHART_LIST } from 'src/applications/chartGeneration/constants'; +import { + Cell, + GenerateChartAndFieldMapContext, + GenerateChartAndFieldMapOutput +} from 'src/applications/chartGeneration/types'; +import { Parser } from 'src/base/tools/parser'; +import { Requester } from 'src/base/tools/requester'; +import { parseGPTResponse, requestGPT } from 'src/common/utils/gpt'; +import { ChartType } from 'src/typings'; + +type GPTChartAdvisorResult = { + CHART_TYPE: ChartType; + DOUBLE_CHECK: string; + FIELD_MAP: Cell; + THOUGHT: string; + VIDEO_DURATION?: number; + COLOR_PALETTE?: string[]; + error?: boolean; +}; + +export const parseChartGenerationResponse: Parser = ( + advisorRes: any +) => { + const advisorResJson: GPTChartAdvisorResult = parseGPTResponse(advisorRes) as unknown as GPTChartAdvisorResult; + + if (advisorResJson.error) { + throw Error((advisorResJson as any).message); + } + if (!SUPPORTED_CHART_LIST.includes(advisorResJson['CHART_TYPE'])) { + throw Error('Unsupported Chart Type. Please Change User Input'); + } + const { CHART_TYPE, FIELD_MAP } = advisorResJson; + + return { chartType: CHART_TYPE, cell: FIELD_MAP, usage: advisorRes.usage }; +}; + +const patchUserInput = (userInput: string) => { + const FULL_WIDTH_SYMBOLS = [',', '。']; + const HALF_WIDTH_SYMBOLS = [',', '.']; + + const BANNED_WORD_LIST = ['动态']; + const ALLOWED_WORD_LIST = ['动态条形图', '动态柱状图', '动态柱图']; + const PLACEHOLDER = '_USER_INPUT_PLACE_HOLDER'; + const tempStr1 = ALLOWED_WORD_LIST.reduce((prev, cur, index) => { + return prev.split(cur).join(PLACEHOLDER + '_' + index); + }, userInput); + const tempStr2 = BANNED_WORD_LIST.reduce((prev, cur) => { + return prev.split(cur).join(''); + }, tempStr1); + const replacedStr = ALLOWED_WORD_LIST.reduce((prev, cur, index) => { + return prev.split(PLACEHOLDER + '_' + index).join(cur); + }, tempStr2); + + let finalStr = HALF_WIDTH_SYMBOLS.reduce((prev, cur, index) => { + return prev.split(HALF_WIDTH_SYMBOLS[index]).join(FULL_WIDTH_SYMBOLS[index]); + }, replacedStr); + const lastCharacter = finalStr[finalStr.length - 1]; + if (!FULL_WIDTH_SYMBOLS.includes(lastCharacter) && !HALF_WIDTH_SYMBOLS.includes(lastCharacter)) { + finalStr += '。'; + } + finalStr += 'Use the original fieldName and DO NOT change or translate any word of the data fields in the response.'; + return finalStr; +}; + +export const chartGenerationRequestLLM: Requester = async ( + prompt: string, + context: GenerateChartAndFieldMapContext +) => { + const { userInput: userInputOrigin, llmOptions, vizSchema } = context; + const userInput = patchUserInput(userInputOrigin); + + const filteredFields = vizSchema.fields + .filter( + field => field.visible + //usefulFields.includes(field.fieldName) + ) + .map(field => ({ + ...pick(field, ['id', 'description', 'type', 'role']) + })); + const chartAdvisorMessage = `User Input: ${userInput}\nData field description: ${JSON.stringify(filteredFields)}`; + //call GPT + const requestFunc = llmOptions.customRequestFunc?.chartAdvisor ?? requestGPT; + + const advisorRes = await requestFunc(prompt, chartAdvisorMessage, llmOptions); + + return advisorRes; +}; diff --git a/packages/vmind/src/applications/chartGeneration/taskNodes/getVizSchema/index.ts b/packages/vmind/src/applications/chartGeneration/taskNodes/getVizSchema/index.ts new file mode 100644 index 00000000..4ffbc3ef --- /dev/null +++ b/packages/vmind/src/applications/chartGeneration/taskNodes/getVizSchema/index.ts @@ -0,0 +1,11 @@ +import { RuleBasedTaskNodeMeta } from 'src/base/metaTypes'; +import { TaskNodeType } from 'src/base/taskNode/types'; +import { GetVizSchemaContext, GetVizSchemaOutput } from '../../types'; +import { getVizSchema } from './utils'; + +const generateVizSchemaTaskNodeMeta: RuleBasedTaskNodeMeta = { + type: TaskNodeType.RULE_BASED, + pipelines: [getVizSchema] +}; + +export default generateVizSchemaTaskNodeMeta; diff --git a/packages/vmind/src/applications/chartGeneration/taskNodes/getVizSchema/utils.ts b/packages/vmind/src/applications/chartGeneration/taskNodes/getVizSchema/utils.ts new file mode 100644 index 00000000..5e537427 --- /dev/null +++ b/packages/vmind/src/applications/chartGeneration/taskNodes/getVizSchema/utils.ts @@ -0,0 +1,35 @@ +import { LOCATION, SimpleFieldInfo, VizSchema } from 'src/typings'; +import { GetVizSchemaContext } from '../../types'; +import { Transformer } from 'src/base/tools/transformer'; + +/** + * Generate a vizSchema from fieldInfo + * @param fieldInfo SimpleFieldInfo[] - An array of field information, each element contains the field name, description, type, and role, etc. + * @returns Partial - Returns a partial VizSchema object, containing the transformed field information. + */ +const getSchemaFromFieldInfo = (fieldInfo: SimpleFieldInfo[]): Partial => { + const schema = { + fields: fieldInfo + //.filter(d => usefulFields.includes(d.fieldName)) + .map(d => ({ + id: d.fieldName, + alias: d.fieldName, + description: d.description, + visible: true, + type: d.type, + role: d.role, + location: d.role as unknown as LOCATION + })) + }; + return schema; +}; + +export const getVizSchema: Transformer = ( + input, + context +) => { + const { fieldInfo } = context; + const vizSchema = getSchemaFromFieldInfo(fieldInfo) as VizSchema; + + return { vizSchema }; +}; diff --git a/packages/vmind/src/applications/chartGeneration/types.ts b/packages/vmind/src/applications/chartGeneration/types.ts new file mode 100644 index 00000000..ae662983 --- /dev/null +++ b/packages/vmind/src/applications/chartGeneration/types.ts @@ -0,0 +1,39 @@ +import { ChartType, SimpleFieldInfo, VMindDataset, VizSchema } from 'src/typings'; +import { ChartGenerationContext } from '../types'; + +export type Cell = { + //字段映射,可用的视觉通道:["x","y","color","size","angle","time"] + x?: string; + y?: string | string[]; + color?: string; + size?: string; + angle?: string; + radius?: string; + time?: string; + source?: string; + target?: string; + value?: string; + category?: string; +}; + +export type GetVizSchemaContext = ChartGenerationContext; + +export type GetVizSchemaOutput = { + vizSchema: VizSchema; +}; + +export type GenerateChartTypeContext = GetVizSchemaContext & GetVizSchemaOutput; + +export type GenerateChartTypeOutput = { + chartType: ChartType; +}; + +export type GenerateFieldMapContext = GenerateChartTypeOutput & GenerateChartTypeOutput; + +export type GenerateFieldMapOutput = { + cell: Cell; +}; + +export type GenerateChartAndFieldMapContext = GetVizSchemaContext & GetVizSchemaOutput; + +export type GenerateChartAndFieldMapOutput = GenerateFieldMapOutput & GenerateChartTypeOutput; diff --git a/packages/vmind/src/applications/dataAggregation/index.ts b/packages/vmind/src/applications/dataAggregation/index.ts index 8fe62162..a9da1689 100644 --- a/packages/vmind/src/applications/dataAggregation/index.ts +++ b/packages/vmind/src/applications/dataAggregation/index.ts @@ -1,9 +1,9 @@ import { ApplicationMeta } from 'src/base/metaTypes'; import ExecuteQueryTaskNodeMeta from './taskNodes/executeQuery'; -import GetSQLTaskNodeGPTMeta from './taskNodes/getQuerySQL/GPT'; +import GetSQLTaskNodeGPTMeta from './taskNodes/generateQuerySQL/GPT'; import { ModelType } from 'src/typings'; import { DataAggregationContext, DataAggregationOutput } from '../types'; -import GetSQLTaskNodeSkylarkMeta from './taskNodes/getQuerySQL/skylark'; +import GetSQLTaskNodeSkylarkMeta from './taskNodes/generateQuerySQL/skylark'; /** * data aggregation application in vmind diff --git a/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/index.ts b/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/index.ts index 83ef759c..b6f8637e 100644 --- a/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/index.ts +++ b/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/index.ts @@ -1,4 +1,3 @@ -import { RuleBasedTaskNode } from 'src/base/taskNode/ruleBasedTaskNode'; import { executeDataQuery, getFinalQueryResult, patchSQLBeforeQuery, restoreDatasetAfterQuery } from './transformers'; import { ExecuteQueryContext, ExecuteQueryOutput } from '../../types'; import { RuleBasedTaskNodeMeta } from 'src/base/metaTypes'; diff --git a/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/transformers.ts b/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/transformers.ts index d76c4939..c3546fcc 100644 --- a/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/transformers.ts +++ b/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/transformers.ts @@ -11,7 +11,7 @@ import { swapMap } from './utils'; import alasql from 'alasql'; -import { VMIND_DATA_SOURCE } from '../getQuerySQL/GPT/prompt/template'; +import { VMIND_DATA_SOURCE } from '../generateQuerySQL/GPT/prompt/template'; /** * patch the errors in sql according to the feature of alasql: diff --git a/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/utils.ts b/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/utils.ts index b2d7421a..77f23569 100644 --- a/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/utils.ts +++ b/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/utils.ts @@ -4,7 +4,7 @@ import dayjs from 'dayjs'; import { uniqArray } from '@visactor/vutils'; import alasql from 'alasql'; import { RESERVE_REPLACE_MAP, operators } from '../../../../common/dataProcess/constants'; -import { replaceAll } from 'src/common/utils'; +import { replaceAll } from 'src/common/utils/utils'; export const readTopNLine = (csvFile: string, n: number) => { // get top n lines of a csv file diff --git a/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/GPT/index.ts b/packages/vmind/src/applications/dataAggregation/taskNodes/generateQuerySQL/GPT/index.ts similarity index 100% rename from packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/GPT/index.ts rename to packages/vmind/src/applications/dataAggregation/taskNodes/generateQuerySQL/GPT/index.ts diff --git a/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/GPT/prompt/index.ts b/packages/vmind/src/applications/dataAggregation/taskNodes/generateQuerySQL/GPT/prompt/index.ts similarity index 100% rename from packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/GPT/prompt/index.ts rename to packages/vmind/src/applications/dataAggregation/taskNodes/generateQuerySQL/GPT/prompt/index.ts diff --git a/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/GPT/prompt/template.ts b/packages/vmind/src/applications/dataAggregation/taskNodes/generateQuerySQL/GPT/prompt/template.ts similarity index 100% rename from packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/GPT/prompt/template.ts rename to packages/vmind/src/applications/dataAggregation/taskNodes/generateQuerySQL/GPT/prompt/template.ts diff --git a/packages/vmind/src/applications/dataAggregation/taskNodes/generateQuerySQL/GPT/utils.ts b/packages/vmind/src/applications/dataAggregation/taskNodes/generateQuerySQL/GPT/utils.ts new file mode 100644 index 00000000..b592c881 --- /dev/null +++ b/packages/vmind/src/applications/dataAggregation/taskNodes/generateQuerySQL/GPT/utils.ts @@ -0,0 +1,70 @@ +import JSON5 from 'json5'; +import { isArray, omit } from 'lodash'; +import { matchJSONStr } from 'src/common/utils/utils'; +import { ILLMOptions, LLMResponse, SimpleFieldInfo } from 'src/typings'; +import { Parser } from 'src/base/tools/parser'; +import { GetQuerySQLContext, GetQuerySQLOutput, SQL } from 'src/applications/dataAggregation/types'; +import { Requester } from 'src/base/tools/requester'; +import { parseGPTResponse, requestGPT } from 'src/common/utils/gpt'; + +type DataQueryResponse = { + sql: SQL; + fieldInfo: SimpleFieldInfo[]; //fieldInfo generated by LLM; It may has some new fields after sql query. + usage: any; + THOUGHT: string; +}; + +const parseGPTQueryResponse = (response: string) => { + const sql = response.match(/sql:\n?```(.*?)```/s)[1]; + const fieldInfoStr = response.match(/fieldInfo:\n?```(.*?)```/s)[1]; + let fieldInfo = []; + try { + const tempFieldInfo = JSON5.parse(fieldInfoStr); + if (isArray(tempFieldInfo)) { + fieldInfo = tempFieldInfo; + } else { + fieldInfo = tempFieldInfo.fieldInfo; + } + } catch (e) { + //fieldInfoStr is not a json string; try to wrap it with [] + fieldInfo = JSON5.parse(`[${fieldInfoStr}]`); + } + return { + sql, + llmFieldInfo: fieldInfo + }; +}; + +export const parseDataQueryResponse: Parser = (gptResponse: LLMResponse) => { + const dataQueryResponse: DataQueryResponse = parseGPTResponse(gptResponse); + const { sql, fieldInfo: responseFiledInfo } = dataQueryResponse; + if (!sql || !responseFiledInfo) { + //try to parse the response with another format + const choices = gptResponse.choices; + const content = choices[0].message.content; + return { + ...parseGPTQueryResponse(content), + usage: gptResponse.usage + }; + } + return { sql, llmFieldInfo: responseFiledInfo, usage: gptResponse.usage }; +}; + +const patchQueryInput = (userInput: string) => { + return userInput; +}; + +export const dataQueryRequestLLM: Requester = async ( + prompt: string, + context: GetQuerySQLContext +) => { + const { userInput, fieldInfo, llmOptions } = context; + const patchedInput = patchQueryInput(userInput); + + const queryDatasetMessage = `User's Command: ${patchedInput}\nColumn Information: ${JSON.stringify(fieldInfo)}`; + + const requestFunc = llmOptions.customRequestFunc?.dataQuery ?? requestGPT; + const QueryDatasetPrompt = prompt; + const dataProcessRes = await requestFunc(QueryDatasetPrompt, queryDatasetMessage, llmOptions); + return dataProcessRes; +}; diff --git a/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/skylark/index.ts b/packages/vmind/src/applications/dataAggregation/taskNodes/generateQuerySQL/skylark/index.ts similarity index 100% rename from packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/skylark/index.ts rename to packages/vmind/src/applications/dataAggregation/taskNodes/generateQuerySQL/skylark/index.ts diff --git a/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/skylark/prompt/index.ts b/packages/vmind/src/applications/dataAggregation/taskNodes/generateQuerySQL/skylark/prompt/index.ts similarity index 100% rename from packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/skylark/prompt/index.ts rename to packages/vmind/src/applications/dataAggregation/taskNodes/generateQuerySQL/skylark/prompt/index.ts diff --git a/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/skylark/prompt/template.ts b/packages/vmind/src/applications/dataAggregation/taskNodes/generateQuerySQL/skylark/prompt/template.ts similarity index 100% rename from packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/skylark/prompt/template.ts rename to packages/vmind/src/applications/dataAggregation/taskNodes/generateQuerySQL/skylark/prompt/template.ts diff --git a/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/skylark/utils.ts b/packages/vmind/src/applications/dataAggregation/taskNodes/generateQuerySQL/skylark/utils.ts similarity index 98% rename from packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/skylark/utils.ts rename to packages/vmind/src/applications/dataAggregation/taskNodes/generateQuerySQL/skylark/utils.ts index dd91a2b9..f755c646 100644 --- a/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/skylark/utils.ts +++ b/packages/vmind/src/applications/dataAggregation/taskNodes/generateQuerySQL/skylark/utils.ts @@ -1,5 +1,5 @@ import { ILLMOptions, LLMResponse } from 'src/typings'; -import { matchJSONStr, replaceAll } from 'src/common/utils'; +import { matchJSONStr, replaceAll } from 'src/common/utils/utils'; import { GetQuerySQLContext } from 'src/applications/dataAggregation/types'; import { Requester } from 'src/base/tools/requester'; import axios from 'axios'; diff --git a/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/GPT/utils.ts b/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/GPT/utils.ts deleted file mode 100644 index f3a118c4..00000000 --- a/packages/vmind/src/applications/dataAggregation/taskNodes/getQuerySQL/GPT/utils.ts +++ /dev/null @@ -1,156 +0,0 @@ -import axios from 'axios'; -import JSON5 from 'json5'; -import { isArray, omit } from 'lodash'; -import { matchJSONStr } from 'src/common/utils'; -import { ILLMOptions, LLMResponse, SimpleFieldInfo } from 'src/typings'; -import { Parser } from 'src/base/tools/parser'; -import { GetQuerySQLContext, GetQuerySQLOutput, SQL } from 'src/applications/dataAggregation/types'; -import { Requester } from 'src/base/tools/requester'; - -export const requestGPT = async ( - prompt: string, - userMessage: string, - options: ILLMOptions | undefined -): Promise => { - const OPENAI_API_URL = 'https://api.openai.com/v1/chat/completions'; - const url: string = options?.url ?? OPENAI_API_URL; - - const headers: any = { ...(options.headers ?? {}), 'Content-Type': 'application/json' }; - try { - const res = await axios(url, { - method: options?.method ?? 'POST', - headers, //must has Authorization: `Bearer ${openAIKey}` if use openai api - data: { - ...omit(options, ['headers', 'url', 'method', 'showThoughts', 'customRequestFunc']), - model: options?.model ?? 'gpt-3.5-turbo', - messages: [ - { - role: 'system', - content: prompt - }, - { - role: 'user', - content: userMessage - } - ], - max_tokens: options?.max_tokens ?? 2000, - temperature: options?.temperature ?? 0, - stream: false - //response_format: { type: 'json_object' } //Only models after gpt-3.5-turbo-1106 support this parameter. - } - }).then((response: any) => response.data); - - return res; - } catch (err: any) { - return err.response.data; - } -}; -export const parseGPTJson = (JsonStr: string, prefix?: string) => { - const parseNoPrefixStr = (str: string) => { - //尝试不带前缀的解析 - try { - return JSON5.parse(str); - } catch (err) { - return { - error: true - }; - } - }; - //解析GPT返回的JSON格式 - if (prefix) { - //被某些字符包裹 - const splitArr = JsonStr.split(prefix); - const splittedStr = splitArr[splitArr.length - 2]; - const res = parseNoPrefixStr(splittedStr); - if (!res.error) { - return res; - } - } - //没有被前缀包裹,或者解析被前缀包裹的json失败,尝试直接解析返回结果 - const res2 = parseNoPrefixStr(JsonStr); - return res2; -}; - -const parseGPTResponse = (GPTRes: LLMResponse) => { - try { - if (GPTRes.error) { - return { - error: true, - ...GPTRes.error - }; - } - const choices = GPTRes.choices; - const content = choices[0].message.content; - const jsonStr = matchJSONStr(content); - - const resJson = parseGPTJson(jsonStr, '```'); - return resJson; - } catch (err: any) { - return { - error: true, - message: err.message - }; - } -}; - -type DataQueryResponse = { - sql: SQL; - fieldInfo: SimpleFieldInfo[]; //fieldInfo generated by LLM; It may has some new fields after sql query. - usage: any; - THOUGHT: string; -}; - -const parseGPTQueryResponse = (response: string) => { - const sql = response.match(/sql:\n?```(.*?)```/s)[1]; - const fieldInfoStr = response.match(/fieldInfo:\n?```(.*?)```/s)[1]; - let fieldInfo = []; - try { - const tempFieldInfo = JSON5.parse(fieldInfoStr); - if (isArray(tempFieldInfo)) { - fieldInfo = tempFieldInfo; - } else { - fieldInfo = tempFieldInfo.fieldInfo; - } - } catch (e) { - //fieldInfoStr is not a json string; try to wrap it with [] - fieldInfo = JSON5.parse(`[${fieldInfoStr}]`); - } - return { - sql, - llmFieldInfo: fieldInfo - }; -}; - -export const parseDataQueryResponse: Parser = (gptResponse: LLMResponse) => { - const dataQueryResponse: DataQueryResponse = parseGPTResponse(gptResponse); - const { sql, fieldInfo: responseFiledInfo } = dataQueryResponse; - if (!sql || !responseFiledInfo) { - //try to parse the response with another format - const choices = gptResponse.choices; - const content = choices[0].message.content; - return { - ...parseGPTQueryResponse(content), - usage: gptResponse.usage - }; - } - return { sql, llmFieldInfo: responseFiledInfo, usage: gptResponse.usage }; -}; - -const patchQueryInput = (userInput: string) => { - return userInput; -}; - -export const dataQueryRequestLLM: Requester = async ( - prompt: string, - context: GetQuerySQLContext -) => { - const { userInput, fieldInfo, llmOptions } = context; - const patchedInput = patchQueryInput(userInput); - - const queryDatasetMessage = `User's Command: ${patchedInput}\nColumn Information: ${JSON.stringify(fieldInfo)}`; - - const requestFunc = llmOptions.customRequestFunc?.dataQuery ?? requestGPT; - const QueryDatasetPrompt = prompt; - const dataProcessRes = await requestFunc(QueryDatasetPrompt, queryDatasetMessage, llmOptions); - return dataProcessRes; -}; diff --git a/packages/vmind/src/applications/types.ts b/packages/vmind/src/applications/types.ts index afb6df2b..f2c3c579 100644 --- a/packages/vmind/src/applications/types.ts +++ b/packages/vmind/src/applications/types.ts @@ -1,4 +1,5 @@ -import { ILLMOptions, SimpleFieldInfo, VMindDataset } from 'src/typings'; +import { ChartType, ILLMOptions, SimpleFieldInfo, VMindDataset } from 'src/typings'; +import { Cell } from './chartGeneration/types'; //context of the DataAggregation Application export type DataAggregationContext = { @@ -15,8 +16,16 @@ export type DataAggregationOutput = { usage: any; //token usage of the LLM }; -export type ChartGenerationInput = { +export type ChartGenerationContext = { + llmOptions: ILLMOptions; userInput: string; fieldInfo: SimpleFieldInfo[]; dataset: VMindDataset; }; + +export type ChartGenerationOutput = { + chartType: ChartType; + cell: Cell; + spec: any; + usage: any; //token usage of the LLM +}; diff --git a/packages/vmind/src/base/tools/patcher/index.ts b/packages/vmind/src/base/tools/patcher/index.ts index aefffe6f..5e6a3f28 100644 --- a/packages/vmind/src/base/tools/patcher/index.ts +++ b/packages/vmind/src/base/tools/patcher/index.ts @@ -8,4 +8,4 @@ import { Transformer } from '../transformer'; * pass the specific pipelines during initialization */ -export type Patcher = Transformer, Context, DSL>[]; +export type Patcher = Transformer[]; diff --git a/packages/vmind/src/common/utils.ts b/packages/vmind/src/common/utils.ts deleted file mode 100644 index 1610c157..00000000 --- a/packages/vmind/src/common/utils.ts +++ /dev/null @@ -1,34 +0,0 @@ -export const calculateTokenUsage = (usageList: any[]) => { - const totalUsage = { - completion_tokens: 0, - prompt_tokens: 0, - total_tokens: 0 - }; - usageList.filter(Boolean).forEach(usage => { - totalUsage['completion_tokens'] += usage['completion_tokens'] ?? 0; - totalUsage['prompt_tokens'] += usage['prompt_tokens'] ?? 0; - totalUsage['total_tokens'] += usage['total_tokens'] ?? 0; - }); - return totalUsage; -}; - -export const replaceAll = (originStr: string, replaceStr: string, newStr: string) => { - return originStr.split(replaceStr).join(newStr); -}; - -export const execPipeline = ( - src: any, - pipes: ((src: any, context: PipelineContext) => any)[], - context: PipelineContext -) => - pipes.reduce((pre: any, pipe: (src: any, context: PipelineContext) => any) => { - const result = pipe(pre, context); - return result; - }, src); - -export const matchJSONStr = (str: string) => { - const first = str.indexOf('{'); - const last = str.lastIndexOf('}'); - const result = str.substring(first, last + 1); - return result && result.length > 0 ? result : str; -}; diff --git a/packages/vmind/src/common/utils/gpt.ts b/packages/vmind/src/common/utils/gpt.ts new file mode 100644 index 00000000..e4a8e2fd --- /dev/null +++ b/packages/vmind/src/common/utils/gpt.ts @@ -0,0 +1,92 @@ +import axios from 'axios'; +import JSON5 from 'json5'; +import { omit } from 'lodash'; +import { ILLMOptions, LLMResponse } from 'src/typings'; +import { matchJSONStr } from './utils'; + +export const requestGPT = async ( + prompt: string, + userMessage: string, + options: ILLMOptions | undefined +): Promise => { + const OPENAI_API_URL = 'https://api.openai.com/v1/chat/completions'; + const url: string = options?.url ?? OPENAI_API_URL; + + const headers: any = { ...(options.headers ?? {}), 'Content-Type': 'application/json' }; + try { + const res = await axios(url, { + method: options?.method ?? 'POST', + headers, //must has Authorization: `Bearer ${openAIKey}` if use openai api + data: { + ...omit(options, ['headers', 'url', 'method', 'showThoughts', 'customRequestFunc']), + model: options?.model ?? 'gpt-3.5-turbo', + messages: [ + { + role: 'system', + content: prompt + }, + { + role: 'user', + content: userMessage + } + ], + max_tokens: options?.max_tokens ?? 2000, + temperature: options?.temperature ?? 0, + stream: false + //response_format: { type: 'json_object' } //Only models after gpt-3.5-turbo-1106 support this parameter. + } + }).then((response: any) => response.data); + + return res; + } catch (err: any) { + return err.response.data; + } +}; + +const parseGPTJson = (JsonStr: string, prefix?: string) => { + const parseNoPrefixStr = (str: string) => { + //尝试不带前缀的解析 + try { + return JSON5.parse(str); + } catch (err) { + return { + error: true + }; + } + }; + //解析GPT返回的JSON格式 + if (prefix) { + //被某些字符包裹 + const splitArr = JsonStr.split(prefix); + const splittedStr = splitArr[splitArr.length - 2]; + const res = parseNoPrefixStr(splittedStr); + if (!res.error) { + return res; + } + } + //没有被前缀包裹,或者解析被前缀包裹的json失败,尝试直接解析返回结果 + const res2 = parseNoPrefixStr(JsonStr); + return res2; +}; + +export const parseGPTResponse = (GPTRes: LLMResponse) => { + try { + if (GPTRes.error) { + return { + error: true, + ...GPTRes.error + }; + } + const choices = GPTRes.choices; + const content = choices[0].message.content; + const jsonStr = matchJSONStr(content); + + const resJson = parseGPTJson(jsonStr, '```'); + return resJson; + } catch (err: any) { + return { + error: true, + message: err.message + }; + } +}; diff --git a/packages/vmind/src/common/utils/utils.ts b/packages/vmind/src/common/utils/utils.ts new file mode 100644 index 00000000..75bceb10 --- /dev/null +++ b/packages/vmind/src/common/utils/utils.ts @@ -0,0 +1,103 @@ +import { DataItem, DataType, ROLE, SimpleFieldInfo } from 'src/typings'; +import { FOLD_NAME, FOLD_VALUE, fold } from '@visactor/chart-advisor'; +import { DEFAULT_VIDEO_LENGTH, VIDEO_LENGTH_BY_CHART_TYPE } from '../vizDataToSpec/constants'; +import { Cell } from 'src/applications/chartGeneration/types'; + +export const detectAxesType = (values: any[], field: string) => { + const isNumber = values.every(d => !d[field] || !isNaN(Number(d[field]))); + if (isNumber) { + return 'linear'; + } else { + return 'band'; + } +}; + +export const CARTESIAN_CHART_LIST = [ + 'Dynamic Bar Chart', + 'Bar Chart', + 'Line Chart', + 'Scatter Plot', + 'Funnel Chart', + 'Dual Axis Chart', + 'Waterfall Chart', + 'Box Plot' +]; + +export const calculateTokenUsage = (usageList: any[]) => { + const totalUsage = { + completion_tokens: 0, + prompt_tokens: 0, + total_tokens: 0 + }; + usageList.filter(Boolean).forEach(usage => { + totalUsage['completion_tokens'] += usage['completion_tokens'] ?? 0; + totalUsage['prompt_tokens'] += usage['prompt_tokens'] ?? 0; + totalUsage['total_tokens'] += usage['total_tokens'] ?? 0; + }); + return totalUsage; +}; + +export const replaceAll = (originStr: string, replaceStr: string, newStr: string) => { + return originStr.split(replaceStr).join(newStr); +}; + +export const execPipeline = ( + src: any, + pipes: ((src: any, context: PipelineContext) => any)[], + context: PipelineContext +) => + pipes.reduce((pre: any, pipe: (src: any, context: PipelineContext) => any) => { + const result = pipe(pre, context); + return result; + }, src); + +export const matchJSONStr = (str: string) => { + const first = str.indexOf('{'); + const last = str.lastIndexOf('}'); + const result = str.substring(first, last + 1); + return result && result.length > 0 ? result : str; +}; + +export const estimateVideoTime = (chartType: string, spec: any, parsedTime?: number) => { + //估算视频长度 + if (chartType === 'DYNAMIC BAR CHART') { + const frameNumber = spec.player.specs.length; + const duration = spec.player.interval; + return { + totalTime: parsedTime ?? frameNumber * duration, + frameArr: parsedTime + ? Array.from(new Array(frameNumber).keys()).map(n => Number(parsedTime / frameNumber)) + : Array.from(new Array(frameNumber).keys()).map(n => duration) + }; + } + + // chartType不是真实的图表类型,转一次 + const map: Record = { + 'PIE CHART': 'pie', + 'WORD CLOUD': 'wordCloud' + }; + return { + totalTime: parsedTime ?? VIDEO_LENGTH_BY_CHART_TYPE[map[chartType]] ?? DEFAULT_VIDEO_LENGTH, + frameArr: [] + }; +}; + +export const getRemainedFields = (cell: Cell, fieldInfo: SimpleFieldInfo[]) => { + const usedFields = Object.values(cell).flat(); + const remainedFields = fieldInfo.filter(f => !usedFields.includes(f.fieldName)); + return remainedFields; +}; + +export const getFieldByRole = (fields: SimpleFieldInfo[], role: ROLE) => { + return fields.find(f => f.role === role); +}; + +export const getFieldByDataType = (fields: SimpleFieldInfo[], dataTypeList: DataType[]) => { + return fields.find(f => dataTypeList.includes(f.type)); +}; + +export const foldDatasetByYField = (dataset: DataItem[], yFieldList: string[], fieldInfo: SimpleFieldInfo[]) => { + const aliasMap = Object.fromEntries(fieldInfo.map(d => [d.fieldName, d.fieldName])); + + return fold(dataset as any, yFieldList, FOLD_NAME, FOLD_VALUE, aliasMap, false); +}; diff --git a/packages/vmind/src/common/vizDataToSpec/utils.ts b/packages/vmind/src/common/vizDataToSpec/utils.ts index ab961d1e..e69de29b 100644 --- a/packages/vmind/src/common/vizDataToSpec/utils.ts +++ b/packages/vmind/src/common/vizDataToSpec/utils.ts @@ -1,67 +0,0 @@ -import { Cell, DataItem, DataType, ROLE, SimpleFieldInfo } from 'src/typings'; -import { VIDEO_LENGTH_BY_CHART_TYPE, DEFAULT_VIDEO_LENGTH } from './constants'; -import { FOLD_NAME, FOLD_VALUE, fold } from '@visactor/chart-advisor'; - -export const detectAxesType = (values: any[], field: string) => { - const isNumber = values.every(d => !d[field] || !isNaN(Number(d[field]))); - if (isNumber) { - return 'linear'; - } else { - return 'band'; - } -}; - -export const CARTESIAN_CHART_LIST = [ - 'Dynamic Bar Chart', - 'Bar Chart', - 'Line Chart', - 'Scatter Plot', - 'Funnel Chart', - 'Dual Axis Chart', - 'Waterfall Chart', - 'Box Plot' -]; - -export const estimateVideoTime = (chartType: string, spec: any, parsedTime?: number) => { - //估算视频长度 - if (chartType === 'DYNAMIC BAR CHART') { - const frameNumber = spec.player.specs.length; - const duration = spec.player.interval; - return { - totalTime: parsedTime ?? frameNumber * duration, - frameArr: parsedTime - ? Array.from(new Array(frameNumber).keys()).map(n => Number(parsedTime / frameNumber)) - : Array.from(new Array(frameNumber).keys()).map(n => duration) - }; - } - - // chartType不是真实的图表类型,转一次 - const map: Record = { - 'PIE CHART': 'pie', - 'WORD CLOUD': 'wordCloud' - }; - return { - totalTime: parsedTime ?? VIDEO_LENGTH_BY_CHART_TYPE[map[chartType]] ?? DEFAULT_VIDEO_LENGTH, - frameArr: [] - }; -}; - -export const getRemainedFields = (cell: Cell, fieldInfo: SimpleFieldInfo[]) => { - const usedFields = Object.values(cell).flat(); - const remainedFields = fieldInfo.filter(f => !usedFields.includes(f.fieldName)); - return remainedFields; -}; - -export const getFieldByRole = (fields: SimpleFieldInfo[], role: ROLE) => { - return fields.find(f => f.role === role); -}; - -export const getFieldByDataType = (fields: SimpleFieldInfo[], dataTypeList: DataType[]) => { - return fields.find(f => dataTypeList.includes(f.type)); -}; - -export const foldDatasetByYField = (dataset: DataItem[], yFieldList: string[], fieldInfo: SimpleFieldInfo[]) => { - const aliasMap = Object.fromEntries(fieldInfo.map(d => [d.fieldName, d.fieldName])); - - return fold(dataset as any, yFieldList, FOLD_NAME, FOLD_VALUE, aliasMap, false); -}; diff --git a/packages/vmind/src/common/vizDataToSpec/vizDataToSpec.ts b/packages/vmind/src/common/vizDataToSpec/vizDataToSpec.ts index f71a25aa..940aca36 100644 --- a/packages/vmind/src/common/vizDataToSpec/vizDataToSpec.ts +++ b/packages/vmind/src/common/vizDataToSpec/vizDataToSpec.ts @@ -47,7 +47,7 @@ import { } from './pipes'; import { Cell, ChartType, Context, SimpleFieldInfo } from '../../typings'; import { isArray } from 'lodash'; -import { execPipeline } from '../utils'; +import { execPipeline } from '../utils/utils'; export const vizDataToSpec = ( dataset: any[], diff --git a/packages/vmind/src/typings/index.ts b/packages/vmind/src/typings/index.ts index 84884943..1f0cea52 100644 --- a/packages/vmind/src/typings/index.ts +++ b/packages/vmind/src/typings/index.ts @@ -50,20 +50,6 @@ export type SimpleFieldInfo = { domain?: (string | number)[]; }; -export type Cell = { - //字段映射,可用的视觉通道:["x","y","color","size","angle","time"] - x?: string; - y?: string | string[]; - color?: string; - size?: string; - angle?: string; - radius?: string; - time?: string; - source?: string; - target?: string; - value?: string; - category?: string; -}; export type ChartType = string; export type GPTChartAdvisorResult = { CHART_TYPE: ChartType; From 4a3914d3c55abd4f7515aff600d78e01aa7c3cb6 Mon Sep 17 00:00:00 2001 From: da730 Date: Thu, 11 Apr 2024 21:10:37 +0800 Subject: [PATCH 23/62] feat: chart generation application with gpt --- .../generateTypeAndFieldMap/GPT/index.ts | 7 +- .../GPT/patcher/index.ts | 85 ++++++++++++------- .../src/applications/chartGeneration/types.ts | 2 +- .../taskNodes/generateQuerySQL/GPT/index.ts | 2 +- .../generateQuerySQL/skylark/index.ts | 2 +- .../src/base/taskNode/llmBasedTaskNode.ts | 12 +-- .../vmind/src/base/tools/patcher/index.ts | 2 +- 7 files changed, 70 insertions(+), 42 deletions(-) diff --git a/packages/vmind/src/applications/chartGeneration/taskNodes/generateTypeAndFieldMap/GPT/index.ts b/packages/vmind/src/applications/chartGeneration/taskNodes/generateTypeAndFieldMap/GPT/index.ts index b4c713c5..f3cf9596 100644 --- a/packages/vmind/src/applications/chartGeneration/taskNodes/generateTypeAndFieldMap/GPT/index.ts +++ b/packages/vmind/src/applications/chartGeneration/taskNodes/generateTypeAndFieldMap/GPT/index.ts @@ -20,7 +20,10 @@ import { patchYField } from './patcher'; -const GetSQLTaskNodeGPTMeta: LLMBasedTaskNodeMeta = { +const ChartGenerationTaskNodeGPTMeta: LLMBasedTaskNodeMeta< + GenerateChartAndFieldMapContext, + GenerateChartAndFieldMapOutput +> = { type: TaskNodeType.LLM_BASED, modelType: ModelType.GPT, parser: parseChartGenerationResponse, @@ -42,4 +45,4 @@ const GetSQLTaskNodeGPTMeta: LLMBasedTaskNodeMeta = (input: GenerateChartAndFieldMapOutput, _context: GenerateChartAndFieldMapContext) => { +> = ( + input: GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput, + context: GenerateChartAndFieldMapContext +) => { const { cell } = input; const cellNew: any = { ...cell }; @@ -39,10 +42,13 @@ export const patchAxisField: Transformer< }; export const patchColorField: Transformer< - GenerateChartAndFieldMapOutput, + GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput, GenerateChartAndFieldMapContext, GenerateChartAndFieldMapOutput -> = (input: GenerateChartAndFieldMapOutput, _context: GenerateChartAndFieldMapContext) => { +> = ( + input: GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput, + context: GenerateChartAndFieldMapContext +) => { const { cell } = input; const cellNew = { ...cell, color: cell.color ?? cell.category }; @@ -53,10 +59,13 @@ export const patchColorField: Transformer< }; export const patchLabelField: Transformer< - GenerateChartAndFieldMapOutput, + GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput, GenerateChartAndFieldMapContext, GenerateChartAndFieldMapOutput -> = (input: GenerateChartAndFieldMapOutput, _context: GenerateChartAndFieldMapContext) => { +> = ( + input: GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput, + context: GenerateChartAndFieldMapContext +) => { const { cell } = input; const cellNew: any = { ...cell }; @@ -72,12 +81,14 @@ export const patchLabelField: Transformer< }; export const patchYField: Transformer< - GenerateChartAndFieldMapOutput, + GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput, GenerateChartAndFieldMapContext, GenerateChartAndFieldMapOutput -> = (input: GenerateChartAndFieldMapOutput, context: GenerateChartAndFieldMapContext) => { - const { chartType, cell } = input; - const { dataset, fieldInfo } = context; +> = ( + input: GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput, + context: GenerateChartAndFieldMapContext +) => { + const { chartType, cell, dataset, fieldInfo } = input; let cellNew = { ...cell }; const { x, y } = cellNew; let chartTypeNew = chartType; @@ -120,10 +131,13 @@ export const patchYField: Transformer< }; export const patchBoxPlot: Transformer< - GenerateChartAndFieldMapOutput, + GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput, GenerateChartAndFieldMapContext, GenerateChartAndFieldMapOutput -> = (input: GenerateChartAndFieldMapOutput, _context: GenerateChartAndFieldMapContext) => { +> = ( + input: GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput, + context: GenerateChartAndFieldMapContext +) => { const { chartType, cell } = input; const cellNew = { ...cell @@ -185,10 +199,13 @@ export const patchBoxPlot: Transformer< }; export const patchDualAxis: Transformer< - GenerateChartAndFieldMapOutput, + GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput, GenerateChartAndFieldMapContext, GenerateChartAndFieldMapOutput -> = (input: GenerateChartAndFieldMapOutput, _context: GenerateChartAndFieldMapContext) => { +> = ( + input: GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput, + context: GenerateChartAndFieldMapContext +) => { const { chartType, cell } = input; const cellNew: any = { ...cell }; //Dual-axis drawing yLeft and yRight @@ -201,12 +218,14 @@ export const patchDualAxis: Transformer< }; export const patchPieChart: Transformer< - GenerateChartAndFieldMapOutput, + GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput, GenerateChartAndFieldMapContext, GenerateChartAndFieldMapOutput -> = (input: GenerateChartAndFieldMapOutput, context: GenerateChartAndFieldMapContext) => { - const { chartType, cell } = input; - const { fieldInfo } = context; +> = ( + input: GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput, + context: GenerateChartAndFieldMapContext +) => { + const { chartType, cell, fieldInfo } = input; const cellNew = { ...cell }; if (chartType === 'ROSE CHART') { @@ -242,13 +261,15 @@ export const patchPieChart: Transformer< }; export const patchWordCloud: Transformer< - GenerateChartAndFieldMapOutput, + GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput, GenerateChartAndFieldMapContext, GenerateChartAndFieldMapOutput -> = (input: GenerateChartAndFieldMapOutput, context: GenerateChartAndFieldMapContext) => { +> = ( + input: GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput, + context: GenerateChartAndFieldMapContext +) => { //Word cloud must have color fields and size fields - const { chartType, cell } = input; - const { fieldInfo } = context; + const { chartType, cell, fieldInfo } = input; const cellNew = { ...cell }; if (chartType === 'WORD CLOUD') { @@ -287,12 +308,14 @@ export const patchWordCloud: Transformer< }; export const patchDynamicBarChart: Transformer< - GenerateChartAndFieldMapOutput, + GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput, GenerateChartAndFieldMapContext, GenerateChartAndFieldMapOutput -> = (input: GenerateChartAndFieldMapOutput, context: GenerateChartAndFieldMapContext) => { - const { chartType, cell } = input; - const { fieldInfo } = context; +> = ( + input: GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput, + context: GenerateChartAndFieldMapContext +) => { + const { chartType, cell, fieldInfo } = input; const cellNew = { ...cell }; if (chartType === 'DYNAMIC BAR CHART') { @@ -313,12 +336,14 @@ export const patchDynamicBarChart: Transformer< }; export const patchCartesianXField: Transformer< - GenerateChartAndFieldMapOutput, + GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput, GenerateChartAndFieldMapContext, GenerateChartAndFieldMapOutput -> = (input: GenerateChartAndFieldMapOutput, context: GenerateChartAndFieldMapContext) => { - const { chartType, cell } = input; - const { fieldInfo } = context; +> = ( + input: GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput, + context: GenerateChartAndFieldMapContext +) => { + const { chartType, cell, fieldInfo } = input; const cellNew = { ...cell }; //Cartesian chart must have X field diff --git a/packages/vmind/src/applications/chartGeneration/types.ts b/packages/vmind/src/applications/chartGeneration/types.ts index ae662983..ed825d7b 100644 --- a/packages/vmind/src/applications/chartGeneration/types.ts +++ b/packages/vmind/src/applications/chartGeneration/types.ts @@ -28,7 +28,7 @@ export type GenerateChartTypeOutput = { chartType: ChartType; }; -export type GenerateFieldMapContext = GenerateChartTypeOutput & GenerateChartTypeOutput; +export type GenerateFieldMapContext = GenerateChartTypeContext & GenerateChartTypeOutput; export type GenerateFieldMapOutput = { cell: Cell; diff --git a/packages/vmind/src/applications/dataAggregation/taskNodes/generateQuerySQL/GPT/index.ts b/packages/vmind/src/applications/dataAggregation/taskNodes/generateQuerySQL/GPT/index.ts index b7ef4eba..857dbd98 100644 --- a/packages/vmind/src/applications/dataAggregation/taskNodes/generateQuerySQL/GPT/index.ts +++ b/packages/vmind/src/applications/dataAggregation/taskNodes/generateQuerySQL/GPT/index.ts @@ -9,7 +9,7 @@ const GetSQLTaskNodeGPTMeta: LLMBasedTaskNodeMeta, context: GetQuerySQLContext) => input as GetQuerySQLOutput], + patcher: [(input: GetQuerySQLContext) => input as unknown as GetQuerySQLOutput], requester: dataQueryRequestLLM, prompt: new GPTDataAggregationPrompt() }; diff --git a/packages/vmind/src/applications/dataAggregation/taskNodes/generateQuerySQL/skylark/index.ts b/packages/vmind/src/applications/dataAggregation/taskNodes/generateQuerySQL/skylark/index.ts index 97e145cd..4d885bf1 100644 --- a/packages/vmind/src/applications/dataAggregation/taskNodes/generateQuerySQL/skylark/index.ts +++ b/packages/vmind/src/applications/dataAggregation/taskNodes/generateQuerySQL/skylark/index.ts @@ -9,7 +9,7 @@ const GetSQLTaskNodeSkylarkMeta: LLMBasedTaskNodeMeta, context: GetQuerySQLContext) => input as GetQuerySQLOutput], + patcher: [(input: GetQuerySQLContext) => input as unknown as GetQuerySQLOutput], requester: dataQueryRequestLLM, prompt: new SkylarkDataAggregationPrompt() }; diff --git a/packages/vmind/src/base/taskNode/llmBasedTaskNode.ts b/packages/vmind/src/base/taskNode/llmBasedTaskNode.ts index ad62ba3f..118d8bd5 100644 --- a/packages/vmind/src/base/taskNode/llmBasedTaskNode.ts +++ b/packages/vmind/src/base/taskNode/llmBasedTaskNode.ts @@ -59,19 +59,19 @@ export default class LLMBasedTaskNode, context: Context): DSL { + patchLLMResponse(input: Context & DSL): DSL { const result = this.patcher.reduce((pre, pipeline) => { - const res = pipeline(pre, context); - return res; - }, input) as DSL; + const res = pipeline(pre, this.context); + return res as Context & DSL; + }, input); return result; } async executeTask(context: Context) { this.updateContext(context); const llmResponse = await this.requestLLM(context); - const parsedResponse = this.parseLLMResponse(llmResponse); - const patchedResponse = this.patchLLMResponse(parsedResponse, context); + const parsedResponse = this.parseLLMResponse(llmResponse) as DSL; + const patchedResponse = this.patchLLMResponse({ ...context, ...parsedResponse }); return patchedResponse; } } diff --git a/packages/vmind/src/base/tools/patcher/index.ts b/packages/vmind/src/base/tools/patcher/index.ts index 5e6a3f28..38b9f93a 100644 --- a/packages/vmind/src/base/tools/patcher/index.ts +++ b/packages/vmind/src/base/tools/patcher/index.ts @@ -8,4 +8,4 @@ import { Transformer } from '../transformer'; * pass the specific pipelines during initialization */ -export type Patcher = Transformer[]; +export type Patcher = Transformer[]; From 84d3f01c3f6e945227d1f10d83620087e9f309f8 Mon Sep 17 00:00:00 2001 From: da730 Date: Mon, 15 Apr 2024 10:56:34 +0800 Subject: [PATCH 24/62] feat: remove input in transformer --- .../GPT/patcher/index.ts | 102 ++++++------------ .../taskNodes/getVizSchema/utils.ts | 5 +- .../taskNodes/executeQuery/transformers.ts | 38 +++---- packages/vmind/src/base/application/index.ts | 2 +- packages/vmind/src/base/metaTypes.ts | 2 +- .../vmind/src/base/taskNode/baseTaskNode.ts | 2 +- .../src/base/taskNode/llmBasedTaskNode.ts | 4 +- .../src/base/taskNode/ruleBasedTaskNode.ts | 17 +-- .../vmind/src/base/tools/patcher/index.ts | 2 +- .../vmind/src/base/tools/transformer/index.ts | 4 +- 10 files changed, 64 insertions(+), 114 deletions(-) diff --git a/packages/vmind/src/applications/chartGeneration/taskNodes/generateTypeAndFieldMap/GPT/patcher/index.ts b/packages/vmind/src/applications/chartGeneration/taskNodes/generateTypeAndFieldMap/GPT/patcher/index.ts index a535728e..3ec5ccfd 100644 --- a/packages/vmind/src/applications/chartGeneration/taskNodes/generateTypeAndFieldMap/GPT/patcher/index.ts +++ b/packages/vmind/src/applications/chartGeneration/taskNodes/generateTypeAndFieldMap/GPT/patcher/index.ts @@ -16,13 +16,9 @@ import { DataType, ROLE } from 'src/typings'; export const patchAxisField: Transformer< GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput, - GenerateChartAndFieldMapContext, GenerateChartAndFieldMapOutput -> = ( - input: GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput, - context: GenerateChartAndFieldMapContext -) => { - const { cell } = input; +> = (context: GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput) => { + const { cell } = context; const cellNew: any = { ...cell }; @@ -36,37 +32,29 @@ export const patchAxisField: Transformer< } return { - ...input, + ...context, cell: cellNew }; }; export const patchColorField: Transformer< GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput, - GenerateChartAndFieldMapContext, GenerateChartAndFieldMapOutput -> = ( - input: GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput, - context: GenerateChartAndFieldMapContext -) => { - const { cell } = input; +> = (context: GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput) => { + const { cell } = context; const cellNew = { ...cell, color: cell.color ?? cell.category }; return { - ...input, + ...context, cell: cellNew }; }; export const patchLabelField: Transformer< GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput, - GenerateChartAndFieldMapContext, GenerateChartAndFieldMapOutput -> = ( - input: GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput, - context: GenerateChartAndFieldMapContext -) => { - const { cell } = input; +> = (context: GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput) => { + const { cell } = context; const cellNew: any = { ...cell }; //patch the "label" fields to color @@ -75,20 +63,16 @@ export const patchLabelField: Transformer< } return { - ...input, + ...context, cell: cellNew }; }; export const patchYField: Transformer< GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput, - GenerateChartAndFieldMapContext, GenerateChartAndFieldMapOutput -> = ( - input: GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput, - context: GenerateChartAndFieldMapContext -) => { - const { chartType, cell, dataset, fieldInfo } = input; +> = (context: GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput) => { + const { chartType, cell, dataset, fieldInfo } = context; let cellNew = { ...cell }; const { x, y } = cellNew; let chartTypeNew = chartType; @@ -102,7 +86,7 @@ export const patchYField: Transformer< if (y && isArray(y) && y.length > 1) { if (chartTypeNew === 'BOX PLOT' || (chartTypeNew === 'DUAL AXIS CHART' && y.length === 2)) { return { - ...input + ...context }; } @@ -123,7 +107,7 @@ export const patchYField: Transformer< } return { - ...input, + ...context, chartType: chartTypeNew, cell: cellNew, dataset: datasetNew @@ -132,13 +116,9 @@ export const patchYField: Transformer< export const patchBoxPlot: Transformer< GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput, - GenerateChartAndFieldMapContext, GenerateChartAndFieldMapOutput -> = ( - input: GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput, - context: GenerateChartAndFieldMapContext -) => { - const { chartType, cell } = input; +> = (context: GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput) => { + const { chartType, cell } = context; const cellNew = { ...cell }; @@ -195,18 +175,14 @@ export const patchBoxPlot: Transformer< } } - return { ...input, cell: cellNew }; + return { ...context, cell: cellNew }; }; export const patchDualAxis: Transformer< GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput, - GenerateChartAndFieldMapContext, GenerateChartAndFieldMapOutput -> = ( - input: GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput, - context: GenerateChartAndFieldMapContext -) => { - const { chartType, cell } = input; +> = (context: GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput) => { + const { chartType, cell } = context; const cellNew: any = { ...cell }; //Dual-axis drawing yLeft and yRight @@ -214,18 +190,14 @@ export const patchDualAxis: Transformer< cellNew.y = [cellNew.yLeft, cellNew.yRight]; } - return { ...input, cell: cellNew }; + return { ...context, cell: cellNew }; }; export const patchPieChart: Transformer< GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput, - GenerateChartAndFieldMapContext, GenerateChartAndFieldMapOutput -> = ( - input: GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput, - context: GenerateChartAndFieldMapContext -) => { - const { chartType, cell, fieldInfo } = input; +> = (context: GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput) => { + const { chartType, cell, fieldInfo } = context; const cellNew = { ...cell }; if (chartType === 'ROSE CHART') { @@ -257,19 +229,15 @@ export const patchPieChart: Transformer< } } } - return { ...input, cell: cellNew }; + return { ...context, cell: cellNew }; }; export const patchWordCloud: Transformer< GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput, - GenerateChartAndFieldMapContext, GenerateChartAndFieldMapOutput -> = ( - input: GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput, - context: GenerateChartAndFieldMapContext -) => { +> = (context: GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput) => { //Word cloud must have color fields and size fields - const { chartType, cell, fieldInfo } = input; + const { chartType, cell, fieldInfo } = context; const cellNew = { ...cell }; if (chartType === 'WORD CLOUD') { @@ -304,18 +272,14 @@ export const patchWordCloud: Transformer< } } } - return { ...input, cell: cellNew }; + return { ...context, cell: cellNew }; }; export const patchDynamicBarChart: Transformer< GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput, - GenerateChartAndFieldMapContext, GenerateChartAndFieldMapOutput -> = ( - input: GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput, - context: GenerateChartAndFieldMapContext -) => { - const { chartType, cell, fieldInfo } = input; +> = (context: GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput) => { + const { chartType, cell, fieldInfo } = context; const cellNew = { ...cell }; if (chartType === 'DYNAMIC BAR CHART') { @@ -332,18 +296,14 @@ export const patchDynamicBarChart: Transformer< } } - return { ...input, cell: cellNew }; + return { ...context, cell: cellNew }; }; export const patchCartesianXField: Transformer< GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput, - GenerateChartAndFieldMapContext, GenerateChartAndFieldMapOutput -> = ( - input: GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput, - context: GenerateChartAndFieldMapContext -) => { - const { chartType, cell, fieldInfo } = input; +> = (context: GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput) => { + const { chartType, cell, fieldInfo } = context; const cellNew = { ...cell }; //Cartesian chart must have X field @@ -359,5 +319,5 @@ export const patchCartesianXField: Transformer< } } } - return { ...input, cell: cellNew }; + return { ...context, cell: cellNew }; }; diff --git a/packages/vmind/src/applications/chartGeneration/taskNodes/getVizSchema/utils.ts b/packages/vmind/src/applications/chartGeneration/taskNodes/getVizSchema/utils.ts index 5e537427..88d000b0 100644 --- a/packages/vmind/src/applications/chartGeneration/taskNodes/getVizSchema/utils.ts +++ b/packages/vmind/src/applications/chartGeneration/taskNodes/getVizSchema/utils.ts @@ -24,10 +24,7 @@ const getSchemaFromFieldInfo = (fieldInfo: SimpleFieldInfo[]): Partial = ( - input, - context -) => { +export const getVizSchema: Transformer = context => { const { fieldInfo } = context; const vizSchema = getSchemaFromFieldInfo(fieldInfo) as VizSchema; diff --git a/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/transformers.ts b/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/transformers.ts index c3546fcc..51e6b953 100644 --- a/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/transformers.ts +++ b/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/transformers.ts @@ -26,13 +26,9 @@ type PatchSQLResult = { validDataset: VMindDataset; columnReplaceMap: Map; sqlReplaceMap: Map; -}; -export const patchSQLBeforeQuery: Transformer = ( - input, - context: ExecuteQueryContext -) => { - const { sql } = input; - const { sourceDataset } = context; +} & ExecuteQueryContext; +export const patchSQLBeforeQuery: Transformer = (context: ExecuteQueryContext) => { + const { sql, sourceDataset } = context; const { fieldInfo } = context; const fieldNames = fieldInfo.map((field: SimpleFieldInfo) => field.fieldName); const { validStr, sqlReplaceMap, columnReplaceMap } = replaceInvalidWords(sql, fieldNames); @@ -53,6 +49,7 @@ export const patchSQLBeforeQuery: Transformer = ( - input: PatchSQLResult, - context: ExecuteQueryContext -) => { - const { finalSql, validDataset } = input; +export const executeDataQuery: Transformer = (context: PatchSQLResult) => { + const { finalSql, validDataset } = context; //replace VMIND_DATA_SOURCE with placeholder "?" const sqlParts = (finalSql + ' ').split(VMIND_DATA_SOURCE); const sqlCount = sqlParts.length - 1; @@ -80,7 +74,7 @@ export const executeDataQuery: Transformer = ( - input: QueryResult, - context: ExecuteQueryContext -) => { - const { columnReplaceMap, sqlReplaceMap, alasqlDataset } = input; +export const restoreDatasetAfterQuery: Transformer = (context: QueryResult) => { + const { columnReplaceMap, sqlReplaceMap, alasqlDataset } = context; //restore the dataset const columnReversedMap = swapMap(columnReplaceMap); const columnRestoredDataset = replaceDataset(alasqlDataset, columnReversedMap, true); @@ -106,23 +97,20 @@ export const restoreDatasetAfterQuery: Transformer = ( - input: RestoreResult, - context: ExecuteQueryContext -) => { - const { sourceDataset, fieldInfo, usage, llmFieldInfo: responseFieldInfo } = context; - const { datasetAfterQuery } = input; +export const getFinalQueryResult: Transformer = (context: RestoreResult) => { + const { sourceDataset, fieldInfo, usage, llmFieldInfo: responseFieldInfo, datasetAfterQuery } = context; const fieldInfoNew = parseRespondField(responseFieldInfo, datasetAfterQuery); if (datasetAfterQuery.length === 0) { console.warn('empty dataset after query!'); } return { + ...context, dataset: datasetAfterQuery.length === 0 ? sourceDataset : datasetAfterQuery, fieldInfo: datasetAfterQuery.length === 0 ? fieldInfo : fieldInfoNew, usage diff --git a/packages/vmind/src/base/application/index.ts b/packages/vmind/src/base/application/index.ts index f31a2fe7..8dd00312 100644 --- a/packages/vmind/src/base/application/index.ts +++ b/packages/vmind/src/base/application/index.ts @@ -57,7 +57,7 @@ export class BaseApplication implements IApplication * @returns DSL */ async runTasks(context: Context) { - this.updateContext(context); + this.updateContext({ ...this.context, ...context }); const handler = async (pre: any, curTask: { name: string; task: BaseTaskNode }) => { console.log(curTask.name); diff --git a/packages/vmind/src/base/metaTypes.ts b/packages/vmind/src/base/metaTypes.ts index 145784c6..60f74b17 100644 --- a/packages/vmind/src/base/metaTypes.ts +++ b/packages/vmind/src/base/metaTypes.ts @@ -23,7 +23,7 @@ export type LLMBasedTaskNodeMeta = { */ export type RuleBasedTaskNodeMeta = { type: TaskNodeType.RULE_BASED; - pipelines: Transformer[]; + pipelines: Transformer[]; }; export type TaskNodeMeta = diff --git a/packages/vmind/src/base/taskNode/baseTaskNode.ts b/packages/vmind/src/base/taskNode/baseTaskNode.ts index 02680286..d303dec4 100644 --- a/packages/vmind/src/base/taskNode/baseTaskNode.ts +++ b/packages/vmind/src/base/taskNode/baseTaskNode.ts @@ -11,7 +11,7 @@ export class BaseTaskNode implements ITaskNode context: Context; type: TaskNodeType; executeTask(context: Context): Promise | Result { - this.updateContext(context); + this.updateContext({ ...this.context, ...context }); return null as Result; } updateContext(context: Context) { diff --git a/packages/vmind/src/base/taskNode/llmBasedTaskNode.ts b/packages/vmind/src/base/taskNode/llmBasedTaskNode.ts index 118d8bd5..ab263dda 100644 --- a/packages/vmind/src/base/taskNode/llmBasedTaskNode.ts +++ b/packages/vmind/src/base/taskNode/llmBasedTaskNode.ts @@ -61,14 +61,14 @@ export default class LLMBasedTaskNode { - const res = pipeline(pre, this.context); + const res = pipeline(pre); return res as Context & DSL; }, input); return result; } async executeTask(context: Context) { - this.updateContext(context); + this.updateContext({ ...this.context, ...context }); const llmResponse = await this.requestLLM(context); const parsedResponse = this.parseLLMResponse(llmResponse) as DSL; const patchedResponse = this.patchLLMResponse({ ...context, ...parsedResponse }); diff --git a/packages/vmind/src/base/taskNode/ruleBasedTaskNode.ts b/packages/vmind/src/base/taskNode/ruleBasedTaskNode.ts index 5aa5393a..9abe29d9 100644 --- a/packages/vmind/src/base/taskNode/ruleBasedTaskNode.ts +++ b/packages/vmind/src/base/taskNode/ruleBasedTaskNode.ts @@ -7,21 +7,26 @@ import { TaskNodeType } from './types'; * It completes the transformation from Input to a specific data structure (DSL) */ export class RuleBasedTaskNode extends BaseTaskNode { - pipelines: Transformer[]; - constructor(pipelines: Transformer[]) { + pipelines: Transformer[]; + constructor(pipelines: Transformer[]) { super(); this.type = TaskNodeType.RULE_BASED; this.registerPipelines(pipelines); } - registerPipelines(pipelines: Transformer[]) { + registerPipelines(pipelines: Transformer[]) { this.pipelines = pipelines; } + /** + * run the tasks using current context + * @param context initial context + * @returns + */ executeTask(context: Context): Result { - this.updateContext(context); - const result: Result = this.pipelines.reduce((pre: any, transformer: Transformer) => { - const res = transformer(pre, context); + this.updateContext({ ...this.context, ...context }); + const result: Result = this.pipelines.reduce((pre: any, transformer: Transformer) => { + const res = transformer(pre); return res; }, context); return result; diff --git a/packages/vmind/src/base/tools/patcher/index.ts b/packages/vmind/src/base/tools/patcher/index.ts index 38b9f93a..e218c0d9 100644 --- a/packages/vmind/src/base/tools/patcher/index.ts +++ b/packages/vmind/src/base/tools/patcher/index.ts @@ -8,4 +8,4 @@ import { Transformer } from '../transformer'; * pass the specific pipelines during initialization */ -export type Patcher = Transformer[]; +export type Patcher = Transformer[]; diff --git a/packages/vmind/src/base/tools/transformer/index.ts b/packages/vmind/src/base/tools/transformer/index.ts index ba462bb9..cd152402 100644 --- a/packages/vmind/src/base/tools/transformer/index.ts +++ b/packages/vmind/src/base/tools/transformer/index.ts @@ -1,6 +1,6 @@ /** * Transformer is to finish the data conversion work - * * Convert the INPUT type to DSL type based on the Context + * * Generate a Result based on the Context * The subclass needs to rewrite the transform method to complete the specific data conversion */ -export type Transformer = (input: Input, context: Context) => Result; +export type Transformer = (context: Context) => Result; From 04fadc4a8d503f9dfbdb81a04de76fd82d553e9e Mon Sep 17 00:00:00 2001 From: da730 Date: Mon, 15 Apr 2024 14:23:28 +0800 Subject: [PATCH 25/62] feat: add error message --- .../GPT/patcher/index.ts | 19 +++--- .../taskNodes/executeQuery/transformers.ts | 2 +- packages/vmind/src/base/application/index.ts | 4 +- packages/vmind/src/base/metaTypes.ts | 2 +- .../vmind/src/base/taskNode/baseTaskNode.ts | 7 ++- .../src/base/taskNode/llmBasedTaskNode.ts | 58 ++++++++++++++----- .../src/base/taskNode/ruleBasedTaskNode.ts | 26 ++++++--- packages/vmind/src/base/taskNode/types.ts | 5 +- packages/vmind/src/common/constants.ts | 0 packages/vmind/src/common/utils/utils.ts | 21 ++++--- packages/vmind/src/typings/index.ts | 3 + 11 files changed, 100 insertions(+), 47 deletions(-) create mode 100644 packages/vmind/src/common/constants.ts diff --git a/packages/vmind/src/applications/chartGeneration/taskNodes/generateTypeAndFieldMap/GPT/patcher/index.ts b/packages/vmind/src/applications/chartGeneration/taskNodes/generateTypeAndFieldMap/GPT/patcher/index.ts index 3ec5ccfd..530f00d8 100644 --- a/packages/vmind/src/applications/chartGeneration/taskNodes/generateTypeAndFieldMap/GPT/patcher/index.ts +++ b/packages/vmind/src/applications/chartGeneration/taskNodes/generateTypeAndFieldMap/GPT/patcher/index.ts @@ -5,15 +5,20 @@ import { GenerateChartAndFieldMapOutput } from 'src/applications/chartGeneration/types'; import { Transformer } from 'src/base/tools/transformer'; -import { - CARTESIAN_CHART_LIST, - foldDatasetByYField, - getFieldByDataType, - getFieldByRole, - getRemainedFields -} from 'src/common/utils/utils'; +import { foldDatasetByYField, getFieldByDataType, getFieldByRole, getRemainedFields } from 'src/common/utils/utils'; import { DataType, ROLE } from 'src/typings'; +const CARTESIAN_CHART_LIST = [ + 'Dynamic Bar Chart', + 'Bar Chart', + 'Line Chart', + 'Scatter Plot', + 'Funnel Chart', + 'Dual Axis Chart', + 'Waterfall Chart', + 'Box Plot' +]; + export const patchAxisField: Transformer< GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput, GenerateChartAndFieldMapOutput diff --git a/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/transformers.ts b/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/transformers.ts index 51e6b953..64e26f00 100644 --- a/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/transformers.ts +++ b/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/transformers.ts @@ -110,7 +110,7 @@ export const getFinalQueryResult: Transformer } return { - ...context, + //...context, dataset: datasetAfterQuery.length === 0 ? sourceDataset : datasetAfterQuery, fieldInfo: datasetAfterQuery.length === 0 ? fieldInfo : fieldInfoNew, usage diff --git a/packages/vmind/src/base/application/index.ts b/packages/vmind/src/base/application/index.ts index 8dd00312..94b32bfc 100644 --- a/packages/vmind/src/base/application/index.ts +++ b/packages/vmind/src/base/application/index.ts @@ -36,13 +36,13 @@ export class BaseApplication implements IApplication const { modelType, parser, patcher, prompt, requester } = taskNode as LLMBasedTaskNodeMeta; return { name, - task: new LLMBasedTaskNode({ modelType, parser, patcher, prompt, requester }) + task: new LLMBasedTaskNode(name, { modelType, parser, patcher, prompt, requester }) }; } else if (type === TaskNodeType.RULE_BASED) { const { pipelines } = taskNode as RuleBasedTaskNodeMeta; return { name, - task: new RuleBasedTaskNode(pipelines) + task: new RuleBasedTaskNode(name, pipelines) }; } return {} as { task: BaseTaskNode; name: string }; diff --git a/packages/vmind/src/base/metaTypes.ts b/packages/vmind/src/base/metaTypes.ts index 60f74b17..3d8979c3 100644 --- a/packages/vmind/src/base/metaTypes.ts +++ b/packages/vmind/src/base/metaTypes.ts @@ -23,7 +23,7 @@ export type LLMBasedTaskNodeMeta = { */ export type RuleBasedTaskNodeMeta = { type: TaskNodeType.RULE_BASED; - pipelines: Transformer[]; + pipelines: Transformer[]; }; export type TaskNodeMeta = diff --git a/packages/vmind/src/base/taskNode/baseTaskNode.ts b/packages/vmind/src/base/taskNode/baseTaskNode.ts index d303dec4..de077d36 100644 --- a/packages/vmind/src/base/taskNode/baseTaskNode.ts +++ b/packages/vmind/src/base/taskNode/baseTaskNode.ts @@ -1,3 +1,4 @@ +import { TaskError } from 'src/typings'; import { ITaskNode, TaskNodeType } from './types'; /** @@ -8,9 +9,13 @@ import { ITaskNode, TaskNodeType } from './types'; * It can be described using TaskNodeMeta (see packages/vmind/src/base/metaTypes.ts) */ export class BaseTaskNode implements ITaskNode { + name: string; context: Context; type: TaskNodeType; - executeTask(context: Context): Promise | Result { + constructor(name: string) { + this.name = name; + } + executeTask(context: Context): Promise | TaskError | Result { this.updateContext({ ...this.context, ...context }); return null as Result; } diff --git a/packages/vmind/src/base/taskNode/llmBasedTaskNode.ts b/packages/vmind/src/base/taskNode/llmBasedTaskNode.ts index ab263dda..c3692504 100644 --- a/packages/vmind/src/base/taskNode/llmBasedTaskNode.ts +++ b/packages/vmind/src/base/taskNode/llmBasedTaskNode.ts @@ -3,9 +3,10 @@ import { BaseTaskNode } from './baseTaskNode'; import { Parser } from 'src/base/tools/parser'; import { Patcher } from 'src/base/tools/patcher'; import { ChatManager } from 'src/base/tools/chatManager'; -import { ILLMOptions, ModelType, RequestFunc } from 'src/typings'; +import { ILLMOptions, ModelType, TaskError } from 'src/typings'; import { TaskNodeType } from './types'; import { Requester } from '../tools/requester'; +import { getObjectProperties } from 'src/common/utils/utils'; export interface ILLMTaskNode { modelType: ModelType; @@ -38,8 +39,8 @@ export default class LLMBasedTaskNode; modelType: ModelType; - constructor(options: LLMTaskNodeOptions) { - super(); + constructor(name: string, options: LLMTaskNodeOptions) { + super(name); this.type = TaskNodeType.LLM_BASED; this.chatManager = new ChatManager(); const { parser, patcher, requester, prompt, modelType } = options; @@ -50,24 +51,51 @@ export default class LLMBasedTaskNode { - const prompt = this.prompt.getPrompt(context); - return await this.requester(prompt, context); + async requestLLM(context: Context): Promise { + try { + const prompt = this.prompt.getPrompt(context); + return await this.requester(prompt, context); + } catch (e: any) { + console.error(`${this.name} error!`); + console.error(e); + return { + ...getObjectProperties(e), + error: true + }; + } } - parseLLMResponse(llmResponse: any): Partial { - return this.parser(llmResponse); + parseLLMResponse(llmResponse: any): Partial | TaskError { + try { + return this.parser(llmResponse); + } catch (e: any) { + console.error(`${this.name} error!`); + console.error(e); + return { + ...getObjectProperties(e), + error: true + }; + } } - patchLLMResponse(input: Context & DSL): DSL { - const result = this.patcher.reduce((pre, pipeline) => { - const res = pipeline(pre); - return res as Context & DSL; - }, input); - return result; + patchLLMResponse(input: Context & DSL): DSL | TaskError { + try { + const result = this.patcher.reduce((pre, pipeline) => { + const res = pipeline(pre); + return res as Context & DSL; + }, input); + return result; + } catch (e: any) { + console.error(`${this.name} error!`); + console.error(e); + return { + ...getObjectProperties(e), + error: true + }; + } } - async executeTask(context: Context) { + async executeTask(context: Context): Promise { this.updateContext({ ...this.context, ...context }); const llmResponse = await this.requestLLM(context); const parsedResponse = this.parseLLMResponse(llmResponse) as DSL; diff --git a/packages/vmind/src/base/taskNode/ruleBasedTaskNode.ts b/packages/vmind/src/base/taskNode/ruleBasedTaskNode.ts index 9abe29d9..01aacd6b 100644 --- a/packages/vmind/src/base/taskNode/ruleBasedTaskNode.ts +++ b/packages/vmind/src/base/taskNode/ruleBasedTaskNode.ts @@ -1,6 +1,8 @@ import { Transformer } from 'src/base/tools/transformer'; import { BaseTaskNode } from './baseTaskNode'; import { TaskNodeType } from './types'; +import { TaskError } from 'src/typings'; +import { getObjectProperties } from 'src/common/utils/utils'; /** * rule-based taskNode, which consists of a series of Pipelines @@ -8,8 +10,8 @@ import { TaskNodeType } from './types'; */ export class RuleBasedTaskNode extends BaseTaskNode { pipelines: Transformer[]; - constructor(pipelines: Transformer[]) { - super(); + constructor(name: string, pipelines: Transformer[]) { + super(name); this.type = TaskNodeType.RULE_BASED; this.registerPipelines(pipelines); } @@ -23,12 +25,20 @@ export class RuleBasedTaskNode extends BaseTaskNode) => { - const res = transformer(pre); - return res; - }, context); - return result; + try { + const result: Result = this.pipelines.reduce((pre: any, transformer: Transformer) => { + const res = transformer(pre); + return res; + }, context); + return result; + } catch (e: any) { + console.error(`${this.name} error!`); + return { + ...getObjectProperties(e), + error: true + }; + } } } diff --git a/packages/vmind/src/base/taskNode/types.ts b/packages/vmind/src/base/taskNode/types.ts index 096e0067..dfeda06b 100644 --- a/packages/vmind/src/base/taskNode/types.ts +++ b/packages/vmind/src/base/taskNode/types.ts @@ -1,7 +1,10 @@ +import { TaskError } from 'src/typings'; + export interface ITaskNode { + name: string; type: TaskNodeType; context: Context; - executeTask: (context: Context) => Promise | DSL; + executeTask: (context: Context) => Promise | TaskError | DSL; updateContext: (context: Context) => void; } export enum TaskNodeType { diff --git a/packages/vmind/src/common/constants.ts b/packages/vmind/src/common/constants.ts new file mode 100644 index 00000000..e69de29b diff --git a/packages/vmind/src/common/utils/utils.ts b/packages/vmind/src/common/utils/utils.ts index 75bceb10..867818ca 100644 --- a/packages/vmind/src/common/utils/utils.ts +++ b/packages/vmind/src/common/utils/utils.ts @@ -12,17 +12,6 @@ export const detectAxesType = (values: any[], field: string) => { } }; -export const CARTESIAN_CHART_LIST = [ - 'Dynamic Bar Chart', - 'Bar Chart', - 'Line Chart', - 'Scatter Plot', - 'Funnel Chart', - 'Dual Axis Chart', - 'Waterfall Chart', - 'Box Plot' -]; - export const calculateTokenUsage = (usageList: any[]) => { const totalUsage = { completion_tokens: 0, @@ -101,3 +90,13 @@ export const foldDatasetByYField = (dataset: DataItem[], yFieldList: string[], f return fold(dataset as any, yFieldList, FOLD_NAME, FOLD_VALUE, aliasMap, false); }; + +export function getObjectProperties(e: Error): {} { + const properties: any = {}; + + for (const prop of Object.getOwnPropertyNames(e)) { + properties[prop] = (e as any)[prop]; + } + + return properties; +} diff --git a/packages/vmind/src/typings/index.ts b/packages/vmind/src/typings/index.ts index 1f0cea52..a942e45c 100644 --- a/packages/vmind/src/typings/index.ts +++ b/packages/vmind/src/typings/index.ts @@ -1,5 +1,6 @@ import type { FFmpeg } from '@ffmpeg/ffmpeg'; import type { ManualTicker, DefaultTimeline } from '@visactor/vrender-core'; +import { Cell } from 'src/applications/chartGeneration/types'; //models that VMind support //more models is under developing export enum Model { @@ -152,3 +153,5 @@ export type PatchPipeline = ( context: PatchContext, _originalContext: PatchContext ) => { chartType: string; cell: Cell; dataset: DataItem[]; fieldInfo: SimpleFieldInfo[] }; + +export type TaskError = { error: boolean }; From 2712a099cef61a9bfa43f9914180266a65443f79 Mon Sep 17 00:00:00 2001 From: da730 Date: Mon, 15 Apr 2024 16:27:11 +0800 Subject: [PATCH 26/62] feat: refactor chart advisor handler --- .../taskNodes/chartAdvisor/errorWrapper.ts | 11 +++ .../taskNodes/chartAdvisor/index.ts | 11 +++ .../taskNodes/chartAdvisor/transformers.ts | 91 +++++++++++++++++++ .../taskNodes/chartAdvisor/types.ts | 7 ++ .../taskNodes/chartAdvisor/utils.ts | 61 +++++++++++++ .../taskNodes/generateChartType/types.ts | 9 ++ .../taskNodes/generateFieldMap/types.ts | 8 ++ .../generateTypeAndFieldMap/GPT/index.ts | 7 +- .../GPT/patcher/index.ts | 64 ++++++++----- .../GPT/prompt/index.ts | 2 +- .../generateTypeAndFieldMap/types.ts | 7 ++ .../taskNodes/getVizSchema/index.ts | 2 +- .../taskNodes/getVizSchema/types.ts | 8 ++ .../chartGeneration/taskNodes/utils.ts | 10 ++ .../src/applications/chartGeneration/types.ts | 25 ----- .../taskNodes/executeQuery/transformers.ts | 20 ++-- .../src/base/taskNode/llmBasedTaskNode.ts | 2 +- .../src/base/taskNode/ruleBasedTaskNode.ts | 2 +- 18 files changed, 282 insertions(+), 65 deletions(-) create mode 100644 packages/vmind/src/applications/chartGeneration/taskNodes/chartAdvisor/errorWrapper.ts create mode 100644 packages/vmind/src/applications/chartGeneration/taskNodes/chartAdvisor/index.ts create mode 100644 packages/vmind/src/applications/chartGeneration/taskNodes/chartAdvisor/transformers.ts create mode 100644 packages/vmind/src/applications/chartGeneration/taskNodes/chartAdvisor/types.ts create mode 100644 packages/vmind/src/applications/chartGeneration/taskNodes/chartAdvisor/utils.ts create mode 100644 packages/vmind/src/applications/chartGeneration/taskNodes/generateChartType/types.ts create mode 100644 packages/vmind/src/applications/chartGeneration/taskNodes/generateFieldMap/types.ts create mode 100644 packages/vmind/src/applications/chartGeneration/taskNodes/generateTypeAndFieldMap/types.ts create mode 100644 packages/vmind/src/applications/chartGeneration/taskNodes/getVizSchema/types.ts create mode 100644 packages/vmind/src/applications/chartGeneration/taskNodes/utils.ts diff --git a/packages/vmind/src/applications/chartGeneration/taskNodes/chartAdvisor/errorWrapper.ts b/packages/vmind/src/applications/chartGeneration/taskNodes/chartAdvisor/errorWrapper.ts new file mode 100644 index 00000000..673feada --- /dev/null +++ b/packages/vmind/src/applications/chartGeneration/taskNodes/chartAdvisor/errorWrapper.ts @@ -0,0 +1,11 @@ +import { TaskNodeMeta } from 'src/base/metaTypes'; +import { TaskNodeType } from 'src/base/taskNode/types'; +import { ChartAdvisorContext, ChartAdvisorOutput } from './types'; +import { chartGenerationErrorWrapper } from './transformers'; + +const ChartAdvisorErrorWrapper: TaskNodeMeta = { + type: TaskNodeType.RULE_BASED, + pipelines: [chartGenerationErrorWrapper] +}; + +export default ChartAdvisorErrorWrapper; diff --git a/packages/vmind/src/applications/chartGeneration/taskNodes/chartAdvisor/index.ts b/packages/vmind/src/applications/chartGeneration/taskNodes/chartAdvisor/index.ts new file mode 100644 index 00000000..535c73d5 --- /dev/null +++ b/packages/vmind/src/applications/chartGeneration/taskNodes/chartAdvisor/index.ts @@ -0,0 +1,11 @@ +import { TaskNodeMeta } from 'src/base/metaTypes'; +import { TaskNodeType } from 'src/base/taskNode/types'; +import { ChartAdvisorContext, ChartAdvisorOutput } from './types'; +import { chartAdvisorTransformer } from './transformers'; + +const ChartAdvisorTaskNode: TaskNodeMeta = { + type: TaskNodeType.RULE_BASED, + pipelines: [chartAdvisorTransformer] +}; + +export default ChartAdvisorTaskNode; diff --git a/packages/vmind/src/applications/chartGeneration/taskNodes/chartAdvisor/transformers.ts b/packages/vmind/src/applications/chartGeneration/taskNodes/chartAdvisor/transformers.ts new file mode 100644 index 00000000..216f2a57 --- /dev/null +++ b/packages/vmind/src/applications/chartGeneration/taskNodes/chartAdvisor/transformers.ts @@ -0,0 +1,91 @@ +import { TaskError, VMindDataset, VizSchema } from 'src/typings'; +import { chartTypeMap, getCell, typeMap } from './utils'; +import { ChartType, chartAdvisor } from '@visactor/chart-advisor'; +import { Transformer } from 'src/base/tools/transformer'; +import { ChartAdvisorContext, ChartAdvisorOutput } from './types'; + +const availableChartTypeList = [ + ChartType.COLUMN, + ChartType.COLUMN_PERCENT, + ChartType.COLUMN_PARALLEL, + ChartType.BAR, + ChartType.BAR_PERCENT, + ChartType.BAR_PARALLEL, + ChartType.LINE, + ChartType.AREA, + ChartType.AREA_PERCENT, + ChartType.PIE, + ChartType.ANNULAR, + ChartType.ROSE, + ChartType.SCATTER, + ChartType.DUAL_AXIS, + ChartType.WORD_CLOUD, + ChartType.FUNNEL, + ChartType.SANKEY, + ChartType.RADAR +]; + +/** + * call @visactor/chart-advisor to get the list of advised charts + * sorted by scores of each chart type + * @param schema + * @param dataset + * @returns + */ +const getAdvisedChartList = (schema: Partial, dataset: any[]) => { + const dimensionList: any = schema.fields + .filter(d => d.role === 'dimension') + .map(d => ({ + uniqueId: d.id, + type: typeMap(d.type) + })); + const measureList: any = schema.fields + .filter(d => d.role === 'measure') + .map(d => ({ + uniqueId: d.id, + type: typeMap(d.type) + })); + const aliasMap = Object.fromEntries(schema.fields.map(d => [d.id, d.alias])); + const advisorResult = chartAdvisor({ originDataset: dataset, dimensionList, measureList, aliasMap }); + return advisorResult; +}; + +/** + * get one recommended chart type using @visactor/chart-advisor + * @param schema + * @param dataset + * @returns + */ +const chartAdvisorHandler = (schema: Partial, dataset: any[]) => { + const advisorResult = getAdvisedChartList(schema, dataset); + const result = advisorResult.scores.find((d: any) => availableChartTypeList.includes(d.chartType)); + return { + chartType: chartTypeMap(result.chartType).toUpperCase(), + cell: getCell(result.cell), + dataset: result.dataset + }; +}; + +export const chartAdvisorTransformer: Transformer = ( + context: ChartAdvisorContext +) => { + const { vizSchema, dataset } = context; + // call rule-based method to get recommended chart type and fieldMap(cell) + const advisorResult = chartAdvisorHandler(vizSchema, dataset); + const chartType = advisorResult.chartType; + const cell = advisorResult.cell; + const datasetAdvisor = advisorResult.dataset as VMindDataset; + const chartSource = 'chartAdvisor'; + + return { chartType, cell, dataset: datasetAdvisor, chartSource }; +}; + +export const chartGenerationErrorWrapper: Transformer = ( + context: ChartAdvisorContext & ChartAdvisorOutput +) => { + const { error } = context as unknown as TaskError; + if (error) { + return chartAdvisorTransformer(context); + } + return context as ChartAdvisorOutput; +}; diff --git a/packages/vmind/src/applications/chartGeneration/taskNodes/chartAdvisor/types.ts b/packages/vmind/src/applications/chartGeneration/taskNodes/chartAdvisor/types.ts new file mode 100644 index 00000000..a92b9ec5 --- /dev/null +++ b/packages/vmind/src/applications/chartGeneration/taskNodes/chartAdvisor/types.ts @@ -0,0 +1,7 @@ +import { GetVizSchemaOutput } from '../getVizSchema/types'; +import { GenerateChartAndFieldMapContext, GenerateChartAndFieldMapOutput } from '../generateTypeAndFieldMap/types'; +import { TaskError } from 'src/typings'; + +export type ChartAdvisorContext = GenerateChartAndFieldMapContext & GetVizSchemaOutput; + +export type ChartAdvisorOutput = GenerateChartAndFieldMapOutput; diff --git a/packages/vmind/src/applications/chartGeneration/taskNodes/chartAdvisor/utils.ts b/packages/vmind/src/applications/chartGeneration/taskNodes/chartAdvisor/utils.ts new file mode 100644 index 00000000..fed68654 --- /dev/null +++ b/packages/vmind/src/applications/chartGeneration/taskNodes/chartAdvisor/utils.ts @@ -0,0 +1,61 @@ +import { ChartType, DataTypeName } from '@visactor/chart-advisor'; +import { Cell } from '../../types'; + +export const typeMap = (type: string): DataTypeName => { + if (['string'].includes(type)) { + return 'string'; + } else if (['date', 'datetime', 'time'].includes(type)) { + return 'date'; + } else if (['int', 'float'].includes(type)) { + return 'number'; + } + return 'string'; +}; + +export const chartTypeMap = (advisorChartType: ChartType) => { + if ( + [ + ChartType.COLUMN, + ChartType.COLUMN_PERCENT, + ChartType.COLUMN_PARALLEL, + ChartType.BAR, + ChartType.BAR_PERCENT, + ChartType.BAR_PARALLEL + ].includes(advisorChartType) + ) { + return 'Bar Chart'; + } else if ([ChartType.LINE, ChartType.AREA, ChartType.AREA_PERCENT].includes(advisorChartType)) { + return 'Line Chart'; + } else if ([ChartType.PIE, ChartType.ANNULAR].includes(advisorChartType)) { + return 'Pie Chart'; + } else if (ChartType.ROSE === advisorChartType) { + return 'Rose Chart'; + } else if (ChartType.SCATTER === advisorChartType) { + return 'Scatter Plot'; + } else if (ChartType.DUAL_AXIS === advisorChartType) { + return 'Dual Axis Chart'; + } else if (ChartType.WORD_CLOUD === advisorChartType) { + return 'Word Cloud'; + } else if (ChartType.FUNNEL === advisorChartType) { + return 'Funnel Chart'; + } else if (ChartType.SANKEY === advisorChartType) { + return 'Sankey Chart'; + } else if (ChartType.RADAR === advisorChartType) { + return 'Radar Chart'; + } + throw 'no matched chart type'; +}; + +export const getCell = (cell: any): Cell => { + const keys = Object.keys(cell); + const result: Cell = {}; + keys.forEach((key: string) => { + const channel = cell[key]; + if (Array.isArray(channel) && channel.length === 1) { + result[key] = String(channel[0]); + } else { + result[key] = Array.isArray(channel) ? channel.map(c => String(c)) : channel; + } + }); + return result; +}; diff --git a/packages/vmind/src/applications/chartGeneration/taskNodes/generateChartType/types.ts b/packages/vmind/src/applications/chartGeneration/taskNodes/generateChartType/types.ts new file mode 100644 index 00000000..562493ba --- /dev/null +++ b/packages/vmind/src/applications/chartGeneration/taskNodes/generateChartType/types.ts @@ -0,0 +1,9 @@ +import { ChartType } from 'src/typings'; +import { GetVizSchemaContext, GetVizSchemaOutput } from '../getVizSchema/types'; + +export type GenerateChartTypeContext = GetVizSchemaContext & GetVizSchemaOutput; + +export type GenerateChartTypeOutput = { + chartType: ChartType; + chartSource: string; +}; diff --git a/packages/vmind/src/applications/chartGeneration/taskNodes/generateFieldMap/types.ts b/packages/vmind/src/applications/chartGeneration/taskNodes/generateFieldMap/types.ts new file mode 100644 index 00000000..e2db8362 --- /dev/null +++ b/packages/vmind/src/applications/chartGeneration/taskNodes/generateFieldMap/types.ts @@ -0,0 +1,8 @@ +import { Cell } from '../../types'; +import { GenerateChartTypeContext, GenerateChartTypeOutput } from '../generateChartType/types'; + +export type GenerateFieldMapContext = GenerateChartTypeContext & GenerateChartTypeOutput; + +export type GenerateFieldMapOutput = { + cell: Cell; +}; diff --git a/packages/vmind/src/applications/chartGeneration/taskNodes/generateTypeAndFieldMap/GPT/index.ts b/packages/vmind/src/applications/chartGeneration/taskNodes/generateTypeAndFieldMap/GPT/index.ts index f3cf9596..943d13a9 100644 --- a/packages/vmind/src/applications/chartGeneration/taskNodes/generateTypeAndFieldMap/GPT/index.ts +++ b/packages/vmind/src/applications/chartGeneration/taskNodes/generateTypeAndFieldMap/GPT/index.ts @@ -1,7 +1,4 @@ -import { - GenerateChartAndFieldMapContext, - GenerateChartAndFieldMapOutput -} from 'src/applications/chartGeneration/types'; +import { GenerateChartAndFieldMapContext, GenerateChartAndFieldMapOutput } from '../types'; import { LLMBasedTaskNodeMeta } from 'src/base/metaTypes'; import { TaskNodeType } from 'src/base/taskNode/types'; import { ModelType } from 'src/typings'; @@ -19,6 +16,7 @@ import { patchWordCloud, patchYField } from './patcher'; +import { addChartSource } from '../../utils'; const ChartGenerationTaskNodeGPTMeta: LLMBasedTaskNodeMeta< GenerateChartAndFieldMapContext, @@ -30,6 +28,7 @@ const ChartGenerationTaskNodeGPTMeta: LLMBasedTaskNodeMeta< // At some point, due to the unclear intention of the user's input, fields may lack fields in Cell returned by GPT. // At this time, you need to make up according to the rules patcher: [ + addChartSource, patchAxisField, patchColorField, patchLabelField, diff --git a/packages/vmind/src/applications/chartGeneration/taskNodes/generateTypeAndFieldMap/GPT/patcher/index.ts b/packages/vmind/src/applications/chartGeneration/taskNodes/generateTypeAndFieldMap/GPT/patcher/index.ts index 530f00d8..acda1d75 100644 --- a/packages/vmind/src/applications/chartGeneration/taskNodes/generateTypeAndFieldMap/GPT/patcher/index.ts +++ b/packages/vmind/src/applications/chartGeneration/taskNodes/generateTypeAndFieldMap/GPT/patcher/index.ts @@ -1,12 +1,10 @@ import { FOLD_NAME, FOLD_VALUE } from '@visactor/chart-advisor'; import { isArray, isNil } from 'lodash'; -import { - GenerateChartAndFieldMapContext, - GenerateChartAndFieldMapOutput -} from 'src/applications/chartGeneration/types'; + import { Transformer } from 'src/base/tools/transformer'; import { foldDatasetByYField, getFieldByDataType, getFieldByRole, getRemainedFields } from 'src/common/utils/utils'; import { DataType, ROLE } from 'src/typings'; +import { GenerateChartAndFieldMapContext, GenerateChartAndFieldMapOutput } from '../../types'; const CARTESIAN_CHART_LIST = [ 'Dynamic Bar Chart', @@ -21,7 +19,7 @@ const CARTESIAN_CHART_LIST = [ export const patchAxisField: Transformer< GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput, - GenerateChartAndFieldMapOutput + Partial > = (context: GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput) => { const { cell } = context; @@ -37,27 +35,27 @@ export const patchAxisField: Transformer< } return { - ...context, + //...context, cell: cellNew }; }; export const patchColorField: Transformer< GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput, - GenerateChartAndFieldMapOutput + Partial > = (context: GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput) => { const { cell } = context; const cellNew = { ...cell, color: cell.color ?? cell.category }; return { - ...context, + //...context, cell: cellNew }; }; export const patchLabelField: Transformer< GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput, - GenerateChartAndFieldMapOutput + Partial > = (context: GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput) => { const { cell } = context; @@ -68,14 +66,14 @@ export const patchLabelField: Transformer< } return { - ...context, + //...context, cell: cellNew }; }; export const patchYField: Transformer< GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput, - GenerateChartAndFieldMapOutput + Partial > = (context: GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput) => { const { chartType, cell, dataset, fieldInfo } = context; let cellNew = { ...cell }; @@ -112,7 +110,7 @@ export const patchYField: Transformer< } return { - ...context, + //...context, chartType: chartTypeNew, cell: cellNew, dataset: datasetNew @@ -121,7 +119,7 @@ export const patchYField: Transformer< export const patchBoxPlot: Transformer< GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput, - GenerateChartAndFieldMapOutput + Partial > = (context: GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput) => { const { chartType, cell } = context; const cellNew = { @@ -180,12 +178,15 @@ export const patchBoxPlot: Transformer< } } - return { ...context, cell: cellNew }; + return { + //...context, + cell: cellNew + }; }; export const patchDualAxis: Transformer< GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput, - GenerateChartAndFieldMapOutput + Partial > = (context: GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput) => { const { chartType, cell } = context; const cellNew: any = { ...cell }; @@ -195,12 +196,15 @@ export const patchDualAxis: Transformer< cellNew.y = [cellNew.yLeft, cellNew.yRight]; } - return { ...context, cell: cellNew }; + return { + //...context, + cell: cellNew + }; }; export const patchPieChart: Transformer< GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput, - GenerateChartAndFieldMapOutput + Partial > = (context: GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput) => { const { chartType, cell, fieldInfo } = context; const cellNew = { ...cell }; @@ -234,12 +238,15 @@ export const patchPieChart: Transformer< } } } - return { ...context, cell: cellNew }; + return { + //...context, + cell: cellNew + }; }; export const patchWordCloud: Transformer< GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput, - GenerateChartAndFieldMapOutput + Partial > = (context: GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput) => { //Word cloud must have color fields and size fields const { chartType, cell, fieldInfo } = context; @@ -277,12 +284,15 @@ export const patchWordCloud: Transformer< } } } - return { ...context, cell: cellNew }; + return { + //...context, + cell: cellNew + }; }; export const patchDynamicBarChart: Transformer< GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput, - GenerateChartAndFieldMapOutput + Partial > = (context: GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput) => { const { chartType, cell, fieldInfo } = context; const cellNew = { ...cell }; @@ -301,12 +311,15 @@ export const patchDynamicBarChart: Transformer< } } - return { ...context, cell: cellNew }; + return { + //...context, + cell: cellNew + }; }; export const patchCartesianXField: Transformer< GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput, - GenerateChartAndFieldMapOutput + Partial > = (context: GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput) => { const { chartType, cell, fieldInfo } = context; const cellNew = { ...cell }; @@ -324,5 +337,8 @@ export const patchCartesianXField: Transformer< } } } - return { ...context, cell: cellNew }; + return { + //...context, + cell: cellNew + }; }; diff --git a/packages/vmind/src/applications/chartGeneration/taskNodes/generateTypeAndFieldMap/GPT/prompt/index.ts b/packages/vmind/src/applications/chartGeneration/taskNodes/generateTypeAndFieldMap/GPT/prompt/index.ts index 432fcfc6..09cfa3c8 100644 --- a/packages/vmind/src/applications/chartGeneration/taskNodes/generateTypeAndFieldMap/GPT/prompt/index.ts +++ b/packages/vmind/src/applications/chartGeneration/taskNodes/generateTypeAndFieldMap/GPT/prompt/index.ts @@ -1,6 +1,6 @@ import { Prompt } from 'src/base/tools/prompt'; -import { GenerateChartAndFieldMapContext } from 'src/applications/chartGeneration/types'; import { ChartAdvisorPromptEnglish } from './template'; +import { GenerateChartAndFieldMapContext } from '../../types'; export class GPTChartGenerationPrompt extends Prompt { constructor() { diff --git a/packages/vmind/src/applications/chartGeneration/taskNodes/generateTypeAndFieldMap/types.ts b/packages/vmind/src/applications/chartGeneration/taskNodes/generateTypeAndFieldMap/types.ts new file mode 100644 index 00000000..fe4d0c00 --- /dev/null +++ b/packages/vmind/src/applications/chartGeneration/taskNodes/generateTypeAndFieldMap/types.ts @@ -0,0 +1,7 @@ +import { GenerateChartTypeOutput } from '../generateChartType/types'; +import { GenerateFieldMapOutput } from '../generateFieldMap/types'; +import { GetVizSchemaContext, GetVizSchemaOutput } from '../getVizSchema/types'; + +export type GenerateChartAndFieldMapContext = GetVizSchemaContext & GetVizSchemaOutput; + +export type GenerateChartAndFieldMapOutput = GenerateFieldMapOutput & GenerateChartTypeOutput; diff --git a/packages/vmind/src/applications/chartGeneration/taskNodes/getVizSchema/index.ts b/packages/vmind/src/applications/chartGeneration/taskNodes/getVizSchema/index.ts index 4ffbc3ef..b7ac9aa2 100644 --- a/packages/vmind/src/applications/chartGeneration/taskNodes/getVizSchema/index.ts +++ b/packages/vmind/src/applications/chartGeneration/taskNodes/getVizSchema/index.ts @@ -1,7 +1,7 @@ import { RuleBasedTaskNodeMeta } from 'src/base/metaTypes'; import { TaskNodeType } from 'src/base/taskNode/types'; -import { GetVizSchemaContext, GetVizSchemaOutput } from '../../types'; import { getVizSchema } from './utils'; +import { GetVizSchemaContext, GetVizSchemaOutput } from './types'; const generateVizSchemaTaskNodeMeta: RuleBasedTaskNodeMeta = { type: TaskNodeType.RULE_BASED, diff --git a/packages/vmind/src/applications/chartGeneration/taskNodes/getVizSchema/types.ts b/packages/vmind/src/applications/chartGeneration/taskNodes/getVizSchema/types.ts new file mode 100644 index 00000000..47c02963 --- /dev/null +++ b/packages/vmind/src/applications/chartGeneration/taskNodes/getVizSchema/types.ts @@ -0,0 +1,8 @@ +import { ChartGenerationContext } from 'src/applications/types'; +import { VizSchema } from 'src/typings'; + +export type GetVizSchemaContext = ChartGenerationContext; + +export type GetVizSchemaOutput = { + vizSchema: VizSchema; +}; diff --git a/packages/vmind/src/applications/chartGeneration/taskNodes/utils.ts b/packages/vmind/src/applications/chartGeneration/taskNodes/utils.ts new file mode 100644 index 00000000..b551cd99 --- /dev/null +++ b/packages/vmind/src/applications/chartGeneration/taskNodes/utils.ts @@ -0,0 +1,10 @@ +import { Transformer } from 'src/base/tools/transformer'; +import { GenerateChartAndFieldMapContext, GenerateChartAndFieldMapOutput } from './generateTypeAndFieldMap/types'; + +export const addChartSource: Transformer< + GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput, + GenerateChartAndFieldMapOutput +> = (context: GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput) => { + const { llmOptions } = context; + return { ...context, chartSource: llmOptions.model }; +}; diff --git a/packages/vmind/src/applications/chartGeneration/types.ts b/packages/vmind/src/applications/chartGeneration/types.ts index ed825d7b..ede8319c 100644 --- a/packages/vmind/src/applications/chartGeneration/types.ts +++ b/packages/vmind/src/applications/chartGeneration/types.ts @@ -1,6 +1,3 @@ -import { ChartType, SimpleFieldInfo, VMindDataset, VizSchema } from 'src/typings'; -import { ChartGenerationContext } from '../types'; - export type Cell = { //字段映射,可用的视觉通道:["x","y","color","size","angle","time"] x?: string; @@ -15,25 +12,3 @@ export type Cell = { value?: string; category?: string; }; - -export type GetVizSchemaContext = ChartGenerationContext; - -export type GetVizSchemaOutput = { - vizSchema: VizSchema; -}; - -export type GenerateChartTypeContext = GetVizSchemaContext & GetVizSchemaOutput; - -export type GenerateChartTypeOutput = { - chartType: ChartType; -}; - -export type GenerateFieldMapContext = GenerateChartTypeContext & GenerateChartTypeOutput; - -export type GenerateFieldMapOutput = { - cell: Cell; -}; - -export type GenerateChartAndFieldMapContext = GetVizSchemaContext & GetVizSchemaOutput; - -export type GenerateChartAndFieldMapOutput = GenerateFieldMapOutput & GenerateChartTypeOutput; diff --git a/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/transformers.ts b/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/transformers.ts index 64e26f00..b572621b 100644 --- a/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/transformers.ts +++ b/packages/vmind/src/applications/dataAggregation/taskNodes/executeQuery/transformers.ts @@ -26,7 +26,7 @@ type PatchSQLResult = { validDataset: VMindDataset; columnReplaceMap: Map; sqlReplaceMap: Map; -} & ExecuteQueryContext; +}; export const patchSQLBeforeQuery: Transformer = (context: ExecuteQueryContext) => { const { sql, sourceDataset } = context; const { fieldInfo } = context; @@ -49,7 +49,7 @@ export const patchSQLBeforeQuery: Transformer = (conte const alasqlDataset = alasql(alasqlQuery, new Array(sqlCount).fill(validDataset)); return { - ...context, + //...context, alasqlDataset }; }; -type RestoreResult = QueryResult & { +type RestoreResult = { datasetAfterQuery: VMindDataset; }; /** @@ -88,7 +88,9 @@ type RestoreResult = QueryResult & { * @param context * @returns restored dataset */ -export const restoreDatasetAfterQuery: Transformer = (context: QueryResult) => { +export const restoreDatasetAfterQuery: Transformer = ( + context: QueryResult & PatchSQLResult +) => { const { columnReplaceMap, sqlReplaceMap, alasqlDataset } = context; //restore the dataset const columnReversedMap = swapMap(columnReplaceMap); @@ -97,12 +99,14 @@ export const restoreDatasetAfterQuery: Transformer = const sqlRestoredDataset = replaceDataset(columnRestoredDataset, sqlReversedMap, false); return { - ...context, + //...context, datasetAfterQuery: sqlRestoredDataset }; }; -export const getFinalQueryResult: Transformer = (context: RestoreResult) => { +export const getFinalQueryResult: Transformer = ( + context: RestoreResult & ExecuteQueryContext +) => { const { sourceDataset, fieldInfo, usage, llmFieldInfo: responseFieldInfo, datasetAfterQuery } = context; const fieldInfoNew = parseRespondField(responseFieldInfo, datasetAfterQuery); if (datasetAfterQuery.length === 0) { diff --git a/packages/vmind/src/base/taskNode/llmBasedTaskNode.ts b/packages/vmind/src/base/taskNode/llmBasedTaskNode.ts index c3692504..44027051 100644 --- a/packages/vmind/src/base/taskNode/llmBasedTaskNode.ts +++ b/packages/vmind/src/base/taskNode/llmBasedTaskNode.ts @@ -82,7 +82,7 @@ export default class LLMBasedTaskNode { const res = pipeline(pre); - return res as Context & DSL; + return { ...pre, ...res } as Context & DSL; }, input); return result; } catch (e: any) { diff --git a/packages/vmind/src/base/taskNode/ruleBasedTaskNode.ts b/packages/vmind/src/base/taskNode/ruleBasedTaskNode.ts index 01aacd6b..241eca39 100644 --- a/packages/vmind/src/base/taskNode/ruleBasedTaskNode.ts +++ b/packages/vmind/src/base/taskNode/ruleBasedTaskNode.ts @@ -30,7 +30,7 @@ export class RuleBasedTaskNode extends BaseTaskNode) => { const res = transformer(pre); - return res; + return { ...pre, ...res }; }, context); return result; } catch (e: any) { From 8f9f5df27b78758054894112623a6c92a15832b2 Mon Sep 17 00:00:00 2001 From: da730 Date: Mon, 15 Apr 2024 21:16:49 +0800 Subject: [PATCH 27/62] feat: refact gpt chart generation --- .../__tests__/browser/src/pages/DataInput.tsx | 22 +- .../src/applications/chartGeneration/index.ts | 23 + .../taskNodes/chartAdvisor/transformers.ts | 2 +- .../generateTypeAndFieldMap/GPT/utils.ts | 2 +- .../getChartSpec/VChart/chartPipeline.ts | 106 ++ .../getChartSpec/VChart/constants.ts | 24 + .../taskNodes/getChartSpec/VChart/index.ts | 11 + .../getChartSpec/VChart/transformers.ts | 1170 +++++++++++++++++ .../taskNodes/getChartSpec/types.ts | 7 + .../taskNodes/getVizSchema/index.ts | 4 +- .../taskNodes/getVizSchema/utils.ts | 4 +- packages/vmind/src/applications/index.ts | 14 + packages/vmind/src/applications/types.ts | 2 +- packages/vmind/src/base/metaTypes.ts | 4 +- .../src/base/taskNode/ruleBasedTaskNode.ts | 20 +- packages/vmind/src/core/VMind.ts | 66 +- packages/vmind/src/core/applications.ts | 11 - 17 files changed, 1453 insertions(+), 39 deletions(-) create mode 100644 packages/vmind/src/applications/chartGeneration/taskNodes/getChartSpec/VChart/chartPipeline.ts create mode 100644 packages/vmind/src/applications/chartGeneration/taskNodes/getChartSpec/VChart/constants.ts create mode 100644 packages/vmind/src/applications/chartGeneration/taskNodes/getChartSpec/VChart/index.ts create mode 100644 packages/vmind/src/applications/chartGeneration/taskNodes/getChartSpec/VChart/transformers.ts create mode 100644 packages/vmind/src/applications/chartGeneration/taskNodes/getChartSpec/types.ts create mode 100644 packages/vmind/src/applications/index.ts delete mode 100644 packages/vmind/src/core/applications.ts diff --git a/packages/vmind/__tests__/browser/src/pages/DataInput.tsx b/packages/vmind/__tests__/browser/src/pages/DataInput.tsx index e8f1d012..b8f6e2b7 100644 --- a/packages/vmind/__tests__/browser/src/pages/DataInput.tsx +++ b/packages/vmind/__tests__/browser/src/pages/DataInput.tsx @@ -125,21 +125,21 @@ export function DataInput(props: IPropsType) { const askGPT = useCallback(async () => { //setLoading(true); const { fieldInfo, dataset } = vmind.parseCSVData(csv); - const { fieldInfo: fieldInfoQuery, dataset: datasetQuery } = await vmind?.dataQuery(describe, fieldInfo, dataset); + //const { fieldInfo: fieldInfoQuery, dataset: datasetQuery } = await vmind?.dataQuery(describe, fieldInfo, dataset); //const { fieldInfo, dataset, usage } = await vmind.parseCSVDataWithLLM(csv, describe); //const dataset = mockData4; //const fieldInfo = vmind?.getFieldInfo(dataset); - //const startTime = new Date().getTime(); - ////const chartGenerationRes = await vmind.generateChart(describe, fieldInfo, dataset, true); - //const endTime = new Date().getTime(); - //if (isArray(chartGenerationRes)) { - // props.onSpecListGenerate(chartGenerationRes.map(res => res.spec)); - //} else { - // const { spec, time } = chartGenerationRes; - // const costTime = endTime - startTime; - // props.onSpecGenerate(spec, time as any, costTime); - //} + const startTime = new Date().getTime(); + const chartGenerationRes = await vmind.generateChart(describe, fieldInfo, dataset, true); + const endTime = new Date().getTime(); + if (isArray(chartGenerationRes)) { + props.onSpecListGenerate(chartGenerationRes.map(res => res.spec)); + } else { + const { spec, time } = chartGenerationRes; + const costTime = endTime - startTime; + props.onSpecGenerate(spec, time as any, costTime); + } setLoading(false); }, [vmind, csv, describe, props]); diff --git a/packages/vmind/src/applications/chartGeneration/index.ts b/packages/vmind/src/applications/chartGeneration/index.ts index e69de29b..d309af9a 100644 --- a/packages/vmind/src/applications/chartGeneration/index.ts +++ b/packages/vmind/src/applications/chartGeneration/index.ts @@ -0,0 +1,23 @@ +import { ApplicationMeta } from 'src/base/metaTypes'; +import { ChartGenerationContext, ChartGenerationOutput } from '../types'; +import GetVizSchemaTaskNodeMeta from './taskNodes/getVizSchema'; +import ChartGenerationTaskNodeGPTMeta from './taskNodes/generateTypeAndFieldMap/GPT'; +import ChartAdvisorErrorWrapper from './taskNodes/chartAdvisor/errorWrapper'; +import getVChartSpecTaskNodeMeta from './taskNodes/getChartSpec/VChart'; +import { ModelType } from 'src/typings'; + +const chartGenerationGPTMeta: ApplicationMeta = { + name: 'chartGeneration', + taskNodes: [ + { taskNode: GetVizSchemaTaskNodeMeta, name: 'getVizSchema' }, + { taskNode: ChartGenerationTaskNodeGPTMeta, name: 'generateChart' }, + { taskNode: ChartAdvisorErrorWrapper, name: 'chartAdvisorHandler' }, + { taskNode: getVChartSpecTaskNodeMeta, name: 'getVChartSpec' } + ] +}; + +const chartGenerationMetaByModel = { + [ModelType.GPT]: chartGenerationGPTMeta +}; + +export default chartGenerationMetaByModel; diff --git a/packages/vmind/src/applications/chartGeneration/taskNodes/chartAdvisor/transformers.ts b/packages/vmind/src/applications/chartGeneration/taskNodes/chartAdvisor/transformers.ts index 216f2a57..d955c8ee 100644 --- a/packages/vmind/src/applications/chartGeneration/taskNodes/chartAdvisor/transformers.ts +++ b/packages/vmind/src/applications/chartGeneration/taskNodes/chartAdvisor/transformers.ts @@ -77,7 +77,7 @@ export const chartAdvisorTransformer: Transformer = ( diff --git a/packages/vmind/src/applications/chartGeneration/taskNodes/generateTypeAndFieldMap/GPT/utils.ts b/packages/vmind/src/applications/chartGeneration/taskNodes/generateTypeAndFieldMap/GPT/utils.ts index df591f28..f9a83780 100644 --- a/packages/vmind/src/applications/chartGeneration/taskNodes/generateTypeAndFieldMap/GPT/utils.ts +++ b/packages/vmind/src/applications/chartGeneration/taskNodes/generateTypeAndFieldMap/GPT/utils.ts @@ -33,7 +33,7 @@ export const parseChartGenerationResponse: Parser { diff --git a/packages/vmind/src/applications/chartGeneration/taskNodes/getChartSpec/VChart/chartPipeline.ts b/packages/vmind/src/applications/chartGeneration/taskNodes/getChartSpec/VChart/chartPipeline.ts new file mode 100644 index 00000000..3ea41374 --- /dev/null +++ b/packages/vmind/src/applications/chartGeneration/taskNodes/getChartSpec/VChart/chartPipeline.ts @@ -0,0 +1,106 @@ +import { Transformer } from 'src/base/tools/transformer'; +import { GetChartSpecContext, GetChartSpecOutput } from '../types'; +import { + axis, + cartesianBar, + cartesianLine, + chartType, + color, + data, + legend, + pieField, + scatterField, + wordCloudField, + roseField, + roseAxis, + radarField, + radarDisplayConf, + radarAxis, + sankeyData, + sankeyField, + sankeyLabel, + sankeyLink, + sequenceData, + rankingBarAxis, + rankingBarField, + customMark, + scatterAxis, + animationOneByOne, + animationCartesianBar, + animationCartisianLine, + animationCartesianPie, + wordCloudData, + displayConfBar, + displayConfLine, + colorLine, + colorBar, + colorDynamicBar, + wordCloudDisplayConf, + rankingBarLabel, + funnelField, + funnelData, + dualAxisSeries, + dualAxisAxes, + waterfallField, + waterfallAxes, + waterfallStackLabel, + boxPlotField, + boxPlotStyle, + initSpec +} from './transformers'; + +const pipelineBar = [chartType, data, colorBar, cartesianBar, axis, legend, displayConfBar, animationCartesianBar]; +const pipelineLine = [chartType, data, colorLine, cartesianLine, axis, legend, displayConfLine, animationCartisianLine]; +const pipelinePie = [chartType, data, color, pieField, legend, animationCartesianPie]; +const pipelineRankingBar = [ + chartType, + sequenceData, + colorDynamicBar, + rankingBarField, + rankingBarAxis, + customMark, + rankingBarLabel +]; + +const pipelineWordCloud = [chartType, wordCloudData, color, wordCloudField, wordCloudDisplayConf, animationOneByOne]; + +const pipelineScatterPlot = [chartType, data, color, scatterField, scatterAxis, legend, animationOneByOne]; + +const pipelineFunnel = [chartType, funnelData, color, funnelField, legend]; + +const pipelineDualAxis = [chartType, data, color, dualAxisSeries, dualAxisAxes, legend]; + +const pipelineRose = [chartType, data, color, roseField, roseAxis, legend, animationCartesianPie]; + +const pipelineRadar = [chartType, data, color, radarField, radarDisplayConf, radarAxis, legend, animationCartisianLine]; + +const pipelineSankey = [chartType, sankeyData, color, sankeyField, sankeyLink, sankeyLabel, legend]; + +const pipelineWaterfall = [chartType, data, color, waterfallField, waterfallAxes, waterfallStackLabel, legend]; +const pipelineBoxPlot = [chartType, data, color, boxPlotField, boxPlotStyle, legend]; + +const pipelineMap: { [chartType: string]: any } = { + 'BAR CHART': pipelineBar, + 'LINE CHART': pipelineLine, + 'PIE CHART': pipelinePie, + 'WORD CLOUD': pipelineWordCloud, + 'SCATTER PLOT': pipelineScatterPlot, + 'DYNAMIC BAR CHART': pipelineRankingBar, + 'FUNNEL CHART': pipelineFunnel, + 'DUAL AXIS CHART': pipelineDualAxis, + 'ROSE CHART': pipelineRose, + 'RADAR CHART': pipelineRadar, + 'SANKEY CHART': pipelineSankey, + 'WATERFALL CHART': pipelineWaterfall, + 'BOX PLOT': pipelineBoxPlot +}; + +export const getChartPipelines: ( + context: GetChartSpecContext +) => Transformer[] = (context: GetChartSpecContext) => { + const { chartType } = context; + return [initSpec].concat(pipelineMap[chartType.toUpperCase()]) as Transformer< + GetChartSpecContext, + GetChartSpecOutput + >[]; +}; diff --git a/packages/vmind/src/applications/chartGeneration/taskNodes/getChartSpec/VChart/constants.ts b/packages/vmind/src/applications/chartGeneration/taskNodes/getChartSpec/VChart/constants.ts new file mode 100644 index 00000000..8ac8aabc --- /dev/null +++ b/packages/vmind/src/applications/chartGeneration/taskNodes/getChartSpec/VChart/constants.ts @@ -0,0 +1,24 @@ +export const WORDCLOUD_NUM_LIMIT = 100; + +export const COLOR_THEMES = { + default: ['#1DD0F3', '#2693FF', '#3259F4', '#1B0CA1', '#CB2BC6', '#FF581D', '#FBBB16', '#F6FB17', '#73EC55'] +}; + +export const LINEAR_COLOR_THEMES = [ + ['#1DD0F3', '#73EC55'], + ['#2693FF', '#F6FB17'], + ['#3259F4', '#FBBB16'], + ['#1B0CA1', '#FF581D'], + ['#1DD0F3', '#CB2BC6'] +]; + +export const animationDuration = 500; +export const oneByOneGroupSize = 10; +export const DEFAULT_VIDEO_LENGTH = 2000; +export const DEFAULT_PIE_VIDEO_LENGTH = 5000; +export const DEFAULT_VIDEO_LENGTH_LONG = 10000; +export const VIDEO_LENGTH_BY_CHART_TYPE: Record = { + pie: DEFAULT_PIE_VIDEO_LENGTH, + wordCloud: DEFAULT_VIDEO_LENGTH_LONG, + wordcloud: DEFAULT_VIDEO_LENGTH_LONG +}; diff --git a/packages/vmind/src/applications/chartGeneration/taskNodes/getChartSpec/VChart/index.ts b/packages/vmind/src/applications/chartGeneration/taskNodes/getChartSpec/VChart/index.ts new file mode 100644 index 00000000..85ca980f --- /dev/null +++ b/packages/vmind/src/applications/chartGeneration/taskNodes/getChartSpec/VChart/index.ts @@ -0,0 +1,11 @@ +import { RuleBasedTaskNodeMeta } from 'src/base/metaTypes'; +import { TaskNodeType } from 'src/base/taskNode/types'; +import { GetChartSpecContext, GetChartSpecOutput } from '../types'; +import { getChartPipelines } from './chartPipeline'; + +const getVChartSpecTaskNodeMeta: RuleBasedTaskNodeMeta = { + type: TaskNodeType.RULE_BASED, + pipelines: getChartPipelines +}; + +export default getVChartSpecTaskNodeMeta; diff --git a/packages/vmind/src/applications/chartGeneration/taskNodes/getChartSpec/VChart/transformers.ts b/packages/vmind/src/applications/chartGeneration/taskNodes/getChartSpec/VChart/transformers.ts new file mode 100644 index 00000000..b9933908 --- /dev/null +++ b/packages/vmind/src/applications/chartGeneration/taskNodes/getChartSpec/VChart/transformers.ts @@ -0,0 +1,1170 @@ +import { Transformer } from 'src/base/tools/transformer'; +import { GetChartSpecContext, GetChartSpecOutput } from '../types'; +import { + COLOR_THEMES, + DEFAULT_PIE_VIDEO_LENGTH, + DEFAULT_VIDEO_LENGTH, + DEFAULT_VIDEO_LENGTH_LONG, + LINEAR_COLOR_THEMES, + WORDCLOUD_NUM_LIMIT, + animationDuration, + oneByOneGroupSize +} from './constants'; +import { detectAxesType } from 'src/common/utils/utils'; +import { array } from '@visactor/vutils'; + +type Context = GetChartSpecContext & GetChartSpecOutput; + +const chartTypeMap: { [chartName: string]: string } = { + 'BAR CHART': 'bar', + 'LINE CHART': 'line', + 'PIE CHART': 'pie', + 'WORD CLOUD': 'wordCloud', + 'SCATTER PLOT': 'scatter', + 'DYNAMIC BAR CHART': 'bar', + 'FUNNEL CHART': 'funnel', + 'DUAL AXIS CHART': 'common', + 'ROSE CHART': 'rose', + 'RADAR CHART': 'radar', + 'SANKEY CHART': 'sankey', + 'WATERFALL CHART': 'waterfall', + 'BOX PLOT': 'boxPlot' +}; + +export const initSpec: Transformer = (_context: Context) => { + return { spec: {} }; +}; + +export const chartType: Transformer = (context: Context) => { + const { chartType, spec } = context; + spec.type = chartTypeMap[chartType]; + return { spec }; +}; + +export const data: Transformer = (context: Context) => { + const { dataset, spec } = context; + // spec.data = [dataset] + spec.data = { + id: 'data', + values: dataset.flat(4) + }; + + return { spec }; +}; + +export const funnelData: Transformer = (context: Context) => { + const { dataset, cell, spec } = context; + // spec.data = [dataset] + spec.data = { + id: 'data', + values: dataset.sort((a: any, b: any) => b[cell.y as string] - a[cell.y as string]) + }; + + return { spec }; +}; + +export const wordCloudData: Transformer = (context: Context) => { + const { dataset, spec } = context; + spec.data = { + id: 'data', + values: dataset.slice(0, WORDCLOUD_NUM_LIMIT) + }; + + return { spec }; +}; + +export const sequenceData: Transformer = ( + context: Context & { totalTime: number } +) => { + const { dataset, cell, totalTime, spec } = context; + const timeField = cell.time as string; + const latestData = dataset; + + // group the data by time field + const timeArray: any[] = []; + const contentMap = {} as any; + latestData.forEach((element: any) => { + const time = element[timeField].toString(); + if (!timeArray.includes(time)) { + timeArray.push(time); + contentMap[time] = []; + contentMap[time].push(element); + } else { + contentMap[time].push(element); + } + }); + + //sort the data by valueField in each group + const valueField = cell.y as string; + for (const time in contentMap) { + const contentItem = contentMap[time]; + + contentItem.sort(function (a: any, b: any) { + return b[valueField] - a[valueField]; + }); + } + + const dataSpecs = Object.keys(contentMap).map(year => { + return { + data: [ + { + id: 'id', + values: contentMap[year] + }, + { + id: 'year', + values: [{ year }] + } + ] + }; + }); + + spec.data = dataSpecs[0].data; + + const duration = totalTime ? totalTime / dataSpecs.length : 1000; + + //config the player + spec.player = { + type: 'continuous', + orient: 'bottom', + auto: true, + loop: true, + dx: 0, + position: 'middle', + interval: duration, + specs: dataSpecs, + slider: { + railStyle: { + visible: false, + height: 6 + }, + trackStyle: { + visible: false + }, + handlerStyle: { + visible: false + } + }, + controller: { + backward: { + style: { + visible: false, + size: 12 + } + }, + forward: { + style: { + visible: false, + size: 12 + } + }, + start: { + style: { + visible: false + }, + order: 1, + position: 'end' + }, + pause: { + style: { + visible: false + } + } + } + }; + + spec.animationUpdate = { + bar: [ + { + type: 'update', + options: { excludeChannels: ['x', 'y'] }, + duration: duration, + easing: 'linear' + }, + { + channel: ['x', 'y'], + options: { excludeChannels: ['width'] }, + duration: duration, + easing: 'linear' + } + ], + axis: { + duration: duration, + easing: 'linear' + } + }; + + return { spec }; +}; + +export const sankeyData: Transformer = (context: Context) => { + const { dataset, cell, spec } = context; + const { source, target } = cell; + const linkData = dataset; + const nodes = [ + ...new Set([ + ...linkData.map((item: any) => item[source as string]), + ...linkData.map((item: any) => item[target as string]) + ]) + ]; + const nodeData = nodes.map(node => ({ name: node })); + + spec.data = { + id: 'data', + values: [ + { + nodes: nodeData, + links: linkData + } + ] + }; + + return { spec }; +}; + +export const color: Transformer = (context: Context) => { + const { colors, spec } = context; + // spec.data = [dataset] + if (colors && colors.length > 0) { + spec.color = colors; + } else { + spec.color = COLOR_THEMES.default; + } + + return { spec }; +}; + +export const colorBar: Transformer = (context: Context) => { + const { colors, spec } = context; + // spec.data = [dataset] + let colorThemes = COLOR_THEMES.default; + if (colors && colors.length > 0) { + colorThemes = colors; + } + + //apply transparent gradient + spec.color = colorThemes.map(c => ({ + gradient: 'linear', + x0: 0.01, + y0: 0, + x1: 0.01, + y1: 1, + stops: [ + { + offset: 0, + color: `#${c.split('#')[1]}FF` + }, + { + offset: 1, + color: `#${c.split('#')[1]}00` + } + ] + })); + + return { spec }; +}; + +export const colorDynamicBar: Transformer = (context: Context) => { + const { colors, spec } = context; + // spec.data = [dataset] + let colorThemes = COLOR_THEMES.default; + if (colors && colors.length > 0) { + colorThemes = colors; + } + + //apply transparent gradient + spec.color = colorThemes.map(c => ({ + gradient: 'linear', + x0: 1, + y0: 0.01, + x1: 0.01, + y1: 0.01, + stops: [ + { + offset: 0, + color: `#${c.split('#')[1]}FF` + }, + { + offset: 1, + color: `#${c.split('#')[1]}00` + } + ] + })); + + return { spec }; +}; + +export const colorLine: Transformer = (context: Context) => { + const { colors, spec } = context; + // spec.data = [dataset] + if (colors && colors.length > 0) { + spec.color = colors; + } else { + //应用渐变色 + spec.color = LINEAR_COLOR_THEMES.map(c => ({ + gradient: 'linear', + x0: 0, + y0: 0.5, + x1: 1, + y1: 0.5, + stops: [ + { + offset: 0, + color: c[0] + }, + { + offset: 1, + color: c[1] + } + ] + })); + spec.point = { + style: { + visible: false + } + }; + } + return { spec }; +}; + +export const cartesianLine: Transformer = (context: Context) => { + //assign field in spec according to cell + const { cell, dataset, spec } = context; + spec.xField = cell.x; + spec.yField = cell.y; + if (cell.color) { + spec.seriesField = cell.color; + } else { + //no color field. choose a discrete field among remaining fields + const dataFields = Object.keys(dataset[0]); + const remainedFields = dataFields.filter(f => !spec.xField.includes(f) && spec.yField !== f); + const colorField = remainedFields.find(f => { + const fieldType = detectAxesType(spec.data.values, f); + return fieldType === 'band'; + }); + if (colorField) { + spec.seriesField = colorField; + } + } + return { spec }; +}; + +export const pieField: Transformer = (context: Context) => { + //assign field in spec according to cell + const { cell, spec } = context; + spec.valueField = cell.angle || cell.value; + if (cell.color || (cell as any).category) { + spec.categoryField = cell.color || (cell as any).category; + } + return { spec }; +}; + +export const scatterField: Transformer = (context: Context) => { + //assign field in spec according to cell + const { cell, spec } = context; + spec.xField = cell.x; + spec.yField = cell.y; + if (cell.color) { + spec.seriesField = cell.color; + } + if (cell.size) { + spec.sizeField = cell.size; + spec.size = { + type: 'linear' + }; + } + + return { spec }; +}; + +export const wordCloudField: Transformer = (context: Context) => { + //assign field in spec according to cell + const { cell, spec } = context; + spec.nameField = cell.color; + + if (cell.size) { + spec.valueField = cell.size; + } + + spec.seriesField = spec.nameField; + + return { spec }; +}; + +export const funnelField: Transformer = (context: Context) => { + //assign field in spec according to cell + const { cell, spec } = context; + spec.categoryField = cell.color || cell.x; + spec.valueField = cell.value || cell.y; + + return { spec }; +}; + +export const waterfallField: Transformer = (context: Context) => { + //assign field in spec according to cell + const { cell, spec } = context; + spec.xField = cell.x; + spec.yField = cell.y; + spec.total = { + type: 'end', + text: '总计' + }; + + return { spec }; +}; + +export const waterfallAxes: Transformer = (context: Context) => { + //assign axises + const { spec } = context; + spec.axes = [ + { + orient: 'left', + title: { visible: true, text: 'favorability' }, + label: { + formatMethod: (v: any) => { + return v + '%'; + } + } + }, + { + orient: 'bottom', + label: { visible: true }, + type: 'band', + paddingInner: 0.4, + title: { visible: true, text: 'date' } + } + ]; + return { spec }; +}; + +export const waterfallStackLabel: Transformer = (context: Context) => { + //assign axises + const { spec } = context; + spec.stackLabel = { + valueType: 'absolute', + formatMethod: (text: any) => { + return text + '%'; + } + }; + return { spec }; +}; + +export const dualAxisSeries: Transformer = (context: Context) => { + //assign series in dual-axis chart + const { cell, spec } = context; + spec.series = [ + { + type: 'bar', + id: cell.y[0], + data: { + id: spec.data.id + '_bar', + values: spec.data.values + }, + dataIndex: 0, + label: { visible: true }, + xField: cell.x, + yField: cell.y[0], + bar: { + style: { + fill: spec.color[0] + } + } + }, + { + type: 'line', + id: cell.y[cell.y?.length - 1], + dataIndex: 0, + data: { + id: spec.data.id + '_line', + values: spec.data.values + }, + label: { visible: true }, + xField: cell.x, + yField: cell.y[cell.y?.length - 1], + line: { + style: { + stroke: spec.color[1] + } + }, + point: { + style: { + fill: spec.color[1] + } + } + } + ]; + return { spec }; +}; + +export const dualAxisAxes: Transformer = (context: Context) => { + //assign axes in dual-axis chart + const { spec } = context; + spec.axes = [ + { + type: 'band', + orient: 'bottom' + }, + { + type: 'linear', + orient: 'left' + }, + { + type: 'linear', + orient: 'right' + } + ]; + return { spec }; +}; + +export const wordCloudDisplayConf: Transformer = (context: Context) => { + const { spec } = context; + spec.fontSizeRange = [20, 50]; + spec.fontWeightRange = [800, 800]; + //spec.wordCloudConfig = { + // zoomToFit: { + // enlarge: true + // } + //} + return { spec }; +}; + +export const radarField: Transformer = (context: Context) => { + const { cell, spec } = context; + if (cell.x || cell.angle) { + spec.categoryField = cell.x ?? cell.angle; + } + if (cell.y || cell.value) { + spec.valueField = cell.y ?? cell.value; + } + if (cell.color) { + spec.seriesField = cell.color; + } + return { spec }; +}; + +export const radarDisplayConf: Transformer = (context: Context) => { + const { spec } = context; + + spec.area = { + visible: true // show area + }; + return { spec }; +}; + +export const radarAxis: Transformer = (context: Context) => { + const { spec } = context; + + spec.axes = [ + { + orient: 'radius', // radius axis + zIndex: 100, + + domainLine: { + visible: false + }, + label: { + visible: true, + space: 0, + style: { + textAlign: 'center', + stroke: '#fff', + lineWidth: 4 + } + }, + grid: { + smooth: false, + style: { + lineDash: [0] + } + } + }, + { + orient: 'angle', // angle axis + zIndex: 50, + tick: { + visible: false + }, + domainLine: { + visible: false + }, + label: { + space: 20 + }, + grid: { + style: { + lineDash: [0] + } + } + } + ]; + + return { spec }; +}; + +export const sankeyField: Transformer = (context: Context) => { + const { cell, spec } = context; + spec.sourceField = cell.source; + spec.targetField = cell.target; + spec.valueField = cell.value; + spec.categoryField = 'name'; + spec.nodeKey = (datum: any) => datum.name; + + return { spec }; +}; + +export const boxPlotField: Transformer = (context: Context) => { + const { cell, dataset, spec } = context; + const { x, y } = cell; + const data = dataset as { [key: string]: number }[]; + // assign x field + spec.xField = x; + // assign y field + // 1. sort y field according to its value + array(y).sort((a, b) => data[0][a] - data[0][b]); + const yFieldsLen = y.length; + // 2. Map the maximum, minimum, median, and upper and lower quartiles respectively according to numerical value. + spec.minField = y[0]; // Minimum value field: the field with the smallest value. + spec.q1Field = y[Math.min(1, yFieldsLen - 1)]; // Lower quartile field: the field with the second smallest value. + spec.medianField = y[Math.floor((yFieldsLen - 1) / 2)]; // Median: the field with a value in the middle. + spec.q3Field = y[Math.max(0, yFieldsLen - 2)]; // Upper quartile field: the field with the second highest value. + spec.maxField = y[yFieldsLen - 1]; // Maximum value field: the field with the highest value. + return { spec }; +}; + +export const boxPlotStyle: Transformer = (context: Context) => { + const { spec } = context; + spec.boxPlot = { + ...spec.boxPlot, + style: { + boxWidth: 50, + shaftWidth: 30, + shaftShape: 'bar', + lineWidth: 2, + shaftOpacity: 0.3 + } + }; + return { spec }; +}; + +export const sankeyLabel: Transformer = (context: Context) => { + const { spec } = context; + + spec.label = { + visible: true, + style: { + fontSize: 12, + fill: '#000000' + } + }; + return { spec }; +}; + +export const sankeyLink: Transformer = (context: Context) => { + const { spec } = context; + + spec.link = { + style: { + fillOpacity: 0.1 + }, + state: { + hover: { + fillOpacity: 0.4 + }, + blur: { + fill: '#e8e8e8' + } + } + }; + return { spec }; +}; + +export const cartesianBar: Transformer = (context: Context) => { + //assign fields according to cell + const { cell, dataset, spec } = context; + const flattenedXField = Array.isArray(cell.x) ? cell.x : [cell.x]; + if (cell.color && cell.color.length > 0 && cell.color !== cell.x) { + flattenedXField.push(cell.color); + } + spec.xField = flattenedXField; + spec.yField = cell.y; + if (cell.color) { + spec.seriesField = cell.color; + } else { + //没有分配颜色字段,从剩下的字段里选择一个离散字段分配到颜色上 + const dataFields = Object.keys(dataset[0] ?? {}); + const remainedFields = dataFields.filter(f => !spec.xField.includes(f) && spec.yField !== f); + const colorField = remainedFields.find(f => { + const fieldType = detectAxesType(spec.data.values, f); + return fieldType === 'band'; + }); + if (colorField) { + spec.seriesField = colorField; + spec.xField.push(colorField); + } + } + return { spec }; +}; + +export const rankingBarField: Transformer = (context: Context) => { + //折线图根据cell分配字段 + const { cell, spec } = context; + spec.xField = cell.y; + spec.yField = cell.x; + if (cell.color) { + spec.seriesField = cell.color; + } else { + spec.seriesField = spec.yField; + } + spec.direction = 'horizontal'; + return { spec }; +}; + +export const roseField: Transformer = (context: Context) => { + const { cell, spec } = context; + spec.valueField = cell.radius || cell.angle; + if (cell.color) { + spec.categoryField = cell.color; + spec.seriesField = cell.color; + } + spec.outerRadius = 0.8; + spec.innerRadius = 0.2; + + return { spec }; +}; + +export const roseAxis: Transformer = (context: Context) => { + const { spec } = context; + + spec.axes = [ + { + orient: 'angle', + domainLine: { + visible: false + }, + grid: { + visible: false, + alignWithLabel: false + }, + label: { + visible: true + } + }, + { + orient: 'radius', + grid: { + visible: false, + smooth: true + } + } + ]; + return { spec }; +}; + +export const rankingBarAxis: Transformer = (context: Context) => { + const { spec } = context; + + spec.axes = [ + { + animation: true, + orient: 'bottom', + type: 'linear', + visible: true, + title: { + visible: false, + style: { + fill: '#FFFFFF' + } + }, + label: { + style: { + fill: '#FFFFFF' + } + }, + grid: { + visible: true + } + }, + { + animation: true, + id: 'axis-left', + orient: 'left', + tick: { visible: false }, + title: { + visible: false, + style: { + fill: '#FFFFFF' + } + }, + label: { + style: { + fill: '#FFFFFF' + } + }, + type: 'band' + } + ]; + + return { spec }; +}; + +export const axis: Transformer = (context: Context) => { + const { spec } = context; + + spec.axes = [ + { + orient: 'bottom', + type: 'band', + label: { + style: { + fill: '#FFFFFF' + } + }, + title: { + visible: false, + style: { + fill: '#FFFFFF' + } + } + }, + { + orient: 'left', + type: 'linear', + label: { + style: { + fill: '#FFFFFF' + } + }, + title: { + visible: false, + style: { + fill: '#FFFFFF' + } + } + } + ]; + return { spec }; +}; + +export const legend: Transformer = (context: Context) => { + const { cell, spec } = context; + if (!(cell.color || cell.category) && !spec.seriesField && spec.type !== 'common') { + return { spec }; + } + spec.legends = [ + { + orient: 'right', + type: 'discrete', + item: { + visible: true, + background: { + style: { + fillOpacity: 0 + } + }, + label: { + style: { + fill: '#FFFFFF' + } + }, + shape: { + style: { + symbolType: 'rect' + } + } + } + } + ]; + return { spec }; +}; + +export const customMark: Transformer = (context: Context) => { + const { spec } = context; + + spec.customMark = [ + { + type: 'text', + dataId: 'year', + style: { + textBaseline: 'bottom', + fontSize: 130, + textAlign: 'right', + fontFamily: 'PingFang SC', + fontWeight: 600, + text: (datum: { year: any }) => datum.year, + x: () => 700, + y: () => 480 - 50, + fill: 'grey', + fillOpacity: 0.5 + } + } + ]; + return { spec }; +}; + +export const rankingBarLabel: Transformer = (context: Context) => { + const { spec } = context; + + spec.label = { + visible: true, + style: { + fill: '#FFFFFF', + stroke: null + }, + animation: { + duration: spec.animationUpdate.axis.duration, + easing: 'linear' + } + }; + return { spec }; +}; + +export const scatterAxis: Transformer = (context: Context) => { + const { spec } = context; + + const xField = spec.xField; + const yField = spec.yField; + spec.axes = [ + { + orient: 'bottom', + type: detectAxesType(spec.data.values, xField), + label: { + style: { + fill: '#FFFFFF' + } + }, + title: { + visible: false, + style: { + fill: '#FFFFFF' + } + } + }, + { + orient: 'left', + type: detectAxesType(spec.data.values, yField), + label: { + style: { + fill: '#FFFFFF' + } + }, + title: { + visible: false, + style: { + fill: '#FFFFFF' + } + } + } + ]; + return { spec }; +}; + +const oneByOneDelayFunc = (delay: number) => (datum: any) => { + const group = datum['__CHARTSPACE_DEFAULT_DATA_INDEX'] % oneByOneGroupSize; + return group * delay; +}; + +export const animationOneByOne: Transformer = (context: Context) => { + const { spec } = context; + + if (spec.type === 'wordCloud3d') { + return { spec }; + } + const totalTime = context.totalTime ?? DEFAULT_VIDEO_LENGTH_LONG; + const duration = animationDuration; + const dataLength = spec.data.values.length; + const delay = Math.max(totalTime / dataLength - duration, 0); + const finalDuration = delay === 0 ? totalTime / dataLength : duration; + const finalDelay = delay === 0 ? Number.MIN_VALUE : delay; + + spec.animationAppear = { + //word: [ + // { + // channel: { + // fontSize: { + // from: 0, + // }, + // }, + // duration: animationDuration, + // delay: oneByOneDelayFunc(delay), + // }, + //], + oneByOne: finalDelay, + duration: finalDuration + }; + return { spec }; +}; + +export const animationScatter: Transformer = (context: Context) => { + const { spec } = context; + + const totalTime = context.totalTime ?? DEFAULT_VIDEO_LENGTH; + const dataLength = spec.data.values.length; + const groupNum = Math.ceil(dataLength / oneByOneGroupSize); + const delay = totalTime / groupNum; + spec.animationAppear = { + duration: animationDuration, + delay: oneByOneDelayFunc(delay) + }; + return { spec }; +}; + +function onlyUnique(value: any, index: number, array: any) { + return array.indexOf(value) === index; +} + +export const animationCartesianBar: Transformer = (context: Context) => { + const { spec } = context; + + const totalTime = context.totalTime ?? DEFAULT_VIDEO_LENGTH; + const groupKey = Array.isArray(spec.xField) ? spec.xField[0] : spec.xField; + const dataValues = spec.data.values as any[]; + const groupNum = dataValues.map(d => d[groupKey]).filter(onlyUnique).length; + //const delay = totalTime / groupNum - 1000; + spec.animationAppear = { + oneByOne: Number.MIN_VALUE, + duration: totalTime / groupNum + }; + return { spec }; +}; + +export const animationCartisianLine: Transformer = (context: Context) => { + const { spec } = context; + + const totalTime = context.totalTime ?? DEFAULT_VIDEO_LENGTH; + const groupKey = Array.isArray(spec.xField) ? spec.xField[0] : spec.xField; + const dataValues = spec.data.values as any[]; + const groups = dataValues.map(d => d[groupKey]).filter(onlyUnique); + const groupNum = groups.length; + const lineAnimationTotalTime = totalTime > 2000 ? 2000 : totalTime; + const pointDelay = lineAnimationTotalTime / groupNum; + spec.animationAppear = { + line: { + type: 'clipIn', + duration: lineAnimationTotalTime, + easing: 'linear' + }, + point: { + delay: (datum: any) => { + const groupIndex = groups.findIndex(d => d === datum[groupKey]); + return groupIndex * pointDelay; + } + } + }; + + spec.animationNormal = { + point: { + loop: true, + timeSlices: [ + { + effects: { + channel: { + size: { to: 14 } + }, + easing: 'circInOut' + }, + duration: 1000 + }, + { + effects: { + channel: { + size: { to: 10 } + }, + easing: 'circInOut' + }, + duration: 500 + } + ] + } + }; + return { spec }; +}; + +export const animationCartesianPie: Transformer = (context: Context) => { + const { spec } = context; + + const totalTime = context.totalTime ?? DEFAULT_PIE_VIDEO_LENGTH; + const groupKey = context.cell.color; + const dataValues = spec.data.values as any[]; + const groupNum = dataValues.map(d => d[groupKey!]).filter(onlyUnique).length; + //const delay = totalTime / groupNum - 1000; + const loopTime = 100 + groupNum * 100 + 400; + // 看看是否可以500ms走一个循环 + if (groupNum * 500 + loopTime < totalTime) { + // 前面500ms的oneByone + spec.animationAppear = { + oneByOne: Number.MIN_VALUE, + duration: (totalTime - loopTime) / groupNum, + options: { + overall: false + } + }; + // 然后走循环动画 + spec.animationNormal = { + pie: [ + { + startTime: 100, + oneByOne: 100, + timeSlices: [ + { + delay: 0, + effects: { + channel: { + scaleX: { to: 1.2 }, + scaleY: { to: 1.2 } + }, + easing: 'linear' + }, + duration: 200 + }, + { + effects: { + channel: { + scaleX: { to: 1 }, + scaleY: { to: 1 } + }, + + easing: 'linear' + }, + duration: 200 + } + ] + } + ] + }; + } else { + spec.animationAppear = { + oneByOne: Number.MIN_VALUE, + duration: totalTime / groupNum, + options: { + overall: false + } + }; + } + return { spec }; +}; + +export const displayConfBar: Transformer = (context: Context) => { + const { spec } = context; + + spec.bar = { + style: { + cornerRadius: [8, 8, 0, 0] + } + }; + + return { spec }; +}; + +export const displayConfLine: Transformer = (context: Context) => { + const { spec } = context; + + spec.line = { + style: { + curveType: 'monotone', + lineWidth: 6, + lineCap: 'round' + } + }; + + return { spec }; +}; diff --git a/packages/vmind/src/applications/chartGeneration/taskNodes/getChartSpec/types.ts b/packages/vmind/src/applications/chartGeneration/taskNodes/getChartSpec/types.ts new file mode 100644 index 00000000..c92a3bcd --- /dev/null +++ b/packages/vmind/src/applications/chartGeneration/taskNodes/getChartSpec/types.ts @@ -0,0 +1,7 @@ +import { GenerateChartAndFieldMapContext, GenerateChartAndFieldMapOutput } from '../generateTypeAndFieldMap/types'; + +export type GetChartSpecContext = GenerateChartAndFieldMapContext & GenerateChartAndFieldMapOutput; +export type Spec = any; +export type GetChartSpecOutput = { + spec: Spec; +}; diff --git a/packages/vmind/src/applications/chartGeneration/taskNodes/getVizSchema/index.ts b/packages/vmind/src/applications/chartGeneration/taskNodes/getVizSchema/index.ts index b7ac9aa2..cabbdeff 100644 --- a/packages/vmind/src/applications/chartGeneration/taskNodes/getVizSchema/index.ts +++ b/packages/vmind/src/applications/chartGeneration/taskNodes/getVizSchema/index.ts @@ -3,9 +3,9 @@ import { TaskNodeType } from 'src/base/taskNode/types'; import { getVizSchema } from './utils'; import { GetVizSchemaContext, GetVizSchemaOutput } from './types'; -const generateVizSchemaTaskNodeMeta: RuleBasedTaskNodeMeta = { +const GetVizSchemaTaskNodeMeta: RuleBasedTaskNodeMeta = { type: TaskNodeType.RULE_BASED, pipelines: [getVizSchema] }; -export default generateVizSchemaTaskNodeMeta; +export default GetVizSchemaTaskNodeMeta; diff --git a/packages/vmind/src/applications/chartGeneration/taskNodes/getVizSchema/utils.ts b/packages/vmind/src/applications/chartGeneration/taskNodes/getVizSchema/utils.ts index 88d000b0..fbdc552c 100644 --- a/packages/vmind/src/applications/chartGeneration/taskNodes/getVizSchema/utils.ts +++ b/packages/vmind/src/applications/chartGeneration/taskNodes/getVizSchema/utils.ts @@ -1,6 +1,6 @@ import { LOCATION, SimpleFieldInfo, VizSchema } from 'src/typings'; -import { GetVizSchemaContext } from '../../types'; import { Transformer } from 'src/base/tools/transformer'; +import { GetVizSchemaContext, GetVizSchemaOutput } from './types'; /** * Generate a vizSchema from fieldInfo @@ -24,7 +24,7 @@ const getSchemaFromFieldInfo = (fieldInfo: SimpleFieldInfo[]): Partial = context => { +export const getVizSchema: Transformer = context => { const { fieldInfo } = context; const vizSchema = getSchemaFromFieldInfo(fieldInfo) as VizSchema; diff --git a/packages/vmind/src/applications/index.ts b/packages/vmind/src/applications/index.ts new file mode 100644 index 00000000..e41a20bc --- /dev/null +++ b/packages/vmind/src/applications/index.ts @@ -0,0 +1,14 @@ +import chartGenerationMetaByModel from './chartGeneration'; +import dataAggregationMetaByModel from './dataAggregation'; + +export enum ApplicationType { + DataAggregation = 'dataAggregation', + ChartGeneration = 'chartGeneration' +} + +const applicationMetaList = { + [ApplicationType.DataAggregation]: dataAggregationMetaByModel, + [ApplicationType.ChartGeneration]: chartGenerationMetaByModel +}; + +export default applicationMetaList; diff --git a/packages/vmind/src/applications/types.ts b/packages/vmind/src/applications/types.ts index f2c3c579..93caa78d 100644 --- a/packages/vmind/src/applications/types.ts +++ b/packages/vmind/src/applications/types.ts @@ -21,7 +21,7 @@ export type ChartGenerationContext = { userInput: string; fieldInfo: SimpleFieldInfo[]; dataset: VMindDataset; -}; +} & { totalTime?: number; colors?: string[] }; export type ChartGenerationOutput = { chartType: ChartType; diff --git a/packages/vmind/src/base/metaTypes.ts b/packages/vmind/src/base/metaTypes.ts index 3d8979c3..80625467 100644 --- a/packages/vmind/src/base/metaTypes.ts +++ b/packages/vmind/src/base/metaTypes.ts @@ -13,7 +13,7 @@ export type LLMBasedTaskNodeMeta = { type: TaskNodeType.LLM_BASED; modelType: ModelType; parser: Parser; - patcher: Patcher; + patcher: Patcher>; prompt: Prompt; requester: Requester; }; @@ -23,7 +23,7 @@ export type LLMBasedTaskNodeMeta = { */ export type RuleBasedTaskNodeMeta = { type: TaskNodeType.RULE_BASED; - pipelines: Transformer[]; + pipelines: Transformer[] | ((context: Context) => Transformer[]); }; export type TaskNodeMeta = diff --git a/packages/vmind/src/base/taskNode/ruleBasedTaskNode.ts b/packages/vmind/src/base/taskNode/ruleBasedTaskNode.ts index 241eca39..7adcabb7 100644 --- a/packages/vmind/src/base/taskNode/ruleBasedTaskNode.ts +++ b/packages/vmind/src/base/taskNode/ruleBasedTaskNode.ts @@ -3,13 +3,14 @@ import { BaseTaskNode } from './baseTaskNode'; import { TaskNodeType } from './types'; import { TaskError } from 'src/typings'; import { getObjectProperties } from 'src/common/utils/utils'; +import { isFunction } from 'lodash'; /** * rule-based taskNode, which consists of a series of Pipelines * It completes the transformation from Input to a specific data structure (DSL) */ export class RuleBasedTaskNode extends BaseTaskNode { - pipelines: Transformer[]; + pipelines: Transformer[] | ((context: Context) => Transformer[]); constructor(name: string, pipelines: Transformer[]) { super(name); this.type = TaskNodeType.RULE_BASED; @@ -27,14 +28,23 @@ export class RuleBasedTaskNode extends BaseTaskNode) => { - const res = transformer(pre); - return { ...pre, ...res }; - }, context); + const result: Result = (pipelines as Transformer[]).reduce( + (pre: any, transformer: Transformer) => { + const res = transformer(pre); + return { ...pre, ...res }; + }, + context + ); return result; } catch (e: any) { console.error(`${this.name} error!`); + //throw e return { ...getObjectProperties(e), error: true diff --git a/packages/vmind/src/core/VMind.ts b/packages/vmind/src/core/VMind.ts index 432c4095..3c00594f 100644 --- a/packages/vmind/src/core/VMind.ts +++ b/packages/vmind/src/core/VMind.ts @@ -1,10 +1,20 @@ import { _chatToVideoWasm } from '../chart-to-video'; -import { ILLMOptions, TimeType, Model, SimpleFieldInfo, DataItem, OuterPackages, ModelType } from '../typings'; +import { + ILLMOptions, + TimeType, + Model, + SimpleFieldInfo, + DataItem, + OuterPackages, + ModelType, + VMindDataset +} from '../typings'; import { getFieldInfoFromDataset, parseCSVData as parseCSVDataWithRule } from '../common/dataProcess'; -import applicationMetaList, { ApplicationType } from './applications'; import { VMindApplicationMap } from './types'; import { BaseApplication } from 'src/base/application'; -import { DataAggregationContext } from 'src/applications/types'; +import { ChartGenerationContext, DataAggregationContext } from 'src/applications/types'; +import applicationMetaList, { ApplicationType } from 'src/applications'; +import { calculateTokenUsage } from 'src/common/utils/utils'; class VMind { private _FPS = 30; @@ -85,6 +95,56 @@ class VMind { }; return await this.runApplication(ApplicationType.DataAggregation, modelType, context); } + async generateChart( + userPrompt: string, //user's intent of visualization, usually aspect in data that they want to visualize + fieldInfo: SimpleFieldInfo[], + dataset: VMindDataset, + enableDataQuery = true, + colorPalette?: string[], + animationDuration?: number + ) { + const modelType = this.getModelType(); + let finalDataset = dataset; + let finalFieldInfo = fieldInfo; + let queryDatasetUsage; + try { + if (enableDataQuery) { + //run data aggregation first + const dataAggregationContext: DataAggregationContext = { + userInput: userPrompt, + fieldInfo, + sourceDataset: dataset, + llmOptions: this._options + }; + const { + dataset: queryDataset, + fieldInfo: fieldInfoNew, + usage, + error + } = await this.runApplication(ApplicationType.DataAggregation, modelType, dataAggregationContext); + if (!error) { + finalDataset = queryDataset; + finalFieldInfo = fieldInfoNew; + queryDatasetUsage = usage; + } + } + } catch (err) { + console.error('data query error!'); + console.error(err); + } + const context: ChartGenerationContext = { + userInput: userPrompt, + fieldInfo: finalFieldInfo, + dataset: finalDataset, + llmOptions: this._options, + colors: colorPalette, + totalTime: animationDuration + }; + + const chartGenerationResult = await this.runApplication(ApplicationType.ChartGeneration, modelType, context); + const usage = calculateTokenUsage([queryDatasetUsage, chartGenerationResult.usage]); + return { ...chartGenerationResult, usage }; + } async exportVideo(spec: any, time: TimeType, outerPackages: OuterPackages, mode?: 'node' | 'desktop-browser') { const { VChart, FFmpeg, fetchFile, ManualTicker } = outerPackages; diff --git a/packages/vmind/src/core/applications.ts b/packages/vmind/src/core/applications.ts deleted file mode 100644 index 63783456..00000000 --- a/packages/vmind/src/core/applications.ts +++ /dev/null @@ -1,11 +0,0 @@ -import dataAggregationMetaByModel from 'src/applications/dataAggregation'; - -export enum ApplicationType { - DataAggregation = 'dataAggregation' -} - -const applicationMetaList = { - [ApplicationType.DataAggregation]: dataAggregationMetaByModel -}; - -export default applicationMetaList; From 9c386e506428b3f54f631b8febe30ea5c91c5b42 Mon Sep 17 00:00:00 2001 From: da730 Date: Mon, 15 Apr 2024 21:32:03 +0800 Subject: [PATCH 28/62] feat: refact gpt chart generation --- .../__tests__/browser/src/pages/DataInput.tsx | 12 ++++------ packages/vmind/src/applications/types.ts | 6 ++++- packages/vmind/src/core/VMind.ts | 22 ++++++++++++++----- 3 files changed, 26 insertions(+), 14 deletions(-) diff --git a/packages/vmind/__tests__/browser/src/pages/DataInput.tsx b/packages/vmind/__tests__/browser/src/pages/DataInput.tsx index b8f6e2b7..05e9afe1 100644 --- a/packages/vmind/__tests__/browser/src/pages/DataInput.tsx +++ b/packages/vmind/__tests__/browser/src/pages/DataInput.tsx @@ -95,8 +95,8 @@ export function DataInput(props: IPropsType) { const defaultDataKey = Object.keys(demoDataList)[3]; const [describe, setDescribe] = useState(demoDataList[defaultDataKey].input); const [csv, setCsv] = useState(demoDataList[defaultDataKey].csv); - const [spec, setSpec] = useState(''); - const [time, setTime] = useState(1000); + //const [spec, setSpec] = useState(''); + //const [time, setTime] = useState(1000); const [model, setModel] = useState(Model.GPT3_5); const [cache, setCache] = useState(true); const [showThoughts, setShowThoughts] = useState(false); @@ -138,6 +138,7 @@ export function DataInput(props: IPropsType) { } else { const { spec, time } = chartGenerationRes; const costTime = endTime - startTime; + console.log(spec); props.onSpecGenerate(spec, time as any, costTime); } @@ -289,12 +290,7 @@ export function DataInput(props: IPropsType) {