forked from AIObjectives/talk-to-the-city-reports
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtext_to_csv_v0.test.ts
127 lines (117 loc) · 3.39 KB
/
text_to_csv_v0.test.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import { describe, it, expect, beforeEach } from 'vitest';
import TextToCSV, { text_to_csv_node_data } from '$lib/compute/text_to_csv_v0';
import deepCopy from 'deep-copy';
describe('TextToCSV class', () => {
let node;
let inputData;
const timeout = 60000;
beforeEach(() => {
node = new TextToCSV(deepCopy(text_to_csv_node_data));
inputData = {
text1:
'This is a test comment that should be split into chunks based on the specified number of tokens.',
text2: ['Another test comment that will be split.', 'And yet another one, for good measure.']
};
}, timeout);
it(
'should convert a single text input to CSV format',
async () => {
const output = await node.compute(
{ text1: inputData.text1 },
'run',
console.log,
console.error,
console.log,
'test_slug',
null
);
expect(output).toEqual([
{
'comment-body':
'This is a test comment that should be split into chunks based on the specified number of tokens.',
'comment-id': '0',
interview: 'Alice',
video: 'https://www.youtube.com/watch?v=1qKz9W3bKbE',
timestamp: '00:00:00'
}
]);
},
timeout
);
it(
'should convert multiple text inputs to CSV format',
async () => {
const output = await node.compute(
inputData,
'run',
console.log,
console.error,
console.log,
'test_slug',
null
);
expect(output).toEqual([
{
'comment-body':
'This is a test comment that should be split into chunks based on the specified number of tokens.',
'comment-id': '0',
interview: 'Alice',
video: 'https://www.youtube.com/watch?v=1qKz9W3bKbE',
timestamp: '00:00:00'
},
{
'comment-body': 'Another test comment that will be split.',
'comment-id': '1',
interview: 'Alice',
video: 'https://www.youtube.com/watch?v=1qKz9W3bKbE',
timestamp: '00:00:00'
},
{
'comment-body': 'And yet another one, for good measure.',
'comment-id': '2',
interview: 'Alice',
video: 'https://www.youtube.com/watch?v=1qKz9W3bKbE',
timestamp: '00:00:00'
}
]);
},
timeout
);
it(
'should handle empty text input',
async () => {
const output = await node.compute(
{ text1: '' },
'run',
console.log,
console.error,
console.log,
'test_slug',
null
);
expect(output).toEqual([]);
},
timeout
);
it(
'should split text into chunks if it exceeds the number of tokens',
async () => {
node.data.numTokens = '10'; // Set a small number of tokens to force splitting
const longText =
'This is a very long test comment that will definitely be split into multiple chunks because it exceeds the token limit.';
const output = await node.compute(
{ text1: longText },
'run',
console.log,
console.error,
console.log,
'test_slug',
null
);
expect(output.length).toBeGreaterThan(1);
expect(output[0]['comment-body']).not.toEqual(longText);
expect(output.reduce((acc, doc) => acc + doc['comment-body'], '')).toEqual(longText);
},
timeout
);
});