diff --git a/Gruntfile.js b/Gruntfile.js index 328150ff..954def5e 100644 --- a/Gruntfile.js +++ b/Gruntfile.js @@ -16,7 +16,7 @@ module.exports = function (grunt) { src: 'test/**/*.test.js', options: { timeout: 3000, // not fully supported yet - reporter: 'dotmatrix' + reporter: 'spec' } } }, diff --git a/README.md b/README.md index 6b24e787..88af5da8 100644 --- a/README.md +++ b/README.md @@ -4,8 +4,9 @@ [![build status](https://secure.travis-ci.org/C2FO/fast-csv.png)](http://travis-ci.org/C2FO/fast-csv) # Fast-csv -This is a library is aimed at providing fast CSV parsing. It accomplishes this by not handling some of the more complex -edge cases such as multi line rows. However it does support escaped values, embedded commas, double and single quotes. +This is a library that provides CSV parsing and formatting. + +**NOTE** As of v0.2.0 `fast-csv` supports multi-line values. ## Installation @@ -21,6 +22,9 @@ All methods accept the following `options` * `ignoreEmpty=false`: If you wish to ignore empty rows. * `delimiter=','`: If your data uses an alternate delimiter such as `;` or `\t`. * **NOTE** When specifying an alternate `delimiter` you may only pass in a single character delimeter +* `quote='"'`: The character to use to escape values that contain a delimeter. +* `escape='"'`: The character to use when escaping a value that is `quoted` and contains a `quote` character. + * `i.e`: 'First,"Name"' => '"First,""name"""' * The following are options for parsing only. * `trim=false`: If you want to trim all values parsed set to true. * `rtrim=false`: If you want to right trim all values parsed set to true. @@ -211,12 +215,8 @@ csv `fast-csv` also allows to you to create create a `CSV` from data. -In addition to the options for parsing you can specify the following additional options. - -* `quote='"'`: The character to use to escape values that contain a delimeter. -* `escape='"'`: The character to use when escaping a value that is `quoted` and constains a `quote` character. - * `i.e`: 'First,"Name"' => '"First,""name"""' - +Formatting accepts the same options as parsing. +* **Writing Data** Each of the following methods accept an array of values to be written, however each value must be an `array` of `array`s or `object`s. diff --git a/benchmark/assets/100000.csv b/benchmark/assets/100000.csv index 98e7dd90..3b48c178 100644 --- a/benchmark/assets/100000.csv +++ b/benchmark/assets/100000.csv @@ -99998,4 +99998,4 @@ First1,Last1,email1@email.com, ,,, "First'1",Last1,email1@email.com,"1 Street St, State ST, 88888" "First""1",Last1,email1@email.com,"1 Street St, State ST, 88888" -"First""1",Last1,email1@email.com,"1 Street St, State ST, 88888" +"First""1",Last1,email1@email.com,"1 Street St, State ST, 88888" \ No newline at end of file diff --git a/benchmark/assets/1000000.csv b/benchmark/assets/1000000.csv index ed38bf67..12571e72 100644 --- a/benchmark/assets/1000000.csv +++ b/benchmark/assets/1000000.csv @@ -999998,4 +999998,4 @@ First1,Last1,email1@email.com, First1,Last1,email1@email.com,"1 Street St, State ST, 88888" First1,Last1,email1@email.com,"1 ""Street"" St, State ST, 88888" First1,Last1,email1@email.com, -,,, +,,, \ No newline at end of file diff --git a/benchmark/assets/20000.csv b/benchmark/assets/20000.csv index 0ae70a96..622034b3 100644 --- a/benchmark/assets/20000.csv +++ b/benchmark/assets/20000.csv @@ -19998,4 +19998,4 @@ First1,Last1,email1@email.com, "First'1",Last1,email1@email.com,"1 Street St, State ST, 88888" "First""1",Last1,email1@email.com,"1 Street St, State ST, 88888" "First""1",Last1,email1@email.com,"1 Street St, State ST, 88888" -First1,Last1,email1@email.com,"1 Street St, State ST, 88888" +First1,Last1,email1@email.com,"1 Street St, State ST, 88888" \ No newline at end of file diff --git a/benchmark/assets/50000.csv b/benchmark/assets/50000.csv index 41290c88..7aabff69 100644 --- a/benchmark/assets/50000.csv +++ b/benchmark/assets/50000.csv @@ -49998,4 +49998,4 @@ First1,Last1,email1@email.com, ,,, "First'1",Last1,email1@email.com,"1 Street St, State ST, 88888" "First""1",Last1,email1@email.com,"1 Street St, State ST, 88888" -"First""1",Last1,email1@email.com,"1 Street St, State ST, 88888" +"First""1",Last1,email1@email.com,"1 Street St, State ST, 88888" \ No newline at end of file diff --git a/benchmark/benchmark.js b/benchmark/benchmark.js index 08d2b96c..b43c423b 100644 --- a/benchmark/benchmark.js +++ b/benchmark/benchmark.js @@ -1,7 +1,7 @@ var fastCsv = require("../lib"), csv = require("csv"), path = require("path"), - COUNT = 20000, + COUNT = 100000, TEST_FILE = path.resolve(__dirname, "./assets/" + COUNT + ".csv"); @@ -23,7 +23,7 @@ function benchmarkFastCsv(done) { ret.address = data.address; return ret; }) - .on("record", function (record) { + .on("record", function () { count++; }) .on("end", function () { @@ -48,7 +48,7 @@ function benchmarkCsv(done) { ["first_name", "last_name", "email_address"].forEach(function (prop, i) { ret[camelize(prop)] = data[i]; }); - ret.address = data[4]; + ret.address = data[3]; return ret; }) .on('record', function () { @@ -67,20 +67,25 @@ function benchmarkCsv(done) { } function benchmark(title, m, done) { - var start = new Date(); + var start = new Date(), runStart = start; m(function (err) { if (err) { done(err); } else { + console.log("%s: RUN 1 %dms", title, (new Date() - runStart)); + runStart = new Date(); m(function (err) { if (err) { done(err); } else { + console.log("%s: RUN 2 %dms", title, (new Date() - runStart)); + runStart = new Date(); m(function (err) { if (err) { done(err); } else { - console.log("%s: %dms", title, (new Date() - start) / 3); + console.log("%s: RUN 3 %dms", title, (new Date() - runStart)); + console.log("%s: 3xAVG %dms", title, (new Date() - start) / 3); done(); } diff --git a/docs/index.html b/docs/index.html index 84061134..aff84fab 100644 --- a/docs/index.html +++ b/docs/index.html @@ -171,8 +171,8 @@
This is a library is aimed at providing fast CSV parsing. It accomplishes this by not handling some of the more complex -edge cases such as multi line rows. However it does support escaped values, embedded commas, double and single quotes.
+This is a library that provides CSV parsing and formatting.
+NOTE As of v0.2.0 fast-csv
supports multi-line values.
npm install fast-csv
delimiter
you may only pass in a single character delimeterquote='"'
: The character to use to escape values that contain a delimeter.escape='"'
: The character to use when escaping a value that is quoted
and contains a quote
character.i.e
: 'First,"Name"' => '"First,""name"""'trim=false
: If you want to trim all values parsed set to true.rtrim=false
: If you want to right trim all values parsed set to true.fast-csv
also allows to you to create create a CSV
from data.
In addition to the options for parsing you can specify the following additional options.
-quote='"'
: The character to use to escape values that contain a delimeter.escape='"'
: The character to use when escaping a value that is quoted
and constains a quote
character.i.e
: 'First,"Name"' => '"First,""name"""'Writing Data
+Formatting accepts the same options as parsing. + +*Writing Data
Each of the following methods accept an array of values to be written, however each value must be an array
of array
s or object
s.
write(arr[, options])
Create a readable stream to read data from.
diff --git a/lib/extended.js b/lib/extended.js index 9f1d8727..071fbfe0 100644 --- a/lib/extended.js +++ b/lib/extended.js @@ -1 +1,5 @@ -module.exports = require("extended")().register(require("is-extended")).register(require("object-extended")).register(require("string-extended")); \ No newline at end of file +module.exports = require("extended")() + .register(require("is-extended")) + .register(require("object-extended")) + .register(require("string-extended")) + .register("LINE_BREAK", (process.platform === 'win32' ? '\r\n' : '\n')); \ No newline at end of file diff --git a/lib/formatter.js b/lib/formatter.js index ee4ded6a..08c6c709 100644 --- a/lib/formatter.js +++ b/lib/formatter.js @@ -3,7 +3,7 @@ var fs = require("fs"), isUndefinedOrNull = extended.isUndefinedOrNull, hash = extended.hash, stream = require("stream"), - LINE_BREAK = (process.platform === 'win32' ? '\r\n' : '\n'); + LINE_BREAK = extended.LINE_BREAK; function createFormatter(options) { options = options || {}; diff --git a/lib/index.js b/lib/index.js index 2af9936e..c7f71a86 100644 --- a/lib/index.js +++ b/lib/index.js @@ -5,8 +5,9 @@ * [![build status](https://secure.travis-ci.org/C2FO/fast-csv.png)](http://travis-ci.org/C2FO/fast-csv) * # Fast-csv * - * This is a library is aimed at providing fast CSV parsing. It accomplishes this by not handling some of the more complex - * edge cases such as multi line rows. However it does support escaped values, embedded commas, double and single quotes. + * This is a library that provides CSV parsing and formatting. + * + * **NOTE** As of v0.2.0 `fast-csv` supports multi-line values. * * ## Installation * @@ -22,6 +23,9 @@ * * `ignoreEmpty=false`: If you wish to ignore empty rows. * * `delimiter=','`: If your data uses an alternate delimiter such as `;` or `\t`. * * **NOTE** When specifying an alternate `delimiter` you may only pass in a single character delimeter + * * `quote='"'`: The character to use to escape values that contain a delimeter. + * * `escape='"'`: The character to use when escaping a value that is `quoted` and contains a `quote` character. + * * `i.e`: 'First,"Name"' => '"First,""name"""' * * The following are options for parsing only. * * `trim=false`: If you want to trim all values parsed set to true. * * `rtrim=false`: If you want to right trim all values parsed set to true. @@ -212,12 +216,8 @@ * * `fast-csv` also allows to you to create create a `CSV` from data. * - * In addition to the options for parsing you can specify the following additional options. - * - * * `quote='"'`: The character to use to escape values that contain a delimeter. - * * `escape='"'`: The character to use when escaping a value that is `quoted` and constains a `quote` character. - * * `i.e`: 'First,"Name"' => '"First,""name"""' - * + * Formatting accepts the same options as parsing. + ** * **Writing Data** * * Each of the following methods accept an array of values to be written, however each value must be an `array` of `array`s or `object`s. diff --git a/lib/parser.js b/lib/parser.js index feb3f524..a61e3e9d 100644 --- a/lib/parser.js +++ b/lib/parser.js @@ -1,9 +1,9 @@ var extended = require("./extended"), + isUndefinedOrNull = extended.isUndefinedOrNull, trim = extended.trim, trimLeft = extended.trimLeft, trimRight = extended.trimRight, - SINGLE_QUOTE = "'", - DOUBLE_QUOTE = '"'; + LINE_BREAK = extended.LINE_BREAK; function createParser(options) { options = options || {}; @@ -11,10 +11,11 @@ function createParser(options) { doLtrim = options.ltrim || false, doRtrim = options.rtrim || false, doTrim = options.trim || false, + ESCAPE = options.quote || '"', VALUE_REGEXP = new RegExp("([^" + delimiter + "'\"\\s\\\\]*(?:\\s+[^" + delimiter + "'\"\\s\\\\]+)*)"), - SEARCH_REGEXP = new RegExp("[^\\\\]" + delimiter), - ESCAPE_CHAR = "\\", - WHITE_SPACE = /\s/; + SEARCH_REGEXP = new RegExp("(?:\\n|" + delimiter + ")"), + ESCAPE_CHAR = options.escape || '"', + NEXT_TOKEN_REGEXP = new RegExp("([^\\s]|\\\n|" + delimiter + ")"); function formatItem(item) { if (doTrim) { @@ -27,105 +28,132 @@ function createParser(options) { return item; } - function getTokensBetween(str, start, items, cursor) { + function parseEscapedItem(str, items, cursor, hasMoreData) { var depth = 0, ret = []; - str = Object(str); - var startPushing = false, token, i = 0; - if (str.length) { - while ((token = str.charAt(cursor)) !== null) { - if (token === start) { + var startPushing = false, token, i = 0, l = str.length, escapeIsEscape = ESCAPE_CHAR === ESCAPE; + if (l) { + while (cursor < l && (token = str.charAt(cursor))) { + if (token === ESCAPE) { if (!startPushing) { depth++; startPushing = true; - } else if (str.charAt(cursor + 1) === start) { + } else if (escapeIsEscape && str.charAt(cursor + 1) === ESCAPE) { cursor++; - i = ret.push(token) - 1; - } else if (ret[i] === ESCAPE_CHAR) { - ret.pop(); - i = ret.push(token) - 1; + ret[i++] = token; + } else if (!escapeIsEscape && ret[i - 1] === ESCAPE_CHAR) { + ret[i - 1] = token; } else { - depth--; - } - if (depth === 0) { - break; + if (!(--depth)) { + ++cursor; + break; + } } } else { - i = ret.push(token) - 1; + ret[i++] = token; } ++cursor; } } - if (++cursor < str.length && getNextToken(str, cursor).token.search(delimiter) !== 0) { - throw new Error("Invalid row " + str); + ret = ret.join(""); + var next = getNextToken(str, cursor), + nextToken = next.token; + if (nextToken && nextToken.search(delimiter) === 0) { + if (hasMoreData && (next.cursor + 1) >= l) { + cursor = null; + } else { + cursor++; + } + } else if (depth && !nextToken) { + if (hasMoreData) { + cursor = null; + } else { + throw new Error("Parse Error: expected: '" + ESCAPE + "' got: '" + nextToken + "'. at '" + str.substr(cursor).replace(/\n/g, "\\n" + "'")); + } + } else if ((!depth && nextToken && nextToken.search(SEARCH_REGEXP) === -1)) { + throw new Error("Parse Error: expected: '" + ESCAPE + "' got: '" + nextToken + "'. at '" + str.substr(cursor, 10).replace(/\n/g, "\\n" + "'")); + } else if (hasMoreData && (!nextToken || nextToken.search(LINE_BREAK) === -1)) { + cursor = null; + } + if (cursor !== null) { + items.push(formatItem(ret)); } - items.push(formatItem(ret.join(""))); - return ++cursor; + return cursor; } - function findNextToken(line, items, cursor) { + function parseItem(line, items, cursor, hasMoreData) { var searchStr = line.substr(cursor), nextIndex = searchStr.search(SEARCH_REGEXP); if (nextIndex === -1) { - if (!searchStr.match(VALUE_REGEXP)) { - throw new Error("Invalid row " + searchStr); + if (!VALUE_REGEXP.test(searchStr)) { + throw new Error("Parse Error: delimiter '" + delimiter + "' not found at '" + searchStr.replace(/\n/g, "\\n" + "'")); } else { - nextIndex = searchStr.length - 1; + nextIndex = searchStr.length; } } - items.push(formatItem(searchStr.substr(0, nextIndex + 1))); - return cursor + (nextIndex + 2); - } - - - function parseSingleQuoteItem(line, items, cursor) { - return getTokensBetween(line, SINGLE_QUOTE, items, cursor); - } - - function parseDoubleQuoteItem(line, items, cursor) { - return getTokensBetween(line, DOUBLE_QUOTE, items, cursor); - } + var nextChar = searchStr.charAt(nextIndex); + if (nextChar.search(delimiter) !== -1) { + if (hasMoreData && (cursor + (nextIndex + 1) >= line.length)) { + cursor = null; + } else { + items.push(formatItem(searchStr.substr(0, nextIndex))); + cursor += nextIndex + 1; + } + } else if (nextChar.search(LINE_BREAK) !== -1) { + items.push(formatItem(searchStr.substr(0, nextIndex))); + cursor += nextIndex; + } else if (!hasMoreData) { + items.push(formatItem(searchStr.substr(0, nextIndex))); + cursor += nextIndex + 1; + } else { + cursor = null; + } - function parseItem(line, items, cursor) { - return findNextToken(line, items, cursor); + return cursor; } function getNextToken(line, cursor) { - var l = line.length, ret, token; - do { - token = line[cursor]; - if (token === delimiter || !WHITE_SPACE.test(token)) { - ret = token; - } else { - token = null; - } - - } while (!token && cursor++ < l); - if (!token) { - throw new Error("Invalid row " + line); + var token, nextIndex; + if ((nextIndex = line.substr(cursor).search(NEXT_TOKEN_REGEXP)) !== -1) { + token = line[cursor += nextIndex]; } return {token: token, cursor: cursor}; } - return function parseLine(line) { - var i = 0, l = line.length, items = [], token, nextToken; + return function parseLine(line, hasMoreData) { + var i = 0, l = line.length, rows = [], items = [], token, nextToken, cursor, lastLineI = 0; while (i < l) { nextToken = getNextToken(line, i); token = nextToken.token; - if (token === delimiter) { - items.push(""); - i++; - } else if (token === SINGLE_QUOTE) { - i = parseSingleQuoteItem(line, items, nextToken.cursor); - } else if (token === DOUBLE_QUOTE) { - i = parseDoubleQuoteItem(line, items, nextToken.cursor); + if (isUndefinedOrNull(token)) { + i = lastLineI; + break; + } else if (token === LINE_BREAK) { + i = nextToken.cursor + 1; + if (i < l) { + rows.push(items); + items = []; + lastLineI = i; + } else { + break; + } } else { - i = parseItem(line, items, i); + if (token === ESCAPE) { + cursor = parseEscapedItem(line, items, nextToken.cursor, hasMoreData); + } else { + cursor = parseItem(line, items, i, hasMoreData); + } + if (cursor === null) { + i = lastLineI; + break; + } else { + i = cursor; + } } } - return items; + cursor !== null && rows.push(items); + return {line: line.substr(i), rows: rows}; }; } - module.exports = createParser; diff --git a/lib/parser_stream.js b/lib/parser_stream.js index 0d758599..a903fa12 100644 --- a/lib/parser_stream.js +++ b/lib/parser_stream.js @@ -59,77 +59,78 @@ extended(ParserStream).extend({ __pausedDone: null, - __parseLine: function __parseLineData(data, index, ignore) { + __handleLine: function __parseLineData(line, index, ignore) { var ignoreEmpty = this._ignoreEmpty; - if (extended.isBoolean(ignoreEmpty) && ignoreEmpty && EMPTY.test(data)) { + if (extended.isBoolean(ignoreEmpty) && ignoreEmpty && EMPTY.test(line.join(""))) { return null; } - var a; - try { - a = this.parser(data); - if (!ignore) { - a = this.__transform(a, index); - if (this.__validate(a, index)) { - return a; - } else { - this.emit("data-invalid", a, index); - } + if (!ignore) { + line = this.__transform(line, index); + if (this.__validate(line, index)) { + return line; } else { - return a; + this.emit("data-invalid", line, index); } - } catch (e) { - this.emit("parse-error", e); - return null; + } else { + return line; } }, - _parse: function _parseLine(data) { - var row, parseLine = this.__parseLine.bind(this), - emitRow = this.emit.bind(this, "record"), + _parse: function _parseLine(data, hasMoreData) { + var row, emitData = this._emitData, - count = 0; - if (!this._parsedHeaders) { - var headers = this._headers; - if (extended.isBoolean(headers) && headers) { - headers = parseLine(data.shift(), 0, true); - } - if (extended.isArray(headers)) { - var headersLength = headers.length, - orig = this.__transform.bind(this); - this.__transform = function (data, index) { - var ret = {}, i = -1, val; - while (++i < headersLength) { - val = data[i]; - ret[headers[i]] = isUndefined(val) ? '' : val; - } - return orig(ret, index); - }; + count = 0, ret, rows, self = this; + try { + data = this.parser(data, hasMoreData); + ret = data.line; + rows = data.rows; + if (!this._parsedHeaders) { + var headers = this._headers; + if (extended.isBoolean(headers) && headers) { + headers = this.__handleLine(rows.shift(), 0, true); + } + if (extended.isArray(headers)) { + var headersLength = headers.length, + orig = this.__transform.bind(this); + this.__transform = function (data, index) { + var ret = {}, i = -1, val; + if (data.length > headersLength) { + self.emit("error", new Error("Unexpected Error: column header mismatch expected: " + headersLength + " columns got: " + data.length)); + } + while (++i < headersLength) { + val = data[i]; + ret[headers[i]] = isUndefined(val) ? '' : val; + } + return orig(ret, index); + }; + } + this._parsedHeaders = true; } - this._parsedHeaders = true; - } - for (var i = 0, l = data.length; i < l; i++) { - row = data[i]; - if (row) { - var dataRow = parseLine(row, count); - if (dataRow) { - emitRow(dataRow, (count = this._rowCount++)); - if (emitData) { - this.push(JSON.stringify(dataRow)); + for (var i = 0, l = rows.length; i < l; i++) { + row = rows[i]; + if (row) { + var dataRow = this.__handleLine(row, count); + if (dataRow) { + this.emit("record", dataRow, (count = this._rowCount++)); + if (emitData) { + this.push(JSON.stringify(dataRow)); + } } } } + } catch (e) { + this.emit("error", e); } + return ret; }, _transform: function (data, encoding, done) { var lines = this.lines; - var lineData = (lines + data).split(LINE_SPLIT); + var lineData = (lines + data); if (lineData.length > 1) { - this.lines = lineData.pop(); - this._parse(lineData); - } else { - this.lines += data; + lineData = this._parse(lineData, true); } + this.lines = lineData; if (!this.paused) { done(); } else { @@ -138,7 +139,9 @@ extended(ParserStream).extend({ }, _flush: function (callback) { - this._parse(this.lines.split(LINE_SPLIT)); + if (this.lines) { + this._parse(this.lines, false); + } this.emit("end", this._rowCount); callback(); }, diff --git a/package.json b/package.json index 7d0bc16a..ee5ca21a 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "fast-csv", - "version": "0.1.2", - "description": "CSV parser for node.js", + "version": "0.2.0", + "description": "CSV parser and writer", "main": "index.js", "scripts": { "test": "grunt" @@ -13,7 +13,10 @@ "keywords": [ "csv", "parser", - "fast" + "fast", + "writer", + "csv writer", + "CSV" ], "homepage": "http://c2fo.github.com/fast-csv/index.html", "author": "Doug Martin", diff --git a/test/assets/test10.csv b/test/assets/test10.csv index 37dd130a..51028da3 100644 --- a/test/assets/test10.csv +++ b/test/assets/test10.csv @@ -1,10 +1,10 @@ first_name,last_name,email_address,address -'First"1',Last1,email1@email.com,"1 Street St, State ST, 88888" -'First"2',Last2,email2@email.com,"2 Street St, State ST, 88888" -'First"3',Last3,email3@email.com,"3 Street St, State ST, 88888" -'First"4',Last4,email4@email.com,"4 Street St, State ST, 88888" -'First"5',Last5,email5@email.com,"5 Street St, State ST, 88888" -'First"6',Last6,email6@email.com,"6 Street St, State ST, 88888" -'First"7',Last7,email7@email.com,"7 Street St, State ST, 88888" -'First"8',Last8,email8@email.com,"8 Street St, State ST, 88888" -'First"9',Last9,email9@email.com,"9 Street St, State ST, 88888" \ No newline at end of file +'First"1',Last1,email1@email.com,'1 Street St, State ST, 88888' +'First"2',Last2,email2@email.com,'2 Street St, State ST, 88888' +'First"3',Last3,email3@email.com,'3 Street St, State ST, 88888' +'First"4',Last4,email4@email.com,'4 Street St, State ST, 88888' +'First"5',Last5,email5@email.com,'5 Street St, State ST, 88888' +'First"6',Last6,email6@email.com,'6 Street St, State ST, 88888' +'First"7',Last7,email7@email.com,'7 Street St, State ST, 88888' +'First"8',Last8,email8@email.com,'8 Street St, State ST, 88888' +'First"9',Last9,email9@email.com,'9 Street St, State ST, 88888' \ No newline at end of file diff --git a/test/assets/test12.csv b/test/assets/test12.csv index f96d2622..f393bfad 100644 --- a/test/assets/test12.csv +++ b/test/assets/test12.csv @@ -1,10 +1,10 @@ first_name,last_name,email_address,address -'First\'1',Last1,email1@email.com,"1 Street St, State ST, 88888" -'First\'2',Last2,email2@email.com,"2 Street St, State ST, 88888" -'First\'3',Last3,email3@email.com,"3 Street St, State ST, 88888" -'First\'4',Last4,email4@email.com,"4 Street St, State ST, 88888" -'First\'5',Last5,email5@email.com,"5 Street St, State ST, 88888" -'First\'6',Last6,email6@email.com,"6 Street St, State ST, 88888" -'First\'7',Last7,email7@email.com,"7 Street St, State ST, 88888" -'First\'8',Last8,email8@email.com,"8 Street St, State ST, 88888" -'First\'9',Last9,email9@email.com,"9 Street St, State ST, 88888" \ No newline at end of file +'First\'1',Last1,email1@email.com,'1 Street St, State ST, 88888' +'First\'2',Last2,email2@email.com,'2 Street St, State ST, 88888' +'First\'3',Last3,email3@email.com,'3 Street St, State ST, 88888' +'First\'4',Last4,email4@email.com,'4 Street St, State ST, 88888' +'First\'5',Last5,email5@email.com,'5 Street St, State ST, 88888' +'First\'6',Last6,email6@email.com,'6 Street St, State ST, 88888' +'First\'7',Last7,email7@email.com,'7 Street St, State ST, 88888' +'First\'8',Last8,email8@email.com,'8 Street St, State ST, 88888' +'First\'9',Last9,email9@email.com,'9 Street St, State ST, 88888' \ No newline at end of file diff --git a/test/assets/test14.txt b/test/assets/test14.txt index 4ad62a6a..690b056c 100644 --- a/test/assets/test14.txt +++ b/test/assets/test14.txt @@ -2,7 +2,7 @@ first_name last_name email_address address First1 Last1 email1@email.com "1 Street St, State ST, 88888" "First2" Last2 email2@email.com "2 Street St, State ST, 88888" "First""3" Last3 email3@email.com "3 Street St, State ST, 88888" -"First\"4" Last4 email4@email.com "4 Street St, State ST, 88888" +"First""4" Last4 email4@email.com "4 Street St, State ST, 88888" "First'5" Last5 email5@email.com "5 Street St, State ST, 88888" -'First''6' Last6 email6@email.com "6 Street St, State ST, 88888" -'First\'7' Last7 email7@email.com "7 Street St, State ST, 88888" \ No newline at end of file +"First'6" Last6 email6@email.com "6 Street St, State ST, 88888" +"First'7" Last7 email7@email.com "7 Street St, State ST, 88888" \ No newline at end of file diff --git a/test/assets/test15.txt b/test/assets/test15.txt index 39adf44b..ad21e4bc 100644 --- a/test/assets/test15.txt +++ b/test/assets/test15.txt @@ -2,7 +2,7 @@ first_name|last_name|email_address|address First1|Last1|email1@email.com|"1 Street St, State ST, 88888" "First2"|Last2|email2@email.com|"2 Street St, State ST, 88888" "First""3"|Last3|email3@email.com|"3 Street St, State ST, 88888" -"First\"4"|Last4|email4@email.com|"4 Street St, State ST, 88888" +"First""4"|Last4|email4@email.com|"4 Street St, State ST, 88888" "First'5"|Last5|email5@email.com|"5 Street St, State ST, 88888" -'First''6'|Last6|email6@email.com|"6 Street St, State ST, 88888" -'First\'7'|Last7|email7@email.com|"7 Street St, State ST, 88888" \ No newline at end of file +"First'6"|Last6|email6@email.com|"6 Street St, State ST, 88888" +"First'7"|Last7|email7@email.com|"7 Street St, State ST, 88888" \ No newline at end of file diff --git a/test/assets/test16.txt b/test/assets/test16.txt index 9dcc5759..8a527575 100644 --- a/test/assets/test16.txt +++ b/test/assets/test16.txt @@ -2,7 +2,7 @@ first_name;last_name;email_address;address First1;Last1;email1@email.com;"1 Street St, State ST, 88888" "First2";Last2;email2@email.com;"2 Street St, State ST, 88888" "First""3";Last3;email3@email.com;"3 Street St, State ST, 88888" -"First\"4";Last4;email4@email.com;"4 Street St, State ST, 88888" +"First""4";Last4;email4@email.com;"4 Street St, State ST, 88888" "First'5";Last5;email5@email.com;"5 Street St, State ST, 88888" -'First''6';Last6;email6@email.com;"6 Street St, State ST, 88888" -'First\'7';Last7;email7@email.com;"7 Street St, State ST, 88888" \ No newline at end of file +"First'6";Last6;email6@email.com;"6 Street St, State ST, 88888" +"First'7";Last7;email7@email.com;"7 Street St, State ST, 88888" \ No newline at end of file diff --git a/test/assets/test21.csv b/test/assets/test21.csv new file mode 100644 index 00000000..62937dac --- /dev/null +++ b/test/assets/test21.csv @@ -0,0 +1,9 @@ +first_name,last_name,email_address,address +"First +1","Last +1","email1@email.com","1 Street St, +State ST, 88888" +"First +2","Last +2","email2@email.com","2 Street St, +State ST, 88888" diff --git a/test/assets/test8.csv b/test/assets/test8.csv index c7f74993..1fc0ea9d 100644 --- a/test/assets/test8.csv +++ b/test/assets/test8.csv @@ -1,10 +1,10 @@ first_name,last_name,email_address First1,Last1,email1@email.com -'','','' +"","","" "","","" ,Last4,email4@email.com First5,,email5@email.com -'',"", +"","", First7,Last7, -"",,'' +"",,"" ,, diff --git a/test/fast-csv.test.js b/test/fast-csv.test.js index c3bcfc83..79bdac05 100644 --- a/test/fast-csv.test.js +++ b/test/fast-csv.test.js @@ -139,7 +139,12 @@ var expected14 = [ {"first_name": "First'7", "last_name": "Last7", "email_address": "email7@email.com", address: "7 Street St, State ST, 88888"} ]; -it.describe("fast-csv parser", function (it) { +var expected21 = [ + {"first_name": "First\n1", "last_name": "Last\n1", "email_address": "email1@email.com", address: "1 Street St,\nState ST, 88888"}, + {"first_name": "First\n2", "last_name": "Last\n2", "email_address": "email2@email.com", address: "2 Street St,\nState ST, 88888"}, +] + +it.describe("fast-csv", function (it) { it.timeout(60000); @@ -216,11 +221,12 @@ it.describe("fast-csv parser", function (it) { it.should("parse a csv with ' escapes", function (next) { var actual = []; csv - .fromPath(path.resolve(__dirname, "./assets/test3.csv"), {headers: true}) + .fromPath(path.resolve(__dirname, "./assets/test3.csv"), {headers: true, quote: "'"}) .on("record",function (data, index) { actual[index] = data; - }). - on("end", function (count) { + }) + .on("error", next) + .on("end", function (count) { assert.deepEqual(actual, expected3); assert.equal(count, actual.length); next(); @@ -233,8 +239,9 @@ it.describe("fast-csv parser", function (it) { .fromPath(path.resolve(__dirname, "./assets/test2.csv"), {headers: ["first_name", "last_name", "email_address", "address"]}) .on("record",function (data, index) { actual[index] = data; - }). - on("end", function (count) { + }) + .on("error", next) + .on("end", function (count) { assert.deepEqual(actual, expected1); assert.equal(count, actual.length); next(); @@ -254,6 +261,7 @@ it.describe("fast-csv parser", function (it) { .on("data-invalid", function (data, index) { invalid.push(data); }) + .on("error", next) .on("end", function (count) { assert.deepEqual(invalid, expectedInvalid); assert.deepEqual(actual, expectedValid); @@ -275,8 +283,9 @@ it.describe("fast-csv parser", function (it) { }) .on("record",function (data, index) { actual[index] = data; - }). - on("end", function (count) { + }) + .on("error", next) + .on("end", function (count) { assert.deepEqual(actual, expectedCamelCase); assert.equal(count, actual.length); next(); @@ -289,8 +298,9 @@ it.describe("fast-csv parser", function (it) { .fromStream(fs.createReadStream(path.resolve(__dirname, "./assets/test4.csv")), {headers: true}) .on("record",function (data, index) { actual[index] = data; - }). - on("end", function () { + }) + .on("error", next) + .on("end", function () { assert.deepEqual(actual, expected4); next(); }); @@ -303,10 +313,13 @@ it.describe("fast-csv parser", function (it) { .on("record", function (data) { actual.push(data); }) + .on("error", function (err) { + next(); + }) .on("end", function () { assert.deepEqual(actual, expected1.slice(1)); assert.isTrue(parseErrorCalled); - next(); + next(new Error("unexpected end call")); }) .on("parse-error", function (error) { parseErrorCalled = true; @@ -321,6 +334,7 @@ it.describe("fast-csv parser", function (it) { .on("record", function (data, index) { actual.push(data); }) + .on("error", next) .on("end", function (count) { assert.deepEqual(actual, expected7); assert.equal(count, actual.length); @@ -335,6 +349,7 @@ it.describe("fast-csv parser", function (it) { .on("record", function (data, index) { actual.push(data); }) + .on("error", next) .on("end", function (count) { assert.deepEqual(actual, expected8); assert.equal(count, actual.length); @@ -349,6 +364,7 @@ it.describe("fast-csv parser", function (it) { .on("record", function (data) { actual.push(data); }) + .on("error", next) .on("end", function (count) { assert.deepEqual(actual, expected9); assert.equal(count, actual.length); @@ -359,10 +375,11 @@ it.describe("fast-csv parser", function (it) { it.should("handle double quotes inside of single quotes", function (next) { var actual = []; csv - .fromPath(path.resolve(__dirname, "./assets/test10.csv"), {headers: true}) + .fromPath(path.resolve(__dirname, "./assets/test10.csv"), {headers: true, quote: "'"}) .on("record", function (data) { actual.push(data); }) + .on("error", next) .on("end", function (count) { assert.deepEqual(actual, expected10); assert.equal(count, actual.length); @@ -373,10 +390,11 @@ it.describe("fast-csv parser", function (it) { it.should("handle escaped double quotes inside of double quotes", function (next) { var actual = []; csv - .fromPath(path.resolve(__dirname, "./assets/test11.csv"), {headers: true}) + .fromPath(path.resolve(__dirname, "./assets/test11.csv"), {headers: true, escape: "\\"}) .on("record", function (data) { actual.push(data); }) + .on("error", next) .on("end", function (count) { assert.deepEqual(actual, expected10); assert.equal(count, actual.length); @@ -387,10 +405,11 @@ it.describe("fast-csv parser", function (it) { it.should("handle escaped single quotes inside of single quotes", function (next) { var actual = []; csv - .fromPath(path.resolve(__dirname, "./assets/test12.csv"), {headers: true}) + .fromPath(path.resolve(__dirname, "./assets/test12.csv"), {headers: true, quote: "'", escape: "\\"}) .on("record", function (data) { actual.push(data); }) + .on("error", next) .on("end", function (count) { assert.deepEqual(actual, expected9); assert.equal(count, actual.length); @@ -405,8 +424,9 @@ it.describe("fast-csv parser", function (it) { .fromPath(path.resolve(__dirname, "./assets/test14.txt"), {headers: true, delimiter: "\t"}) .on("record",function (data, index) { actual[index] = data; - }). - on("end", function (count) { + }) + .on("error", next) + .on("end", function (count) { assert.deepEqual(actual, expected14); assert.equal(count, actual.length); next(); @@ -419,8 +439,9 @@ it.describe("fast-csv parser", function (it) { .fromPath(path.resolve(__dirname, "./assets/test15.txt"), {headers: true, delimiter: "|"}) .on("record",function (data, index) { actual[index] = data; - }). - on("end", function (count) { + }) + .on("error", next) + .on("end", function (count) { assert.deepEqual(actual, expected14); assert.equal(count, actual.length); next(); @@ -433,8 +454,9 @@ it.describe("fast-csv parser", function (it) { .fromPath(path.resolve(__dirname, "./assets/test16.txt"), {headers: true, delimiter: ";"}) .on("record",function (data, index) { actual[index] = data; - }). - on("end", function (count) { + }) + .on("error", next) + .on("end", function (count) { assert.deepEqual(actual, expected14); assert.equal(count, actual.length); next(); @@ -499,6 +521,20 @@ it.describe("fast-csv parser", function (it) { }); }); + it.should("handle CSVs with new lines", function (next) { + var actual = []; + csv + .fromPath(path.resolve(__dirname, "./assets/test21.csv"), {headers: true, ignoreEmpty: true}) + .on("record",function (data, index) { + actual[index] = data; + }). + on("end", function (count) { + assert.deepEqual(actual, expected21); + assert.equal(count, actual.length); + next(); + }); + }); + it.should("throw an error if an invalid path or stream is passed in", function () { assert.throws(function () { @@ -530,7 +566,7 @@ it.describe("fast-csv parser", function (it) { ["a", "b"], ["a1", "b1"], ["a2", "b2"] - ], {headers: true}); + ], {headers: true}).on("error", next); }); it.should("write an array of objects", function (next) { @@ -542,7 +578,7 @@ it.describe("fast-csv parser", function (it) { csv.writeToStream(ws, [ {a: "a1", b: "b1"}, {a: "a2", b: "b2"} - ], {headers: true}); + ], {headers: true}).on("error", next); }); }); @@ -576,7 +612,7 @@ it.describe("fast-csv parser", function (it) { ["a", "b"], ["a1", "b1"], ["a2", "b2"] - ], {headers: true}).pipe(ws); + ], {headers: true}).on("error", next).pipe(ws); }); it.should("write an array of objects", function (next) { @@ -588,7 +624,7 @@ it.describe("fast-csv parser", function (it) { csv.write([ {a: "a1", b: "b1"}, {a: "a2", b: "b2"} - ], {headers: true}).pipe(ws); + ], {headers: true}).on("error", next).pipe(ws); }); }); @@ -601,6 +637,7 @@ it.describe("fast-csv parser", function (it) { ["a1", "b1"], ["a2", "b2"] ], {headers: true}) + .on("error", next) .on("finish", function () { assert.equal(fs.readFileSync(path.resolve(__dirname, "assets/test.csv")).toString(), "a,b\na1,b1\na2,b2"); fs.unlinkSync(path.resolve(__dirname, "assets/test.csv")); @@ -614,6 +651,7 @@ it.describe("fast-csv parser", function (it) { {a: "a1", b: "b1"}, {a: "a2", b: "b2"} ], {headers: true}) + .on("error", next) .on("finish", function () { assert.equal(fs.readFileSync(path.resolve(__dirname, "assets/test.csv")).toString(), "a,b\na1,b1\na2,b2"); fs.unlinkSync(path.resolve(__dirname, "assets/test.csv")); @@ -621,6 +659,4 @@ it.describe("fast-csv parser", function (it) { }); }); }); -}); - -it.run(); +}); \ No newline at end of file diff --git a/test/parser.test.js b/test/parser.test.js new file mode 100644 index 00000000..e70f8a85 --- /dev/null +++ b/test/parser.test.js @@ -0,0 +1,202 @@ +var it = require("it"), + assert = require("assert"), + parser = require("../lib/parser"); + +it.describe("fast-csv parser", function (it) { + + it.describe("unescaped data", function (it) { + + it.should("parse a block of CSV text", function () { + var data = "first_name,last_name,email_address\nFirst1,Last1,email1@email.com"; + var myParser = parser({delimiter: ","}); + assert.deepEqual(myParser(data, false), {"line": "", "rows": [ + ["first_name", "last_name", "email_address"], + ["First1", "Last1", "email1@email.com"] + ]}); + }); + + it.should("return the rest of the line if there is more data", function () { + var data = "first_name,last_name,email_address\nFirst1,Last1,email1@email.com"; + var myParser = parser({delimiter: ","}); + assert.deepEqual(myParser(data, true), { + "line": "First1,Last1,email1@email.com", + "rows": [ + ["first_name", "last_name", "email_address"] + ] + }); + }); + + it.should("accept new data and return the result", function () { + var data = "first_name,last_name,email_address\nFirst1,Last1,email1@email.com,"; + var myParser = parser({delimiter: ","}); + var parsedData = myParser(data, true); + assert.deepEqual(parsedData, { + "line": "First1,Last1,email1@email.com,", + "rows": [ + ["first_name", "last_name", "email_address"] + ] + }); + assert.deepEqual(myParser(parsedData.line + "\nFirst2,Last2,email2@email.com", false), {"line": "", "rows": [ + ["First1", "Last1", "email1@email.com"], + ["First2", "Last2", "email2@email.com"] + ]}); + }); + + it.should("not parse a row if a new line is not found and there is more data", function () { + var data = "first_name,last_name,email_address"; + var myParser = parser({delimiter: ","}); + var parsedData = myParser(data, true); + assert.deepEqual(parsedData, { + "line": "first_name,last_name,email_address", + "rows": [] + }); + }); + + it.should("not parse a row if there is a trailing delimiter and there is more data", function () { + var data = "first_name,last_name,email_address,"; + var myParser = parser({delimiter: ","}); + var parsedData = myParser(data, true); + assert.deepEqual(parsedData, { + "line": "first_name,last_name,email_address,", + "rows": [] + }); + }); + + it.should("parse a row if a new line is found and there is more data", function () { + var data = "first_name,last_name,email_address\n"; + var myParser = parser({delimiter: ","}); + var parsedData = myParser(data, true); + assert.deepEqual(parsedData, { + "line": "", + "rows": [ + ["first_name", "last_name", "email_address"] + ] + }); + }); + + }); + + it.describe("escaped values", function (it) { + + it.should("parse a block of CSV text", function () { + var data = 'first_name,last_name,email_address\n"First,1","Last,1","email1@email.com"'; + var myParser = parser({delimiter: ","}); + assert.deepEqual(myParser(data, false), {"line": "", "rows": [ + ["first_name", "last_name", "email_address"], + ["First,1", "Last,1", "email1@email.com"] + ]}); + }); + + it.should("parse a block of CSV text with escaped escaped char", function () { + var data = 'first_name,last_name,email_address\n"First,""1""","Last,""1""","email1@email.com"'; + var myParser = parser({delimiter: ","}); + assert.deepEqual(myParser(data, false), {"line": "", "rows": [ + ["first_name", "last_name", "email_address"], + ["First,\"1\"", "Last,\"1\"", "email1@email.com"] + ]}); + }); + + it.should("parse a block of CSV text with alternate escape char", function () { + var data = 'first_name,last_name,email_address\n"First,\\"1\\"","Last,\\"1\\"","email1@email.com"'; + var myParser = parser({delimiter: ",", escape: "\\"}); + assert.deepEqual(myParser(data, false), {"line": "", "rows": [ + ["first_name", "last_name", "email_address"], + ["First,\"1\"", "Last,\"1\"", "email1@email.com"] + ]}); + }); + + it.should("return the rest of the line if a complete value is not found", function () { + var data = 'first_name,last_name,email_address\n"First,""1""","Last,""1""","email1@email.com'; + var myParser = parser({delimiter: ","}); + assert.deepEqual(myParser(data, true), { + "line": '"First,""1""","Last,""1""","email1@email.com', + "rows": [ + ["first_name", "last_name", "email_address"] + ] + }); + }); + + it.should("accept more data appended to the returned line with escaped values", function () { + var data = 'first_name,last_name,email_address\n"First,""1""","Last,""1""","email1@email.com'; + var myParser = parser({delimiter: ","}), + parsedData = myParser(data, true); + assert.deepEqual(parsedData, { + "line": '"First,""1""","Last,""1""","email1@email.com', + "rows": [ + ["first_name", "last_name", "email_address"] + ] + }); + assert.deepEqual(myParser(parsedData.line + '"\n"First,""2""","Last,""2""","email2@email.com"', false), { + line: "", + rows: [ + ["First,\"1\"", "Last,\"1\"", "email1@email.com"], + ["First,\"2\"", "Last,\"2\"", "email2@email.com"] + ] + }); + }); + + it.should("throw an error if there is not more data and there is an invalid escape sequence", function () { + var data = 'first_name,last_name,email_address\n"First,""1""","Last,""1""","email1@email.com'; + var myParser = parser({delimiter: ","}), + parsedData = myParser(data, true); + assert.deepEqual(parsedData, { + "line": '"First,""1""","Last,""1""","email1@email.com', + "rows": [ + ["first_name", "last_name", "email_address"] + ] + }); + assert.throws(function () { + assert.deepEqual(myParser(parsedData.line + '\n"First,"",2""","Last""2""","email2@email.com"', false), { + line: "", + rows: [ + ["First,\"1\"", "Last,\"1\"", "email1@email.com"], + ["First,\"2\"", "Last,\"2\"", "email2@email.com"] + ] + }); + }, Error, ' Parse Error: expected: \'"\' got: \'F\'. at \'First,""2""","Last""2""","email2@email.com"'); + }); + + it.should("handle empty values properly", function () { + var data = '"","",""\n,Last4,email4@email.com'; + var myParser = parser({delimiter: ","}), + parsedData = myParser(data, false); + assert.deepEqual(parsedData, {"line": "", "rows": [ + ["", "", ""], + ["", "Last4", "email4@email.com"] + ]}); + }); + + it.should("not parse a row if a new line is not found and there is more data", function () { + var data = '"first_name","last_name","email_address"'; + var myParser = parser({delimiter: ","}); + var parsedData = myParser(data, true); + assert.deepEqual(parsedData, { + "line": '"first_name","last_name","email_address"', + "rows": [] + }); + }); + + it.should("not parse a row if there is a trailing delimiter and there is more data", function () { + var data = '"first_name","last_name","email_address",'; + var myParser = parser({delimiter: ","}); + var parsedData = myParser(data, true); + assert.deepEqual(parsedData, { + "line": '"first_name","last_name","email_address",', + "rows": [] + }); + }); + + it.should("parse a row if a new line is found and there is more data", function () { + var data = '"first_name","last_name","email_address"\n'; + var myParser = parser({delimiter: ","}); + var parsedData = myParser(data, true); + assert.deepEqual(parsedData, { + "line": "", + "rows": [ + ["first_name", "last_name", "email_address"] + ] + }); + }); + }); + +}); \ No newline at end of file