diff --git a/README.md b/README.md index 08538d79..6b24e787 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,11 @@ All methods accept the following `options` * `ignoreEmpty=false`: If you wish to ignore empty rows. * `delimiter=','`: If your data uses an alternate delimiter such as `;` or `\t`. * **NOTE** When specifying an alternate `delimiter` you may only pass in a single character delimeter +* The following are options for parsing only. + * `trim=false`: If you want to trim all values parsed set to true. + * `rtrim=false`: If you want to right trim all values parsed set to true. + * `ltrim=false`: If you want to left trim all values parsed set to true. + **events** @@ -223,22 +228,22 @@ Create a readable stream to read data from. ```javascript var ws = fs.createWritableStream("my.csv"); csv - .write([ - ["a", "b"], - ["a1", "b1"], - ["a2", "b2"] - ], {headers: true}) - .pipe(ws); + .write([ + ["a", "b"], + ["a1", "b1"], + ["a2", "b2"] + ], {headers: true}) + .pipe(ws); ``` ```javascript var ws = fs.createWritableStream("my.csv"); csv - .write([ - {a: "a1", b: "b1"}, - {a: "a2", b: "b2"} - ], {headers: true}) - .pipe(ws); + .write([ + {a: "a1", b: "b1"}, + {a: "a2", b: "b2"} + ], {headers: true}) + .pipe(ws); ``` **`writeToStream(stream,arr[, options])`** @@ -247,20 +252,20 @@ Write an array of values to a `WritableStream` ```javascript csv - .writeToStream(fs.createWritableStream("my.csv"), [ - ["a", "b"], - ["a1", "b1"], - ["a2", "b2"] - ], {headers: true}); + .writeToStream(fs.createWritableStream("my.csv"), [ + ["a", "b"], + ["a1", "b1"], + ["a2", "b2"] + ], {headers: true}); ``` ```javascript csv - .writeToStream(fs.createWritableStream("my.csv"), [ - {a: "a1", b: "b1"}, - {a: "a2", b: "b2"} - ], {headers: true}) - .pipe(ws); + .writeToStream(fs.createWritableStream("my.csv"), [ + {a: "a1", b: "b1"}, + {a: "a2", b: "b2"} + ], {headers: true}) + .pipe(ws); ``` **`writeToPath(arr[, options])`** @@ -269,41 +274,41 @@ Write an array of values to the specified path ```javascript csv - .writeToPath("my.csv", [ - ["a", "b"], - ["a1", "b1"], - ["a2", "b2"] - ], {headers: true}) - .on("finish", function(){ - console.log("done!"); - }); + .writeToPath("my.csv", [ + ["a", "b"], + ["a1", "b1"], + ["a2", "b2"] + ], {headers: true}) + .on("finish", function(){ + console.log("done!"); + }); ``` ```javascript csv - .writeToStream("my.csv", [ - {a: "a1", b: "b1"}, - {a: "a2", b: "b2"} - ], {headers: true}) - .on("finish", function(){ - console.log("done!"); - }); + .writeToStream("my.csv", [ + {a: "a1", b: "b1"}, + {a: "a2", b: "b2"} + ], {headers: true}) + .on("finish", function(){ + console.log("done!"); + }); ``` **`writeToString(arr[, options])`** ```javascript csv.writeToString([ - ["a", "b"], - ["a1", "b1"], - ["a2", "b2"] + ["a", "b"], + ["a1", "b1"], + ["a2", "b2"] ], {headers: true}); //"a,b\na1,b1\na2,b2\n" ``` ```javascript csv.writeToString([ - {a: "a1", b: "b1"}, - {a: "a2", b: "b2"} + {a: "a1", b: "b1"}, + {a: "a2", b: "b2"} ], {headers: true}); //"a,b\na1,b1\na2,b2\n" ``` diff --git a/benchmark/benchmark.js b/benchmark/benchmark.js index 637d12f0..08d2b96c 100644 --- a/benchmark/benchmark.js +++ b/benchmark/benchmark.js @@ -1,7 +1,7 @@ var fastCsv = require("../lib"), csv = require("csv"), path = require("path"), - COUNT = 1000000, + COUNT = 20000, TEST_FILE = path.resolve(__dirname, "./assets/" + COUNT + ".csv"); diff --git a/docs/index.html b/docs/index.html index bbb75431..84061134 100644 --- a/docs/index.html +++ b/docs/index.html @@ -185,6 +185,12 @@

Parsing

  • NOTE When specifying an alternate delimiter you may only pass in a single character delimeter
  • +
  • The following are options for parsing only. +
  • events

    parse-error: Emitted if there was an error parsing a row. @@ -332,61 +338,61 @@

    Formatting

    Create a readable stream to read data from.

    var ws = fs.createWritableStream("my.csv");
     csv
    -    .write([
    -        ["a", "b"],
    -        ["a1", "b1"],
    -        ["a2", "b2"]
    -    ], {headers: true})
    -    .pipe(ws);
    + .write([ + ["a", "b"], + ["a1", "b1"], + ["a2", "b2"] + ], {headers: true}) + .pipe(ws);
    var ws = fs.createWritableStream("my.csv");
     csv
    -    .write([
    -        {a: "a1", b: "b1"},
    -        {a: "a2", b: "b2"}
    -    ], {headers: true})
    -    .pipe(ws);
    + .write([ + {a: "a1", b: "b1"}, + {a: "a2", b: "b2"} + ], {headers: true}) + .pipe(ws);

    writeToStream(stream,arr[, options])

    Write an array of values to a WritableStream

    csv
    -    .writeToStream(fs.createWritableStream("my.csv"), [
    -        ["a", "b"],
    -        ["a1", "b1"],
    -        ["a2", "b2"]
    -    ], {headers: true});
    + .writeToStream(fs.createWritableStream("my.csv"), [ + ["a", "b"], + ["a1", "b1"], + ["a2", "b2"] + ], {headers: true});
    csv
    -    .writeToStream(fs.createWritableStream("my.csv"), [
    -        {a: "a1", b: "b1"},
    -        {a: "a2", b: "b2"}
    -    ], {headers: true})
    -    .pipe(ws);
    + .writeToStream(fs.createWritableStream("my.csv"), [ + {a: "a1", b: "b1"}, + {a: "a2", b: "b2"} + ], {headers: true}) + .pipe(ws);

    writeToPath(arr[, options])

    Write an array of values to the specified path

    csv
    -    .writeToPath("my.csv", [
    -        ["a", "b"],
    -        ["a1", "b1"],
    -        ["a2", "b2"]
    -    ], {headers: true})
    -    .on("finish", function(){
    -        console.log("done!");
    -    });
    + .writeToPath("my.csv", [ + ["a", "b"], + ["a1", "b1"], + ["a2", "b2"] + ], {headers: true}) + .on("finish", function(){ + console.log("done!"); + });
    csv
    -    .writeToStream("my.csv", [
    -        {a: "a1", b: "b1"},
    -        {a: "a2", b: "b2"}
    -    ], {headers: true})
    -    .on("finish", function(){
    -        console.log("done!");
    -    });
    + .writeToStream("my.csv", [ + {a: "a1", b: "b1"}, + {a: "a2", b: "b2"} + ], {headers: true}) + .on("finish", function(){ + console.log("done!"); + });

    writeToString(arr[, options])

    csv.writeToString([
    -    ["a", "b"],
    -    ["a1", "b1"],
    -    ["a2", "b2"]
    +   ["a", "b"],
    +   ["a1", "b1"],
    +   ["a2", "b2"]
     ], {headers: true}); //"a,b\na1,b1\na2,b2\n"
    csv.writeToString([
    -    {a: "a1", b: "b1"},
    -    {a: "a2", b: "b2"}
    +   {a: "a1", b: "b1"},
    +   {a: "a2", b: "b2"}
     ], {headers: true}); //"a,b\na1,b1\na2,b2\n"

    Benchmarks

    Parsing 20000 records AVG over 3 runs

    diff --git a/lib/index.js b/lib/index.js index cb5bfce7..2af9936e 100644 --- a/lib/index.js +++ b/lib/index.js @@ -22,6 +22,11 @@ * * `ignoreEmpty=false`: If you wish to ignore empty rows. * * `delimiter=','`: If your data uses an alternate delimiter such as `;` or `\t`. * * **NOTE** When specifying an alternate `delimiter` you may only pass in a single character delimeter + * * The following are options for parsing only. + * * `trim=false`: If you want to trim all values parsed set to true. + * * `rtrim=false`: If you want to right trim all values parsed set to true. + * * `ltrim=false`: If you want to left trim all values parsed set to true. + * * * **events** * diff --git a/lib/parser.js b/lib/parser.js index a679a20d..feb3f524 100644 --- a/lib/parser.js +++ b/lib/parser.js @@ -1,11 +1,31 @@ -var SINGLE_QUOTE = "'", +var extended = require("./extended"), + trim = extended.trim, + trimLeft = extended.trimLeft, + trimRight = extended.trimRight, + SINGLE_QUOTE = "'", DOUBLE_QUOTE = '"'; -function createParser(delimiter) { - - var VALUE_REGEXP = new RegExp("([^" + delimiter + "'\"\\s\\\\]*(?:\\s+[^" + delimiter + "'\"\\s\\\\]+)*)"), +function createParser(options) { + options = options || {}; + var delimiter = options.delimiter || ",", + doLtrim = options.ltrim || false, + doRtrim = options.rtrim || false, + doTrim = options.trim || false, + VALUE_REGEXP = new RegExp("([^" + delimiter + "'\"\\s\\\\]*(?:\\s+[^" + delimiter + "'\"\\s\\\\]+)*)"), SEARCH_REGEXP = new RegExp("[^\\\\]" + delimiter), - ESCAPE_CHAR = "\\"; + ESCAPE_CHAR = "\\", + WHITE_SPACE = /\s/; + + function formatItem(item) { + if (doTrim) { + item = trim(item); + } else if (doLtrim) { + item = trimLeft(item); + } else if (doRtrim) { + item = trimRight(item); + } + return item; + } function getTokensBetween(str, start, items, cursor) { var depth = 0, ret = []; @@ -35,10 +55,10 @@ function createParser(delimiter) { ++cursor; } } - if (++cursor < str.length && str[cursor].search(delimiter) !== 0) { + if (++cursor < str.length && getNextToken(str, cursor).token.search(delimiter) !== 0) { throw new Error("Invalid row " + str); } - items.push(ret.join("")); + items.push(formatItem(ret.join(""))); return ++cursor; } @@ -52,7 +72,7 @@ function createParser(delimiter) { nextIndex = searchStr.length - 1; } } - items.push(searchStr.substr(0, nextIndex + 1)); + items.push(formatItem(searchStr.substr(0, nextIndex + 1))); return cursor + (nextIndex + 2); } @@ -69,20 +89,39 @@ function createParser(delimiter) { return findNextToken(line, items, cursor); } + function getNextToken(line, cursor) { + var l = line.length, ret, token; + do { + token = line[cursor]; + if (token === delimiter || !WHITE_SPACE.test(token)) { + ret = token; + } else { + token = null; + } + + } while (!token && cursor++ < l); + if (!token) { + throw new Error("Invalid row " + line); + } + return {token: token, cursor: cursor}; + } + return function parseLine(line) { - var i = 0, l = line.length, items = [], token; + var i = 0, l = line.length, items = [], token, nextToken; while (i < l) { - token = line[i]; + nextToken = getNextToken(line, i); + token = nextToken.token; if (token === delimiter) { items.push(""); i++; } else if (token === SINGLE_QUOTE) { - i = parseSingleQuoteItem(line, items, i); + i = parseSingleQuoteItem(line, items, nextToken.cursor); } else if (token === DOUBLE_QUOTE) { - i = parseDoubleQuoteItem(line, items, i); + i = parseDoubleQuoteItem(line, items, nextToken.cursor); } else { i = parseItem(line, items, i); } + } return items; }; diff --git a/lib/parser_stream.js b/lib/parser_stream.js index ee9bcc9a..ec063a39 100644 --- a/lib/parser_stream.js +++ b/lib/parser_stream.js @@ -27,7 +27,8 @@ function ParserStream(options) { } else { delimiter = DEFAULT_DELIMITER; } - this.parser = createParser(delimiter); + options.delimiter = delimiter; + this.parser = createParser(options); this._headers = options.headers; this._ignoreEmpty = options.ignoreEmpty; return this; diff --git a/package.json b/package.json index 657e58ef..b9ea2f4c 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "fast-csv", - "version": "0.1.0", + "version": "0.1.1", "description": "CSV parser for node.js", "main": "index.js", "scripts": { diff --git a/test/assets/test17.csv b/test/assets/test17.csv new file mode 100644 index 00000000..909c9a8c --- /dev/null +++ b/test/assets/test17.csv @@ -0,0 +1,10 @@ +first_name,last_name,email_address,address +"First1", "Last1", "email1@email.com", "1 Street St, State ST, 88888" +"First2", "Last2", "email2@email.com", "2 Street St, State ST, 88888" +"First3", "Last3", "email3@email.com", "3 Street St, State ST, 88888" +"First4", "Last4", "email4@email.com", "4 Street St, State ST, 88888" +"First5", "Last5", "email5@email.com", "5 Street St, State ST, 88888" +"First6", "Last6", "email6@email.com", "6 Street St, State ST, 88888" +"First7", "Last7", "email7@email.com", "7 Street St, State ST, 88888" +"First8", "Last8", "email8@email.com", "8 Street St, State ST, 88888" +"First9", "Last9", "email9@email.com", "9 Street St, State ST, 88888" \ No newline at end of file diff --git a/test/assets/test18.csv b/test/assets/test18.csv new file mode 100644 index 00000000..4572de25 --- /dev/null +++ b/test/assets/test18.csv @@ -0,0 +1,10 @@ +first_name,last_name,email_address,address +" First1"," Last1"," email1@email.com"," 1 Street St, State ST, 88888" +" First2"," Last2"," email2@email.com"," 2 Street St, State ST, 88888" +" First3"," Last3"," email3@email.com"," 3 Street St, State ST, 88888" +" First4"," Last4"," email4@email.com"," 4 Street St, State ST, 88888" +" First5"," Last5"," email5@email.com"," 5 Street St, State ST, 88888" +" First6"," Last6"," email6@email.com"," 6 Street St, State ST, 88888" +" First7"," Last7"," email7@email.com"," 7 Street St, State ST, 88888" +" First8"," Last8"," email8@email.com"," 8 Street St, State ST, 88888" +" First9"," Last9"," email9@email.com"," 9 Street St, State ST, 88888" \ No newline at end of file diff --git a/test/assets/test19.csv b/test/assets/test19.csv new file mode 100644 index 00000000..bf51359c --- /dev/null +++ b/test/assets/test19.csv @@ -0,0 +1,10 @@ +first_name,last_name,email_address,address +"First1 ","Last1 ","email1@email.com ","1 Street St, State ST, 88888 " +"First2 ","Last2 ","email2@email.com ","2 Street St, State ST, 88888 " +"First3 ","Last3 ","email3@email.com ","3 Street St, State ST, 88888 " +"First4 ","Last4 ","email4@email.com ","4 Street St, State ST, 88888 " +"First5 ","Last5 ","email5@email.com ","5 Street St, State ST, 88888 " +"First6 ","Last6 ","email6@email.com ","6 Street St, State ST, 88888 " +"First7 ","Last7 ","email7@email.com ","7 Street St, State ST, 88888 " +"First8 ","Last8 ","email8@email.com ","8 Street St, State ST, 88888 " +"First9 ","Last9 ","email9@email.com ","9 Street St, State ST, 88888 " \ No newline at end of file diff --git a/test/assets/test20.csv b/test/assets/test20.csv new file mode 100644 index 00000000..2968ad50 --- /dev/null +++ b/test/assets/test20.csv @@ -0,0 +1,10 @@ +first_name,last_name,email_address,address +" First1 "," Last1 "," email1@email.com "," 1 Street St, State ST, 88888 " +" First2 "," Last2 "," email2@email.com "," 2 Street St, State ST, 88888 " +" First3 "," Last3 "," email3@email.com "," 3 Street St, State ST, 88888 " +" First4 "," Last4 "," email4@email.com "," 4 Street St, State ST, 88888 " +" First5 "," Last5 "," email5@email.com "," 5 Street St, State ST, 88888 " +" First6 "," Last6 "," email6@email.com "," 6 Street St, State ST, 88888 " +" First7 "," Last7 "," email7@email.com "," 7 Street St, State ST, 88888 " +" First8 "," Last8 "," email8@email.com "," 8 Street St, State ST, 88888 " +" First9 "," Last9 "," email9@email.com "," 9 Street St, State ST, 88888 " \ No newline at end of file diff --git a/test/fast-csv.test.js b/test/fast-csv.test.js index f87bf4b1..d3f2eece 100644 --- a/test/fast-csv.test.js +++ b/test/fast-csv.test.js @@ -442,6 +442,63 @@ it.describe("fast-csv parser", function (it) { }); }); + it.should("ignore leading white space in front of a quoted value", function (next) { + var actual = []; + csv + .fromPath(path.resolve(__dirname, "./assets/test17.csv"), {headers: true}) + .on("record",function (data, index) { + actual[index] = data; + }). + on("end", function (count) { + assert.deepEqual(actual, expected1); + assert.equal(count, actual.length); + next(); + }); + }); + + it.should("accept a ltrim parameter", function (next) { + var actual = []; + csv + .fromPath(path.resolve(__dirname, "./assets/test18.csv"), {ltrim: true, trim: false, headers: true}) + .on("record",function (data, index) { + actual[index] = data; + }). + on("end", function (count) { + assert.deepEqual(actual, expected1); + assert.equal(count, actual.length); + next(); + }); + }); + + + it.should("accept a rtrim parameter", function (next) { + var actual = []; + csv + .fromPath(path.resolve(__dirname, "./assets/test19.csv"), {rtrim: true, trim: false, headers: true}) + .on("record",function (data, index) { + actual[index] = data; + }). + on("end", function (count) { + assert.deepEqual(actual, expected1); + assert.equal(count, actual.length); + next(); + }); + }); + + it.should("accept a trim parameter", function (next) { + var actual = []; + csv + .fromPath(path.resolve(__dirname, "./assets/test20.csv"), {trim: true, headers: true}) + .on("record",function (data, index) { + actual[index] = data; + }). + on("end", function (count) { + assert.deepEqual(actual, expected1); + assert.equal(count, actual.length); + next(); + }); + }); + it.should("throw an error if an invalid path or stream is passed in", function () { assert.throws(function () { @@ -535,7 +592,6 @@ it.describe("fast-csv parser", function (it) { }); }); - it.describe(".writeToPath", function (it) { it.should("write an array of arrays", function (next) { @@ -565,8 +621,4 @@ it.describe("fast-csv parser", function (it) { }); }); }); - - }); - -it.run();