diff --git a/README.md b/README.md
index 08538d79..6b24e787 100644
--- a/README.md
+++ b/README.md
@@ -21,6 +21,11 @@ All methods accept the following `options`
* `ignoreEmpty=false`: If you wish to ignore empty rows.
* `delimiter=','`: If your data uses an alternate delimiter such as `;` or `\t`.
* **NOTE** When specifying an alternate `delimiter` you may only pass in a single character delimeter
+* The following are options for parsing only.
+ * `trim=false`: If you want to trim all values parsed set to true.
+ * `rtrim=false`: If you want to right trim all values parsed set to true.
+ * `ltrim=false`: If you want to left trim all values parsed set to true.
+
**events**
@@ -223,22 +228,22 @@ Create a readable stream to read data from.
```javascript
var ws = fs.createWritableStream("my.csv");
csv
- .write([
- ["a", "b"],
- ["a1", "b1"],
- ["a2", "b2"]
- ], {headers: true})
- .pipe(ws);
+ .write([
+ ["a", "b"],
+ ["a1", "b1"],
+ ["a2", "b2"]
+ ], {headers: true})
+ .pipe(ws);
```
```javascript
var ws = fs.createWritableStream("my.csv");
csv
- .write([
- {a: "a1", b: "b1"},
- {a: "a2", b: "b2"}
- ], {headers: true})
- .pipe(ws);
+ .write([
+ {a: "a1", b: "b1"},
+ {a: "a2", b: "b2"}
+ ], {headers: true})
+ .pipe(ws);
```
**`writeToStream(stream,arr[, options])`**
@@ -247,20 +252,20 @@ Write an array of values to a `WritableStream`
```javascript
csv
- .writeToStream(fs.createWritableStream("my.csv"), [
- ["a", "b"],
- ["a1", "b1"],
- ["a2", "b2"]
- ], {headers: true});
+ .writeToStream(fs.createWritableStream("my.csv"), [
+ ["a", "b"],
+ ["a1", "b1"],
+ ["a2", "b2"]
+ ], {headers: true});
```
```javascript
csv
- .writeToStream(fs.createWritableStream("my.csv"), [
- {a: "a1", b: "b1"},
- {a: "a2", b: "b2"}
- ], {headers: true})
- .pipe(ws);
+ .writeToStream(fs.createWritableStream("my.csv"), [
+ {a: "a1", b: "b1"},
+ {a: "a2", b: "b2"}
+ ], {headers: true})
+ .pipe(ws);
```
**`writeToPath(arr[, options])`**
@@ -269,41 +274,41 @@ Write an array of values to the specified path
```javascript
csv
- .writeToPath("my.csv", [
- ["a", "b"],
- ["a1", "b1"],
- ["a2", "b2"]
- ], {headers: true})
- .on("finish", function(){
- console.log("done!");
- });
+ .writeToPath("my.csv", [
+ ["a", "b"],
+ ["a1", "b1"],
+ ["a2", "b2"]
+ ], {headers: true})
+ .on("finish", function(){
+ console.log("done!");
+ });
```
```javascript
csv
- .writeToStream("my.csv", [
- {a: "a1", b: "b1"},
- {a: "a2", b: "b2"}
- ], {headers: true})
- .on("finish", function(){
- console.log("done!");
- });
+ .writeToStream("my.csv", [
+ {a: "a1", b: "b1"},
+ {a: "a2", b: "b2"}
+ ], {headers: true})
+ .on("finish", function(){
+ console.log("done!");
+ });
```
**`writeToString(arr[, options])`**
```javascript
csv.writeToString([
- ["a", "b"],
- ["a1", "b1"],
- ["a2", "b2"]
+ ["a", "b"],
+ ["a1", "b1"],
+ ["a2", "b2"]
], {headers: true}); //"a,b\na1,b1\na2,b2\n"
```
```javascript
csv.writeToString([
- {a: "a1", b: "b1"},
- {a: "a2", b: "b2"}
+ {a: "a1", b: "b1"},
+ {a: "a2", b: "b2"}
], {headers: true}); //"a,b\na1,b1\na2,b2\n"
```
diff --git a/benchmark/benchmark.js b/benchmark/benchmark.js
index 637d12f0..08d2b96c 100644
--- a/benchmark/benchmark.js
+++ b/benchmark/benchmark.js
@@ -1,7 +1,7 @@
var fastCsv = require("../lib"),
csv = require("csv"),
path = require("path"),
- COUNT = 1000000,
+ COUNT = 20000,
TEST_FILE = path.resolve(__dirname, "./assets/" + COUNT + ".csv");
diff --git a/docs/index.html b/docs/index.html
index bbb75431..84061134 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -185,6 +185,12 @@
Parsing
NOTE When specifying an alternate delimiter
you may only pass in a single character delimeter
+The following are options for parsing only.
+trim=false
: If you want to trim all values parsed set to true.
+rtrim=false
: If you want to right trim all values parsed set to true.
+ltrim=false
: If you want to left trim all values parsed set to true.
+
+
events
parse-error
: Emitted if there was an error parsing a row.
@@ -332,61 +338,61 @@
Formatting
Create a readable stream to read data from.
var ws = fs.createWritableStream("my.csv");
csv
- .write([
- ["a", "b"],
- ["a1", "b1"],
- ["a2", "b2"]
- ], {headers: true})
- .pipe(ws);
+ .write([
+ ["a", "b"],
+ ["a1", "b1"],
+ ["a2", "b2"]
+ ], {headers: true})
+ .pipe(ws);
var ws = fs.createWritableStream("my.csv");
csv
- .write([
- {a: "a1", b: "b1"},
- {a: "a2", b: "b2"}
- ], {headers: true})
- .pipe(ws);
+ .write([
+ {a: "a1", b: "b1"},
+ {a: "a2", b: "b2"}
+ ], {headers: true})
+ .pipe(ws);
writeToStream(stream,arr[, options])
Write an array of values to a WritableStream
csv
- .writeToStream(fs.createWritableStream("my.csv"), [
- ["a", "b"],
- ["a1", "b1"],
- ["a2", "b2"]
- ], {headers: true});
+ .writeToStream(fs.createWritableStream("my.csv"), [
+ ["a", "b"],
+ ["a1", "b1"],
+ ["a2", "b2"]
+ ], {headers: true});
csv
- .writeToStream(fs.createWritableStream("my.csv"), [
- {a: "a1", b: "b1"},
- {a: "a2", b: "b2"}
- ], {headers: true})
- .pipe(ws);
+ .writeToStream(fs.createWritableStream("my.csv"), [
+ {a: "a1", b: "b1"},
+ {a: "a2", b: "b2"}
+ ], {headers: true})
+ .pipe(ws);
writeToPath(arr[, options])
Write an array of values to the specified path
csv
- .writeToPath("my.csv", [
- ["a", "b"],
- ["a1", "b1"],
- ["a2", "b2"]
- ], {headers: true})
- .on("finish", function(){
- console.log("done!");
- });
+ .writeToPath("my.csv", [
+ ["a", "b"],
+ ["a1", "b1"],
+ ["a2", "b2"]
+ ], {headers: true})
+ .on("finish", function(){
+ console.log("done!");
+ });
csv
- .writeToStream("my.csv", [
- {a: "a1", b: "b1"},
- {a: "a2", b: "b2"}
- ], {headers: true})
- .on("finish", function(){
- console.log("done!");
- });
+ .writeToStream("my.csv", [
+ {a: "a1", b: "b1"},
+ {a: "a2", b: "b2"}
+ ], {headers: true})
+ .on("finish", function(){
+ console.log("done!");
+ });
writeToString(arr[, options])
csv.writeToString([
- ["a", "b"],
- ["a1", "b1"],
- ["a2", "b2"]
+ ["a", "b"],
+ ["a1", "b1"],
+ ["a2", "b2"]
], {headers: true}); //"a,b\na1,b1\na2,b2\n"
csv.writeToString([
- {a: "a1", b: "b1"},
- {a: "a2", b: "b2"}
+ {a: "a1", b: "b1"},
+ {a: "a2", b: "b2"}
], {headers: true}); //"a,b\na1,b1\na2,b2\n"
Benchmarks
Parsing 20000 records AVG over 3 runs
diff --git a/lib/index.js b/lib/index.js
index cb5bfce7..2af9936e 100644
--- a/lib/index.js
+++ b/lib/index.js
@@ -22,6 +22,11 @@
* * `ignoreEmpty=false`: If you wish to ignore empty rows.
* * `delimiter=','`: If your data uses an alternate delimiter such as `;` or `\t`.
* * **NOTE** When specifying an alternate `delimiter` you may only pass in a single character delimeter
+ * * The following are options for parsing only.
+ * * `trim=false`: If you want to trim all values parsed set to true.
+ * * `rtrim=false`: If you want to right trim all values parsed set to true.
+ * * `ltrim=false`: If you want to left trim all values parsed set to true.
+ *
*
* **events**
*
diff --git a/lib/parser.js b/lib/parser.js
index a679a20d..feb3f524 100644
--- a/lib/parser.js
+++ b/lib/parser.js
@@ -1,11 +1,31 @@
-var SINGLE_QUOTE = "'",
+var extended = require("./extended"),
+ trim = extended.trim,
+ trimLeft = extended.trimLeft,
+ trimRight = extended.trimRight,
+ SINGLE_QUOTE = "'",
DOUBLE_QUOTE = '"';
-function createParser(delimiter) {
-
- var VALUE_REGEXP = new RegExp("([^" + delimiter + "'\"\\s\\\\]*(?:\\s+[^" + delimiter + "'\"\\s\\\\]+)*)"),
+function createParser(options) {
+ options = options || {};
+ var delimiter = options.delimiter || ",",
+ doLtrim = options.ltrim || false,
+ doRtrim = options.rtrim || false,
+ doTrim = options.trim || false,
+ VALUE_REGEXP = new RegExp("([^" + delimiter + "'\"\\s\\\\]*(?:\\s+[^" + delimiter + "'\"\\s\\\\]+)*)"),
SEARCH_REGEXP = new RegExp("[^\\\\]" + delimiter),
- ESCAPE_CHAR = "\\";
+ ESCAPE_CHAR = "\\",
+ WHITE_SPACE = /\s/;
+
+ function formatItem(item) {
+ if (doTrim) {
+ item = trim(item);
+ } else if (doLtrim) {
+ item = trimLeft(item);
+ } else if (doRtrim) {
+ item = trimRight(item);
+ }
+ return item;
+ }
function getTokensBetween(str, start, items, cursor) {
var depth = 0, ret = [];
@@ -35,10 +55,10 @@ function createParser(delimiter) {
++cursor;
}
}
- if (++cursor < str.length && str[cursor].search(delimiter) !== 0) {
+ if (++cursor < str.length && getNextToken(str, cursor).token.search(delimiter) !== 0) {
throw new Error("Invalid row " + str);
}
- items.push(ret.join(""));
+ items.push(formatItem(ret.join("")));
return ++cursor;
}
@@ -52,7 +72,7 @@ function createParser(delimiter) {
nextIndex = searchStr.length - 1;
}
}
- items.push(searchStr.substr(0, nextIndex + 1));
+ items.push(formatItem(searchStr.substr(0, nextIndex + 1)));
return cursor + (nextIndex + 2);
}
@@ -69,20 +89,39 @@ function createParser(delimiter) {
return findNextToken(line, items, cursor);
}
+ function getNextToken(line, cursor) {
+ var l = line.length, ret, token;
+ do {
+ token = line[cursor];
+ if (token === delimiter || !WHITE_SPACE.test(token)) {
+ ret = token;
+ } else {
+ token = null;
+ }
+
+ } while (!token && cursor++ < l);
+ if (!token) {
+ throw new Error("Invalid row " + line);
+ }
+ return {token: token, cursor: cursor};
+ }
+
return function parseLine(line) {
- var i = 0, l = line.length, items = [], token;
+ var i = 0, l = line.length, items = [], token, nextToken;
while (i < l) {
- token = line[i];
+ nextToken = getNextToken(line, i);
+ token = nextToken.token;
if (token === delimiter) {
items.push("");
i++;
} else if (token === SINGLE_QUOTE) {
- i = parseSingleQuoteItem(line, items, i);
+ i = parseSingleQuoteItem(line, items, nextToken.cursor);
} else if (token === DOUBLE_QUOTE) {
- i = parseDoubleQuoteItem(line, items, i);
+ i = parseDoubleQuoteItem(line, items, nextToken.cursor);
} else {
i = parseItem(line, items, i);
}
+
}
return items;
};
diff --git a/lib/parser_stream.js b/lib/parser_stream.js
index ee9bcc9a..ec063a39 100644
--- a/lib/parser_stream.js
+++ b/lib/parser_stream.js
@@ -27,7 +27,8 @@ function ParserStream(options) {
} else {
delimiter = DEFAULT_DELIMITER;
}
- this.parser = createParser(delimiter);
+ options.delimiter = delimiter;
+ this.parser = createParser(options);
this._headers = options.headers;
this._ignoreEmpty = options.ignoreEmpty;
return this;
diff --git a/package.json b/package.json
index 657e58ef..b9ea2f4c 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
{
"name": "fast-csv",
- "version": "0.1.0",
+ "version": "0.1.1",
"description": "CSV parser for node.js",
"main": "index.js",
"scripts": {
diff --git a/test/assets/test17.csv b/test/assets/test17.csv
new file mode 100644
index 00000000..909c9a8c
--- /dev/null
+++ b/test/assets/test17.csv
@@ -0,0 +1,10 @@
+first_name,last_name,email_address,address
+"First1", "Last1", "email1@email.com", "1 Street St, State ST, 88888"
+"First2", "Last2", "email2@email.com", "2 Street St, State ST, 88888"
+"First3", "Last3", "email3@email.com", "3 Street St, State ST, 88888"
+"First4", "Last4", "email4@email.com", "4 Street St, State ST, 88888"
+"First5", "Last5", "email5@email.com", "5 Street St, State ST, 88888"
+"First6", "Last6", "email6@email.com", "6 Street St, State ST, 88888"
+"First7", "Last7", "email7@email.com", "7 Street St, State ST, 88888"
+"First8", "Last8", "email8@email.com", "8 Street St, State ST, 88888"
+"First9", "Last9", "email9@email.com", "9 Street St, State ST, 88888"
\ No newline at end of file
diff --git a/test/assets/test18.csv b/test/assets/test18.csv
new file mode 100644
index 00000000..4572de25
--- /dev/null
+++ b/test/assets/test18.csv
@@ -0,0 +1,10 @@
+first_name,last_name,email_address,address
+" First1"," Last1"," email1@email.com"," 1 Street St, State ST, 88888"
+" First2"," Last2"," email2@email.com"," 2 Street St, State ST, 88888"
+" First3"," Last3"," email3@email.com"," 3 Street St, State ST, 88888"
+" First4"," Last4"," email4@email.com"," 4 Street St, State ST, 88888"
+" First5"," Last5"," email5@email.com"," 5 Street St, State ST, 88888"
+" First6"," Last6"," email6@email.com"," 6 Street St, State ST, 88888"
+" First7"," Last7"," email7@email.com"," 7 Street St, State ST, 88888"
+" First8"," Last8"," email8@email.com"," 8 Street St, State ST, 88888"
+" First9"," Last9"," email9@email.com"," 9 Street St, State ST, 88888"
\ No newline at end of file
diff --git a/test/assets/test19.csv b/test/assets/test19.csv
new file mode 100644
index 00000000..bf51359c
--- /dev/null
+++ b/test/assets/test19.csv
@@ -0,0 +1,10 @@
+first_name,last_name,email_address,address
+"First1 ","Last1 ","email1@email.com ","1 Street St, State ST, 88888 "
+"First2 ","Last2 ","email2@email.com ","2 Street St, State ST, 88888 "
+"First3 ","Last3 ","email3@email.com ","3 Street St, State ST, 88888 "
+"First4 ","Last4 ","email4@email.com ","4 Street St, State ST, 88888 "
+"First5 ","Last5 ","email5@email.com ","5 Street St, State ST, 88888 "
+"First6 ","Last6 ","email6@email.com ","6 Street St, State ST, 88888 "
+"First7 ","Last7 ","email7@email.com ","7 Street St, State ST, 88888 "
+"First8 ","Last8 ","email8@email.com ","8 Street St, State ST, 88888 "
+"First9 ","Last9 ","email9@email.com ","9 Street St, State ST, 88888 "
\ No newline at end of file
diff --git a/test/assets/test20.csv b/test/assets/test20.csv
new file mode 100644
index 00000000..2968ad50
--- /dev/null
+++ b/test/assets/test20.csv
@@ -0,0 +1,10 @@
+first_name,last_name,email_address,address
+" First1 "," Last1 "," email1@email.com "," 1 Street St, State ST, 88888 "
+" First2 "," Last2 "," email2@email.com "," 2 Street St, State ST, 88888 "
+" First3 "," Last3 "," email3@email.com "," 3 Street St, State ST, 88888 "
+" First4 "," Last4 "," email4@email.com "," 4 Street St, State ST, 88888 "
+" First5 "," Last5 "," email5@email.com "," 5 Street St, State ST, 88888 "
+" First6 "," Last6 "," email6@email.com "," 6 Street St, State ST, 88888 "
+" First7 "," Last7 "," email7@email.com "," 7 Street St, State ST, 88888 "
+" First8 "," Last8 "," email8@email.com "," 8 Street St, State ST, 88888 "
+" First9 "," Last9 "," email9@email.com "," 9 Street St, State ST, 88888 "
\ No newline at end of file
diff --git a/test/fast-csv.test.js b/test/fast-csv.test.js
index f87bf4b1..d3f2eece 100644
--- a/test/fast-csv.test.js
+++ b/test/fast-csv.test.js
@@ -442,6 +442,63 @@ it.describe("fast-csv parser", function (it) {
});
});
+ it.should("ignore leading white space in front of a quoted value", function (next) {
+ var actual = [];
+ csv
+ .fromPath(path.resolve(__dirname, "./assets/test17.csv"), {headers: true})
+ .on("record",function (data, index) {
+ actual[index] = data;
+ }).
+ on("end", function (count) {
+ assert.deepEqual(actual, expected1);
+ assert.equal(count, actual.length);
+ next();
+ });
+ });
+
+ it.should("accept a ltrim parameter", function (next) {
+ var actual = [];
+ csv
+ .fromPath(path.resolve(__dirname, "./assets/test18.csv"), {ltrim: true, trim: false, headers: true})
+ .on("record",function (data, index) {
+ actual[index] = data;
+ }).
+ on("end", function (count) {
+ assert.deepEqual(actual, expected1);
+ assert.equal(count, actual.length);
+ next();
+ });
+ });
+
+
+ it.should("accept a rtrim parameter", function (next) {
+ var actual = [];
+ csv
+ .fromPath(path.resolve(__dirname, "./assets/test19.csv"), {rtrim: true, trim: false, headers: true})
+ .on("record",function (data, index) {
+ actual[index] = data;
+ }).
+ on("end", function (count) {
+ assert.deepEqual(actual, expected1);
+ assert.equal(count, actual.length);
+ next();
+ });
+ });
+
+ it.should("accept a trim parameter", function (next) {
+ var actual = [];
+ csv
+ .fromPath(path.resolve(__dirname, "./assets/test20.csv"), {trim: true, headers: true})
+ .on("record",function (data, index) {
+ actual[index] = data;
+ }).
+ on("end", function (count) {
+ assert.deepEqual(actual, expected1);
+ assert.equal(count, actual.length);
+ next();
+ });
+ });
+
it.should("throw an error if an invalid path or stream is passed in", function () {
assert.throws(function () {
@@ -535,7 +592,6 @@ it.describe("fast-csv parser", function (it) {
});
});
-
it.describe(".writeToPath", function (it) {
it.should("write an array of arrays", function (next) {
@@ -565,8 +621,4 @@ it.describe("fast-csv parser", function (it) {
});
});
});
-
-
});
-
-it.run();