From 7d230e8b118d2d01fe109fd0dbb5ae7c2b2b96dc Mon Sep 17 00:00:00 2001 From: Doug Martin Date: Mon, 10 Mar 2014 15:32:09 -0500 Subject: [PATCH] v0.2.0 * Added multiline value support * Updated escaping logic * More performance enhancements * More robusts test cases * Removed support for having two quote types instead it just supports a single quote and escape sequence. --- Gruntfile.js | 2 +- README.md | 16 +-- benchmark/assets/100000.csv | 2 +- benchmark/assets/1000000.csv | 2 +- benchmark/assets/20000.csv | 2 +- benchmark/assets/50000.csv | 2 +- benchmark/benchmark.js | 15 ++- docs/index.html | 21 ++-- lib/extended.js | 6 +- lib/formatter.js | 2 +- lib/index.js | 16 +-- lib/parser.js | 162 ++++++++++++++++------------ lib/parser_stream.js | 107 ++++++++++--------- package.json | 9 +- test/assets/test10.csv | 18 ++-- test/assets/test12.csv | 18 ++-- test/assets/test14.txt | 6 +- test/assets/test15.txt | 6 +- test/assets/test16.txt | 6 +- test/assets/test21.csv | 9 ++ test/assets/test8.csv | 6 +- test/fast-csv.test.js | 90 +++++++++++----- test/parser.test.js | 202 +++++++++++++++++++++++++++++++++++ 23 files changed, 507 insertions(+), 218 deletions(-) create mode 100644 test/assets/test21.csv create mode 100644 test/parser.test.js diff --git a/Gruntfile.js b/Gruntfile.js index 328150ff..954def5e 100644 --- a/Gruntfile.js +++ b/Gruntfile.js @@ -16,7 +16,7 @@ module.exports = function (grunt) { src: 'test/**/*.test.js', options: { timeout: 3000, // not fully supported yet - reporter: 'dotmatrix' + reporter: 'spec' } } }, diff --git a/README.md b/README.md index 6b24e787..88af5da8 100644 --- a/README.md +++ b/README.md @@ -4,8 +4,9 @@ [![build status](https://secure.travis-ci.org/C2FO/fast-csv.png)](http://travis-ci.org/C2FO/fast-csv) # Fast-csv -This is a library is aimed at providing fast CSV parsing. It accomplishes this by not handling some of the more complex -edge cases such as multi line rows. However it does support escaped values, embedded commas, double and single quotes. +This is a library that provides CSV parsing and formatting. + +**NOTE** As of v0.2.0 `fast-csv` supports multi-line values. ## Installation @@ -21,6 +22,9 @@ All methods accept the following `options` * `ignoreEmpty=false`: If you wish to ignore empty rows. * `delimiter=','`: If your data uses an alternate delimiter such as `;` or `\t`. * **NOTE** When specifying an alternate `delimiter` you may only pass in a single character delimeter +* `quote='"'`: The character to use to escape values that contain a delimeter. +* `escape='"'`: The character to use when escaping a value that is `quoted` and contains a `quote` character. + * `i.e`: 'First,"Name"' => '"First,""name"""' * The following are options for parsing only. * `trim=false`: If you want to trim all values parsed set to true. * `rtrim=false`: If you want to right trim all values parsed set to true. @@ -211,12 +215,8 @@ csv `fast-csv` also allows to you to create create a `CSV` from data. -In addition to the options for parsing you can specify the following additional options. - -* `quote='"'`: The character to use to escape values that contain a delimeter. -* `escape='"'`: The character to use when escaping a value that is `quoted` and constains a `quote` character. - * `i.e`: 'First,"Name"' => '"First,""name"""' - +Formatting accepts the same options as parsing. +* **Writing Data** Each of the following methods accept an array of values to be written, however each value must be an `array` of `array`s or `object`s. diff --git a/benchmark/assets/100000.csv b/benchmark/assets/100000.csv index 98e7dd90..3b48c178 100644 --- a/benchmark/assets/100000.csv +++ b/benchmark/assets/100000.csv @@ -99998,4 +99998,4 @@ First1,Last1,email1@email.com, ,,, "First'1",Last1,email1@email.com,"1 Street St, State ST, 88888" "First""1",Last1,email1@email.com,"1 Street St, State ST, 88888" -"First""1",Last1,email1@email.com,"1 Street St, State ST, 88888" +"First""1",Last1,email1@email.com,"1 Street St, State ST, 88888" \ No newline at end of file diff --git a/benchmark/assets/1000000.csv b/benchmark/assets/1000000.csv index ed38bf67..12571e72 100644 --- a/benchmark/assets/1000000.csv +++ b/benchmark/assets/1000000.csv @@ -999998,4 +999998,4 @@ First1,Last1,email1@email.com, First1,Last1,email1@email.com,"1 Street St, State ST, 88888" First1,Last1,email1@email.com,"1 ""Street"" St, State ST, 88888" First1,Last1,email1@email.com, -,,, +,,, \ No newline at end of file diff --git a/benchmark/assets/20000.csv b/benchmark/assets/20000.csv index 0ae70a96..622034b3 100644 --- a/benchmark/assets/20000.csv +++ b/benchmark/assets/20000.csv @@ -19998,4 +19998,4 @@ First1,Last1,email1@email.com, "First'1",Last1,email1@email.com,"1 Street St, State ST, 88888" "First""1",Last1,email1@email.com,"1 Street St, State ST, 88888" "First""1",Last1,email1@email.com,"1 Street St, State ST, 88888" -First1,Last1,email1@email.com,"1 Street St, State ST, 88888" +First1,Last1,email1@email.com,"1 Street St, State ST, 88888" \ No newline at end of file diff --git a/benchmark/assets/50000.csv b/benchmark/assets/50000.csv index 41290c88..7aabff69 100644 --- a/benchmark/assets/50000.csv +++ b/benchmark/assets/50000.csv @@ -49998,4 +49998,4 @@ First1,Last1,email1@email.com, ,,, "First'1",Last1,email1@email.com,"1 Street St, State ST, 88888" "First""1",Last1,email1@email.com,"1 Street St, State ST, 88888" -"First""1",Last1,email1@email.com,"1 Street St, State ST, 88888" +"First""1",Last1,email1@email.com,"1 Street St, State ST, 88888" \ No newline at end of file diff --git a/benchmark/benchmark.js b/benchmark/benchmark.js index 08d2b96c..b43c423b 100644 --- a/benchmark/benchmark.js +++ b/benchmark/benchmark.js @@ -1,7 +1,7 @@ var fastCsv = require("../lib"), csv = require("csv"), path = require("path"), - COUNT = 20000, + COUNT = 100000, TEST_FILE = path.resolve(__dirname, "./assets/" + COUNT + ".csv"); @@ -23,7 +23,7 @@ function benchmarkFastCsv(done) { ret.address = data.address; return ret; }) - .on("record", function (record) { + .on("record", function () { count++; }) .on("end", function () { @@ -48,7 +48,7 @@ function benchmarkCsv(done) { ["first_name", "last_name", "email_address"].forEach(function (prop, i) { ret[camelize(prop)] = data[i]; }); - ret.address = data[4]; + ret.address = data[3]; return ret; }) .on('record', function () { @@ -67,20 +67,25 @@ function benchmarkCsv(done) { } function benchmark(title, m, done) { - var start = new Date(); + var start = new Date(), runStart = start; m(function (err) { if (err) { done(err); } else { + console.log("%s: RUN 1 %dms", title, (new Date() - runStart)); + runStart = new Date(); m(function (err) { if (err) { done(err); } else { + console.log("%s: RUN 2 %dms", title, (new Date() - runStart)); + runStart = new Date(); m(function (err) { if (err) { done(err); } else { - console.log("%s: %dms", title, (new Date() - start) / 3); + console.log("%s: RUN 3 %dms", title, (new Date() - runStart)); + console.log("%s: 3xAVG %dms", title, (new Date() - start) / 3); done(); } diff --git a/docs/index.html b/docs/index.html index 84061134..aff84fab 100644 --- a/docs/index.html +++ b/docs/index.html @@ -171,8 +171,8 @@

build status

Fast-csv

-

This is a library is aimed at providing fast CSV parsing. It accomplishes this by not handling some of the more complex -edge cases such as multi line rows. However it does support escaped values, embedded commas, double and single quotes.

+

This is a library that provides CSV parsing and formatting.

+

NOTE As of v0.2.0 fast-csv supports multi-line values.

Installation

npm install fast-csv

Usage

@@ -185,6 +185,11 @@

Parsing

  • NOTE When specifying an alternate delimiter you may only pass in a single character delimeter
  • +
  • quote='"': The character to use to escape values that contain a delimeter.
  • +
  • escape='"': The character to use when escaping a value that is quoted and contains a quote character. +
  • The following are options for parsing only.