diff --git a/lib/parser.js b/lib/parser.js index 3a99a9bf..735cbcbf 100644 --- a/lib/parser.js +++ b/lib/parser.js @@ -14,8 +14,8 @@ function createParser(options) { VALUE_REGEXP = new RegExp("([^" + delimiter + "'\"\\s\\\\]*(?:\\s+[^" + delimiter + "'\"\\s\\\\]+)*)"), SEARCH_REGEXP = new RegExp("(?:\\n|\\r|" + delimiter + ")"), ESCAPE_CHAR = options.escape || '"', - NEXT_TOKEN_REGEXP = new RegExp("([^\\s]|\\n|\\r|" + delimiter + ")"), - LINE_BREAK = /[\r\n]/; + NEXT_TOKEN_REGEXP = new RegExp("([^\\s]|\\r\\n|\\n|\\r|" + delimiter + ")"), + LINE_BREAK = /(\r\n|\n|\r)/; function formatItem(item) { if (doTrim) { @@ -112,9 +112,10 @@ function createParser(options) { } function getNextToken(line, cursor) { - var token, nextIndex; - if ((nextIndex = line.substr(cursor).search(NEXT_TOKEN_REGEXP)) !== -1) { + var token, nextIndex, subStr = line.substr(cursor); + if ((nextIndex = subStr.search(NEXT_TOKEN_REGEXP)) !== -1) { token = line[cursor += nextIndex]; + cursor += subStr.match(NEXT_TOKEN_REGEXP)[1].length - 1; } return {token: token, cursor: cursor}; } diff --git a/package.json b/package.json index 678e7918..98a1383e 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "fast-csv", - "version": "0.2.1", + "version": "0.2.2", "description": "CSV parser and writer", "main": "index.js", "scripts": { diff --git a/test/parser.test.js b/test/parser.test.js index 00461587..f97fc409 100644 --- a/test/parser.test.js +++ b/test/parser.test.js @@ -200,7 +200,6 @@ it.describe("fast-csv parser", function (it) { }); }); }); - }); it.describe("with \\r", function (it) { @@ -402,4 +401,203 @@ it.describe("fast-csv parser", function (it) { }); + it.describe("with \\r\\n", function (it) { + + it.describe("unescaped data", function (it) { + + it.should("parse a block of CSV text", function () { + var data = "first_name,last_name,email_address\r\nFirst1,Last1,email1@email.com"; + var myParser = parser({delimiter: ","}); + assert.deepEqual(myParser(data, false), {"line": "", "rows": [ + ["first_name", "last_name", "email_address"], + ["First1", "Last1", "email1@email.com"] + ]}); + }); + + it.should("return the rest of the line if there is more data", function () { + var data = "first_name,last_name,email_address\r\nFirst1,Last1,email1@email.com"; + var myParser = parser({delimiter: ","}); + assert.deepEqual(myParser(data, true), { + "line": "First1,Last1,email1@email.com", + "rows": [ + ["first_name", "last_name", "email_address"] + ] + }); + }); + + it.should("accept new data and return the result", function () { + var data = "first_name,last_name,email_address\r\nFirst1,Last1,email1@email.com,"; + var myParser = parser({delimiter: ","}); + var parsedData = myParser(data, true); + assert.deepEqual(parsedData, { + "line": "First1,Last1,email1@email.com,", + "rows": [ + ["first_name", "last_name", "email_address"] + ] + }); + assert.deepEqual(myParser(parsedData.line + "\r\nFirst2,Last2,email2@email.com", false), {"line": "", "rows": [ + ["First1", "Last1", "email1@email.com"], + ["First2", "Last2", "email2@email.com"] + ]}); + }); + + it.should("not parse a row if a new line is not found and there is more data", function () { + var data = "first_name,last_name,email_address"; + var myParser = parser({delimiter: ","}); + var parsedData = myParser(data, true); + assert.deepEqual(parsedData, { + "line": "first_name,last_name,email_address", + "rows": [] + }); + }); + + it.should("not parse a row if there is a trailing delimiter and there is more data", function () { + var data = "first_name,last_name,email_address,"; + var myParser = parser({delimiter: ","}); + var parsedData = myParser(data, true); + assert.deepEqual(parsedData, { + "line": "first_name,last_name,email_address,", + "rows": [] + }); + }); + + it.should("parse a row if a new line is found and there is more data", function () { + var data = "first_name,last_name,email_address\r\n"; + var myParser = parser({delimiter: ","}); + var parsedData = myParser(data, true); + assert.deepEqual(parsedData, { + "line": "", + "rows": [ + ["first_name", "last_name", "email_address"] + ] + }); + }); + + }); + + it.describe("escaped values", function (it) { + + it.should("parse a block of CSV text", function () { + var data = 'first_name,last_name,email_address\r\n"First,1","Last,1","email1@email.com"'; + var myParser = parser({delimiter: ","}); + assert.deepEqual(myParser(data, false), {"line": "", "rows": [ + ["first_name", "last_name", "email_address"], + ["First,1", "Last,1", "email1@email.com"] + ]}); + }); + + it.should("parse a block of CSV text with escaped escaped char", function () { + var data = 'first_name,last_name,email_address\r\n"First,""1""","Last,""1""","email1@email.com"'; + var myParser = parser({delimiter: ","}); + assert.deepEqual(myParser(data, false), {"line": "", "rows": [ + ["first_name", "last_name", "email_address"], + ["First,\"1\"", "Last,\"1\"", "email1@email.com"] + ]}); + }); + + it.should("parse a block of CSV text with alternate escape char", function () { + var data = 'first_name,last_name,email_address\r\n"First,\\"1\\"","Last,\\"1\\"","email1@email.com"'; + var myParser = parser({delimiter: ",", escape: "\\"}); + assert.deepEqual(myParser(data, false), {"line": "", "rows": [ + ["first_name", "last_name", "email_address"], + ["First,\"1\"", "Last,\"1\"", "email1@email.com"] + ]}); + }); + + it.should("return the rest of the line if a complete value is not found", function () { + var data = 'first_name,last_name,email_address\r\n"First,""1""","Last,""1""","email1@email.com'; + var myParser = parser({delimiter: ","}); + assert.deepEqual(myParser(data, true), { + "line": '"First,""1""","Last,""1""","email1@email.com', + "rows": [ + ["first_name", "last_name", "email_address"] + ] + }); + }); + + it.should("accept more data appended to the returned line with escaped values", function () { + var data = 'first_name,last_name,email_address\r\n"First,""1""","Last,""1""","email1@email.com'; + var myParser = parser({delimiter: ","}), + parsedData = myParser(data, true); + assert.deepEqual(parsedData, { + "line": '"First,""1""","Last,""1""","email1@email.com', + "rows": [ + ["first_name", "last_name", "email_address"] + ] + }); + assert.deepEqual(myParser(parsedData.line + '"\r\n"First,""2""","Last,""2""","email2@email.com"', false), { + line: "", + rows: [ + ["First,\"1\"", "Last,\"1\"", "email1@email.com"], + ["First,\"2\"", "Last,\"2\"", "email2@email.com"] + ] + }); + }); + + it.should("throw an error if there is not more data and there is an invalid escape sequence", function () { + var data = 'first_name,last_name,email_address\r\n"First,""1""","Last,""1""","email1@email.com'; + var myParser = parser({delimiter: ","}), + parsedData = myParser(data, true); + assert.deepEqual(parsedData, { + "line": '"First,""1""","Last,""1""","email1@email.com', + "rows": [ + ["first_name", "last_name", "email_address"] + ] + }); + assert.throws(function () { + assert.deepEqual(myParser(parsedData.line + '\r\n"First,"",2""","Last""2""","email2@email.com"', false), { + line: "", + rows: [ + ["First,\"1\"", "Last,\"1\"", "email1@email.com"], + ["First,\"2\"", "Last,\"2\"", "email2@email.com"] + ] + }); + }, Error, ' Parse Error: expected: \'"\' got: \'F\'. at \'First,""2""","Last""2""","email2@email.com"'); + }); + + it.should("handle empty values properly", function () { + var data = '"","",""\r\n,Last4,email4@email.com'; + var myParser = parser({delimiter: ","}), + parsedData = myParser(data, false); + assert.deepEqual(parsedData, {"line": "", "rows": [ + ["", "", ""], + ["", "Last4", "email4@email.com"] + ]}); + }); + + it.should("not parse a row if a new line is not found and there is more data", function () { + var data = '"first_name","last_name","email_address"'; + var myParser = parser({delimiter: ","}); + var parsedData = myParser(data, true); + assert.deepEqual(parsedData, { + "line": '"first_name","last_name","email_address"', + "rows": [] + }); + }); + + it.should("not parse a row if there is a trailing delimiter and there is more data", function () { + var data = '"first_name","last_name","email_address",'; + var myParser = parser({delimiter: ","}); + var parsedData = myParser(data, true); + assert.deepEqual(parsedData, { + "line": '"first_name","last_name","email_address",', + "rows": [] + }); + }); + + it.should("parse a row if a new line is found and there is more data", function () { + var data = '"first_name","last_name","email_address"\r\n'; + var myParser = parser({delimiter: ","}); + var parsedData = myParser(data, true); + assert.deepEqual(parsedData, { + "line": "", + "rows": [ + ["first_name", "last_name", "email_address"] + ] + }); + }); + }); + + }); + }); \ No newline at end of file