From a4627a6fb4ee9ce91134cdedec2263cf3651ef46 Mon Sep 17 00:00:00 2001 From: German Zhyvotnikov Date: Mon, 27 Feb 2023 23:23:02 +0200 Subject: [PATCH] fix: handle case when input for field parser is a single opening double-quote --- src/Data/Csv/Parser.hs | 25 ++++++++++++++----------- tests/UnitTests.hs | 2 +- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/src/Data/Csv/Parser.hs b/src/Data/Csv/Parser.hs index d028167..82fe4d2 100644 --- a/src/Data/Csv/Parser.hs +++ b/src/Data/Csv/Parser.hs @@ -154,17 +154,20 @@ field !delim = do escapedField :: AL.Parser S.ByteString escapedField = do _ <- dquote - -- The scan state is 'True' if the previous character was a double - -- quote. We need to drop a trailing double quote left by scan. - s <- S.init <$> (A.scan False $ \s c -> if c == doubleQuote - then Just (not s) - else if s then Nothing - else Just False) - if doubleQuote `S.elem` s - then case Z.parse unescape s of - Right r -> return r - Left err -> fail err - else return s + -- The scan state is 'True' if the previous character was a double quote. + s' <- A.scan False $ \s c -> if c == doubleQuote + then Just (not s) + else if s then Nothing + else Just False + -- We need to drop a trailing double quote left by scan. + if S.null s' + then fail "trailing double quote" + else let s = S.init s' + in if doubleQuote `S.elem` s + then case Z.parse unescape s of + Right r -> return r + Left err -> fail err + else return s unescapedField :: Word8 -> AL.Parser S.ByteString unescapedField !delim = A.takeWhile (\ c -> c /= doubleQuote && diff --git a/tests/UnitTests.hs b/tests/UnitTests.hs index ee2c975..2bf35fa 100644 --- a/tests/UnitTests.hs +++ b/tests/UnitTests.hs @@ -226,7 +226,7 @@ positionalTests = testCase "escapedMalformed1" $ "\"x,\"y" `decodeFailsWith` "endOfInput", testCase "escapedMalformed0" $ - "baz,\"" `decodeFailsWith` "endOfInput" + "baz,\"" `decodeFailsWith` "Failed reading: trailing double quote" ] nameBasedTests :: [TF.Test]