Skip to content

Commit

Permalink
Proper exception on hanging doublequote (PR #222)
Browse files Browse the repository at this point in the history
Squashed from commits:

- task: add test cases for decoding escaped input

  Test case `escapedMalformed0` is failing, throwing uncaught `error`

- fix: handle case when input for field parser is a single opening double-quote

- Fix output of escapedMalformed1 test
  (this contribution got deleted)

- Fixup PR #222: remove failing test, drop testing for GHC < 8.4

Co-authored-by: German Zhyvotnikov <[email protected]>
  • Loading branch information
gzh authored and andreasabel committed Aug 3, 2024
1 parent c1a60a0 commit ef014ec
Show file tree
Hide file tree
Showing 6 changed files with 41 additions and 18 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/haskell-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ jobs:
echo "CABAL=$HOME/.ghcup/bin/cabal-3.12.1.0 -vnormal+nowrap" >> "$GITHUB_ENV"
HCNUMVER=$(${HC} --numeric-version|perl -ne '/^(\d+)\.(\d+)\.(\d+)(\.(\d+))?$/; print(10000 * $1 + 100 * $2 + ($3 == 0 ? $5 != 1 : $3))')
echo "HCNUMVER=$HCNUMVER" >> "$GITHUB_ENV"
echo "ARG_TESTS=--enable-tests" >> "$GITHUB_ENV"
if [ $((HCNUMVER >= 80400)) -ne 0 ] ; then echo "ARG_TESTS=--enable-tests" >> "$GITHUB_ENV" ; else echo "ARG_TESTS=--disable-tests" >> "$GITHUB_ENV" ; fi
echo "ARG_BENCH=--enable-benchmarks" >> "$GITHUB_ENV"
echo "HEADHACKAGE=false" >> "$GITHUB_ENV"
echo "ARG_COMPILER=--$HCKIND --with-compiler=$HC" >> "$GITHUB_ENV"
Expand Down Expand Up @@ -230,7 +230,7 @@ jobs:
$CABAL v2-build $ARG_COMPILER $ARG_TESTS $ARG_BENCH all --write-ghc-environment-files=always
- name: tests
run: |
$CABAL v2-test $ARG_COMPILER $ARG_TESTS $ARG_BENCH all --test-show-details=direct
if [ $((HCNUMVER >= 80400)) -ne 0 ] ; then $CABAL v2-test $ARG_COMPILER $ARG_TESTS $ARG_BENCH all --test-show-details=direct ; fi
- name: cabal check
run: |
cd ${PKGDIR_cassava} || false
Expand Down
4 changes: 3 additions & 1 deletion CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
## Version 0.5.3.2

* Proper exception on hanging doublequote ([PR #222](https://github.com/haskell-hvr/cassava/pull/222)).
* Allow latest `hashable`.
* Tested with GHC 8.0 - 9.10.1.
* Build tested with GHC 8.0 - 9.10.1.
* Functionality tested with GHC 8.4 - 9.10.1.

## Version 0.5.3.1

Expand Down
2 changes: 2 additions & 0 deletions cabal.haskell-ci
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
branches: master

tests: >= 8.4

-- constraint-set containers-0.7
-- ghc: >=8.2
-- constraints: containers ^>=0.7
Expand Down
6 changes: 2 additions & 4 deletions cassava.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ Test-suite unit-tests
Main-is: UnitTests.hs
-- dependencies with version constraints inherited via lib:cassava
Build-depends: attoparsec
, base
, base >= 4.11 && < 5
, bytestring
, cassava
, hashable
Expand All @@ -158,12 +158,10 @@ Test-suite unit-tests
-Wall
-- https://ghc.haskell.org/trac/ghc/wiki/Migration/8.0#Recommendationsforforward-compatibility
-Wcompat
-Wcpp-undef
-Wnoncanonical-monad-instances

if impl(ghc >= 8.8)
ghc-options: -Wno-star-is-type
else
ghc-options: -Wnoncanonical-monadfail-instances

if impl(ghc >= 8.2)
ghc-options: -Wcpp-undef
25 changes: 14 additions & 11 deletions src/Data/Csv/Parser.hs
Original file line number Diff line number Diff line change
Expand Up @@ -149,17 +149,20 @@ field !delim = do
escapedField :: AL.Parser S.ByteString
escapedField = do
_ <- dquote
-- The scan state is 'True' if the previous character was a double
-- quote. We need to drop a trailing double quote left by scan.
s <- S.init <$> (A.scan False $ \s c -> if c == doubleQuote
then Just (not s)
else if s then Nothing
else Just False)
if doubleQuote `S.elem` s
then case Z.parse unescape s of
Right r -> return r
Left err -> fail err
else return s
-- The scan state is 'True' if the previous character was a double quote.
s' <- A.scan False $ \s c -> if c == doubleQuote
then Just (not s)
else if s then Nothing
else Just False
-- We need to drop a trailing double quote left by scan.
if S.null s'
then fail "trailing double quote"
else let s = S.init s'
in if doubleQuote `S.elem` s
then case Z.parse unescape s of
Right r -> return r
Left err -> fail err
else return s

unescapedField :: Word8 -> AL.Parser S.ByteString
unescapedField !delim = A.takeWhile (\ c -> c /= doubleQuote &&
Expand Down
18 changes: 18 additions & 0 deletions tests/UnitTests.hs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import qualified Data.Text as T
import qualified Data.Text.Lazy as LT
import qualified Data.Vector as V
import qualified Data.Foldable as F
import Data.List (isPrefixOf)
import Data.Word
import Numeric.Natural
import GHC.Generics (Generic)
Expand Down Expand Up @@ -48,6 +49,14 @@ assertResult input expected res = case res of
" input: " ++ show (BL8.unpack input) ++ "\n" ++
"parse error: " ++ err

decodeFailsWith :: BL.ByteString -> String -> Assertion
decodeFailsWith input expected = case decode NoHeader input of
Right r -> assertFailure $ "input: " ++ show (BL8.unpack input) ++ "\n" ++
"retuned: " ++ show (r :: (V.Vector (V.Vector B.ByteString))) ++ "\n" ++
"whereas should have failed with " <> expected
Left err -> assertBool ("got " <> err <> "\ninstead of " <> expected)
$ ("parse error ("++expected++")") `isPrefixOf` err

encodesAs :: [[B.ByteString]] -> BL.ByteString -> Assertion
encodesAs input expected =
encode (map V.fromList input) @?= expected
Expand Down Expand Up @@ -158,6 +167,11 @@ positionalTests =
[ testGroup "decode" $ map streamingDecodeTest decodeTests
, testGroup "decodeWith" $ map streamingDecodeWithTest decodeWithTests
]
, testGroup "failing"
[ testCase "escapedMalformed0" $
"baz,\"" `decodeFailsWith` "Failed reading: trailing double quote"
]

]
where
rfc4180Input = BL8.pack $
Expand All @@ -178,6 +192,10 @@ positionalTests =
[["a", "b", "c"], ["d", "e", "f"]])
, ("leadingSpace", " a, b, c\n", [[" a", " b", " c"]])
, ("rfc4180", rfc4180Input, rfc4180Output)
, ("escapedDoubleQuotes"
, "\"x,y\",z\nbaz,\"bar\nfoo,\""
, [["x,y", "z"], ["baz", "bar\nfoo,"]]
)
]
decodeWithTests =
[ ("tab-delim", defDec { decDelimiter = 9 }, "1\t2", [["1", "2"]])
Expand Down

0 comments on commit ef014ec

Please sign in to comment.