Skip to content

Commit

Permalink
Remove Maybe indirection in PatternSet (+ LambdaCase)
Browse files Browse the repository at this point in the history
Instead of `Maybe . Set` just have `Set`, collapsing the two null
values `Nothing` and `Just mempty`.  The distinction of these wasn't
used anywhere.

We drop GHC 7.4 in favor of using LambdaCase.
  • Loading branch information
andreasabel committed Jul 18, 2022
1 parent e7398f4 commit 6d66351
Show file tree
Hide file tree
Showing 5 changed files with 99 additions and 50 deletions.
5 changes: 0 additions & 5 deletions .github/workflows/haskell-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -92,11 +92,6 @@ jobs:
compilerVersion: 7.6.3
setup-method: hvr-ppa
allow-failure: false
- compiler: ghc-7.4.2
compilerKind: ghc
compilerVersion: 7.4.2
setup-method: hvr-ppa
allow-failure: false
fail-fast: false
steps:
- name: apt
Expand Down
83 changes: 62 additions & 21 deletions lib/Text/Regex/TDFA/Pattern.hs
Original file line number Diff line number Diff line change
Expand Up @@ -5,25 +5,31 @@
-- the parsed form of a Regular Expression.

module Text.Regex.TDFA.Pattern
(Pattern(..)
,PatternSet(..)
,PatternSetCharacterClass(..)
,PatternSetCollatingElement(..)
,PatternSetEquivalenceClass(..)
,GroupIndex
,DoPa(..)
,showPattern
-- ** Internal use
,starTrans
-- ** Internal use, Operations to support debugging under ghci
,starTrans',simplify',dfsPattern
( Pattern(..)
, PatternSet(..)
, patternSetChars
, patternSetCharacterClasses
, patternSetCollatingElements
, patternSetEquivalenceClasses
, PatternSetCharacterClass(..)
, PatternSetCollatingElement(..)
, PatternSetEquivalenceClass(..)
, GroupIndex
, DoPa(..)
, showPattern
-- ** Internal use
, starTrans
-- ** Internal use, Operations to support debugging under ghci
, starTrans', simplify', dfsPattern
) where

{- By Chris Kuklewicz, 2007. BSD License, see the LICENSE file. -}

import Data.List(intersperse,partition)
import qualified Data.Set as Set(toAscList,toList)
import Data.Set(Set) -- XXX EnumSet

import Utils
import Text.Regex.TDFA.Common(DoPa(..),GroupIndex,common_error)

err :: String -> a
Expand Down Expand Up @@ -92,19 +98,54 @@ showPattern pIn =
-}
paren s = ('(':s)++")"

data PatternSet = PatternSet (Maybe (Set Char))
(Maybe (Set PatternSetCharacterClass))
(Maybe (Set PatternSetCollatingElement))
(Maybe (Set PatternSetEquivalenceClass))
deriving (Eq)
-- | Processed content of a bracket expression.
data PatternSet = PatternSet
{ _patternSetChars :: Set Char
-- ^ Characters included in the pattern.
, _patternSetCharacterClasses :: Set PatternSetCharacterClass
-- ^ POSIX character classes included in the pattern.
, _patternSetCollatingElements :: Set PatternSetCollatingElement
-- ^ Collating elements included in the pattern.
, _patternSetEquivalenceClasses :: Set PatternSetEquivalenceClass
-- ^ Equivalence classes included in the pattern.
}
deriving (Eq)

instance Semigroup PatternSet where
PatternSet a b c d <> PatternSet a' b' c' d' =
PatternSet (a <> a') (b <> b') (c <> c') (d <> d')

instance Monoid PatternSet where
mempty = PatternSet mempty mempty mempty mempty
mappend = (<>)

-- | Lens for '_patternSetChars'.
patternSetChars :: Lens' PatternSet (Set Char)
patternSetChars f ps =
f (_patternSetChars ps) <&> \ i -> ps{ _patternSetChars = i }

-- | Lens for '_patternSetCharacterClasses'.
patternSetCharacterClasses :: Lens' PatternSet (Set PatternSetCharacterClass)
patternSetCharacterClasses f ps =
f (_patternSetCharacterClasses ps) <&> \ i -> ps{ _patternSetCharacterClasses = i }

-- | Lens for '_patternSetCollatingElements'.
patternSetCollatingElements :: Lens' PatternSet (Set PatternSetCollatingElement)
patternSetCollatingElements f ps =
f (_patternSetCollatingElements ps) <&> \ i -> ps{ _patternSetCollatingElements = i }

-- | Lens for '_patternSetEquivalenceClasses'.
patternSetEquivalenceClasses :: Lens' PatternSet (Set PatternSetEquivalenceClass)
patternSetEquivalenceClasses f ps =
f (_patternSetEquivalenceClasses ps) <&> \ i -> ps{ _patternSetEquivalenceClasses = i }

instance Show PatternSet where
showsPrec i (PatternSet s scc sce sec) =
let (special,normal) = maybe ("","") ((partition (`elem` "]-")) . Set.toAscList) s
let (special,normal) = partition (`elem` "]-") $ Set.toAscList s
charSpec = (if ']' `elem` special then (']':) else id) (byRange normal)
scc' = maybe "" ((concatMap show) . Set.toList) scc
sce' = maybe "" ((concatMap show) . Set.toList) sce
sec' = maybe "" ((concatMap show) . Set.toList) sec
scc' = concatMap show $ Set.toList scc
sce' = concatMap show $ Set.toList sce
sec' = concatMap show $ Set.toList sec
in shows charSpec
. showsPrec i scc' . showsPrec i sce' . showsPrec i sec'
. if '-' `elem` special then showChar '-' else id
Expand Down
44 changes: 28 additions & 16 deletions lib/Text/Regex/TDFA/ReadRegex.hs
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,12 @@ import Text.ParserCombinators.Parsec((<|>), (<?>),
try, runParser, many, getState, setState, CharParser, ParseError,
sepBy1, option, notFollowedBy, many1, lookAhead, eof, between,
string, noneOf, digit, char, anyChar)
import Utils

import Control.Monad (liftM, guard)

import Data.Foldable (asum)
import qualified Data.Set as Set(fromList)
import Data.Foldable (asum, foldl')
import qualified Data.Set as Set

-- | An element inside @[...]@, denoting a character class.
data BracketElement
Expand Down Expand Up @@ -132,20 +133,31 @@ p_bracket :: P Pattern
p_bracket = (char '[') >> ( (char '^' >> p_set True) <|> (p_set False) )

p_set :: Bool -> P Pattern
p_set invert = do initial <- option "" (char ']' >> return "]")
values <- if null initial then many1 p_set_elem else many p_set_elem
_ <- char ']'
ci <- char_index
let chars = maybe'set $ concat $
initial :
[ c | BEChar c <- values ] :
[ [start..end] | BERange start end <- values ]
colls = maybe'set [PatternSetCollatingElement coll | BEColl coll <- values ]
equivs = maybe'set [PatternSetEquivalenceClass equiv | BEEquiv equiv <- values]
class's = maybe'set [PatternSetCharacterClass a'class | BEClass a'class <- values]
maybe'set x = if null x then Nothing else Just (Set.fromList x)
sets = PatternSet chars class's colls equivs
sets `seq` return $ if invert then PAnyNot ci sets else PAny ci sets
p_set invert = do
-- A ] as first character after the opening [ is treated as alternative ']'
-- rather than the closing bracket.
initial <- option mempty $ Set.singleton <$> char ']'
-- Parse remaining content of bracket expression.
values <- if Set.null initial then many1 p_set_elem else many p_set_elem
_ <- char ']'
ci <- char_index
-- Process the content of bracket expression into a PatternSet.
let !sets = foldl' (flip addBracketElement) (mempty{ _patternSetChars = initial }) values
return $ if invert then PAnyNot ci sets else PAny ci sets

addBracketElement :: BracketElement -> PatternSet -> PatternSet
addBracketElement = \case
BEChar c ->
over patternSetChars $ Set.insert c
BERange start end ->
over patternSetChars $ (`Set.union` Set.fromDistinctAscList [start..end])
-- Set.union is left-biased, [start..end] is considered the smaller set
BEClass s ->
over patternSetCharacterClasses $ Set.insert $ PatternSetCharacterClass s
BEColl s ->
over patternSetCollatingElements $ Set.insert $ PatternSetCollatingElement s
BEEquiv s ->
over patternSetEquivalenceClasses $ Set.insert $ PatternSetEquivalenceClass s

-- From here down the code is the parser and functions for pattern [ ] set things

Expand Down
12 changes: 6 additions & 6 deletions lib/Text/Regex/TDFA/TNFA.hs
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ import Data.IntSet.EnumSet2(EnumSet)
import qualified Data.IntSet.EnumSet2 as Set(singleton,toList,insert)
import Data.Maybe(catMaybes,isNothing)
import Data.Monoid as Mon(Monoid(..))
import qualified Data.Set as S(Set,insert,toAscList,empty)
import qualified Data.Set as S

import Text.Regex.TDFA.Common(QT(..),QNFA(..),QTrans,TagTask(..),TagUpdate(..),DoPa(..)
,CompOption(..)
Expand Down Expand Up @@ -793,11 +793,11 @@ ADD ORPHAN ID check and make this a fatal error while testing
-- | @decodePatternSet@ cannot handle collating element and treats
-- equivalence classes as just their definition and nothing more.
decodePatternSet :: PatternSet -> S.Set Char
decodePatternSet (PatternSet msc mscc _ msec) =
let baseMSC = maybe S.empty id msc
withMSCC = foldl (flip S.insert) baseMSC (maybe [] (concatMap decodeCharacterClass . S.toAscList) mscc)
withMSEC = foldl (flip S.insert) withMSCC (maybe [] (concatMap unSEC . S.toAscList) msec)
in withMSEC
decodePatternSet (PatternSet chars ccs _ eqcs) = S.unions
[ chars
, foldMap (S.fromList . decodeCharacterClass) ccs
, foldMap (S.fromList . unSEC) eqcs
]

-- | This returns the strictly ascending list of characters
-- represented by @[: :]@ POSIX character classes.
Expand Down
5 changes: 3 additions & 2 deletions regex-tdfa.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ tested-with:
GHC == 7.10.3
GHC == 7.8.4
GHC == 7.6.3
GHC == 7.4.2

source-repository head
type: git
Expand Down Expand Up @@ -99,7 +98,8 @@ library
build-depends: fail == 4.9.*
, semigroups == 0.18.* || == 0.19.*
build-depends: array >= 0.4 && < 0.6
, base >= 4.5 && < 5
, base >= 4.6 && < 5
-- GHC 7.6 required for LambdaCase
, bytestring >= 0.9.2 && < 0.12
, containers >= 0.4.2 && < 0.7
, mtl >= 2.1.3 && < 2.4
Expand All @@ -114,6 +114,7 @@ library
FlexibleInstances
ForeignFunctionInterface
FunctionalDependencies
LambdaCase
MagicHash
MultiParamTypeClasses
NondecreasingIndentation
Expand Down

0 comments on commit 6d66351

Please sign in to comment.