-
Notifications
You must be signed in to change notification settings - Fork 0
/
Lex.hs
54 lines (43 loc) · 1.48 KB
/
Lex.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
module Lex
( tokenize
, main
) where
import Text.Regex
import IOUtils
mkRegex' :: String -> Regex
mkRegex' s = mkRegexWithOpts ("^" ++ s) True True
compOp = mkRegex' "(==|!=|>|>=|<|<=)"
boolOp = mkRegex' "(&&|\\|\\|)"
arithOp = mkRegex' ('(':'\\':'+':'|':'\\':'*':"|=|-|/|%)")
keyword = mkRegex' "(if|else)"
bool = mkRegex' "(true|false)"
int = mkRegex' "([[:digit:]]+)" -- start with nonnegative integers for now
ident = mkRegex' "[_a-z][_a-zA-Z0-9]*"
delim = mkRegex' ('(':'\\':'(':'|':'\\':')':'|':"{|})")
regexes = [compOp, boolOp, arithOp, keyword, delim, bool, int, ident]
lineEnding = mkRegex ";"
tokenize :: String -> [String]
tokenize [] = []
tokenize s =
case matchRegexAll lineEnding normal of
Nothing -> tokenizeLine normal
Just(x, _, xs, _) -> tokenizeLine x ++ ";":tokenize xs
where normal = removeWeirdness s
tokenizeLine :: String -> [String]
tokenizeLine [] = []
tokenizeLine line@(x:xs)
| x == ' ' || x == '\n' = tokenizeLine xs
| otherwise = tok:tokenizeLine rem
where (tok, rem) = subtokenizeLine regexes line
subtokenizeLine :: [Regex] -> String -> (String, String)
subtokenizeLine [] _ = ("", "")
subtokenizeLine (x:xs) s =
case result of Nothing -> subtokenizeLine xs s
(Just(_, m, r, _)) -> (m, r)
where result = matchRegexAll x s
removeWeirdness :: String -> String
removeWeirdness [] = []
removeWeirdness s = subRegex (mkRegex "(\t|\v)") s " "
main = do
text <- getInput
print $ tokenize $ concat text