-
Notifications
You must be signed in to change notification settings - Fork 4
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add more lex tests #36
Changes from 10 commits
b1a049f
bfee002
a076961
3a33bee
b9cc2de
869bfc9
5fc13ba
74f6a0d
2971779
07c772d
a99613b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -83,10 +83,10 @@ int is_valid_numeric_or_id_char(char c) { | |
return isalnum(c) || (c == '_') || (c == '.'); | ||
} | ||
|
||
int real_lex(Lexer*, Token*); | ||
int real_lex(Lexer *, Token *); | ||
|
||
/** | ||
* This produces a list of tokens after having been processed by the | ||
* This produces a list of tokens after having been processed by the | ||
* preprocessor. For example, if the code is | ||
* #define MAX_ARRAY 5 | ||
* int arr[MAX_ARRAY]; | ||
|
@@ -98,7 +98,7 @@ int real_lex(Lexer*, Token*); | |
* ] | ||
* ; | ||
*/ | ||
int lex(Lexer* l, Token* t) { | ||
int lex(Lexer *l, Token *t) { | ||
// For now, all we need to do is skip newlines | ||
for (;;) { | ||
real_lex(l, t); | ||
|
@@ -277,6 +277,8 @@ int skip_to_token(Lexer *l) { | |
return -1; // EOF was reached | ||
} | ||
|
||
// This is a function for parsing single char tokens | ||
// Now handles all cases of single char tokens | ||
TokenType ttype_one_char(char c) { | ||
switch (c) { | ||
case '(': | ||
|
@@ -330,11 +332,15 @@ TokenType ttype_one_char(char c) { | |
case '?': | ||
return TT_QMARK; | ||
default: | ||
PRINT_ERROR("Token type for token '%c' not recognized", c); | ||
return TT_NO_TOKEN; | ||
if (isdigit(c)) { | ||
return TT_LITERAL; | ||
} else { | ||
return TT_IDENTIFIER; | ||
} | ||
} | ||
} | ||
|
||
// This is a function for parsing exclusively tokens with more than one char | ||
TokenType ttype_many_chars(const char *contents) { | ||
if (STREQ(contents, "auto")) { | ||
return TT_AUTO; | ||
|
@@ -504,6 +510,7 @@ TokenType ttype_many_chars(const char *contents) { | |
return TT_IDENTIFIER; | ||
} | ||
|
||
// This is the function for parsing all tokens from strings | ||
TokenType ttype_from_string(const char *contents) { | ||
int len; | ||
|
||
|
@@ -512,10 +519,7 @@ TokenType ttype_from_string(const char *contents) { | |
// Single character contents | ||
if (len == 1) { | ||
TokenType token = ttype_one_char(contents[0]); | ||
|
||
if (token != TT_NO_TOKEN) { | ||
return token; | ||
} | ||
return token; | ||
} | ||
|
||
return ttype_many_chars(contents); | ||
|
@@ -612,9 +616,48 @@ static const char *ttype_names[] = { | |
|
||
const char *ttype_name(TokenType tt) { return ttype_names[tt]; } | ||
|
||
int test_ttype_many_chars() { | ||
testing_func_setup(); | ||
|
||
tassert(ttype_many_chars("foo") == TT_IDENTIFIER); | ||
tassert(ttype_many_chars("struct") == TT_STRUCT); | ||
tassert(ttype_many_chars("while") == TT_WHILE); | ||
|
||
return 0; | ||
} | ||
|
||
int test_ttype_one_char() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would it be possible to get some representation for failure paths, e.g. invalid tokens? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What do you mean? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I mean as in testing that it fails gracefully and doesn't segfault or something if we ask There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yea, testing doesn't segfault, it just prints the assert that failed There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't mean that the testing code itself might be broken, I mean testing whether There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ohh I see, yup we definitely should have that added. Same PR or later? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Later is probably fine. |
||
testing_func_setup(); | ||
|
||
// Use ttype_from_string | ||
tassert(ttype_one_char('a') == TT_IDENTIFIER); | ||
tassert(ttype_one_char('1') == TT_LITERAL); | ||
|
||
tassert(ttype_one_char('+') == TT_PLUS); | ||
tassert(ttype_one_char('-') == TT_MINUS); | ||
tassert(ttype_one_char('>') == TT_GREATER); | ||
tassert(ttype_one_char('~') == TT_BNOT); | ||
|
||
return 0; | ||
} | ||
|
||
int test_ttype_name() { | ||
testing_func_setup(); | ||
|
||
tassert(strcmp(ttype_name(TT_LITERAL), "literal") == 0); | ||
tassert(strcmp(ttype_name(TT_PLUS), "+") == 0); | ||
tassert(strcmp(ttype_name(TT_SIZEOF), "sizeof") == 0); | ||
tassert(strcmp(ttype_name(TT_WHILE), "while") == 0); | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. testing this function is great! I am worried that as a lookup table with no information about the ordering of the enum, it is a bit fragile though. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That's true. Having this is a good canary in the coal mine for names not working. If the tests break, any code that relies on the function will also break. It's a problem with how naming enums works in C in general so we should be careful with the use of the enum + name function. |
||
return 0; | ||
} | ||
|
||
int test_ttype_from_string() { | ||
testing_func_setup(); | ||
|
||
tassert(ttype_from_string("+") == TT_PLUS); | ||
tassert(ttype_from_string("=") == TT_ASSIGN); | ||
|
||
tassert(ttype_from_string("1") == TT_LITERAL); | ||
tassert(ttype_from_string("1.2") == TT_LITERAL); | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
do we know that everything not listed is an identifier?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Great question. I think we might need to check for one character symbols that definitely aren't tokens or identifiers. I can only think of things like the
@
symbol that isn't either, so we might want to have error handling for that later