Skip to content

Commit

Permalink
Merge pull request #35 from adamhutchings/token
Browse files Browse the repository at this point in the history
Add operator lexing
  • Loading branch information
adamhutchings authored Jun 25, 2024
2 parents 2891e98 + 6fa630b commit d493287
Show file tree
Hide file tree
Showing 3 changed files with 107 additions and 8 deletions.
100 changes: 92 additions & 8 deletions src/lexer/lex.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,66 @@ int in_string(char c, char s[]) {
return 0;
}

// We will need to add more of these later, for sure
char single_char_tokens[] = "(){}[];";
char single_char_tokens[] = "(){}[];~#,.:?~";

// All strings which represent operators.
char* operator_strings[] = {
"-",
"+",
"*",
"/",
"=",
":",
"%",
"&",
"&&",
"|",
"||",
"-=",
"+=",
"++",
"--",
"/=",
"*=",
"%=",
"&=",
"|=",
"&&=",
"||=",
">",
"<",
"<=",
">=",
"<<",
">>",
"!",
"==",
"!=",
"^",
"^=",
"->",
"<<=",
">>=",
NULL, // for iterating
};

int starts_operator(char c) {
switch (c) {
case '-': case '+': case '*': case '/': case '=': case ':': case '%':
case '&': case '|': case '<': case '>': case '!': case '~': case '^':
return 1;
default:
return 0;
}
}

int valid_operator_sequence(char* op) {
for (char** top = operator_strings; *top; ++top) {
if (STREQ(*top, op))
return 1;
}
return 0;
}

int is_valid_numeric_or_id_char(char c) {
return isalnum(c) || (c == '_') || (c == '.');
Expand Down Expand Up @@ -143,6 +201,22 @@ int real_lex(Lexer *l, Token *t) {
return 0;
}

// Lex an operator. We do this by lexing characters from the buffer until
// the resulting string is no longer an operator, then we cut our losses and
// return./
if (starts_operator(init)) {
while (valid_operator_sequence(t->contents)) {
t->contents[pos++] = (c = getc(l->fp));
}
// We've ended!
// Can we reduce this code duplication from above in a smart way?
ungetc(c, l->fp);
t->contents[pos - 1] = '\0';
t->type = ttype_from_string(t->contents);
t->length = pos;
return 0;
}

// TODO - parse character or string literal

return 0;
Expand Down Expand Up @@ -251,9 +325,14 @@ TokenType ttype_one_char(char c) {
return TT_BNOT; // ~
case '^':
return TT_XOR; // ^
case '#':
return TT_POUND;
case '?':
return TT_QMARK;
default:
PRINT_ERROR("Token type for token '%c' not recognized", c);
return TT_NO_TOKEN;
}

return TT_NO_TOKEN;
}

TokenType ttype_many_chars(const char *contents) {
Expand Down Expand Up @@ -316,9 +395,9 @@ TokenType ttype_many_chars(const char *contents) {
} else if (STREQ(contents, "unsigned")) {
return TT_UNSIGNED;
} else if (STREQ(contents, "void")) {
return TT_SIZEOF;
} else if (STREQ(contents, "volitile")) {
return TT_SIZEOF;
return TT_VOID;
} else if (STREQ(contents, "volatile")) {
return TT_VOLATILE;
} else if (STREQ(contents, "while")) {
return TT_WHILE;
} else if (STREQ(contents, "&&")) {
Expand Down Expand Up @@ -365,6 +444,8 @@ TokenType ttype_many_chars(const char *contents) {
return TT_LEFTSHIFTEQUALS;
} else if (STREQ(contents, ">>=")) {
return TT_RIGHTSHIFTEQUALS;
} else if (STREQ(contents, "!=")) {
return TT_NOTEQ;
}

// Includes only numbers
Expand Down Expand Up @@ -453,8 +534,10 @@ static const char *ttype_names[] = {
"no token", // Not a token
"end of file", // End-of-file, lex until we hit the end of the file
"newline", // Newline, used in preprocessing
"pound",
".",
",",
"?",
"-",
"+",
"*",
Expand Down Expand Up @@ -486,6 +569,7 @@ static const char *ttype_names[] = {
"!",
"~",
"==",
"!=",
"^",
"^=",
"->",
Expand Down Expand Up @@ -522,7 +606,7 @@ static const char *ttype_names[] = {
"unsigned",
"union",
"void",
"volitile",
"volatile",
"while",
};

Expand Down
3 changes: 3 additions & 0 deletions src/lexer/token.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,11 @@ typedef enum {
TT_NO_TOKEN, // Not a token
TT_EOF, // End-of-file, so we can lex until we hit the end of the file
TT_NEWLINE, // Newline, used in preprocessing
TT_POUND, // # for preprocessing

TT_PERIOD, // .
TT_COMMA, // ,
TT_QMARK, // ?
TT_MINUS, // -
TT_PLUS, // +
TT_STAR, // *
Expand Down Expand Up @@ -53,6 +55,7 @@ typedef enum {
TT_LNOT, // !
TT_BNOT, // ~
TT_EQUALS, // ==
TT_NOTEQ, // !=
TT_XOR, // ^
TT_XOREQ, // ^=
TT_POINT, // ->
Expand Down
12 changes: 12 additions & 0 deletions tests/optest.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
/* Credit - djb2 */
static unsigned long hash(const char * key) {

unsigned long hash = 5381;
int c;

while ((c = *key++))
hash = ((hash << 5) + hash) + c; /* hash * 33 + c */

return hash;

}

0 comments on commit d493287

Please sign in to comment.