Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More tokens #19

Merged
merged 7 commits into from
Jun 22, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 85 additions & 19 deletions src/lexer/lex.c
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
#include "lex.h"

#include <string.h> // memcpy
#include <assert.h> // assert
#include <string.h> // memcpy

#define STREQ(a, b) (!strcmp((a), (b)))

#include <util/out.h> // error reporting

Expand All @@ -20,10 +22,8 @@ int lex(Lexer *l, Token *t) {
int unlex(Lexer *l, Token *t) {
// First, make sure we can actually fit it in the buffer.
if (l->unlexed_count >= TOKEN_PUTBACKS) {
PRINT_ERROR(
"internal: tried to unlex more than %d tokens at a time",
TOKEN_PUTBACKS
);
PRINT_ERROR("internal: tried to unlex more than %d tokens at a time",
TOKEN_PUTBACKS);
return -1; // Error return code
}
memcpy(&l->unlexed[l->unlexed_count], t, sizeof(Token));
Expand Down Expand Up @@ -53,7 +53,73 @@ TokenType ttype_one_char(char c) {
}

TokenType ttype_many_chars(const char *contents) {
// TODO: Handle operations
if (STREQ(contents, "auto")) {
return TT_AUTO;
} else if (STREQ(contents, "break")) {
return TT_BREAK;
} else if (STREQ(contents, "continue")) {
return TT_CONTINUE;
} else if (STREQ(contents, "const")) {
return TT_CONST;
} else if (STREQ(contents, "case")) {
return TT_CASE;
} else if (STREQ(contents, "char")) {
return TT_CHAR;
} else if (STREQ(contents, "do")) {
return TT_DO;
} else if (STREQ(contents, "double")) {
return TT_DOUBLE;
} else if (STREQ(contents, "default")) {
return TT_DEFAULT;
} else if (STREQ(contents, "enum")) {
return TT_ENUM;
} else if (STREQ(contents, "else")) {
return TT_ELSE;
} else if (STREQ(contents, "extern")) {
return TT_EXTERN;
} else if (STREQ(contents, "float")) {
return TT_FLOAT;
} else if (STREQ(contents, "for")) {
return TT_FOR;
} else if (STREQ(contents, "goto")) {
return TT_GOTO;
} else if (STREQ(contents, "int")) {
return TT_INT;
} else if (STREQ(contents, "if")) {
return TT_IF;
} else if (STREQ(contents, "long")) {
return TT_LONG;
} else if (STREQ(contents, "return")) {
return TT_RETURN;
} else if (STREQ(contents, "register")) {
return TT_REGISTER;
} else if (STREQ(contents, "struct")) {
return TT_STRUCT;
} else if (STREQ(contents, "signed")) {
return TT_SIGNED;
} else if (STREQ(contents, "sizeof")) {
return TT_SIZEOF;
} else if (STREQ(contents, "static")) {
return TT_STATIC;
} else if (STREQ(contents, "short")) {
return TT_SHORT;
} else if (STREQ(contents, "switch")) {
return TT_SWITCH;
} else if (STREQ(contents, "typedef")) {
return TT_TYPEDEF;
} else if (STREQ(contents, "union")) {
return TT_UNION;
} else if (STREQ(contents, "unsigned")) {
return TT_UNSIGNED;
} else if (STREQ(contents, "void")) {
return TT_SIZEOF;
} else if (STREQ(contents, "volitile")) {
return TT_SIZEOF;
} else if (STREQ(contents, "while")) {
return TT_WHILE;
}

// TODO: Handle operations

// Includes only numbers
int all_numeric = 1;
Expand Down Expand Up @@ -129,23 +195,23 @@ TokenType ttype_from_string(const char *contents) {
}

int test_ttype_from_string() {
assert(ttype_from_string("1") == TT_LITERAL);
assert(ttype_from_string("1.2") == TT_LITERAL);
assert(ttype_from_string("1") == TT_LITERAL);
assert(ttype_from_string("1.2") == TT_LITERAL);

assert(ttype_from_string("1u") == TT_LITERAL);
assert(ttype_from_string("1.2f") == TT_LITERAL);
assert(ttype_from_string("1.f") == TT_LITERAL);
assert(ttype_from_string("1u") == TT_LITERAL);
assert(ttype_from_string("1.2f") == TT_LITERAL);
assert(ttype_from_string("1.f") == TT_LITERAL);

assert(ttype_from_string("\"Planck\"") == TT_LITERAL);
assert(ttype_from_string("'Language'") == TT_LITERAL);
assert(ttype_from_string("\"Planck\"") == TT_LITERAL);
assert(ttype_from_string("'Language'") == TT_LITERAL);

assert(ttype_from_string("Jaba") == TT_IDENTIFIER);
assert(ttype_from_string("cat_") == TT_IDENTIFIER);
assert(ttype_from_string("Jaba") == TT_IDENTIFIER);
assert(ttype_from_string("cat_") == TT_IDENTIFIER);

assert(ttype_from_string("(") == TT_OPAREN);
assert(ttype_from_string("}") == TT_CBRACE);
assert(ttype_from_string("(") == TT_OPAREN);
assert(ttype_from_string("}") == TT_CBRACE);

assert(ttype_from_string(";") == TT_SEMI);
assert(ttype_from_string(";") == TT_SEMI);

return 0;
return 0;
}
67 changes: 67 additions & 0 deletions src/lexer/token.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,73 @@ typedef enum {
TT_CBRACKET, // ]
TT_SEMI, // ;
TT_NO_TOKEN, // Not a token

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought of a couple more that need to be added:

TT_XOR, // ^
TT_XOREQ // ^=
TT_POINT, // ->

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just realized -- also TT_LEFTSHIFTEQUALS and TT_RIGHTSHIFTEQUALS for <<= and >>=.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh one more!!! (Hopefully this is the last one) there's also the dot operator ..

TT_PERIOD, // .
TT_COMMA, // ,
TT_MINUS, // -
TT_PLUS, // +
TT_STAR, // *
TT_SLASH, // /
TT_ASSIGN, // =
TT_COLON, // :
TT_MOD, // %
TT_BAND, // &
TT_LAND, // &&
TT_BOR, // |
TT_LOR, // ||
TT_DEC, // -=
TT_INC, // +=
TT_PLUSPLUS, // ++
TT_MINUSMINUS, // --
TT_DIVEQ, // /=
TT_MULEQ, // *=
TT_MODEQ, // %=
TT_BANDEQ, // &=
TT_BOREQ, // |=
TT_LANDEQ, // &&=
TT_LOREQ, // ||=
TT_GREATER, // >
TT_LESS, // <
TT_LESSEQ, // <=
TT_GREATEREQ, // >=
TT_LEFTSHIFT, // <<
TT_RIGHTSHIFT, // >>
TT_LNOT, // !
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also this is minor, but tilde is a bitwise NOT, so if we want to have naming consistent we could call them LNOT and BNOT respectively.

TT_LTILDE, // ~
TT_EQUALS, // ==

TT_AUTO, // auto
TT_BREAK, // break
TT_CHAR, // char
TT_CONST, // const
TT_CASE, // case
TT_CONTINUE, // continue
TT_DOUBLE, // double
TT_DO, // do
TT_DEFAULT, // default
TT_ENUM, // enum
TT_ELSE, // else
TT_EXTERN, // extern
TT_FLOAT, // float
TT_FOR, // for
TT_GOTO, // goto
TT_IF, // if
TT_INT, // int
TT_LONG, // long
TT_RETURN, // return
TT_REGISTER, // register
TT_STATIC, // static
TT_SWITCH, // switch
TT_SHORT, // short
TT_SIGNED, // signed
TT_STRUCT, // struct
TT_SIZEOF, // sizeof
TT_TYPEDEF, // typedef
TT_UNSIGNED, // unsigned
TT_UNION, // union
TT_VOID, // void
TT_VOLATILE, // volitile
TT_WHILE, // while
} TokenType;

// The maximum possible length of a token.
Expand Down
Loading