Merge branch 'main' of https://github.com/NicoBliss/jccc

jabacat · Jun 25, 2024 · 78486dc · 78486dc
2 parents 67ca863 + 556a3ff
commit 78486dc
Show file tree

Hide file tree

Showing 15 changed files with 280 additions and 19 deletions.
diff --git a/src/codegen/RISCV/codegen.c b/src/codegen/RISCV/codegen.c
@@ -0,0 +1 @@
+// Possibility later
diff --git a/src/codegen/RISCV/codegen.h b/src/codegen/RISCV/codegen.h
@@ -0,0 +1,3 @@
+// Possibility later
+
+#pragma once
diff --git a/src/codegen/x86/codegen.c b/src/codegen/x86/codegen.c
@@ -0,0 +1,80 @@
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "codegen.h"
+
+#include <testing/tassert.h> // tassert
+
+struct GenState {
+    // Each bit corresponds with a registers 0-31 where the LSB is 0
+    uint32_t registers_in_use;
+
+    unsigned int rsp_offset;
+} GEN_STATE;
+
+void code_gen_init() {
+    GEN_STATE.registers_in_use = 0;
+    GEN_STATE.rsp_offset = 0;
+}
+
+char *start_main() {
+    static char start[256] = "\
+global _start\
+section .text\
+\
+_start:";
+
+    return start;
+}
+
+char *end_main() {
+    static char end[256] = "\
+mov rax, 60\
+mov rdi, 0\
+syscall";
+
+    return end;
+}
+
+char *start_func() {
+    static char start[256] = "\
+sub rsp, 32\
+mov [rsp], r12\
+mov [rsp+8], r13\
+mov [rsp+16], r14\
+mov [rsp+24], r15";
+
+    return start;
+}
+
+char *end_func() {
+    static char end[256] = "\
+mov r12, [rsp]\
+mov r13, [rsp+8]\
+mov r14, [rsp+16]\
+mov r15, [rsp+24]\
+add rsp, 32";
+
+    return end;
+}
+
+char *init_int_literal(int val) {
+    GEN_STATE.rsp_offset += 8;
+
+    char *init;
+    init = (char *)malloc(256 * sizeof(char));
+    sprintf(init, "mov [rsp+%d], %d", GEN_STATE.rsp_offset, val);
+
+    return init;
+}
+
+int test_init_int_literal() {
+	testing_func_setup();
+    code_gen_init();
+
+    tassert(strcmp(init_int_literal(100), "mov [rsp+8], 100") == 0);
+
+	return 0;
+}
diff --git a/src/codegen/x86/codegen.h b/src/codegen/x86/codegen.h
@@ -0,0 +1,15 @@
+#pragma once
+
+#include <testing/test_utils.h>
+
+char *start_main();
+
+char *end_main();
+
+char *start_func();
+
+char *end_func();
+
+char *init_int_literal(int val);
+
+int test_init_int_literal();
diff --git a/src/codegen/x86/test_x86.c b/src/codegen/x86/test_x86.c
@@ -0,0 +1,15 @@
+/*
+ * Run all of the tests at the end of each C file in the lexer module
+ */
+
+#include "codegen.h"
+#include <testing/test_utils.h>
+
+int test_x86() {
+    testing_module_setup();
+
+    test_init_int_literal();
+
+    testing_module_cleanup();
+    return 0;
+}
diff --git a/src/codegen/x86/test_x86.h b/src/codegen/x86/test_x86.h
@@ -0,0 +1,10 @@
+/*
+ * Run all of the tests at the end of each C file in the lexer module
+ */
+
+#pragma once
+
+#include "codegen.h"
+#include <testing/test_utils.h>
+
+int test_x86();
diff --git a/src/lexer/lex.c b/src/lexer/lex.c
@@ -3,6 +3,7 @@
 #include <assert.h>
 #include <stdio.h>
 #include <testing/tassert.h> // tassert
+#include <testing/test_utils.h>
 
 #include <ctype.h>
 #include <string.h> // memcpy
@@ -19,8 +20,67 @@ int in_string(char c, char s[]) {
     }
     return 0;
 }
-// We will need to add more of these later, for sure
-char single_char_tokens[] = "(){}[];";
+
+char single_char_tokens[] = "(){}[];~#,.:?~";
+
+// All strings which represent operators.
+char* operator_strings[] = {
+    "-",
+    "+",
+    "*",
+    "/",
+    "=",
+    ":",
+    "%",
+    "&",
+    "&&",
+    "|",
+    "||",
+    "-=",
+    "+=",
+    "++",
+    "--",
+    "/=",
+    "*=",
+    "%=",
+    "&=",
+    "|=",
+    "&&=",
+    "||=",
+    ">",
+    "<",
+    "<=",
+    ">=",
+    "<<",
+    ">>",
+    "!",
+    "==",
+    "!=",
+    "^",
+    "^=",
+    "->",
+    "<<=",
+    ">>=",
+    NULL, // for iterating
+};
+
+int starts_operator(char c) {
+    switch (c) {
+    case '-': case '+': case '*': case '/': case '=': case ':': case '%':
+    case '&': case '|': case '<': case '>': case '!': case '~': case '^':
+        return 1;
+    default:
+        return 0;
+    }
+}
+
+int valid_operator_sequence(char* op) {
+    for (char** top = operator_strings; *top; ++top) {
+        if (STREQ(*top, op))
+            return 1;
+    }
+    return 0;
+}
 
 int is_valid_numeric_or_id_char(char c) {
     return isalnum(c) || (c == '_') || (c == '.');
@@ -183,6 +243,22 @@ int real_lex(Lexer *l, Token *t) {
         return 0;
     }
 
+    // Lex an operator. We do this by lexing characters from the buffer until
+    // the resulting string is no longer an operator, then we cut our losses and
+    // return./
+    if (starts_operator(init)) {
+        while (valid_operator_sequence(t->contents)) {
+            t->contents[pos++] = (c = getc(l->fp));
+        }
+        // We've ended!
+        // Can we reduce this code duplication from above in a smart way?
+        ungetc(c, l->fp);
+        t->contents[pos - 1] = '\0';
+        t->type = ttype_from_string(t->contents);
+        t->length = pos;
+        return 0;
+    }
+
     // TODO - parse character or string literal
 
     PRINT_ERROR("lexer unable to identify token starting with: %c", init);
@@ -291,9 +367,14 @@ TokenType ttype_one_char(char c) {
         return TT_BNOT; // ~
     case '^':
         return TT_XOR; // ^
+    case '#':
+        return TT_POUND;
+    case '?':
+        return TT_QMARK;
+    default:
+        PRINT_ERROR("Token type for token '%c' not recognized", c);
+        return TT_NO_TOKEN;
     }
-
-    return TT_NO_TOKEN;
 }
 
 TokenType ttype_many_chars(const char *contents) {
@@ -356,9 +437,9 @@ TokenType ttype_many_chars(const char *contents) {
     } else if (STREQ(contents, "unsigned")) {
         return TT_UNSIGNED;
     } else if (STREQ(contents, "void")) {
-        return TT_SIZEOF;
-    } else if (STREQ(contents, "volitile")) {
-        return TT_SIZEOF;
+        return TT_VOID;
+    } else if (STREQ(contents, "volatile")) {
+        return TT_VOLATILE;
     } else if (STREQ(contents, "while")) {
         return TT_WHILE;
     } else if (STREQ(contents, "&&")) {
@@ -405,6 +486,8 @@ TokenType ttype_many_chars(const char *contents) {
         return TT_LEFTSHIFTEQUALS;
     } else if (STREQ(contents, ">>=")) {
         return TT_RIGHTSHIFTEQUALS;
+    } else if (STREQ(contents, "!=")) {
+        return TT_NOTEQ;
     }
 
     // Includes only numbers
@@ -493,8 +576,10 @@ static const char *ttype_names[] = {
     "no token",      // Not a token
     "end of file",   // End-of-file, lex until we hit the end of the file
     "newline",       // Newline, used in preprocessing
+    "pound",
     ".",
     ",",
+    "?",
     "-",
     "+",
     "*",
@@ -526,6 +611,7 @@ static const char *ttype_names[] = {
     "!",
     "~",
     "==",
+    "!=",
     "^",
     "^=",
     "->",
@@ -562,13 +648,15 @@ static const char *ttype_names[] = {
     "unsigned",
     "union",
     "void",
-    "volitile",
+    "volatile",
     "while",
 };
 
 const char *ttype_name(TokenType tt) { return ttype_names[tt]; }
 
 int test_ttype_from_string() {
+    testing_func_setup();
+
     tassert(ttype_from_string("1") == TT_LITERAL);
     tassert(ttype_from_string("1.2") == TT_LITERAL);
 

diff --git a/src/lexer/test_lexer.c b/src/lexer/test_lexer.c
@@ -6,10 +6,10 @@
 #include <testing/test_utils.h>
 
 int test_lexer() {
-    testing_setup();
+    testing_module_setup();
 
     test_ttype_from_string();
 
-    testing_cleanup();
+    testing_module_cleanup();
     return 0;
 }
diff --git a/src/lexer/test_lexer.h b/src/lexer/test_lexer.h
@@ -1,6 +1,8 @@
 /*
  * Run all of the tests at the end of each C file in the lexer module */
 
+#pragma once
+
 #include "lex.h"
 
 int test_lexer();
diff --git a/src/lexer/token.h b/src/lexer/token.h
@@ -19,9 +19,11 @@ typedef enum {
     TT_NO_TOKEN,   // Not a token
     TT_EOF,     // End-of-file, so we can lex until we hit the end of the file
     TT_NEWLINE, // Newline, used in preprocessing
+    TT_POUND,      // # for preprocessing
 
     TT_PERIOD,           // .
     TT_COMMA,            // ,
+    TT_QMARK,            // ?
     TT_MINUS,            // -
     TT_PLUS,             // +
     TT_STAR,             // *
@@ -53,6 +55,7 @@ typedef enum {
     TT_LNOT,             // !
     TT_BNOT,             // ~
     TT_EQUALS,           // ==
+    TT_NOTEQ,            // !=
     TT_XOR,              // ^
     TT_XOREQ,            // ^=
     TT_POINT,            // ->

diff --git a/src/testing/main.c b/src/testing/main.c
@@ -1,7 +1,9 @@
 #include "lexer/test_lexer.h"
+#include "codegen/x86/test_x86.h"
 
 int main() {
 	test_lexer();
+	test_x86();
 
 	return 0;
 }
diff --git a/src/testing/tassert.h b/src/testing/tassert.h
@@ -1,3 +1,5 @@
+#pragma once
+
 #include <assert.h>
 
 #define tassert(e) ((void)((e) ? 0 : __assert(#e, __FILE__, __LINE__)))

diff --git a/src/testing/test_utils.c b/src/testing/test_utils.c
@@ -2,14 +2,21 @@
 #include <util/out.h>
 
 void testing_setup_internal(const char *func_name) {
-    printf("Running test ");
+    printf("Running tests from ");
     fflush(stdout);
     PRINT_WITH_COLOR(CODE_GREEN, "\"%s\"", func_name);
     printf(" ...\n");
 }
 
 void testing_cleanup_internal(const char *func_name) {
-    printf("Concluded test ");
+    printf("Concluded tests from ");
+    fflush(stdout);
+    PRINT_WITH_COLOR(CODE_GREEN, "\"%s\"", func_name);
+    printf("\n");
+}
+
+void testing_single_test_internal(const char *func_name) {
+    printf("Running ");
     fflush(stdout);
     PRINT_WITH_COLOR(CODE_GREEN, "\"%s\"", func_name);
     printf("\n");