Skip to content

Commit

Permalink
Merge branch 'main' of https://github.com/NicoBliss/jccc
Browse files Browse the repository at this point in the history
  • Loading branch information
NicoBliss committed Jun 25, 2024
2 parents 67ca863 + 556a3ff commit 78486dc
Show file tree
Hide file tree
Showing 15 changed files with 280 additions and 19 deletions.
1 change: 1 addition & 0 deletions src/codegen/RISCV/codegen.c
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
// Possibility later
3 changes: 3 additions & 0 deletions src/codegen/RISCV/codegen.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
// Possibility later

#pragma once
80 changes: 80 additions & 0 deletions src/codegen/x86/codegen.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "codegen.h"

#include <testing/tassert.h> // tassert

struct GenState {
// Each bit corresponds with a registers 0-31 where the LSB is 0
uint32_t registers_in_use;

unsigned int rsp_offset;
} GEN_STATE;

void code_gen_init() {
GEN_STATE.registers_in_use = 0;
GEN_STATE.rsp_offset = 0;
}

char *start_main() {
static char start[256] = "\
global _start\
section .text\
\
_start:";

return start;
}

char *end_main() {
static char end[256] = "\
mov rax, 60\
mov rdi, 0\
syscall";

return end;
}

char *start_func() {
static char start[256] = "\
sub rsp, 32\
mov [rsp], r12\
mov [rsp+8], r13\
mov [rsp+16], r14\
mov [rsp+24], r15";

return start;
}

char *end_func() {
static char end[256] = "\
mov r12, [rsp]\
mov r13, [rsp+8]\
mov r14, [rsp+16]\
mov r15, [rsp+24]\
add rsp, 32";

return end;
}

char *init_int_literal(int val) {
GEN_STATE.rsp_offset += 8;

char *init;
init = (char *)malloc(256 * sizeof(char));
sprintf(init, "mov [rsp+%d], %d", GEN_STATE.rsp_offset, val);

return init;
}

int test_init_int_literal() {
testing_func_setup();
code_gen_init();

tassert(strcmp(init_int_literal(100), "mov [rsp+8], 100") == 0);

return 0;
}
15 changes: 15 additions & 0 deletions src/codegen/x86/codegen.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#pragma once

#include <testing/test_utils.h>

char *start_main();

char *end_main();

char *start_func();

char *end_func();

char *init_int_literal(int val);

int test_init_int_literal();
15 changes: 15 additions & 0 deletions src/codegen/x86/test_x86.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
/*
* Run all of the tests at the end of each C file in the lexer module
*/

#include "codegen.h"
#include <testing/test_utils.h>

int test_x86() {
testing_module_setup();

test_init_int_literal();

testing_module_cleanup();
return 0;
}
10 changes: 10 additions & 0 deletions src/codegen/x86/test_x86.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
/*
* Run all of the tests at the end of each C file in the lexer module
*/

#pragma once

#include "codegen.h"
#include <testing/test_utils.h>

int test_x86();
104 changes: 96 additions & 8 deletions src/lexer/lex.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include <assert.h>
#include <stdio.h>
#include <testing/tassert.h> // tassert
#include <testing/test_utils.h>

#include <ctype.h>
#include <string.h> // memcpy
Expand All @@ -19,8 +20,67 @@ int in_string(char c, char s[]) {
}
return 0;
}
// We will need to add more of these later, for sure
char single_char_tokens[] = "(){}[];";

char single_char_tokens[] = "(){}[];~#,.:?~";

// All strings which represent operators.
char* operator_strings[] = {
"-",
"+",
"*",
"/",
"=",
":",
"%",
"&",
"&&",
"|",
"||",
"-=",
"+=",
"++",
"--",
"/=",
"*=",
"%=",
"&=",
"|=",
"&&=",
"||=",
">",
"<",
"<=",
">=",
"<<",
">>",
"!",
"==",
"!=",
"^",
"^=",
"->",
"<<=",
">>=",
NULL, // for iterating
};

int starts_operator(char c) {
switch (c) {
case '-': case '+': case '*': case '/': case '=': case ':': case '%':
case '&': case '|': case '<': case '>': case '!': case '~': case '^':
return 1;
default:
return 0;
}
}

int valid_operator_sequence(char* op) {
for (char** top = operator_strings; *top; ++top) {
if (STREQ(*top, op))
return 1;
}
return 0;
}

int is_valid_numeric_or_id_char(char c) {
return isalnum(c) || (c == '_') || (c == '.');
Expand Down Expand Up @@ -183,6 +243,22 @@ int real_lex(Lexer *l, Token *t) {
return 0;
}

// Lex an operator. We do this by lexing characters from the buffer until
// the resulting string is no longer an operator, then we cut our losses and
// return./
if (starts_operator(init)) {
while (valid_operator_sequence(t->contents)) {
t->contents[pos++] = (c = getc(l->fp));
}
// We've ended!
// Can we reduce this code duplication from above in a smart way?
ungetc(c, l->fp);
t->contents[pos - 1] = '\0';
t->type = ttype_from_string(t->contents);
t->length = pos;
return 0;
}

// TODO - parse character or string literal

PRINT_ERROR("lexer unable to identify token starting with: %c", init);
Expand Down Expand Up @@ -291,9 +367,14 @@ TokenType ttype_one_char(char c) {
return TT_BNOT; // ~
case '^':
return TT_XOR; // ^
case '#':
return TT_POUND;
case '?':
return TT_QMARK;
default:
PRINT_ERROR("Token type for token '%c' not recognized", c);
return TT_NO_TOKEN;
}

return TT_NO_TOKEN;
}

TokenType ttype_many_chars(const char *contents) {
Expand Down Expand Up @@ -356,9 +437,9 @@ TokenType ttype_many_chars(const char *contents) {
} else if (STREQ(contents, "unsigned")) {
return TT_UNSIGNED;
} else if (STREQ(contents, "void")) {
return TT_SIZEOF;
} else if (STREQ(contents, "volitile")) {
return TT_SIZEOF;
return TT_VOID;
} else if (STREQ(contents, "volatile")) {
return TT_VOLATILE;
} else if (STREQ(contents, "while")) {
return TT_WHILE;
} else if (STREQ(contents, "&&")) {
Expand Down Expand Up @@ -405,6 +486,8 @@ TokenType ttype_many_chars(const char *contents) {
return TT_LEFTSHIFTEQUALS;
} else if (STREQ(contents, ">>=")) {
return TT_RIGHTSHIFTEQUALS;
} else if (STREQ(contents, "!=")) {
return TT_NOTEQ;
}

// Includes only numbers
Expand Down Expand Up @@ -493,8 +576,10 @@ static const char *ttype_names[] = {
"no token", // Not a token
"end of file", // End-of-file, lex until we hit the end of the file
"newline", // Newline, used in preprocessing
"pound",
".",
",",
"?",
"-",
"+",
"*",
Expand Down Expand Up @@ -526,6 +611,7 @@ static const char *ttype_names[] = {
"!",
"~",
"==",
"!=",
"^",
"^=",
"->",
Expand Down Expand Up @@ -562,13 +648,15 @@ static const char *ttype_names[] = {
"unsigned",
"union",
"void",
"volitile",
"volatile",
"while",
};

const char *ttype_name(TokenType tt) { return ttype_names[tt]; }

int test_ttype_from_string() {
testing_func_setup();

tassert(ttype_from_string("1") == TT_LITERAL);
tassert(ttype_from_string("1.2") == TT_LITERAL);

Expand Down
4 changes: 2 additions & 2 deletions src/lexer/test_lexer.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@
#include <testing/test_utils.h>

int test_lexer() {
testing_setup();
testing_module_setup();

test_ttype_from_string();

testing_cleanup();
testing_module_cleanup();
return 0;
}
2 changes: 2 additions & 0 deletions src/lexer/test_lexer.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
/*
* Run all of the tests at the end of each C file in the lexer module */

#pragma once

#include "lex.h"

int test_lexer();
3 changes: 3 additions & 0 deletions src/lexer/token.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,11 @@ typedef enum {
TT_NO_TOKEN, // Not a token
TT_EOF, // End-of-file, so we can lex until we hit the end of the file
TT_NEWLINE, // Newline, used in preprocessing
TT_POUND, // # for preprocessing

TT_PERIOD, // .
TT_COMMA, // ,
TT_QMARK, // ?
TT_MINUS, // -
TT_PLUS, // +
TT_STAR, // *
Expand Down Expand Up @@ -53,6 +55,7 @@ typedef enum {
TT_LNOT, // !
TT_BNOT, // ~
TT_EQUALS, // ==
TT_NOTEQ, // !=
TT_XOR, // ^
TT_XOREQ, // ^=
TT_POINT, // ->
Expand Down
2 changes: 2 additions & 0 deletions src/testing/main.c
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
#include "lexer/test_lexer.h"
#include "codegen/x86/test_x86.h"

int main() {
test_lexer();
test_x86();

return 0;
}
2 changes: 2 additions & 0 deletions src/testing/tassert.h
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#pragma once

#include <assert.h>

#define tassert(e) ((void)((e) ? 0 : __assert(#e, __FILE__, __LINE__)))
Expand Down
11 changes: 9 additions & 2 deletions src/testing/test_utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,21 @@
#include <util/out.h>

void testing_setup_internal(const char *func_name) {
printf("Running test ");
printf("Running tests from ");
fflush(stdout);
PRINT_WITH_COLOR(CODE_GREEN, "\"%s\"", func_name);
printf(" ...\n");
}

void testing_cleanup_internal(const char *func_name) {
printf("Concluded test ");
printf("Concluded tests from ");
fflush(stdout);
PRINT_WITH_COLOR(CODE_GREEN, "\"%s\"", func_name);
printf("\n");
}

void testing_single_test_internal(const char *func_name) {
printf("Running ");
fflush(stdout);
PRINT_WITH_COLOR(CODE_GREEN, "\"%s\"", func_name);
printf("\n");
Expand Down
Loading

0 comments on commit 78486dc

Please sign in to comment.