* [PATCH olang v1 1/3] lexer: add tokenize support for '+' '/' '*' '-'
2024-03-13 21:21 [PATCH olang v1 0/3] frontend: add basic arithmetic expr support Johnny Richard
@ 2024-03-13 21:21 ` Johnny Richard
2024-03-13 21:21 ` [PATCH olang v1 2/3] ast: create binary operation ast node Johnny Richard
` (2 subsequent siblings)
3 siblings, 0 replies; 6+ messages in thread
From: Johnny Richard @ 2024-03-13 21:21 UTC (permalink / raw)
To: ~johnnyrichard/olang-devel; +Cc: Johnny Richard
Signed-off-by: Johnny Richard <johnny@johnnyrichard.com>
---
examples/expression.ol | 3 ++
src/lexer.c | 24 ++++++++++++++-
src/lexer.h | 4 +++
tests/integration/cli_test.c | 56 +++++++++++++++++++++++++++++++++--
tests/integration/proc_exec.h | 3 +-
5 files changed, 86 insertions(+), 4 deletions(-)
create mode 100644 examples/expression.ol
diff --git a/examples/expression.ol b/examples/expression.ol
new file mode 100644
index 0000000..efa4ab5
--- /dev/null
+++ b/examples/expression.ol
@@ -0,0 +1,3 @@
+fn main(): u32 {
+ return (10 + 1 * 2) - (10 - (1 + 1) / 2)
+}
diff --git a/src/lexer.c b/src/lexer.c
index dd6f11d..45a3d63 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -126,6 +126,26 @@ lexer_next_token(lexer_t *lexer, token_t *token)
lexer_skip_char(lexer);
return;
}
+ case '+': {
+ lexer_init_char_value_token(lexer, token, TOKEN_PLUS);
+ lexer_skip_char(lexer);
+ return;
+ }
+ case '-': {
+ lexer_init_char_value_token(lexer, token, TOKEN_DASH);
+ lexer_skip_char(lexer);
+ return;
+ }
+ case '*': {
+ lexer_init_char_value_token(lexer, token, TOKEN_STAR);
+ lexer_skip_char(lexer);
+ return;
+ }
+ case '/': {
+ lexer_init_char_value_token(lexer, token, TOKEN_SLASH);
+ lexer_skip_char(lexer);
+ return;
+ }
case '\n': {
lexer_init_char_value_token(lexer, token, TOKEN_LF);
lexer_skip_char(lexer);
@@ -151,7 +171,9 @@ static char *token_kind_str_table[] = {
[TOKEN_RETURN] = "return", [TOKEN_LF] = "line_feed",
[TOKEN_OPAREN] = "(", [TOKEN_CPAREN] = ")",
[TOKEN_COLON] = ":", [TOKEN_OCURLY] = "{",
- [TOKEN_CCURLY] = "}", [TOKEN_EOF] = "EOF",
+ [TOKEN_CCURLY] = "}", [TOKEN_PLUS] = "+",
+ [TOKEN_DASH] = "-", [TOKEN_STAR] = "*",
+ [TOKEN_SLASH] = "/", [TOKEN_EOF] = "EOF",
};
char *
diff --git a/src/lexer.h b/src/lexer.h
index cb91d7e..5ab9ccc 100644
--- a/src/lexer.h
+++ b/src/lexer.h
@@ -40,6 +40,10 @@ typedef enum token_kind
TOKEN_RETURN,
// Single char
+ TOKEN_PLUS,
+ TOKEN_DASH,
+ TOKEN_SLASH,
+ TOKEN_STAR,
TOKEN_LF,
TOKEN_OPAREN,
TOKEN_CPAREN,
diff --git a/tests/integration/cli_test.c b/tests/integration/cli_test.c
index 8cc22f9..8481ba3 100644
--- a/tests/integration/cli_test.c
+++ b/tests/integration/cli_test.c
@@ -20,7 +20,7 @@
#include <stdio.h>
static MunitResult
-test_cli_dump_tokens(const MunitParameter params[], void *user_data_or_fixture)
+test_cli_dump_tokens_exmaple_main_exit(const MunitParameter params[], void *user_data_or_fixture)
{
cli_result_t compilation_result = cli_runner_compiler_dump_tokens("../../examples/main_exit.ol");
munit_assert_int(compilation_result.exec.exit_code, ==, 0);
@@ -42,6 +42,47 @@ test_cli_dump_tokens(const MunitParameter params[], void *user_data_or_fixture)
return MUNIT_OK;
}
+static MunitResult
+test_cli_dump_tokens_exmaple_expression(const MunitParameter params[], void *user_data_or_fixture)
+{
+ cli_result_t compilation_result = cli_runner_compiler_dump_tokens("../../examples/expression.ol");
+ munit_assert_int(compilation_result.exec.exit_code, ==, 0);
+ munit_assert_string_equal(compilation_result.exec.stdout_buf,
+ "../../examples/expression.ol:1:1: <fn>\n"
+ "../../examples/expression.ol:1:4: <identifier>\n"
+ "../../examples/expression.ol:1:8: <(>\n"
+ "../../examples/expression.ol:1:9: <)>\n"
+ "../../examples/expression.ol:1:10: <:>\n"
+ "../../examples/expression.ol:1:12: <identifier>\n"
+ "../../examples/expression.ol:1:16: <{>\n"
+ "../../examples/expression.ol:1:17: <line_feed>\n"
+ "../../examples/expression.ol:2:3: <return>\n"
+ "../../examples/expression.ol:2:10: <(>\n"
+ "../../examples/expression.ol:2:11: <number>\n"
+ "../../examples/expression.ol:2:14: <+>\n"
+ "../../examples/expression.ol:2:16: <number>\n"
+ "../../examples/expression.ol:2:18: <*>\n"
+ "../../examples/expression.ol:2:20: <number>\n"
+ "../../examples/expression.ol:2:21: <)>\n"
+ "../../examples/expression.ol:2:23: <->\n"
+ "../../examples/expression.ol:2:25: <(>\n"
+ "../../examples/expression.ol:2:26: <number>\n"
+ "../../examples/expression.ol:2:29: <->\n"
+ "../../examples/expression.ol:2:31: <(>\n"
+ "../../examples/expression.ol:2:32: <number>\n"
+ "../../examples/expression.ol:2:34: <+>\n"
+ "../../examples/expression.ol:2:36: <number>\n"
+ "../../examples/expression.ol:2:37: <)>\n"
+ "../../examples/expression.ol:2:39: </>\n"
+ "../../examples/expression.ol:2:41: <number>\n"
+ "../../examples/expression.ol:2:42: <)>\n"
+ "../../examples/expression.ol:2:43: <line_feed>\n"
+ "../../examples/expression.ol:3:1: <}>\n"
+ "../../examples/expression.ol:3:2: <line_feed>\n"
+ "../../examples/expression.ol:4:1: <EOF>\n");
+ return MUNIT_OK;
+}
+
static MunitResult
test_cli_compile_minimal_program(const MunitParameter params[], void *user_data_or_fixture)
{
@@ -62,7 +103,18 @@ test_cli_compile_minimal_program(const MunitParameter params[], void *user_data_
}
static MunitTest tests[] = {
- { "/test_cli_dump_tokens", test_cli_dump_tokens, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL },
+ { "/test_cli_dump_tokens_exmaple_main_exit",
+ test_cli_dump_tokens_exmaple_main_exit,
+ NULL,
+ NULL,
+ MUNIT_TEST_OPTION_NONE,
+ NULL },
+ { "/test_cli_dump_tokens_exmaple_expression",
+ test_cli_dump_tokens_exmaple_expression,
+ NULL,
+ NULL,
+ MUNIT_TEST_OPTION_NONE,
+ NULL },
{ "/test_cli_compile_minimal_program", test_cli_compile_minimal_program, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL },
{ NULL, NULL, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL }
};
diff --git a/tests/integration/proc_exec.h b/tests/integration/proc_exec.h
index 135aa6a..45c2977 100644
--- a/tests/integration/proc_exec.h
+++ b/tests/integration/proc_exec.h
@@ -21,7 +21,8 @@
typedef struct proc_exec_result
{
int exit_code;
- char stdout_buf[1024];
+ // FIXME: output buffer shouldn't be fixed size
+ char stdout_buf[2048];
} proc_exec_result_t;
typedef struct proc_exec_command
--
2.44.0
^ permalink raw reply [flat|nested] 6+ messages in thread
* [PATCH olang v1 2/3] ast: create binary operation ast node
2024-03-13 21:21 [PATCH olang v1 0/3] frontend: add basic arithmetic expr support Johnny Richard
2024-03-13 21:21 ` [PATCH olang v1 1/3] lexer: add tokenize support for '+' '/' '*' '-' Johnny Richard
@ 2024-03-13 21:21 ` Johnny Richard
2024-03-13 21:21 ` [PATCH olang v1 3/3] parser: add basic arithmetic expressions '+' '*' '/' '-' Johnny Richard
2024-03-17 21:37 ` [PATCH olang v1 0/3] frontend: add basic arithmetic expr support Johnny Richard
3 siblings, 0 replies; 6+ messages in thread
From: Johnny Richard @ 2024-03-13 21:21 UTC (permalink / raw)
To: ~johnnyrichard/olang-devel; +Cc: Johnny Richard
Signed-off-by: Johnny Richard <johnny@johnnyrichard.com>
---
src/ast.c | 14 ++++++++++++++
src/ast.h | 20 ++++++++++++++++++++
2 files changed, 34 insertions(+)
diff --git a/src/ast.c b/src/ast.c
index ab56c96..c9f33a4 100644
--- a/src/ast.c
+++ b/src/ast.c
@@ -52,6 +52,20 @@ ast_new_node_fn_def(arena_t *arena, string_view_t identifier, type_t return_type
return node_fn_def;
}
+ast_node_t *
+ast_new_node_bin_op(arena_t *arena, ast_binary_op_kind_t kind, ast_node_t *lhs, ast_node_t *rhs)
+{
+ ast_node_t *node_bin_op = (ast_node_t *)arena_alloc(arena, sizeof(ast_node_t));
+ assert(node_bin_op);
+
+ node_bin_op->kind = AST_NODE_BINARY_OP;
+ node_bin_op->data.as_bin_op.kind = kind;
+ node_bin_op->data.as_bin_op.lhs = lhs;
+ node_bin_op->data.as_bin_op.rhs = rhs;
+
+ return node_bin_op;
+}
+
ast_node_t *
ast_new_node_literal_u32(arena_t *arena, uint32_t value)
{
diff --git a/src/ast.h b/src/ast.h
index 2b42781..206a827 100644
--- a/src/ast.h
+++ b/src/ast.h
@@ -30,6 +30,7 @@ typedef enum
AST_NODE_PROGRAM,
AST_NODE_BLOCK,
AST_NODE_FN_DEF,
+ AST_NODE_BINARY_OP,
AST_NODE_RETURN_STMT,
AST_NODE_LITERAL,
AST_NODE_UNKNOWN
@@ -73,6 +74,21 @@ typedef struct ast_literal
ast_literal_value_t value;
} ast_literal_t;
+typedef enum ast_binary_op_kind
+{
+ AST_BINOP_ADDITION,
+ AST_BINOP_SUBTRACTION,
+ AST_BINOP_MULTIPLICATION,
+ AST_BINOP_DIVISION,
+} ast_binary_op_kind_t;
+
+typedef struct ast_binary_op
+{
+ ast_binary_op_kind_t kind;
+ ast_node_t *lhs;
+ ast_node_t *rhs;
+} ast_binary_op_t;
+
typedef struct ast_return_stmt
{
ast_node_t *data;
@@ -82,6 +98,7 @@ typedef union
{
ast_program_t as_program;
ast_fn_definition_t as_fn_def;
+ ast_binary_op_t as_bin_op;
ast_literal_t as_literal;
ast_block_t as_block;
ast_return_stmt_t as_return_stmt;
@@ -99,6 +116,9 @@ ast_new_program(arena_t *arena, ast_node_t *fn_def);
ast_node_t *
ast_new_node_fn_def(arena_t *arena, string_view_t identifier, type_t return_type, ast_node_t *block);
+ast_node_t *
+ast_new_node_bin_op(arena_t *arena, ast_binary_op_kind_t kind, ast_node_t *lhs, ast_node_t *rhs);
+
ast_node_t *
ast_new_node_literal_u32(arena_t *arena, uint32_t value);
--
2.44.0
^ permalink raw reply [flat|nested] 6+ messages in thread
* [PATCH olang v1 3/3] parser: add basic arithmetic expressions '+' '*' '/' '-'
2024-03-13 21:21 [PATCH olang v1 0/3] frontend: add basic arithmetic expr support Johnny Richard
2024-03-13 21:21 ` [PATCH olang v1 1/3] lexer: add tokenize support for '+' '/' '*' '-' Johnny Richard
2024-03-13 21:21 ` [PATCH olang v1 2/3] ast: create binary operation ast node Johnny Richard
@ 2024-03-13 21:21 ` Johnny Richard
2024-03-13 20:29 ` [olang/patches/.build.yml] build success builds.sr.ht
2024-03-17 21:37 ` [PATCH olang v1 0/3] frontend: add basic arithmetic expr support Johnny Richard
3 siblings, 1 reply; 6+ messages in thread
From: Johnny Richard @ 2024-03-13 21:21 UTC (permalink / raw)
To: ~johnnyrichard/olang-devel; +Cc: Johnny Richard
This implementation uses a call chain to guarantee calling precedence.
Whenever we try to add more expression types and it becomes complexer,
we will design a solution based on priority.
Now is pretty difficult to visualize the ast tree since we don't have a
ast pretty printer. I have validated it by running **gdb**.
Testing the ast with unit tests is quite annoying without a good "DSL".
We can design such DSL in the near future. =^)
Signed-off-by: Johnny Richard <johnny@johnnyrichard.com>
---
src/parser.c | 140 ++++++++++++++++++++++++++++++++++++++++++++++-----
1 file changed, 127 insertions(+), 13 deletions(-)
diff --git a/src/parser.c b/src/parser.c
index 5b7d39a..ab7051d 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -39,6 +39,15 @@ parser_parse_block(parser_t *parser);
ast_node_t *
parser_parse_fn_definition(parser_t *parser);
+static ast_node_t *
+parser_parse_expr(parser_t *parser);
+
+static ast_node_t *
+parser_parse_term(parser_t *parser);
+
+static ast_node_t *
+parser_parse_factor(parser_t *parser);
+
static void
skip_line_feeds(lexer_t *lexer);
@@ -57,10 +66,117 @@ ast_node_t *
parser_parse_program(parser_t *parser)
{
ast_node_t *fn = parser_parse_fn_definition(parser);
+ if (fn == NULL) {
+ return NULL;
+ }
return ast_new_program(parser->arena, fn);
}
+static ast_binary_op_kind_t
+token_kind_to_binary_op_kind(token_kind_t kind)
+{
+ switch (kind) {
+ case TOKEN_PLUS:
+ return AST_BINOP_ADDITION;
+ case TOKEN_DASH:
+ return AST_BINOP_SUBTRACTION;
+ case TOKEN_SLASH:
+ return AST_BINOP_DIVISION;
+ case TOKEN_STAR:
+ return AST_BINOP_MULTIPLICATION;
+ default: {
+ fprintf(stderr, "error: token kind (%s) not compatible with binary op kind\n", token_kind_to_cstr(kind));
+ assert(false);
+ }
+ }
+}
+
+static ast_node_t *
+parser_parse_expr(parser_t *parser)
+{
+ ast_node_t *node = parser_parse_term(parser);
+ if (node == NULL) {
+ return NULL;
+ }
+
+ token_t token;
+ lexer_peek_next(parser->lexer, &token);
+
+ while (token.kind == TOKEN_PLUS || token.kind == TOKEN_DASH) {
+ lexer_next_token(parser->lexer, &token);
+
+ ast_node_t *lhs = node;
+ ast_node_t *rhs = parser_parse_term(parser);
+ if (rhs == NULL) {
+ return NULL;
+ }
+
+ node = ast_new_node_bin_op(parser->arena, token_kind_to_binary_op_kind(token.kind), lhs, rhs);
+
+ lexer_peek_next(parser->lexer, &token);
+ }
+
+ return node;
+}
+
+static ast_node_t *
+parser_parse_term(parser_t *parser)
+{
+ ast_node_t *node = parser_parse_factor(parser);
+ if (node == NULL) {
+ return NULL;
+ }
+
+ token_t token;
+ lexer_peek_next(parser->lexer, &token);
+
+ while (token.kind == TOKEN_STAR || token.kind == TOKEN_SLASH) {
+ lexer_next_token(parser->lexer, &token);
+
+ ast_node_t *lhs = node;
+ ast_node_t *rhs = parser_parse_factor(parser);
+ if (rhs == NULL) {
+ return NULL;
+ }
+
+ node = ast_new_node_bin_op(parser->arena, token_kind_to_binary_op_kind(token.kind), lhs, rhs);
+
+ lexer_peek_next(parser->lexer, &token);
+ }
+
+ return node;
+}
+
+static ast_node_t *
+parser_parse_factor(parser_t *parser)
+{
+ token_t token;
+ lexer_next_token(parser->lexer, &token);
+
+ switch (token.kind) {
+ case TOKEN_NUMBER:
+ return ast_new_node_literal_u32(parser->arena, string_view_to_u32(token.value));
+
+ case TOKEN_OPAREN: {
+ ast_node_t *expr = parser_parse_expr(parser);
+ if (expr == NULL) {
+ return NULL;
+ }
+
+ if (!skip_expected_token(parser, TOKEN_CPAREN)) {
+ return NULL;
+ }
+
+ return expr;
+ }
+ default: {
+ fprintf(stderr, "error: parse_factor: unsupported or invalid token (%s)\n", token_kind_to_cstr(token.kind));
+ assert(false);
+ }
+ }
+}
+
ast_node_t *
parser_parse_fn_definition(parser_t *parser)
{
@@ -131,41 +247,39 @@ parser_parse_type(parser_t *parser, type_t *type)
static ast_node_t *
parser_parse_block(parser_t *parser)
{
- token_t number_token;
if (!skip_expected_token(parser, TOKEN_OCURLY)) {
- return false;
+ return NULL;
}
skip_line_feeds(parser->lexer);
ast_node_t *node_block = ast_new_node_block(parser->arena);
+ if (node_block == NULL) {
+ return NULL;
+ }
if (!skip_expected_token(parser, TOKEN_RETURN)) {
- return false;
+ return NULL;
}
ast_node_t *node_return_stmt = ast_new_node_return_stmt(parser->arena);
assert(node_return_stmt);
- if (!expected_token(parser, &number_token, TOKEN_NUMBER)) {
- return false;
+ ast_node_t *expr = parser_parse_expr(parser);
+ if (expr == NULL) {
+ return NULL;
}
- ast_node_t *literal_node = ast_new_node_literal_u32(parser->arena, string_view_to_u32(number_token.value));
- assert(literal_node);
-
- node_return_stmt->data.as_return_stmt.data = literal_node;
+ node_return_stmt->data.as_return_stmt.data = expr;
list_append(node_block->data.as_block.nodes, node_return_stmt);
-
if (!skip_expected_token(parser, TOKEN_LF)) {
- return false;
+ return NULL;
}
skip_line_feeds(parser->lexer);
-
if (!skip_expected_token(parser, TOKEN_CCURLY)) {
- return false;
+ return NULL;
}
return node_block;
--
2.44.0
^ permalink raw reply [flat|nested] 6+ messages in thread