From: Carlos Maniero <carlos@maniero.me>
To: ~johnnyrichard/olang-devel@lists.sr.ht
Cc: Carlos Maniero <carlos@maniero.me>
Subject: [PATCH olang v1 2/6] semantics: resolve variable symbols
Date: Thu, 24 Oct 2024 12:38:38 +0000 (UTC) [thread overview]
Message-ID: <20241024123825.120390-3-carlos@maniero.me> (raw)
In-Reply-To: <20241024123825.120390-1-carlos@maniero.me>
This is a first step for semantics check. It introduces the symbol to
ast_id_t. It still does not check if types match neither print helpful
errors.
Why to add the symbol into the ast_id_t?
----------------------------------------
The semantics is required to resolve the symbol in order to perform type
checking and to check if the reference can be resolved. So, checking if
the symbol exists and throw the found symbol away is a computational
waste.
Additionally, adding the symbol to the id removes complexity from
codegen, once symbol_lookups will be no longer required.
Why to create the resolve_symbols function?
-------------------------------------------
If you look at the resolve_symbols and the populate_scope you should
wonder "why there are two functions that traverse the entire ast?". The
main reason is that olang allows reference before definition. So if
something such as a function call refer to a function before it is
defined the checker will fail. Meaning that it is required to first to
populate the scope and make the symbols' registration for the entire AST
and afterwards check for symbols.
Signed-off-by: Carlos Maniero <carlos@maniero.me>
---
src/ast.h | 1 +
src/codegen_x86_64.c | 30 ++++-------
src/type_checker.c | 123 ++++++++++++++++++++++++++++++++++++++++++-
3 files changed, 132 insertions(+), 22 deletions(-)
diff --git a/src/ast.h b/src/ast.h
index c55ecf1..2181849 100644
--- a/src/ast.h
+++ b/src/ast.h
@@ -72,6 +72,7 @@ typedef struct ast_id
{
string_view_t name;
scope_t *scope;
+ symbol_t *symbol;
} ast_id_t;
typedef struct ast_translation_unit
diff --git a/src/codegen_x86_64.c b/src/codegen_x86_64.c
index c7122be..1758902 100644
--- a/src/codegen_x86_64.c
+++ b/src/codegen_x86_64.c
@@ -143,12 +143,10 @@ codegen_x86_64_emit_expression(codegen_x86_64_t *codegen, ast_node_t *expr_node)
case AST_NODE_REF: {
ast_ref_t ref = expr_node->as_ref;
- symbol_t *symbol = scope_lookup(ref.id.scope, ref.id.name);
- assert(symbol);
-
- size_t offset = codegen_x86_64_get_stack_offset(codegen, symbol);
+ size_t offset =
+ codegen_x86_64_get_stack_offset(codegen, ref.id.symbol);
- size_t bytes = type_to_bytes(symbol->type);
+ size_t bytes = type_to_bytes(ref.id.symbol->type);
fprintf(codegen->out,
" mov -%ld(%%rbp), %s\n",
@@ -597,17 +595,14 @@ codegen_x86_64_emit_expression(codegen_x86_64_t *codegen, ast_node_t *expr_node)
switch (bin_op.lhs->kind) {
case AST_NODE_REF: {
ast_ref_t ref = bin_op.lhs->as_ref;
- scope_t *scope = ref.id.scope;
-
- symbol_t *symbol = scope_lookup(scope, ref.id.name);
- assert(symbol);
size_t offset = codegen_x86_64_get_stack_offset(
- codegen, symbol);
+ codegen, ref.id.symbol);
codegen_x86_64_emit_expression(codegen, bin_op.rhs);
- size_t type_size = type_to_bytes(symbol->type);
+ size_t type_size =
+ type_to_bytes(ref.id.symbol->type);
fprintf(codegen->out,
" mov %s, -%ld(%%rbp)\n",
get_reg_for(REG_ACCUMULATOR, type_size),
@@ -668,11 +663,8 @@ codegen_x86_64_emit_expression(codegen_x86_64_t *codegen, ast_node_t *expr_node)
ast_ref_t ref = unary_op.operand->as_ref;
- symbol_t *symbol = scope_lookup(ref.id.scope, ref.id.name);
- assert(symbol);
-
size_t offset =
- codegen_x86_64_get_stack_offset(codegen, symbol);
+ codegen_x86_64_get_stack_offset(codegen, ref.id.symbol);
fprintf(
codegen->out, " lea -%ld(%%rbp), %%rax\n", offset);
@@ -722,16 +714,12 @@ codegen_x86_64_emit_block(codegen_x86_64_t *codegen, ast_block_t *block)
case AST_NODE_VAR_DEF: {
ast_var_definition_t var_def = node->as_var_def;
- scope_t *scope = var_def.id.scope;
-
- symbol_t *symbol = scope_lookup(scope, var_def.id.name);
- assert(symbol);
- size_t type_size = type_to_bytes(symbol->type);
+ size_t type_size = type_to_bytes(var_def.id.symbol->type);
codegen->base_offset += type_size;
codegen_x86_64_put_stack_offset(
- codegen, symbol, codegen->base_offset);
+ codegen, var_def.id.symbol, codegen->base_offset);
if (var_def.value) {
codegen_x86_64_emit_expression(codegen, var_def.value);
diff --git a/src/type_checker.c b/src/type_checker.c
index a2ffdd6..daccecf 100644
--- a/src/type_checker.c
+++ b/src/type_checker.c
@@ -23,6 +23,9 @@
static void
populate_scope(checker_t *checker, scope_t *scope, ast_node_t *ast);
+static void
+resolve_symbols(checker_t *checker, ast_node_t *ast);
+
checker_t *
checker_new(arena_t *arena)
{
@@ -98,6 +101,7 @@ checker_check(checker_t *checker, ast_node_t *ast)
scope_t *scope = scope_new(checker->arena);
populate_scope(checker, scope, ast);
+ resolve_symbols(checker, ast);
// TODO: traverse the ast tree to verify semantics
}
@@ -105,12 +109,27 @@ checker_check(checker_t *checker, ast_node_t *ast)
static void
register_id(checker_t *checker, scope_t *scope, ast_id_t *id, type_t *type)
{
- id->scope = scope;
symbol_t *symbol = symbol_new(checker->arena, id->name, type);
+ id->scope = scope;
+ id->symbol = symbol;
+
scope_insert(scope, symbol);
}
+static void
+resolve_id(checker_t *checker, ast_id_t *id)
+{
+ assert(checker);
+
+ symbol_t *symbol = scope_lookup(id->scope, id->name);
+
+ // FIXME: assert types and print a friendly error message
+ assert(symbol);
+
+ id->symbol = symbol;
+}
+
static void
populate_scope(checker_t *checker, scope_t *scope, ast_node_t *ast)
{
@@ -244,3 +263,105 @@ populate_scope(checker_t *checker, scope_t *scope, ast_node_t *ast)
return;
}
}
+
+static void
+resolve_symbols(checker_t *checker, ast_node_t *ast)
+{
+ assert(checker);
+
+ switch (ast->kind) {
+ case AST_NODE_TRANSLATION_UNIT: {
+ list_item_t *item = list_head(ast->as_translation_unit.decls);
+
+ while (item != NULL) {
+ resolve_symbols(checker, (ast_node_t *)item->value);
+ item = list_next(item);
+ }
+ return;
+ }
+
+ case AST_NODE_FN_DEF: {
+ if (ast->as_fn_def.block != NULL) {
+ resolve_symbols(checker, ast->as_fn_def.block);
+ }
+ return;
+ }
+
+ case AST_NODE_FN_CALL: {
+ list_item_t *item = list_head(ast->as_fn_call.args);
+
+ while (item != NULL) {
+ resolve_symbols(checker, (ast_node_t *)item->value);
+ item = list_next(item);
+ }
+
+ return;
+ }
+
+ case AST_NODE_IF_STMT: {
+ resolve_symbols(checker, ast->as_if_stmt.cond);
+ resolve_symbols(checker, ast->as_if_stmt.then);
+
+ if (ast->as_if_stmt._else) {
+ resolve_symbols(checker, ast->as_if_stmt._else);
+ }
+
+ return;
+ }
+
+ case AST_NODE_WHILE_STMT: {
+ resolve_symbols(checker, ast->as_while_stmt.cond);
+ resolve_symbols(checker, ast->as_while_stmt.then);
+
+ return;
+ }
+
+ case AST_NODE_BINARY_OP: {
+ ast_binary_op_t bin_op = ast->as_bin_op;
+
+ resolve_symbols(checker, bin_op.lhs);
+ resolve_symbols(checker, bin_op.rhs);
+ return;
+ }
+
+ case AST_NODE_UNARY_OP: {
+ ast_unary_op_t unary_op = ast->as_unary_op;
+
+ resolve_symbols(checker, unary_op.operand);
+ return;
+ }
+
+ case AST_NODE_RETURN_STMT: {
+ ast_return_stmt_t return_stmt = ast->as_return_stmt;
+
+ resolve_symbols(checker, return_stmt.value);
+ return;
+ }
+
+ case AST_NODE_BLOCK: {
+ ast_block_t block = ast->as_block;
+
+ list_item_t *item = list_head(block.nodes);
+
+ while (item != NULL) {
+ resolve_symbols(checker, (ast_node_t *)item->value);
+ item = list_next(item);
+ }
+
+ return;
+ }
+
+ case AST_NODE_VAR_DEF: {
+ resolve_symbols(checker, ast->as_var_def.value);
+ return;
+ }
+
+ case AST_NODE_REF: {
+ resolve_id(checker, &ast->as_ref.id);
+ return;
+ }
+ case AST_NODE_LITERAL:
+ case AST_NODE_UNKNOWN:
+ return;
+ }
+}
--
2.46.1
next prev parent reply other threads:[~2024-10-24 12:38 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-10-24 12:38 [PATCH olang v1 0/6] Remove symbol lookups from codegen Carlos Maniero
2024-10-24 12:38 ` [PATCH olang v1 1/6] ast: create the ast_id_t and apply it to var_def and ref Carlos Maniero
2024-10-24 12:38 ` Carlos Maniero [this message]
2024-10-24 12:38 ` [PATCH olang v1 3/6] semantics: refactor: use the ast_id_t into the fn_call node Carlos Maniero
2024-10-24 12:38 ` [PATCH olang v1 4/6] semantics: refactor: use the ast_id_t into the fn_def.params Carlos Maniero
2024-10-24 12:38 ` [PATCH olang v1 5/6] type: refactor: rename type.id to type.name Carlos Maniero
2024-10-24 12:38 ` [PATCH olang v1 6/6] ast: remove dead code from var_assign ast node Carlos Maniero
2024-10-24 12:39 ` [olang/patches/.build.yml] build success builds.sr.ht
2024-10-31 3:16 ` [PATCH olang v1 0/6] Remove symbol lookups from codegen Carlos Maniero
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20241024123825.120390-3-carlos@maniero.me \
--to=carlos@maniero.me \
--cc=~johnnyrichard/olang-devel@lists.sr.ht \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://git.johnnyrichard.com/olang.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox