public inbox for ~johnnyrichard/olang-devel@lists.sr.ht
 help / color / mirror / code / Atom feed
From: Carlos Maniero <carlos@maniero.me>
To: ~johnnyrichard/olang-devel@lists.sr.ht
Cc: Carlos Maniero <carlos@maniero.me>
Subject: [PATCH olang v2 2/6] semantics: resolve variable symbols
Date: Thu, 31 Oct 2024 03:13:13 +0000 (UTC)	[thread overview]
Message-ID: <20241031031302.136553-3-carlos@maniero.me> (raw)
In-Reply-To: <20241031031302.136553-1-carlos@maniero.me>

This is a first step for semantics check. It introduces the symbol to
ast_ident_t. It still does not check if types match neither print helpful
errors.

Why to add the symbol into the ast_ident_t?
-------------------------------------------

The semantics is required to resolve the symbol in order to perform type
checking and to check if the reference can be resolved. So, checking if
the symbol exists and throw the found symbol away is a computational
waste.

Additionally, adding the symbol to the id removes complexity from
codegen, once symbol_lookups will be no longer required.

Why to create the resolve_symbols function?
-------------------------------------------

If you look at the resolve_symbols and the populate_scope you should
wonder "why there are two functions that traverse the entire ast?". The
main reason is that olang allows reference before definition. So if
something such as a function call refer to a function before it is
defined the checker will fail. Meaning that it is required to first to
populate the scope and make the symbols' registration for the entire AST
and afterwards check for symbols.

Signed-off-by: Carlos Maniero <carlos@maniero.me>
---
 src/ast.c              |   2 +-
 src/ast.h              |   3 +-
 src/codegen_x86_64.c   |  34 ++++-------
 src/pretty_print_ast.c |   2 +-
 src/type_checker.c     | 125 ++++++++++++++++++++++++++++++++++++++++-
 5 files changed, 137 insertions(+), 29 deletions(-)

diff --git a/src/ast.c b/src/ast.c
index 7bb5277..e6b518e 100644
--- a/src/ast.c
+++ b/src/ast.c
@@ -106,7 +106,7 @@ ast_new_node_var_def(arena_t *arena,
     node_var_def->loc = loc;
     ast_var_definition_t *var_def = &node_var_def->as_var_def;
 
-    var_def->id.name = id;
+    var_def->ident.name = id;
     var_def->type = type;
     var_def->value = value;
 
diff --git a/src/ast.h b/src/ast.h
index 33496a8..9d128d7 100644
--- a/src/ast.h
+++ b/src/ast.h
@@ -72,6 +72,7 @@ typedef struct ast_ident
 {
     string_view_t name;
     scope_t *scope;
+    symbol_t *symbol;
 } ast_ident_t;
 
 typedef struct ast_translation_unit
@@ -108,7 +109,7 @@ typedef struct ast_fn_call
 typedef struct ast_var_definition
 {
     AST_NODE_HEAD;
-    ast_ident_t id;
+    ast_ident_t ident;
     type_t *type;
     ast_node_t *value;
 } ast_var_definition_t;
diff --git a/src/codegen_x86_64.c b/src/codegen_x86_64.c
index b695cc0..9b101f8 100644
--- a/src/codegen_x86_64.c
+++ b/src/codegen_x86_64.c
@@ -143,12 +143,10 @@ codegen_x86_64_emit_expression(codegen_x86_64_t *codegen, ast_node_t *expr_node)
         case AST_NODE_REF: {
             ast_ref_t ref = expr_node->as_ref;
 
-            symbol_t *symbol = scope_lookup(ref.ident.scope, ref.ident.name);
-            assert(symbol);
-
-            size_t offset = codegen_x86_64_get_stack_offset(codegen, symbol);
+            size_t offset =
+                codegen_x86_64_get_stack_offset(codegen, ref.ident.symbol);
 
-            size_t bytes = type_to_bytes(symbol->type);
+            size_t bytes = type_to_bytes(ref.ident.symbol->type);
 
             fprintf(codegen->out,
                     "    mov -%ld(%%rbp), %s\n",
@@ -597,18 +595,14 @@ codegen_x86_64_emit_expression(codegen_x86_64_t *codegen, ast_node_t *expr_node)
                     switch (bin_op.lhs->kind) {
                         case AST_NODE_REF: {
                             ast_ref_t ref = bin_op.lhs->as_ref;
-                            scope_t *scope = ref.ident.scope;
-
-                            symbol_t *symbol =
-                                scope_lookup(scope, ref.ident.name);
-                            assert(symbol);
 
                             size_t offset = codegen_x86_64_get_stack_offset(
-                                codegen, symbol);
+                                codegen, ref.ident.symbol);
 
                             codegen_x86_64_emit_expression(codegen, bin_op.rhs);
 
-                            size_t type_size = type_to_bytes(symbol->type);
+                            size_t type_size =
+                                type_to_bytes(ref.ident.symbol->type);
                             fprintf(codegen->out,
                                     "    mov %s, -%ld(%%rbp)\n",
                                     get_reg_for(REG_ACCUMULATOR, type_size),
@@ -669,12 +663,8 @@ codegen_x86_64_emit_expression(codegen_x86_64_t *codegen, ast_node_t *expr_node)
 
                     ast_ref_t ref = unary_op.operand->as_ref;
 
-                    symbol_t *symbol =
-                        scope_lookup(ref.ident.scope, ref.ident.name);
-                    assert(symbol);
-
-                    size_t offset =
-                        codegen_x86_64_get_stack_offset(codegen, symbol);
+                    size_t offset = codegen_x86_64_get_stack_offset(
+                        codegen, ref.ident.symbol);
 
                     fprintf(
                         codegen->out, "    lea -%ld(%%rbp), %%rax\n", offset);
@@ -724,16 +714,12 @@ codegen_x86_64_emit_block(codegen_x86_64_t *codegen, ast_block_t *block)
 
             case AST_NODE_VAR_DEF: {
                 ast_var_definition_t var_def = node->as_var_def;
-                scope_t *scope = var_def.id.scope;
-
-                symbol_t *symbol = scope_lookup(scope, var_def.id.name);
-                assert(symbol);
 
-                size_t type_size = type_to_bytes(symbol->type);
+                size_t type_size = type_to_bytes(var_def.ident.symbol->type);
                 codegen->base_offset += type_size;
 
                 codegen_x86_64_put_stack_offset(
-                    codegen, symbol, codegen->base_offset);
+                    codegen, var_def.ident.symbol, codegen->base_offset);
 
                 if (var_def.value) {
                     codegen_x86_64_emit_expression(codegen, var_def.value);
diff --git a/src/pretty_print_ast.c b/src/pretty_print_ast.c
index 9c0e607..06b93f1 100644
--- a/src/pretty_print_ast.c
+++ b/src/pretty_print_ast.c
@@ -288,7 +288,7 @@ ast_node_to_pretty_print_node(ast_node_t *ast, arena_t *arena)
             char name[256];
             sprintf(name,
                     "Var_Definition <name:" SV_FMT "> <kind:" SV_FMT ">",
-                    SV_ARG(var.id.name),
+                    SV_ARG(var.ident.name),
                     SV_ARG(var.type->id));
             node->name =
                 (char *)arena_alloc(arena, sizeof(char) * (strlen(name) + 1));
diff --git a/src/type_checker.c b/src/type_checker.c
index abf7ae9..081034d 100644
--- a/src/type_checker.c
+++ b/src/type_checker.c
@@ -23,6 +23,9 @@
 static void
 populate_scope(checker_t *checker, scope_t *scope, ast_node_t *ast);
 
+static void
+resolve_symbols(checker_t *checker, ast_node_t *ast);
+
 checker_t *
 checker_new(arena_t *arena)
 {
@@ -98,6 +101,7 @@ checker_check(checker_t *checker, ast_node_t *ast)
 
     scope_t *scope = scope_new(checker->arena);
     populate_scope(checker, scope, ast);
+    resolve_symbols(checker, ast);
 
     // TODO: traverse the ast tree to verify semantics
 }
@@ -108,12 +112,27 @@ register_id(checker_t *checker,
             ast_ident_t *ident,
             type_t *type)
 {
-    ident->scope = scope;
     symbol_t *symbol = symbol_new(checker->arena, ident->name, type);
 
+    ident->scope = scope;
+    ident->symbol = symbol;
+
     scope_insert(scope, symbol);
 }
 
+static void
+resolve_id(checker_t *checker, ast_ident_t *id)
+{
+    assert(checker);
+
+    symbol_t *symbol = scope_lookup(id->scope, id->name);
+
+    // FIXME: assert types and print a friendly error message
+    assert(symbol);
+
+    id->symbol = symbol;
+}
+
 static void
 populate_scope(checker_t *checker, scope_t *scope, ast_node_t *ast)
 {
@@ -231,7 +250,7 @@ populate_scope(checker_t *checker, scope_t *scope, ast_node_t *ast)
             type_resolve(ast->as_var_def.type);
 
             register_id(
-                checker, scope, &ast->as_var_def.id, ast->as_var_def.type);
+                checker, scope, &ast->as_var_def.ident, ast->as_var_def.type);
 
             populate_scope(checker, scope, ast->as_var_def.value);
             return;
@@ -247,3 +266,105 @@ populate_scope(checker_t *checker, scope_t *scope, ast_node_t *ast)
             return;
     }
 }
+
+static void
+resolve_symbols(checker_t *checker, ast_node_t *ast)
+{
+    assert(checker);
+
+    switch (ast->kind) {
+        case AST_NODE_TRANSLATION_UNIT: {
+            list_item_t *item = list_head(ast->as_translation_unit.decls);
+
+            while (item != NULL) {
+                resolve_symbols(checker, (ast_node_t *)item->value);
+                item = list_next(item);
+            }
+            return;
+        }
+
+        case AST_NODE_FN_DEF: {
+            if (ast->as_fn_def.block != NULL) {
+                resolve_symbols(checker, ast->as_fn_def.block);
+            }
+            return;
+        }
+
+        case AST_NODE_FN_CALL: {
+            list_item_t *item = list_head(ast->as_fn_call.args);
+
+            while (item != NULL) {
+                resolve_symbols(checker, (ast_node_t *)item->value);
+                item = list_next(item);
+            }
+
+            return;
+        }
+
+        case AST_NODE_IF_STMT: {
+            resolve_symbols(checker, ast->as_if_stmt.cond);
+            resolve_symbols(checker, ast->as_if_stmt.then);
+
+            if (ast->as_if_stmt._else) {
+                resolve_symbols(checker, ast->as_if_stmt._else);
+            }
+
+            return;
+        }
+
+        case AST_NODE_WHILE_STMT: {
+            resolve_symbols(checker, ast->as_while_stmt.cond);
+            resolve_symbols(checker, ast->as_while_stmt.then);
+
+            return;
+        }
+
+        case AST_NODE_BINARY_OP: {
+            ast_binary_op_t bin_op = ast->as_bin_op;
+
+            resolve_symbols(checker, bin_op.lhs);
+            resolve_symbols(checker, bin_op.rhs);
+            return;
+        }
+
+        case AST_NODE_UNARY_OP: {
+            ast_unary_op_t unary_op = ast->as_unary_op;
+
+            resolve_symbols(checker, unary_op.operand);
+            return;
+        }
+
+        case AST_NODE_RETURN_STMT: {
+            ast_return_stmt_t return_stmt = ast->as_return_stmt;
+
+            resolve_symbols(checker, return_stmt.value);
+            return;
+        }
+
+        case AST_NODE_BLOCK: {
+            ast_block_t block = ast->as_block;
+
+            list_item_t *item = list_head(block.nodes);
+
+            while (item != NULL) {
+                resolve_symbols(checker, (ast_node_t *)item->value);
+                item = list_next(item);
+            }
+
+            return;
+        }
+
+        case AST_NODE_VAR_DEF: {
+            resolve_symbols(checker, ast->as_var_def.value);
+            return;
+        }
+
+        case AST_NODE_REF: {
+            resolve_id(checker, &ast->as_ref.ident);
+            return;
+        }
+        case AST_NODE_LITERAL:
+        case AST_NODE_UNKNOWN:
+            return;
+    }
+}
-- 
2.46.1


  parent reply	other threads:[~2024-10-31  3:13 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-10-31  3:13 [PATCH olang v2 0/6] Remove symbol lookups from codegen Carlos Maniero
2024-10-31  3:13 ` [PATCH olang v2 1/6] ast: create the ast_ident_t and apply it to var_def and ref Carlos Maniero
2024-10-31  3:13 ` Carlos Maniero [this message]
2024-10-31  3:13 ` [PATCH olang v2 3/6] semantics: refactor: use the ast_ident_t into the fn_call node Carlos Maniero
2024-10-31  3:13 ` [PATCH olang v2 4/6] semantics: refactor: use the ast_ident_t into the fn_def.params Carlos Maniero
2024-10-31  3:13 ` [PATCH olang v2 5/6] type: refactor: rename type.id to type.name Carlos Maniero
2024-10-31  3:13 ` [PATCH olang v2 6/6] ast: remove dead code from var_assign ast node Carlos Maniero
2024-10-31  3:14   ` [olang/patches/.build.yml] build success builds.sr.ht
2024-11-01  2:17 ` [PATCH olang v2 0/6] Remove symbol lookups from codegen Johnny Richard

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20241031031302.136553-3-carlos@maniero.me \
    --to=carlos@maniero.me \
    --cc=~johnnyrichard/olang-devel@lists.sr.ht \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://git.johnnyrichard.com/olang.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox