From: Carlos Maniero <carlos@maniero.me>
To: ~johnnyrichard/olang-devel@lists.sr.ht
Cc: Carlos Maniero <carlos@maniero.me>
Subject: [PATCH olang] ast: add token location at the ast nodes
Date: Fri, 04 Oct 2024 23:02:41 +0000 (UTC) [thread overview]
Message-ID: <20241004230210.184300-1-carlos@maniero.me> (raw)
This is an important step for future semantics error reporting and
binary debug information.
Signed-off-by: Carlos Maniero <carlos@maniero.me>
---
src/ast.c | 29 +++++++++++++++++++--------
src/ast.h | 55 ++++++++++++++++++++++++++++++++++------------------
src/parser.c | 29 ++++++++++++++-------------
3 files changed, 73 insertions(+), 40 deletions(-)
diff --git a/src/ast.c b/src/ast.c
index db18426..1224305 100644
--- a/src/ast.c
+++ b/src/ast.c
@@ -40,7 +40,12 @@ ast_new_translation_unit(arena_t *arena)
}
ast_node_t *
-ast_new_node_fn_def(arena_t *arena, string_view_t id, list_t *params, string_view_t return_type, ast_node_t *block)
+ast_new_node_fn_def(arena_t *arena,
+ token_loc_t loc,
+ string_view_t id,
+ list_t *params,
+ string_view_t return_type,
+ ast_node_t *block)
{
assert(arena);
assert(params);
@@ -50,6 +55,7 @@ ast_new_node_fn_def(arena_t *arena, string_view_t id, list_t *params, string_vie
assert(node_fn_def);
node_fn_def->kind = AST_NODE_FN_DEF;
+ node_fn_def->loc = loc;
ast_fn_definition_t *fn_def = &node_fn_def->as_fn_def;
fn_def->id = id;
@@ -61,7 +67,7 @@ ast_new_node_fn_def(arena_t *arena, string_view_t id, list_t *params, string_vie
}
ast_node_t *
-ast_new_node_fn_call(arena_t *arena, string_view_t id, list_t *args)
+ast_new_node_fn_call(arena_t *arena, token_loc_t loc, string_view_t id, list_t *args)
{
assert(arena);
assert(args);
@@ -70,6 +76,7 @@ ast_new_node_fn_call(arena_t *arena, string_view_t id, list_t *args)
assert(node_fn_call);
node_fn_call->kind = AST_NODE_FN_CALL;
+ node_fn_call->loc = loc;
ast_fn_call_t *fn_call = &node_fn_call->as_fn_call;
fn_call->id = id;
@@ -79,12 +86,13 @@ ast_new_node_fn_call(arena_t *arena, string_view_t id, list_t *args)
}
ast_node_t *
-ast_new_node_var_def(arena_t *arena, string_view_t id, string_view_t type, ast_node_t *value)
+ast_new_node_var_def(arena_t *arena, token_loc_t loc, string_view_t id, string_view_t type, ast_node_t *value)
{
ast_node_t *node_var_def = (ast_node_t *)arena_alloc(arena, sizeof(ast_node_t));
assert(node_var_def);
node_var_def->kind = AST_NODE_VAR_DEF;
+ node_var_def->loc = loc;
ast_var_definition_t *var_def = &node_var_def->as_var_def;
var_def->id = id;
@@ -95,12 +103,13 @@ ast_new_node_var_def(arena_t *arena, string_view_t id, string_view_t type, ast_n
}
ast_node_t *
-ast_new_node_bin_op(arena_t *arena, ast_binary_op_kind_t kind, ast_node_t *lhs, ast_node_t *rhs)
+ast_new_node_bin_op(arena_t *arena, token_loc_t loc, ast_binary_op_kind_t kind, ast_node_t *lhs, ast_node_t *rhs)
{
ast_node_t *node_bin_op = (ast_node_t *)arena_alloc(arena, sizeof(ast_node_t));
assert(node_bin_op);
node_bin_op->kind = AST_NODE_BINARY_OP;
+ node_bin_op->loc = loc;
node_bin_op->as_bin_op.kind = kind;
node_bin_op->as_bin_op.lhs = lhs;
node_bin_op->as_bin_op.rhs = rhs;
@@ -109,12 +118,13 @@ ast_new_node_bin_op(arena_t *arena, ast_binary_op_kind_t kind, ast_node_t *lhs,
}
ast_node_t *
-ast_new_node_literal_u32(arena_t *arena, uint32_t value)
+ast_new_node_literal_u32(arena_t *arena, token_loc_t loc, uint32_t value)
{
ast_node_t *node_literal = (ast_node_t *)arena_alloc(arena, sizeof(ast_node_t));
assert(node_literal);
node_literal->kind = AST_NODE_LITERAL;
+ node_literal->loc = loc;
node_literal->as_literal.kind = AST_LITERAL_U32;
node_literal->as_literal.as_u32 = value;
@@ -122,36 +132,39 @@ ast_new_node_literal_u32(arena_t *arena, uint32_t value)
}
ast_node_t *
-ast_new_node_ref(arena_t *arena, string_view_t id)
+ast_new_node_ref(arena_t *arena, token_loc_t loc, string_view_t id)
{
ast_node_t *node_ref = (ast_node_t *)arena_alloc(arena, sizeof(ast_node_t));
assert(node_ref);
node_ref->kind = AST_NODE_REF;
+ node_ref->loc = loc;
node_ref->as_ref.id = id;
return node_ref;
}
ast_node_t *
-ast_new_node_return_stmt(arena_t *arena, ast_node_t *expr)
+ast_new_node_return_stmt(arena_t *arena, token_loc_t loc, ast_node_t *expr)
{
ast_node_t *node_return_stmt = (ast_node_t *)arena_alloc(arena, sizeof(ast_node_t));
assert(node_return_stmt);
node_return_stmt->kind = AST_NODE_RETURN_STMT;
+ node_return_stmt->loc = loc;
node_return_stmt->as_return_stmt.expr = expr;
return node_return_stmt;
}
ast_node_t *
-ast_new_node_if_stmt(arena_t *arena, ast_node_t *cond, ast_node_t *then, ast_node_t *_else)
+ast_new_node_if_stmt(arena_t *arena, token_loc_t loc, ast_node_t *cond, ast_node_t *then, ast_node_t *_else)
{
ast_node_t *node_if_stmt = arena_alloc(arena, sizeof(ast_node_t));
assert(node_if_stmt);
node_if_stmt->kind = AST_NODE_IF_STMT;
+ node_if_stmt->loc = loc;
node_if_stmt->as_if_stmt.cond = cond;
node_if_stmt->as_if_stmt.then = then;
node_if_stmt->as_if_stmt._else = _else;
diff --git a/src/ast.h b/src/ast.h
index 4791d6b..f9a23b5 100644
--- a/src/ast.h
+++ b/src/ast.h
@@ -20,6 +20,7 @@
#include <stdint.h>
#include "arena.h"
+#include "lexer.h"
#include "list.h"
#include "scope.h"
#include "string_view.h"
@@ -42,15 +43,21 @@ typedef enum
AST_NODE_UNKNOWN
} ast_node_kind_t;
+typedef struct ast_node_meta
+{
+ ast_node_kind_t kind;
+ token_loc_t loc;
+} ast_node_meta_t;
+
typedef struct ast_block
{
- ast_node_kind_t node_kind;
+ ast_node_meta_t meta;
list_t *nodes;
} ast_block_t;
typedef struct ast_translation_unit
{
- ast_node_kind_t node_kind;
+ ast_node_meta_t meta;
list_t *decls;
} ast_translation_unit_t;
@@ -62,7 +69,7 @@ typedef struct ast_fn_param
typedef struct ast_fn_definition
{
- ast_node_kind_t node_kind;
+ ast_node_meta_t meta;
string_view_t id;
list_t *params;
string_view_t return_type;
@@ -72,7 +79,7 @@ typedef struct ast_fn_definition
typedef struct ast_fn_call
{
- ast_node_kind_t node_kind;
+ ast_node_meta_t meta;
string_view_t id;
list_t *args;
scope_t *scope;
@@ -80,7 +87,7 @@ typedef struct ast_fn_call
typedef struct ast_var_definition
{
- ast_node_kind_t node_kind;
+ ast_node_meta_t meta;
string_view_t id;
string_view_t type;
ast_node_t *value;
@@ -94,7 +101,7 @@ typedef enum
typedef struct ast_literal
{
- ast_node_kind_t node_kind;
+ ast_node_meta_t meta;
ast_literal_kind_t kind;
union
{
@@ -104,7 +111,7 @@ typedef struct ast_literal
typedef struct ast_ref
{
- ast_node_kind_t node_kind;
+ ast_node_meta_t meta;
string_view_t id;
scope_t *scope;
} ast_ref_t;
@@ -133,7 +140,7 @@ typedef enum ast_binary_op_kind
typedef struct ast_binary_op
{
- ast_node_kind_t node_kind;
+ ast_node_meta_t meta;
ast_binary_op_kind_t kind;
ast_node_t *lhs;
ast_node_t *rhs;
@@ -141,13 +148,13 @@ typedef struct ast_binary_op
typedef struct ast_return_stmt
{
- ast_node_kind_t node_kind;
+ ast_node_meta_t meta;
ast_node_t *expr;
} ast_return_stmt_t;
typedef struct ast_if_stmt
{
- ast_node_kind_t node_kind;
+ ast_node_meta_t meta;
ast_node_t *cond;
ast_node_t *then;
ast_node_t *_else;
@@ -155,7 +162,12 @@ typedef struct ast_if_stmt
typedef union ast_node
{
- ast_node_kind_t kind;
+ // inlined ast_node_meta_t struct.
+ struct
+ {
+ ast_node_kind_t kind;
+ token_loc_t loc;
+ };
ast_translation_unit_t as_translation_unit;
ast_fn_definition_t as_fn_def;
ast_fn_call_t as_fn_call;
@@ -172,28 +184,33 @@ ast_node_t *
ast_new_translation_unit(arena_t *arena);
ast_node_t *
-ast_new_node_fn_def(arena_t *arena, string_view_t id, list_t *params, string_view_t return_type, ast_node_t *block);
+ast_new_node_fn_def(arena_t *arena,
+ token_loc_t loc,
+ string_view_t id,
+ list_t *params,
+ string_view_t return_type,
+ ast_node_t *block);
ast_node_t *
-ast_new_node_fn_call(arena_t *arena, string_view_t id, list_t *args);
+ast_new_node_fn_call(arena_t *arena, token_loc_t loc, string_view_t id, list_t *args);
ast_node_t *
-ast_new_node_var_def(arena_t *arena, string_view_t id, string_view_t type, ast_node_t *value);
+ast_new_node_var_def(arena_t *arena, token_loc_t loc, string_view_t id, string_view_t type, ast_node_t *value);
ast_node_t *
-ast_new_node_bin_op(arena_t *arena, ast_binary_op_kind_t kind, ast_node_t *lhs, ast_node_t *rhs);
+ast_new_node_bin_op(arena_t *arena, token_loc_t loc, ast_binary_op_kind_t kind, ast_node_t *lhs, ast_node_t *rhs);
ast_node_t *
-ast_new_node_literal_u32(arena_t *arena, uint32_t value);
+ast_new_node_literal_u32(arena_t *arena, token_loc_t loc, uint32_t value);
ast_node_t *
-ast_new_node_ref(arena_t *arena, string_view_t id);
+ast_new_node_ref(arena_t *arena, token_loc_t loc, string_view_t id);
ast_node_t *
-ast_new_node_return_stmt(arena_t *arena, ast_node_t *expr);
+ast_new_node_return_stmt(arena_t *arena, token_loc_t loc, ast_node_t *expr);
ast_node_t *
-ast_new_node_if_stmt(arena_t *arena, ast_node_t *cond, ast_node_t *then, ast_node_t *_else);
+ast_new_node_if_stmt(arena_t *arena, token_loc_t loc, ast_node_t *cond, ast_node_t *then, ast_node_t *_else);
ast_node_t *
ast_new_node_block(arena_t *arena);
diff --git a/src/parser.c b/src/parser.c
index ecc10f0..35c8107 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -224,7 +224,7 @@ parser_parse_expr_1(parser_t *parser, ast_node_t *lhs, size_t prev_precedence)
lexer_peek_next(parser->lexer, &lookahead_token);
}
- lhs = ast_new_node_bin_op(parser->arena, token_kind_to_binary_op_kind(token_op.kind), lhs, rhs);
+ lhs = ast_new_node_bin_op(parser->arena, token_op.loc, token_kind_to_binary_op_kind(token_op.kind), lhs, rhs);
if (lhs == NULL) {
return NULL;
}
@@ -252,19 +252,19 @@ parser_parse_factor(parser_t *parser)
switch (token.kind) {
case TOKEN_NUMBER:
- return ast_new_node_literal_u32(parser->arena, string_view_to_u32(token.value));
+ return ast_new_node_literal_u32(parser->arena, token.loc, string_view_to_u32(token.value));
case TOKEN_ID: {
- string_view_t id = token.value;
+ token_t token_id = token;
lexer_peek_next(parser->lexer, &token);
if (token.kind == TOKEN_OPAREN) {
list_t *args = parser_parse_fn_args(parser);
- return ast_new_node_fn_call(parser->arena, id, args);
+ return ast_new_node_fn_call(parser->arena, token_id.loc, token_id.value, args);
}
- return ast_new_node_ref(parser->arena, id);
+ return ast_new_node_ref(parser->arena, token_id.loc, token_id.value);
}
case TOKEN_OPAREN: {
@@ -411,7 +411,7 @@ parser_parse_fn_definition(parser_t *parser)
return NULL;
}
- return ast_new_node_fn_def(parser->arena, fn_name_token.value, params, fn_return_type, block);
+ return ast_new_node_fn_def(parser->arena, fn_name_token.loc, fn_name_token.value, params, fn_return_type, block);
}
static bool
@@ -498,7 +498,9 @@ EndLoop:
static ast_node_t *
parser_parse_return_stmt(parser_t *parser)
{
- if (!skip_expected_token(parser, TOKEN_RETURN)) {
+ token_t token_ret;
+
+ if (!expected_next_token(parser, &token_ret, TOKEN_RETURN)) {
return NULL;
}
@@ -507,7 +509,7 @@ parser_parse_return_stmt(parser_t *parser)
return NULL;
}
- ast_node_t *node_return_stmt = ast_new_node_return_stmt(parser->arena, expr);
+ ast_node_t *node_return_stmt = ast_new_node_return_stmt(parser->arena, token_ret.loc, expr);
assert(node_return_stmt);
if (!skip_expected_token(parser, TOKEN_LF)) {
@@ -521,7 +523,8 @@ parser_parse_return_stmt(parser_t *parser)
static ast_node_t *
parser_parse_if_stmt(parser_t *parser)
{
- if (!skip_expected_token(parser, TOKEN_IF)) {
+ token_t token_if;
+ if (!expected_next_token(parser, &token_if, TOKEN_IF)) {
return NULL;
}
@@ -557,7 +560,7 @@ parser_parse_if_stmt(parser_t *parser)
return NULL;
}
- ast_node_t *node_if_stmt = ast_new_node_if_stmt(parser->arena, cond, then, _else);
+ ast_node_t *node_if_stmt = ast_new_node_if_stmt(parser->arena, token_if.loc, cond, then, _else);
assert(node_if_stmt);
@@ -573,8 +576,8 @@ parser_parse_var_def(parser_t *parser)
return NULL;
}
- token_t id_token;
- if (!expected_next_token(parser, &id_token, TOKEN_ID)) {
+ token_t token_id;
+ if (!expected_next_token(parser, &token_id, TOKEN_ID)) {
return NULL;
}
@@ -593,7 +596,7 @@ parser_parse_var_def(parser_t *parser)
return NULL;
}
- ast_node_t *var_node = ast_new_node_var_def(parser->arena, id_token.value, var_type, expr);
+ ast_node_t *var_node = ast_new_node_var_def(parser->arena, token_id.loc, token_id.value, var_type, expr);
skip_line_feeds(parser->lexer);
base-commit: 832f13d2ed2762bb9582eb1b633a30af608e028f
--
2.34.1
next reply other threads:[~2024-10-04 23:03 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-10-04 23:02 Carlos Maniero [this message]
2024-10-04 23:03 ` [olang/patches/.build.yml] build success builds.sr.ht
2024-10-05 1:04 ` [PATCH olang] ast: add token location at the ast nodes Johnny Richard
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20241004230210.184300-1-carlos@maniero.me \
--to=carlos@maniero.me \
--cc=~johnnyrichard/olang-devel@lists.sr.ht \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://git.johnnyrichard.com/olang.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox