[PATCH olang] lexer: create token location structure

public inbox for ~johnnyrichard/olang-devel@lists.sr.ht
 help / color / mirror / code / Atom feed

From: Carlos Maniero <carlos@maniero.me>
To: ~johnnyrichard/olang-devel@lists.sr.ht
Cc: Carlos Maniero <carlos@maniero.me>
Subject: [PATCH olang] lexer: create token location structure
Date: Fri, 04 Oct 2024 22:12:18 +0000 (UTC)	[thread overview]
Message-ID: <20241004221157.153364-1-carlos@maniero.me> (raw)

To reach the source line and column to print errors, we use to get the
filepath from parser, the cursor from token and the source code from the
lexer.

Now, instead of navigating into these three data structures, the same
information must be accessible from a single place, the token's location
*token_loc_t*.

The source_code.h file was removed once the source code struct was added
to the lexer.

Signed-off-by: Carlos Maniero <carlos@maniero.me>
---
 src/lexer.c              | 26 +++++++++++++++++++-------
 src/lexer.h              | 23 ++++++++++++++++++++---
 src/main.c               | 18 +++++++++---------
 src/parser.c             | 33 +++++++++++++++------------------
 src/parser.h             |  4 +---
 src/source_code.h        | 28 ----------------------------
 tests/unit/parser_test.c |  2 +-
 7 files changed, 65 insertions(+), 69 deletions(-)
 delete mode 100644 src/source_code.h

diff --git a/src/lexer.c b/src/lexer.c
index 523822f..4784f1c 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -396,21 +396,21 @@ static void
 lexer_init_char_value_token(lexer_t *lexer, token_t *token, token_kind_t kind)
 {
     string_view_t str = { .chars = lexer->src.code.chars + lexer->cur.offset, .size = 1 };
-    *token = (token_t){ .kind = kind, .value = str, .cur = lexer->cur };
+    *token = (token_t){ .kind = kind, .value = str, .loc = (token_loc_t){ .src = lexer->src, .cur = lexer->cur } };
 }
 
 static void
 lexer_init_str_value_token(lexer_t *lexer, token_t *token, token_kind_t kind, lexer_cursor_t cur)
 {
     string_view_t str = { .chars = lexer->src.code.chars + cur.offset, .size = lexer->cur.offset - cur.offset };
-    *token = (token_t){ .kind = kind, .value = str, .cur = cur };
+    *token = (token_t){ .kind = kind, .value = str, .loc = (token_loc_t){ .src = lexer->src, .cur = cur } };
 }
 
 static void
 lexer_init_eof_token(lexer_t *lexer, token_t *token)
 {
     string_view_t str = { 0 };
-    *token = (token_t){ .kind = TOKEN_EOF, .value = str, .cur = lexer->cur };
+    *token = (token_t){ .kind = TOKEN_EOF, .value = str, .loc = (token_loc_t){ .src = lexer->src, .cur = lexer->cur } };
 }
 
 static token_kind_t
@@ -458,14 +458,26 @@ lexer_lookahead(lexer_t *lexer, token_t *token, size_t n)
 }
 
 string_view_t
-lexer_get_token_line(lexer_t *lexer, token_t *token)
+token_loc_to_line(token_loc_t loc)
 {
-    size_t offset = token->cur.bol;
-    string_view_t line = { .chars = lexer->src.code.chars + offset, .size = 0 };
+    size_t offset = loc.cur.bol;
+    string_view_t line = { .chars = loc.src.code.chars + offset, .size = 0 };
 
-    while ((line.size + offset) < lexer->src.code.size && line.chars[line.size] != '\n' && line.chars[line.size] != 0) {
+    while ((line.size + offset) < loc.src.code.size && line.chars[line.size] != '\n' && line.chars[line.size] != 0) {
         ++line.size;
     }
 
     return line;
 }
+
+size_t
+token_loc_to_lineno(token_loc_t loc)
+{
+    return loc.cur.row + 1;
+}
+
+size_t
+token_loc_to_colno(token_loc_t loc)
+{
+    return loc.cur.offset - loc.cur.bol + 1;
+}
diff --git a/src/lexer.h b/src/lexer.h
index c5a342a..bb39a70 100644
--- a/src/lexer.h
+++ b/src/lexer.h
@@ -17,11 +17,16 @@
 #ifndef LEXER_H
 #define LEXER_H
 
-#include "source_code.h"
 #include "string_view.h"
 #include <stdint.h>
 #include <stdio.h>
 
+typedef struct source_code
+{
+    char *filepath;
+    string_view_t code;
+} source_code_t;
+
 typedef struct lexer_cursor
 {
     size_t offset;
@@ -85,13 +90,25 @@ typedef enum token_kind
     TOKEN_EOF
 } token_kind_t;
 
+typedef struct token_loc
+{
+    source_code_t src;
+    lexer_cursor_t cur;
+} token_loc_t;
+
 typedef struct token
 {
     token_kind_t kind;
     string_view_t value;
-    lexer_cursor_t cur;
+    token_loc_t loc;
 } token_t;
 
+size_t
+token_loc_to_lineno(token_loc_t loc);
+
+size_t
+token_loc_to_colno(token_loc_t loc);
+
 void
 lexer_init(lexer_t *lexer, source_code_t src);
 
@@ -111,6 +128,6 @@ bool
 token_kind_is_binary_op(token_kind_t kind);
 
 string_view_t
-lexer_get_token_line(lexer_t *lexer, token_t *token);
+token_loc_to_line(token_loc_t loc);
 
 #endif /* LEXER_H */
diff --git a/src/main.c b/src/main.c
index 4c8f2a5..d1c76e3 100644
--- a/src/main.c
+++ b/src/main.c
@@ -44,7 +44,7 @@ void
 handle_codegen_linux(cli_opts_t *opts);
 
 static void
-print_token(char *filepath, token_t *token);
+print_token(token_t *token);
 
 source_code_t
 read_entire_file(char *filepath, arena_t *arena);
@@ -89,10 +89,10 @@ handle_dump_tokens(cli_opts_t *opts)
     token_t token = { 0 };
     lexer_next_token(&lexer, &token);
     while (token.kind != TOKEN_EOF) {
-        print_token(opts->filepath, &token);
+        print_token(&token);
         lexer_next_token(&lexer, &token);
     }
-    print_token(opts->filepath, &token);
+    print_token(&token);
 
     arena_free(&arena);
 }
@@ -112,7 +112,7 @@ handle_dump_ast(cli_opts_t *opts)
     source_code_t src = read_entire_file(opts->filepath, &arena);
 
     lexer_init(&lexer, src);
-    parser_init(&parser, &lexer, &arena, opts->filepath);
+    parser_init(&parser, &lexer, &arena);
 
     ast_node_t *ast = parser_parse_translation_unit(&parser);
 
@@ -133,7 +133,7 @@ handle_codegen_linux(cli_opts_t *opts)
 
     source_code_t src = read_entire_file(opts->filepath, &arena);
     lexer_init(&lexer, src);
-    parser_init(&parser, &lexer, &arena, opts->filepath);
+    parser_init(&parser, &lexer, &arena);
 
     ast_node_t *ast = parser_parse_translation_unit(&parser);
 
@@ -242,11 +242,11 @@ read_entire_file(char *filepath, arena_t *arena)
 }
 
 static void
-print_token(char *filepath, token_t *token)
+print_token(token_t *token)
 {
     printf("%s:%lu:%lu: <%s>\n",
-           filepath,
-           token->cur.row + 1,
-           (token->cur.offset - token->cur.bol) + 1,
+           token->loc.src.filepath,
+           token_loc_to_lineno(token->loc),
+           token_loc_to_colno(token->loc),
            token_kind_to_cstr(token->kind));
 }
diff --git a/src/parser.c b/src/parser.c
index 26e5465..ecc10f0 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -32,7 +32,7 @@ static bool
 expected_next_token(parser_t *parser, token_t *token, token_kind_t kind);
 
 static bool
-expected_token(parser_t *parser, token_t *token, token_kind_t kind);
+expected_token(token_t *token, token_kind_t kind);
 
 static bool
 parser_parse_type(parser_t *parser, string_view_t *type);
@@ -68,14 +68,12 @@ static void
 skip_line_feeds(lexer_t *lexer);
 
 void
-parser_init(parser_t *parser, lexer_t *lexer, arena_t *arena, char *file_path)
+parser_init(parser_t *parser, lexer_t *lexer, arena_t *arena)
 {
     assert(parser && "parser is required");
     assert(lexer && "lexer is required");
-    assert(file_path && "file_path is required");
     parser->lexer = lexer;
     parser->arena = arena;
-    parser->file_path = file_path;
 }
 
 ast_node_t *
@@ -311,7 +309,7 @@ parser_parse_fn_args(parser_t *parser)
     bool is_not_first_arg = false;
 
     while (token.kind != TOKEN_CPAREN && token.kind != TOKEN_EOF) {
-        if (is_not_first_arg && expected_token(parser, &token, TOKEN_COMMA)) {
+        if (is_not_first_arg && expected_token(&token, TOKEN_COMMA)) {
             lexer_next_token(parser->lexer, &token);
         }
 
@@ -353,11 +351,11 @@ parser_parse_fn_params(parser_t *parser)
     bool is_not_first_param = false;
 
     while (token.kind != TOKEN_CPAREN && token.kind != TOKEN_EOF) {
-        if (is_not_first_param && expected_token(parser, &token, TOKEN_COMMA)) {
+        if (is_not_first_param && expected_token(&token, TOKEN_COMMA)) {
             lexer_next_token(parser->lexer, &token);
         }
 
-        if (!expected_token(parser, &token, TOKEN_ID)) {
+        if (!expected_token(&token, TOKEN_ID)) {
             return NULL;
         }
 
@@ -372,7 +370,7 @@ parser_parse_fn_params(parser_t *parser)
         is_not_first_param = true;
     }
 
-    if (!expected_token(parser, &token, TOKEN_CPAREN)) {
+    if (!expected_token(&token, TOKEN_CPAREN)) {
         return NULL;
     }
 
@@ -555,7 +553,7 @@ parser_parse_if_stmt(parser_t *parser)
             return NULL;
         }
 
-    } else if (!expected_token(parser, &next_token, TOKEN_LF)) {
+    } else if (!expected_token(&next_token, TOKEN_LF)) {
         return NULL;
     }
 
@@ -613,24 +611,23 @@ static bool
 expected_next_token(parser_t *parser, token_t *token, token_kind_t expected_kind)
 {
     lexer_next_token(parser->lexer, token);
-    return expected_token(parser, token, expected_kind);
+    return expected_token(token, expected_kind);
 }
 
 static bool
-expected_token(parser_t *parser, token_t *token, token_kind_t expected_kind)
+expected_token(token_t *token, token_kind_t expected_kind)
 {
     if (token->kind != expected_kind) {
         fprintf(stderr,
-                "%s:%lu:%lu: error: got '" SV_FMT "' token but expect <%s>\n",
-                parser->file_path,
-                token->cur.row + 1,
-                (token->cur.offset - token->cur.bol) + 1,
+                "%s:%lu:%lu: syntax error: got '" SV_FMT "' token but expect '%s'\n",
+                token->loc.src.filepath,
+                token_loc_to_lineno(token->loc),
+                token_loc_to_colno(token->loc),
                 SV_ARG(token->value),
                 token_kind_to_cstr(expected_kind));
 
-        string_view_t line = lexer_get_token_line(parser->lexer, token);
-        fprintf(stderr, "" SV_FMT "\n", SV_ARG(line));
-        fprintf(stderr, "%*s\n", (int)(token->cur.offset - token->cur.bol + 1), "^");
+        fprintf(stderr, SV_FMT "\n", SV_ARG(token_loc_to_line(token->loc)));
+        fprintf(stderr, "%*s\n", (int)token_loc_to_colno(token->loc), "^");
 
         exit(EXIT_FAILURE);
     }
diff --git a/src/parser.h b/src/parser.h
index 31c0dc3..7db2b74 100644
--- a/src/parser.h
+++ b/src/parser.h
@@ -25,12 +25,10 @@ typedef struct parser
 {
     lexer_t *lexer;
     arena_t *arena;
-    // TODO: we should define a better place to file_path string
-    char *file_path;
 } parser_t;
 
 void
-parser_init(parser_t *parser, lexer_t *lexer, arena_t *arena, char *file_path);
+parser_init(parser_t *parser, lexer_t *lexer, arena_t *arena);
 
 ast_node_t *
 parser_parse_translation_unit(parser_t *parser);
diff --git a/src/source_code.h b/src/source_code.h
deleted file mode 100644
index 2c774c7..0000000
--- a/src/source_code.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright (C) 2024 olang maintainers
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <https://www.gnu.org/licenses/>.
- */
-#include "string_view.h"
-
-#ifndef SOURCE_CODE_H
-#define SOURCE_CODE_H
-
-typedef struct source_code
-{
-    char *filepath;
-    string_view_t code;
-} source_code_t;
-
-#endif
diff --git a/tests/unit/parser_test.c b/tests/unit/parser_test.c
index 9eb56fd..c834261 100644
--- a/tests/unit/parser_test.c
+++ b/tests/unit/parser_test.c
@@ -39,7 +39,7 @@ parse_translation_unit_test(const MunitParameter params[], void *user_data_or_fi
     lexer_init(&lexer, (source_code_t){ .code = code, .filepath = filepath });
 
     parser_t parser;
-    parser_init(&parser, &lexer, &arena, filepath);
+    parser_init(&parser, &lexer, &arena);
 
     ast_node_t *translation_unit_node = parser_parse_translation_unit(&parser);
     assert_not_null(translation_unit_node);

base-commit: 83fd6730450617e85376acdec1f4bb396fff26c8
-- 
2.34.1

next             reply	other threads:[~2024-10-04 22:12 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-10-04 22:12 Carlos Maniero [this message]
2024-10-04 22:12 ` [olang/patches/.build.yml] build success builds.sr.ht
2024-10-05  0:15 ` [PATCH olang] lexer: create token location structure Johnny Richard

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20241004221157.153364-1-carlos@maniero.me \
    --to=carlos@maniero.me \
    --cc=~johnnyrichard/olang-devel@lists.sr.ht \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

Code repositories for project(s) associated with this public inbox

	https://git.johnnyrichard.com/olang.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox