public inbox for ~johnnyrichard/olang-devel@lists.sr.ht
 help / color / mirror / code / Atom feed
From: Carlos Maniero <carlos@maniero.me>
To: ~johnnyrichard/olang-devel@lists.sr.ht
Cc: Carlos Maniero <carlos@maniero.me>
Subject: [PATCH olang] lexer: add source code abstraction
Date: Fri, 04 Oct 2024 16:34:02 +0000 (UTC)	[thread overview]
Message-ID: <20241004163339.46748-1-carlos@maniero.me> (raw)

This struct stores the source code string and its filepath. This will be
used in the near future to create token's locations.

Signed-off-by: Carlos Maniero <carlos@maniero.me>
---
 src/cli.c                |  2 +-
 src/cli.h                |  2 +-
 src/lexer.c              | 20 ++++++-------
 src/lexer.h              |  5 ++--
 src/main.c               | 64 ++++++++++++++++++++--------------------
 src/source_code.h        | 28 ++++++++++++++++++
 tests/unit/parser_test.c |  8 ++---
 7 files changed, 79 insertions(+), 50 deletions(-)
 create mode 100644 src/source_code.h

diff --git a/src/cli.c b/src/cli.c
index fa73b60..9d0f875 100644
--- a/src/cli.c
+++ b/src/cli.c
@@ -57,7 +57,7 @@ cli_parse_args(int argc, char **argv)
             opts.options |= CLI_OPT_SYSROOT;
             cli_opts_parse_sysroot(&opts, &args);
         } else {
-            opts.file_path = arg;
+            opts.filepath = arg;
         }
         arg = cli_args_shift(&args);
     }
diff --git a/src/cli.h b/src/cli.h
index 3f4c3a9..1a93443 100644
--- a/src/cli.h
+++ b/src/cli.h
@@ -32,7 +32,7 @@ typedef struct cli_opts
     char *arch;
     char *sysroot;
     char *compiler_path;
-    char *file_path;
+    char *filepath;
     string_view_t output_bin;
 } cli_opts_t;
 
diff --git a/src/lexer.c b/src/lexer.c
index 8de40a0..523822f 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -22,10 +22,10 @@
 #include <stdio.h>
 
 void
-lexer_init(lexer_t *lexer, string_view_t source)
+lexer_init(lexer_t *lexer, source_code_t src)
 {
     assert(lexer);
-    lexer->source = source;
+    lexer->src = src;
     lexer->cur.offset = 0;
     lexer->cur.row = 0;
     lexer->cur.bol = 0;
@@ -90,7 +90,7 @@ lexer_next_token(lexer_t *lexer, token_t *token)
                 current_char = lexer_current_char(lexer);
             }
 
-            string_view_t text = { .chars = lexer->source.chars + start_cur.offset,
+            string_view_t text = { .chars = lexer->src.code.chars + start_cur.offset,
                                    .size = lexer->cur.offset - start_cur.offset };
 
             lexer_init_str_value_token(lexer, token, lexer_str_to_token_kind(text), start_cur);
@@ -359,13 +359,13 @@ token_kind_is_binary_op(token_kind_t kind)
 static char
 lexer_current_char(lexer_t *lexer)
 {
-    return lexer->source.chars[lexer->cur.offset];
+    return lexer->src.code.chars[lexer->cur.offset];
 }
 
 static void
 lexer_skip_char(lexer_t *lexer)
 {
-    assert(lexer->cur.offset < lexer->source.size);
+    assert(lexer->cur.offset < lexer->src.code.size);
     if (lexer_current_char(lexer) == '\n') {
         lexer->cur.row++;
         lexer->cur.bol = ++lexer->cur.offset;
@@ -377,7 +377,7 @@ lexer_skip_char(lexer_t *lexer)
 static bool
 lexer_is_eof(lexer_t *lexer)
 {
-    return lexer->cur.offset >= lexer->source.size;
+    return lexer->cur.offset >= lexer->src.code.size;
 }
 
 static bool
@@ -395,14 +395,14 @@ _isspace(char c)
 static void
 lexer_init_char_value_token(lexer_t *lexer, token_t *token, token_kind_t kind)
 {
-    string_view_t str = { .chars = lexer->source.chars + lexer->cur.offset, .size = 1 };
+    string_view_t str = { .chars = lexer->src.code.chars + lexer->cur.offset, .size = 1 };
     *token = (token_t){ .kind = kind, .value = str, .cur = lexer->cur };
 }
 
 static void
 lexer_init_str_value_token(lexer_t *lexer, token_t *token, token_kind_t kind, lexer_cursor_t cur)
 {
-    string_view_t str = { .chars = lexer->source.chars + cur.offset, .size = lexer->cur.offset - cur.offset };
+    string_view_t str = { .chars = lexer->src.code.chars + cur.offset, .size = lexer->cur.offset - cur.offset };
     *token = (token_t){ .kind = kind, .value = str, .cur = cur };
 }
 
@@ -461,9 +461,9 @@ string_view_t
 lexer_get_token_line(lexer_t *lexer, token_t *token)
 {
     size_t offset = token->cur.bol;
-    string_view_t line = { .chars = lexer->source.chars + offset, .size = 0 };
+    string_view_t line = { .chars = lexer->src.code.chars + offset, .size = 0 };
 
-    while ((line.size + offset) < lexer->source.size && line.chars[line.size] != '\n' && line.chars[line.size] != 0) {
+    while ((line.size + offset) < lexer->src.code.size && line.chars[line.size] != '\n' && line.chars[line.size] != 0) {
         ++line.size;
     }
 
diff --git a/src/lexer.h b/src/lexer.h
index 1aecb11..c5a342a 100644
--- a/src/lexer.h
+++ b/src/lexer.h
@@ -17,6 +17,7 @@
 #ifndef LEXER_H
 #define LEXER_H
 
+#include "source_code.h"
 #include "string_view.h"
 #include <stdint.h>
 #include <stdio.h>
@@ -30,7 +31,7 @@ typedef struct lexer_cursor
 
 typedef struct lexer
 {
-    string_view_t source;
+    source_code_t src;
     lexer_cursor_t cur;
 } lexer_t;
 
@@ -92,7 +93,7 @@ typedef struct token
 } token_t;
 
 void
-lexer_init(lexer_t *lexer, string_view_t source);
+lexer_init(lexer_t *lexer, source_code_t src);
 
 void
 lexer_next_token(lexer_t *lexer, token_t *token);
diff --git a/src/main.c b/src/main.c
index 9d66455..4c8f2a5 100644
--- a/src/main.c
+++ b/src/main.c
@@ -44,10 +44,10 @@ void
 handle_codegen_linux(cli_opts_t *opts);
 
 static void
-print_token(char *file_path, token_t *token);
+print_token(char *filepath, token_t *token);
 
-string_view_t
-read_entire_file(char *file_path, arena_t *arena);
+source_code_t
+read_entire_file(char *filepath, arena_t *arena);
 
 int
 main(int argc, char **argv)
@@ -75,24 +75,24 @@ main(int argc, char **argv)
 void
 handle_dump_tokens(cli_opts_t *opts)
 {
-    if (opts->file_path == NULL) {
+    if (opts->filepath == NULL) {
         cli_print_usage(stderr, opts->compiler_path);
         exit(EXIT_FAILURE);
     }
 
     arena_t arena = arena_new(ARENA_CAPACITY);
-    string_view_t file_content = read_entire_file(opts->file_path, &arena);
+    source_code_t src = read_entire_file(opts->filepath, &arena);
 
     lexer_t lexer = { 0 };
-    lexer_init(&lexer, file_content);
+    lexer_init(&lexer, src);
 
     token_t token = { 0 };
     lexer_next_token(&lexer, &token);
     while (token.kind != TOKEN_EOF) {
-        print_token(opts->file_path, &token);
+        print_token(opts->filepath, &token);
         lexer_next_token(&lexer, &token);
     }
-    print_token(opts->file_path, &token);
+    print_token(opts->filepath, &token);
 
     arena_free(&arena);
 }
@@ -100,7 +100,7 @@ handle_dump_tokens(cli_opts_t *opts)
 void
 handle_dump_ast(cli_opts_t *opts)
 {
-    if (opts->file_path == NULL) {
+    if (opts->filepath == NULL) {
         cli_print_usage(stderr, opts->compiler_path);
         exit(EXIT_FAILURE);
     }
@@ -109,10 +109,10 @@ handle_dump_ast(cli_opts_t *opts)
     lexer_t lexer = { 0 };
     parser_t parser = { 0 };
 
-    string_view_t file_content = read_entire_file(opts->file_path, &arena);
+    source_code_t src = read_entire_file(opts->filepath, &arena);
 
-    lexer_init(&lexer, file_content);
-    parser_init(&parser, &lexer, &arena, opts->file_path);
+    lexer_init(&lexer, src);
+    parser_init(&parser, &lexer, &arena, opts->filepath);
 
     ast_node_t *ast = parser_parse_translation_unit(&parser);
 
@@ -122,7 +122,7 @@ handle_dump_ast(cli_opts_t *opts)
 void
 handle_codegen_linux(cli_opts_t *opts)
 {
-    if (opts->file_path == NULL) {
+    if (opts->filepath == NULL) {
         cli_print_usage(stderr, opts->compiler_path);
         exit(EXIT_FAILURE);
     }
@@ -131,9 +131,9 @@ handle_codegen_linux(cli_opts_t *opts)
     lexer_t lexer = { 0 };
     parser_t parser = { 0 };
 
-    string_view_t file_content = read_entire_file(opts->file_path, &arena);
-    lexer_init(&lexer, file_content);
-    parser_init(&parser, &lexer, &arena, opts->file_path);
+    source_code_t src = read_entire_file(opts->filepath, &arena);
+    lexer_init(&lexer, src);
+    parser_init(&parser, &lexer, &arena, opts->filepath);
 
     ast_node_t *ast = parser_parse_translation_unit(&parser);
 
@@ -204,48 +204,48 @@ handle_codegen_linux(cli_opts_t *opts)
     arena_free(&arena);
 }
 
-string_view_t
-read_entire_file(char *file_path, arena_t *arena)
+source_code_t
+read_entire_file(char *filepath, arena_t *arena)
 {
-    FILE *stream = fopen(file_path, "rb");
+    FILE *stream = fopen(filepath, "rb");
 
     if (stream == NULL) {
-        fprintf(stderr, "error: could not open file %s: %s\n", file_path, strerror(errno));
+        fprintf(stderr, "error: could not open file %s: %s\n", filepath, strerror(errno));
         exit(EXIT_FAILURE);
     }
 
-    string_view_t file_content = { 0 };
+    string_view_t code = { 0 };
 
     fseek(stream, 0, SEEK_END);
-    file_content.size = ftell(stream);
+    code.size = ftell(stream);
     fseek(stream, 0, SEEK_SET);
 
-    assert(file_content.size * 2 < ARENA_CAPACITY);
+    assert(code.size * 2 < ARENA_CAPACITY);
 
-    file_content.chars = (char *)arena_alloc(arena, (size_t)file_content.size);
+    code.chars = (char *)arena_alloc(arena, (size_t)code.size);
 
-    if (file_content.chars == NULL) {
-        fprintf(stderr, "error: could not read file %s: %s\n", file_path, strerror(errno));
+    if (code.chars == NULL) {
+        fprintf(stderr, "error: could not read file %s: %s\n", filepath, strerror(errno));
         exit(EXIT_FAILURE);
     }
 
-    size_t read_bytes = fread(file_content.chars, 1, file_content.size, stream);
+    size_t read_bytes = fread(code.chars, 1, code.size, stream);
 
-    if (read_bytes != file_content.size) {
-        fprintf(stderr, "error: failed to read all file bytes %s\n", file_path);
+    if (read_bytes != code.size) {
+        fprintf(stderr, "error: failed to read all file bytes %s\n", filepath);
         exit(EXIT_FAILURE);
     }
 
     fclose(stream);
 
-    return file_content;
+    return (source_code_t){ .filepath = filepath, .code = code };
 }
 
 static void
-print_token(char *file_path, token_t *token)
+print_token(char *filepath, token_t *token)
 {
     printf("%s:%lu:%lu: <%s>\n",
-           file_path,
+           filepath,
            token->cur.row + 1,
            (token->cur.offset - token->cur.bol) + 1,
            token_kind_to_cstr(token->kind));
diff --git a/src/source_code.h b/src/source_code.h
new file mode 100644
index 0000000..2c774c7
--- /dev/null
+++ b/src/source_code.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2024 olang maintainers
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+#include "string_view.h"
+
+#ifndef SOURCE_CODE_H
+#define SOURCE_CODE_H
+
+typedef struct source_code
+{
+    char *filepath;
+    string_view_t code;
+} source_code_t;
+
+#endif
diff --git a/tests/unit/parser_test.c b/tests/unit/parser_test.c
index a7c60d1..9eb56fd 100644
--- a/tests/unit/parser_test.c
+++ b/tests/unit/parser_test.c
@@ -31,15 +31,15 @@ parse_translation_unit_test(const MunitParameter params[], void *user_data_or_fi
 {
     arena_t arena = arena_new(ARENA_CAPACITY);
 
-    char *file_path = "main.0";
+    char *filepath = "main.0";
     char *source_value = "fn main(): u32 {\n\treturn 69\n}";
 
     lexer_t lexer;
-    string_view_t source = { .chars = source_value, .size = strlen(source_value) };
-    lexer_init(&lexer, source);
+    string_view_t code = { .chars = source_value, .size = strlen(source_value) };
+    lexer_init(&lexer, (source_code_t){ .code = code, .filepath = filepath });
 
     parser_t parser;
-    parser_init(&parser, &lexer, &arena, file_path);
+    parser_init(&parser, &lexer, &arena, filepath);
 
     ast_node_t *translation_unit_node = parser_parse_translation_unit(&parser);
     assert_not_null(translation_unit_node);

base-commit: 978a9914a9abc98ed9f866d528e9a094fbc1b35e
-- 
2.34.1


             reply	other threads:[~2024-10-04 16:34 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-10-04 16:34 Carlos Maniero [this message]
2024-10-04 16:34 ` [olang/patches/.build.yml] build success builds.sr.ht
2024-10-04 18:37 ` [PATCH olang] lexer: add source code abstraction Johnny Richard

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20241004163339.46748-1-carlos@maniero.me \
    --to=carlos@maniero.me \
    --cc=~johnnyrichard/olang-devel@lists.sr.ht \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://git.johnnyrichard.com/olang.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox