From: Carlos Maniero <carlos@maniero.me>
To: ~johnnyrichard/olang-devel@lists.sr.ht
Cc: Carlos Maniero <carlos@maniero.me>
Subject: [PATCH olang] lexer: add source code abstraction
Date: Fri, 04 Oct 2024 16:34:02 +0000 (UTC) [thread overview]
Message-ID: <20241004163339.46748-1-carlos@maniero.me> (raw)
This struct stores the source code string and its filepath. This will be
used in the near future to create token's locations.
Signed-off-by: Carlos Maniero <carlos@maniero.me>
---
src/cli.c | 2 +-
src/cli.h | 2 +-
src/lexer.c | 20 ++++++-------
src/lexer.h | 5 ++--
src/main.c | 64 ++++++++++++++++++++--------------------
src/source_code.h | 28 ++++++++++++++++++
tests/unit/parser_test.c | 8 ++---
7 files changed, 79 insertions(+), 50 deletions(-)
create mode 100644 src/source_code.h
diff --git a/src/cli.c b/src/cli.c
index fa73b60..9d0f875 100644
--- a/src/cli.c
+++ b/src/cli.c
@@ -57,7 +57,7 @@ cli_parse_args(int argc, char **argv)
opts.options |= CLI_OPT_SYSROOT;
cli_opts_parse_sysroot(&opts, &args);
} else {
- opts.file_path = arg;
+ opts.filepath = arg;
}
arg = cli_args_shift(&args);
}
diff --git a/src/cli.h b/src/cli.h
index 3f4c3a9..1a93443 100644
--- a/src/cli.h
+++ b/src/cli.h
@@ -32,7 +32,7 @@ typedef struct cli_opts
char *arch;
char *sysroot;
char *compiler_path;
- char *file_path;
+ char *filepath;
string_view_t output_bin;
} cli_opts_t;
diff --git a/src/lexer.c b/src/lexer.c
index 8de40a0..523822f 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -22,10 +22,10 @@
#include <stdio.h>
void
-lexer_init(lexer_t *lexer, string_view_t source)
+lexer_init(lexer_t *lexer, source_code_t src)
{
assert(lexer);
- lexer->source = source;
+ lexer->src = src;
lexer->cur.offset = 0;
lexer->cur.row = 0;
lexer->cur.bol = 0;
@@ -90,7 +90,7 @@ lexer_next_token(lexer_t *lexer, token_t *token)
current_char = lexer_current_char(lexer);
}
- string_view_t text = { .chars = lexer->source.chars + start_cur.offset,
+ string_view_t text = { .chars = lexer->src.code.chars + start_cur.offset,
.size = lexer->cur.offset - start_cur.offset };
lexer_init_str_value_token(lexer, token, lexer_str_to_token_kind(text), start_cur);
@@ -359,13 +359,13 @@ token_kind_is_binary_op(token_kind_t kind)
static char
lexer_current_char(lexer_t *lexer)
{
- return lexer->source.chars[lexer->cur.offset];
+ return lexer->src.code.chars[lexer->cur.offset];
}
static void
lexer_skip_char(lexer_t *lexer)
{
- assert(lexer->cur.offset < lexer->source.size);
+ assert(lexer->cur.offset < lexer->src.code.size);
if (lexer_current_char(lexer) == '\n') {
lexer->cur.row++;
lexer->cur.bol = ++lexer->cur.offset;
@@ -377,7 +377,7 @@ lexer_skip_char(lexer_t *lexer)
static bool
lexer_is_eof(lexer_t *lexer)
{
- return lexer->cur.offset >= lexer->source.size;
+ return lexer->cur.offset >= lexer->src.code.size;
}
static bool
@@ -395,14 +395,14 @@ _isspace(char c)
static void
lexer_init_char_value_token(lexer_t *lexer, token_t *token, token_kind_t kind)
{
- string_view_t str = { .chars = lexer->source.chars + lexer->cur.offset, .size = 1 };
+ string_view_t str = { .chars = lexer->src.code.chars + lexer->cur.offset, .size = 1 };
*token = (token_t){ .kind = kind, .value = str, .cur = lexer->cur };
}
static void
lexer_init_str_value_token(lexer_t *lexer, token_t *token, token_kind_t kind, lexer_cursor_t cur)
{
- string_view_t str = { .chars = lexer->source.chars + cur.offset, .size = lexer->cur.offset - cur.offset };
+ string_view_t str = { .chars = lexer->src.code.chars + cur.offset, .size = lexer->cur.offset - cur.offset };
*token = (token_t){ .kind = kind, .value = str, .cur = cur };
}
@@ -461,9 +461,9 @@ string_view_t
lexer_get_token_line(lexer_t *lexer, token_t *token)
{
size_t offset = token->cur.bol;
- string_view_t line = { .chars = lexer->source.chars + offset, .size = 0 };
+ string_view_t line = { .chars = lexer->src.code.chars + offset, .size = 0 };
- while ((line.size + offset) < lexer->source.size && line.chars[line.size] != '\n' && line.chars[line.size] != 0) {
+ while ((line.size + offset) < lexer->src.code.size && line.chars[line.size] != '\n' && line.chars[line.size] != 0) {
++line.size;
}
diff --git a/src/lexer.h b/src/lexer.h
index 1aecb11..c5a342a 100644
--- a/src/lexer.h
+++ b/src/lexer.h
@@ -17,6 +17,7 @@
#ifndef LEXER_H
#define LEXER_H
+#include "source_code.h"
#include "string_view.h"
#include <stdint.h>
#include <stdio.h>
@@ -30,7 +31,7 @@ typedef struct lexer_cursor
typedef struct lexer
{
- string_view_t source;
+ source_code_t src;
lexer_cursor_t cur;
} lexer_t;
@@ -92,7 +93,7 @@ typedef struct token
} token_t;
void
-lexer_init(lexer_t *lexer, string_view_t source);
+lexer_init(lexer_t *lexer, source_code_t src);
void
lexer_next_token(lexer_t *lexer, token_t *token);
diff --git a/src/main.c b/src/main.c
index 9d66455..4c8f2a5 100644
--- a/src/main.c
+++ b/src/main.c
@@ -44,10 +44,10 @@ void
handle_codegen_linux(cli_opts_t *opts);
static void
-print_token(char *file_path, token_t *token);
+print_token(char *filepath, token_t *token);
-string_view_t
-read_entire_file(char *file_path, arena_t *arena);
+source_code_t
+read_entire_file(char *filepath, arena_t *arena);
int
main(int argc, char **argv)
@@ -75,24 +75,24 @@ main(int argc, char **argv)
void
handle_dump_tokens(cli_opts_t *opts)
{
- if (opts->file_path == NULL) {
+ if (opts->filepath == NULL) {
cli_print_usage(stderr, opts->compiler_path);
exit(EXIT_FAILURE);
}
arena_t arena = arena_new(ARENA_CAPACITY);
- string_view_t file_content = read_entire_file(opts->file_path, &arena);
+ source_code_t src = read_entire_file(opts->filepath, &arena);
lexer_t lexer = { 0 };
- lexer_init(&lexer, file_content);
+ lexer_init(&lexer, src);
token_t token = { 0 };
lexer_next_token(&lexer, &token);
while (token.kind != TOKEN_EOF) {
- print_token(opts->file_path, &token);
+ print_token(opts->filepath, &token);
lexer_next_token(&lexer, &token);
}
- print_token(opts->file_path, &token);
+ print_token(opts->filepath, &token);
arena_free(&arena);
}
@@ -100,7 +100,7 @@ handle_dump_tokens(cli_opts_t *opts)
void
handle_dump_ast(cli_opts_t *opts)
{
- if (opts->file_path == NULL) {
+ if (opts->filepath == NULL) {
cli_print_usage(stderr, opts->compiler_path);
exit(EXIT_FAILURE);
}
@@ -109,10 +109,10 @@ handle_dump_ast(cli_opts_t *opts)
lexer_t lexer = { 0 };
parser_t parser = { 0 };
- string_view_t file_content = read_entire_file(opts->file_path, &arena);
+ source_code_t src = read_entire_file(opts->filepath, &arena);
- lexer_init(&lexer, file_content);
- parser_init(&parser, &lexer, &arena, opts->file_path);
+ lexer_init(&lexer, src);
+ parser_init(&parser, &lexer, &arena, opts->filepath);
ast_node_t *ast = parser_parse_translation_unit(&parser);
@@ -122,7 +122,7 @@ handle_dump_ast(cli_opts_t *opts)
void
handle_codegen_linux(cli_opts_t *opts)
{
- if (opts->file_path == NULL) {
+ if (opts->filepath == NULL) {
cli_print_usage(stderr, opts->compiler_path);
exit(EXIT_FAILURE);
}
@@ -131,9 +131,9 @@ handle_codegen_linux(cli_opts_t *opts)
lexer_t lexer = { 0 };
parser_t parser = { 0 };
- string_view_t file_content = read_entire_file(opts->file_path, &arena);
- lexer_init(&lexer, file_content);
- parser_init(&parser, &lexer, &arena, opts->file_path);
+ source_code_t src = read_entire_file(opts->filepath, &arena);
+ lexer_init(&lexer, src);
+ parser_init(&parser, &lexer, &arena, opts->filepath);
ast_node_t *ast = parser_parse_translation_unit(&parser);
@@ -204,48 +204,48 @@ handle_codegen_linux(cli_opts_t *opts)
arena_free(&arena);
}
-string_view_t
-read_entire_file(char *file_path, arena_t *arena)
+source_code_t
+read_entire_file(char *filepath, arena_t *arena)
{
- FILE *stream = fopen(file_path, "rb");
+ FILE *stream = fopen(filepath, "rb");
if (stream == NULL) {
- fprintf(stderr, "error: could not open file %s: %s\n", file_path, strerror(errno));
+ fprintf(stderr, "error: could not open file %s: %s\n", filepath, strerror(errno));
exit(EXIT_FAILURE);
}
- string_view_t file_content = { 0 };
+ string_view_t code = { 0 };
fseek(stream, 0, SEEK_END);
- file_content.size = ftell(stream);
+ code.size = ftell(stream);
fseek(stream, 0, SEEK_SET);
- assert(file_content.size * 2 < ARENA_CAPACITY);
+ assert(code.size * 2 < ARENA_CAPACITY);
- file_content.chars = (char *)arena_alloc(arena, (size_t)file_content.size);
+ code.chars = (char *)arena_alloc(arena, (size_t)code.size);
- if (file_content.chars == NULL) {
- fprintf(stderr, "error: could not read file %s: %s\n", file_path, strerror(errno));
+ if (code.chars == NULL) {
+ fprintf(stderr, "error: could not read file %s: %s\n", filepath, strerror(errno));
exit(EXIT_FAILURE);
}
- size_t read_bytes = fread(file_content.chars, 1, file_content.size, stream);
+ size_t read_bytes = fread(code.chars, 1, code.size, stream);
- if (read_bytes != file_content.size) {
- fprintf(stderr, "error: failed to read all file bytes %s\n", file_path);
+ if (read_bytes != code.size) {
+ fprintf(stderr, "error: failed to read all file bytes %s\n", filepath);
exit(EXIT_FAILURE);
}
fclose(stream);
- return file_content;
+ return (source_code_t){ .filepath = filepath, .code = code };
}
static void
-print_token(char *file_path, token_t *token)
+print_token(char *filepath, token_t *token)
{
printf("%s:%lu:%lu: <%s>\n",
- file_path,
+ filepath,
token->cur.row + 1,
(token->cur.offset - token->cur.bol) + 1,
token_kind_to_cstr(token->kind));
diff --git a/src/source_code.h b/src/source_code.h
new file mode 100644
index 0000000..2c774c7
--- /dev/null
+++ b/src/source_code.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2024 olang maintainers
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+#include "string_view.h"
+
+#ifndef SOURCE_CODE_H
+#define SOURCE_CODE_H
+
+typedef struct source_code
+{
+ char *filepath;
+ string_view_t code;
+} source_code_t;
+
+#endif
diff --git a/tests/unit/parser_test.c b/tests/unit/parser_test.c
index a7c60d1..9eb56fd 100644
--- a/tests/unit/parser_test.c
+++ b/tests/unit/parser_test.c
@@ -31,15 +31,15 @@ parse_translation_unit_test(const MunitParameter params[], void *user_data_or_fi
{
arena_t arena = arena_new(ARENA_CAPACITY);
- char *file_path = "main.0";
+ char *filepath = "main.0";
char *source_value = "fn main(): u32 {\n\treturn 69\n}";
lexer_t lexer;
- string_view_t source = { .chars = source_value, .size = strlen(source_value) };
- lexer_init(&lexer, source);
+ string_view_t code = { .chars = source_value, .size = strlen(source_value) };
+ lexer_init(&lexer, (source_code_t){ .code = code, .filepath = filepath });
parser_t parser;
- parser_init(&parser, &lexer, &arena, file_path);
+ parser_init(&parser, &lexer, &arena, filepath);
ast_node_t *translation_unit_node = parser_parse_translation_unit(&parser);
assert_not_null(translation_unit_node);
base-commit: 978a9914a9abc98ed9f866d528e9a094fbc1b35e
--
2.34.1
next reply other threads:[~2024-10-04 16:34 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-10-04 16:34 Carlos Maniero [this message]
2024-10-04 16:34 ` [olang/patches/.build.yml] build success builds.sr.ht
2024-10-04 18:37 ` [PATCH olang] lexer: add source code abstraction Johnny Richard
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20241004163339.46748-1-carlos@maniero.me \
--to=carlos@maniero.me \
--cc=~johnnyrichard/olang-devel@lists.sr.ht \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://git.johnnyrichard.com/olang.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox