public inbox for ~johnnyrichard/olang-devel@lists.sr.ht
 help / color / mirror / code / Atom feed
From: Johnny Richard <johnny@johnnyrichard.com>
To: Carlos Maniero <carlos@maniero.me>
Cc: ~johnnyrichard/olang-devel@lists.sr.ht
Subject: Re: [PATCH olang v3 2/2] lexer: create --dump-tokens cli command
Date: Mon, 19 Feb 2024 20:51:28 +0100	[thread overview]
Message-ID: <dmx5lusa4pq6vtapqsu2gqiax6qfjhw47ercxuhivlchovbiel@7yjlecggkdw4> (raw)
In-Reply-To: <CZ8QKMHJU0JB.2GEWC56YMGTMN@maniero.me>

Thanks for review it.  Here I have the changes you requested.  Let me
know if this changes are enough.

    Johnny Richard

-------->8--------
Subject: fixup: review comments

---
 0c.c    |   20 +++++++++++++++-----
 lexer.c |   57 ++++++++++++++++++++++++++++++++++-----------------------
 2 files changed, 49 insertions(+), 28 deletions(-)

diff --git a/src/0c.c b/src/0c.c
index e5199a7..0af9caa 100644
--- a/src/0c.c
+++ b/src/0c.c
@@ -42,6 +42,9 @@ typedef struct cli_opts
 void
 print_usage(FILE *stream, char *prog);
 
+static void
+print_token(char *file_path, token_t *token);
+
 string_view_t
 read_entire_file(char *file_path);
 
@@ -80,13 +83,10 @@ main(int argc, char **argv)
     token_t token = { 0 };
     lexer_next_token(&lexer, &token);
     while (token.kind != TOKEN_EOF) {
-        printf("%s:%lu:%lu: <%s>\n",
-               opts.file_path,
-               token.location.row + 1,
-               (token.location.offset - token.location.bol) + 1,
-               token_kind_to_cstr(token.kind));
+        print_token(opts.file_path, &token);
         lexer_next_token(&lexer, &token);
     }
+    print_token(opts.file_path, &token);
 
     free(file_content.chars);
 
@@ -136,3 +136,13 @@ read_entire_file(char *file_path)
 
     return file_content;
 }
+
+static void
+print_token(char *file_path, token_t *token)
+{
+    printf("%s:%lu:%lu: <%s>\n",
+           file_path,
+           token->location.row + 1,
+           (token->location.offset - token->location.bol) + 1,
+           token_kind_to_cstr(token->kind));
+}
diff --git a/src/lexer.c b/src/lexer.c
index 544a54d..b107762 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -31,7 +31,7 @@ lexer_init(lexer_t *lexer, string_view_t source)
 }
 
 static char
-lexer_next_char(lexer_t *lexer);
+lexer_current_char(lexer_t *lexer);
 
 static void
 lexer_skip_char(lexer_t *lexer);
@@ -46,10 +46,13 @@ static bool
 _isspace(char c);
 
 static void
-lexer_init_char_token(lexer_t *lexer, token_t *token, token_kind_t kind);
+lexer_init_char_value_token(lexer_t *lexer, token_t *token, token_kind_t kind);
 
 static void
-lexer_init_str_token(lexer_t *lexer, token_t *token, token_kind_t kind, size_t start_offset);
+lexer_init_str_value_token(lexer_t *lexer, token_t *token, token_kind_t kind, size_t start_offset);
+
+static void
+lexer_init_eof_token(lexer_t *lexer, token_t *token);
 
 static token_kind_t
 lexer_str_to_token_kind(string_view_t text);
@@ -58,16 +61,16 @@ void
 lexer_next_token(lexer_t *lexer, token_t *token)
 {
     if (lexer_is_eof(lexer)) {
-        *token = (token_t){ .kind = TOKEN_EOF };
+        lexer_init_eof_token(lexer, token);
         return;
     }
 
-    char current_char = lexer_next_char(lexer);
+    char current_char = lexer_current_char(lexer);
 
     if (_isspace(current_char)) {
         while (_isspace(current_char) && lexer_is_not_eof(lexer)) {
             lexer_skip_char(lexer);
-            current_char = lexer_next_char(lexer);
+            current_char = lexer_current_char(lexer);
         }
     }
 
@@ -76,12 +79,12 @@ lexer_next_token(lexer_t *lexer, token_t *token)
             size_t start_offset = lexer->offset;
             while (isalnum(current_char) && lexer_is_not_eof(lexer)) {
                 lexer_skip_char(lexer);
-                current_char = lexer_next_char(lexer);
+                current_char = lexer_current_char(lexer);
             }
 
             string_view_t text = { .chars = lexer->source.chars + start_offset, .size = lexer->offset - start_offset };
 
-            lexer_init_str_token(lexer, token, lexer_str_to_token_kind(text), start_offset);
+            lexer_init_str_value_token(lexer, token, lexer_str_to_token_kind(text), start_offset);
             return;
         }
 
@@ -89,46 +92,46 @@ lexer_next_token(lexer_t *lexer, token_t *token)
             size_t start_offset = lexer->offset;
             while (isdigit(current_char) && lexer_is_not_eof(lexer)) {
                 lexer_skip_char(lexer);
-                current_char = lexer_next_char(lexer);
+                current_char = lexer_current_char(lexer);
             }
 
-            lexer_init_str_token(lexer, token, TOKEN_NUMBER, start_offset);
+            lexer_init_str_value_token(lexer, token, TOKEN_NUMBER, start_offset);
             return;
         }
 
         switch (current_char) {
             case '(': {
-                lexer_init_char_token(lexer, token, TOKEN_OPAREN);
+                lexer_init_char_value_token(lexer, token, TOKEN_OPAREN);
                 lexer_skip_char(lexer);
                 return;
             }
             case ')': {
-                lexer_init_char_token(lexer, token, TOKEN_CPAREN);
+                lexer_init_char_value_token(lexer, token, TOKEN_CPAREN);
                 lexer_skip_char(lexer);
                 return;
             }
             case ':': {
-                lexer_init_char_token(lexer, token, TOKEN_COLON);
+                lexer_init_char_value_token(lexer, token, TOKEN_COLON);
                 lexer_skip_char(lexer);
                 return;
             }
             case '{': {
-                lexer_init_char_token(lexer, token, TOKEN_OCURLY);
+                lexer_init_char_value_token(lexer, token, TOKEN_OCURLY);
                 lexer_skip_char(lexer);
                 return;
             }
             case '}': {
-                lexer_init_char_token(lexer, token, TOKEN_CCURLY);
+                lexer_init_char_value_token(lexer, token, TOKEN_CCURLY);
                 lexer_skip_char(lexer);
                 return;
             }
             case '\n': {
-                lexer_init_char_token(lexer, token, TOKEN_LF);
+                lexer_init_char_value_token(lexer, token, TOKEN_LF);
                 lexer_skip_char(lexer);
                 return;
             }
             default: {
-                lexer_init_char_token(lexer, token, TOKEN_UNKNOWN);
+                lexer_init_char_value_token(lexer, token, TOKEN_UNKNOWN);
                 lexer_skip_char(lexer);
                 return;
             }
@@ -136,7 +139,7 @@ lexer_next_token(lexer_t *lexer, token_t *token)
     }
 
     if (lexer_is_eof(lexer)) {
-        *token = (token_t){ .kind = TOKEN_EOF };
+        lexer_init_eof_token(lexer, token);
         return;
     }
 }
@@ -158,7 +161,7 @@ token_kind_to_cstr(token_kind_t kind)
 }
 
 static char
-lexer_next_char(lexer_t *lexer)
+lexer_current_char(lexer_t *lexer)
 {
     return lexer->source.chars[lexer->offset];
 }
@@ -167,7 +170,7 @@ static void
 lexer_skip_char(lexer_t *lexer)
 {
     assert(lexer->offset < lexer->source.size);
-    if (lexer->source.chars[lexer->offset] == '\n') {
+    if (lexer_current_char(lexer) == '\n') {
         lexer->row++;
         lexer->bol = ++lexer->offset;
     } else {
@@ -190,11 +193,11 @@ lexer_is_not_eof(lexer_t *lexer)
 static bool
 _isspace(char c)
 {
-    return c == ' ' || c == '\f' || c == '\r' || c == '\t' || c == '\v';
+    return c != '\n' && isspace(c);
 }
 
 static void
-lexer_init_char_token(lexer_t *lexer, token_t *token, token_kind_t kind)
+lexer_init_char_value_token(lexer_t *lexer, token_t *token, token_kind_t kind)
 {
     string_view_t str = { .chars = lexer->source.chars + lexer->offset, .size = 1 };
     token_loc_t location = { .offset = lexer->offset, .row = lexer->row, .bol = lexer->bol };
@@ -202,13 +205,21 @@ lexer_init_char_token(lexer_t *lexer, token_t *token, token_kind_t kind)
 }
 
 static void
-lexer_init_str_token(lexer_t *lexer, token_t *token, token_kind_t kind, size_t start_offset)
+lexer_init_str_value_token(lexer_t *lexer, token_t *token, token_kind_t kind, size_t start_offset)
 {
     string_view_t str = { .chars = lexer->source.chars + start_offset, .size = lexer->offset - start_offset };
     token_loc_t location = { .offset = start_offset, .row = lexer->row, .bol = lexer->bol };
     *token = (token_t){ .kind = kind, .value = str, .location = location };
 }
 
+static void
+lexer_init_eof_token(lexer_t *lexer, token_t *token)
+{
+    string_view_t str = { 0 };
+    token_loc_t location = { .offset = lexer->offset, .row = lexer->row, .bol = lexer->bol };
+    *token = (token_t){ .kind = TOKEN_EOF, .value = str, .location = location };
+}
+
 static token_kind_t
 lexer_str_to_token_kind(string_view_t text)
 {

  reply	other threads:[~2024-02-19 18:52 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-02-19  1:38 [PATCH olang v3 0/2] Create --dump-tokens on compiler cli Johnny Richard
2024-02-19  1:38 ` [PATCH olang v3 1/2] utils: create string_view data structure Johnny Richard
2024-02-19  1:44 ` [PATCH olang v3 2/2] lexer: create --dump-tokens cli command Johnny Richard
2024-02-19  0:47   ` [olang/patches/.build.yml] build success builds.sr.ht
2024-02-19  3:30   ` [PATCH olang v3 2/2] lexer: create --dump-tokens cli command Carlos Maniero
2024-02-19 19:51     ` Johnny Richard [this message]
2024-02-19 19:17       ` Carlos Maniero
2024-02-19 10:01   ` Carlos Maniero
2024-02-19 21:07 ` [PATCH olang v3 0/2] Create --dump-tokens on compiler cli Johnny Richard

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=dmx5lusa4pq6vtapqsu2gqiax6qfjhw47ercxuhivlchovbiel@7yjlecggkdw4 \
    --to=johnny@johnnyrichard.com \
    --cc=carlos@maniero.me \
    --cc=~johnnyrichard/olang-devel@lists.sr.ht \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://git.johnnyrichard.com/olang.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox