From mboxrd@z Thu Jan  1 00:00:00 1970
Authentication-Results: mail-a.sr.ht; dkim=pass header.d=johnnyrichard.com header.i=@johnnyrichard.com
Received: from out-176.mta0.migadu.com (out-176.mta0.migadu.com [IPv6:2001:41d0:1004:224b::b0])
	by mail-a.sr.ht (Postfix) with ESMTPS id 122BA200D9
	for <~johnnyrichard/olang-devel@lists.sr.ht>; Mon, 19 Feb 2024 18:52:37 +0000 (UTC)
Date: Mon, 19 Feb 2024 20:51:28 +0100
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=johnnyrichard.com;
	s=key1; t=1708368755;
	h=from:from:reply-to:subject:subject:date:date:message-id:message-id:
	 to:to:cc:cc:mime-version:mime-version:content-type:content-type:
	 in-reply-to:in-reply-to:references:references;
	bh=QFBgEgZVYOqnlPePQ8Km2AcoGCjKWR3LIKeNdE97ejQ=;
	b=tHmyidFnd7q9Mv6puVUaB4Ua35L59OJst1VQiBJ5/nF/Ktwg9wRjf1CaEH5ErA2fDTu37B
	9Uebg2dRLuEkX55un/5Fv2BLp98TGKPXievUC9DBp/2gEb6OxXCRc0ef7l3q36DEq9l9G/
	NSJINeIWpOXl0DG028dY9N9AH9hkipKQ/LUD7eAO9KXI1vRCQ3Xz1wxu8hC6eMs1TnZA6b
	9PvhImdavgTaNc3yZMFqNRjgNwjITfCfCgK/ykmDZM3gnTrL4+fLZpCQQCMVZXjbBxsj3x
	KoerFlFVYalbOVudFSPI/EYtv+yroDB8a4hIZuKuaU9YFhVI2qu0AHuxm5mylw==
X-Report-Abuse: Please report any abuse attempt to abuse@migadu.com and include these headers.
From: Johnny Richard <johnny@johnnyrichard.com>
To: Carlos Maniero <carlos@maniero.me>
Cc: ~johnnyrichard/olang-devel@lists.sr.ht
Subject: Re: [PATCH olang v3 2/2] lexer: create --dump-tokens cli command
Message-ID: <dmx5lusa4pq6vtapqsu2gqiax6qfjhw47ercxuhivlchovbiel@7yjlecggkdw4>
References: <20240219013843.15707-1-johnny@johnnyrichard.com>
 <20240219013843.15707-4-johnny@johnnyrichard.com>
 <CZ8QKMHJU0JB.2GEWC56YMGTMN@maniero.me>
MIME-Version: 1.0
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
In-Reply-To: <CZ8QKMHJU0JB.2GEWC56YMGTMN@maniero.me>
X-Migadu-Flow: FLOW_OUT
X-TUID: zIofyubhgbiH

Thanks for review it.  Here I have the changes you requested.  Let me
know if this changes are enough.

    Johnny Richard

-------->8--------
Subject: fixup: review comments

---
 0c.c    |   20 +++++++++++++++-----
 lexer.c |   57 ++++++++++++++++++++++++++++++++++-----------------------
 2 files changed, 49 insertions(+), 28 deletions(-)

diff --git a/src/0c.c b/src/0c.c
index e5199a7..0af9caa 100644
--- a/src/0c.c
+++ b/src/0c.c
@@ -42,6 +42,9 @@ typedef struct cli_opts
 void
 print_usage(FILE *stream, char *prog);
 
+static void
+print_token(char *file_path, token_t *token);
+
 string_view_t
 read_entire_file(char *file_path);
 
@@ -80,13 +83,10 @@ main(int argc, char **argv)
     token_t token = { 0 };
     lexer_next_token(&lexer, &token);
     while (token.kind != TOKEN_EOF) {
-        printf("%s:%lu:%lu: <%s>\n",
-               opts.file_path,
-               token.location.row + 1,
-               (token.location.offset - token.location.bol) + 1,
-               token_kind_to_cstr(token.kind));
+        print_token(opts.file_path, &token);
         lexer_next_token(&lexer, &token);
     }
+    print_token(opts.file_path, &token);
 
     free(file_content.chars);
 
@@ -136,3 +136,13 @@ read_entire_file(char *file_path)
 
     return file_content;
 }
+
+static void
+print_token(char *file_path, token_t *token)
+{
+    printf("%s:%lu:%lu: <%s>\n",
+           file_path,
+           token->location.row + 1,
+           (token->location.offset - token->location.bol) + 1,
+           token_kind_to_cstr(token->kind));
+}
diff --git a/src/lexer.c b/src/lexer.c
index 544a54d..b107762 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -31,7 +31,7 @@ lexer_init(lexer_t *lexer, string_view_t source)
 }
 
 static char
-lexer_next_char(lexer_t *lexer);
+lexer_current_char(lexer_t *lexer);
 
 static void
 lexer_skip_char(lexer_t *lexer);
@@ -46,10 +46,13 @@ static bool
 _isspace(char c);
 
 static void
-lexer_init_char_token(lexer_t *lexer, token_t *token, token_kind_t kind);
+lexer_init_char_value_token(lexer_t *lexer, token_t *token, token_kind_t kind);
 
 static void
-lexer_init_str_token(lexer_t *lexer, token_t *token, token_kind_t kind, size_t start_offset);
+lexer_init_str_value_token(lexer_t *lexer, token_t *token, token_kind_t kind, size_t start_offset);
+
+static void
+lexer_init_eof_token(lexer_t *lexer, token_t *token);
 
 static token_kind_t
 lexer_str_to_token_kind(string_view_t text);
@@ -58,16 +61,16 @@ void
 lexer_next_token(lexer_t *lexer, token_t *token)
 {
     if (lexer_is_eof(lexer)) {
-        *token = (token_t){ .kind = TOKEN_EOF };
+        lexer_init_eof_token(lexer, token);
         return;
     }
 
-    char current_char = lexer_next_char(lexer);
+    char current_char = lexer_current_char(lexer);
 
     if (_isspace(current_char)) {
         while (_isspace(current_char) && lexer_is_not_eof(lexer)) {
             lexer_skip_char(lexer);
-            current_char = lexer_next_char(lexer);
+            current_char = lexer_current_char(lexer);
         }
     }
 
@@ -76,12 +79,12 @@ lexer_next_token(lexer_t *lexer, token_t *token)
             size_t start_offset = lexer->offset;
             while (isalnum(current_char) && lexer_is_not_eof(lexer)) {
                 lexer_skip_char(lexer);
-                current_char = lexer_next_char(lexer);
+                current_char = lexer_current_char(lexer);
             }
 
             string_view_t text = { .chars = lexer->source.chars + start_offset, .size = lexer->offset - start_offset };
 
-            lexer_init_str_token(lexer, token, lexer_str_to_token_kind(text), start_offset);
+            lexer_init_str_value_token(lexer, token, lexer_str_to_token_kind(text), start_offset);
             return;
         }
 
@@ -89,46 +92,46 @@ lexer_next_token(lexer_t *lexer, token_t *token)
             size_t start_offset = lexer->offset;
             while (isdigit(current_char) && lexer_is_not_eof(lexer)) {
                 lexer_skip_char(lexer);
-                current_char = lexer_next_char(lexer);
+                current_char = lexer_current_char(lexer);
             }
 
-            lexer_init_str_token(lexer, token, TOKEN_NUMBER, start_offset);
+            lexer_init_str_value_token(lexer, token, TOKEN_NUMBER, start_offset);
             return;
         }
 
         switch (current_char) {
             case '(': {
-                lexer_init_char_token(lexer, token, TOKEN_OPAREN);
+                lexer_init_char_value_token(lexer, token, TOKEN_OPAREN);
                 lexer_skip_char(lexer);
                 return;
             }
             case ')': {
-                lexer_init_char_token(lexer, token, TOKEN_CPAREN);
+                lexer_init_char_value_token(lexer, token, TOKEN_CPAREN);
                 lexer_skip_char(lexer);
                 return;
             }
             case ':': {
-                lexer_init_char_token(lexer, token, TOKEN_COLON);
+                lexer_init_char_value_token(lexer, token, TOKEN_COLON);
                 lexer_skip_char(lexer);
                 return;
             }
             case '{': {
-                lexer_init_char_token(lexer, token, TOKEN_OCURLY);
+                lexer_init_char_value_token(lexer, token, TOKEN_OCURLY);
                 lexer_skip_char(lexer);
                 return;
             }
             case '}': {
-                lexer_init_char_token(lexer, token, TOKEN_CCURLY);
+                lexer_init_char_value_token(lexer, token, TOKEN_CCURLY);
                 lexer_skip_char(lexer);
                 return;
             }
             case '\n': {
-                lexer_init_char_token(lexer, token, TOKEN_LF);
+                lexer_init_char_value_token(lexer, token, TOKEN_LF);
                 lexer_skip_char(lexer);
                 return;
             }
             default: {
-                lexer_init_char_token(lexer, token, TOKEN_UNKNOWN);
+                lexer_init_char_value_token(lexer, token, TOKEN_UNKNOWN);
                 lexer_skip_char(lexer);
                 return;
             }
@@ -136,7 +139,7 @@ lexer_next_token(lexer_t *lexer, token_t *token)
     }
 
     if (lexer_is_eof(lexer)) {
-        *token = (token_t){ .kind = TOKEN_EOF };
+        lexer_init_eof_token(lexer, token);
         return;
     }
 }
@@ -158,7 +161,7 @@ token_kind_to_cstr(token_kind_t kind)
 }
 
 static char
-lexer_next_char(lexer_t *lexer)
+lexer_current_char(lexer_t *lexer)
 {
     return lexer->source.chars[lexer->offset];
 }
@@ -167,7 +170,7 @@ static void
 lexer_skip_char(lexer_t *lexer)
 {
     assert(lexer->offset < lexer->source.size);
-    if (lexer->source.chars[lexer->offset] == '\n') {
+    if (lexer_current_char(lexer) == '\n') {
         lexer->row++;
         lexer->bol = ++lexer->offset;
     } else {
@@ -190,11 +193,11 @@ lexer_is_not_eof(lexer_t *lexer)
 static bool
 _isspace(char c)
 {
-    return c == ' ' || c == '\f' || c == '\r' || c == '\t' || c == '\v';
+    return c != '\n' && isspace(c);
 }
 
 static void
-lexer_init_char_token(lexer_t *lexer, token_t *token, token_kind_t kind)
+lexer_init_char_value_token(lexer_t *lexer, token_t *token, token_kind_t kind)
 {
     string_view_t str = { .chars = lexer->source.chars + lexer->offset, .size = 1 };
     token_loc_t location = { .offset = lexer->offset, .row = lexer->row, .bol = lexer->bol };
@@ -202,13 +205,21 @@ lexer_init_char_token(lexer_t *lexer, token_t *token, token_kind_t kind)
 }
 
 static void
-lexer_init_str_token(lexer_t *lexer, token_t *token, token_kind_t kind, size_t start_offset)
+lexer_init_str_value_token(lexer_t *lexer, token_t *token, token_kind_t kind, size_t start_offset)
 {
     string_view_t str = { .chars = lexer->source.chars + start_offset, .size = lexer->offset - start_offset };
     token_loc_t location = { .offset = start_offset, .row = lexer->row, .bol = lexer->bol };
     *token = (token_t){ .kind = kind, .value = str, .location = location };
 }
 
+static void
+lexer_init_eof_token(lexer_t *lexer, token_t *token)
+{
+    string_view_t str = { 0 };
+    token_loc_t location = { .offset = lexer->offset, .row = lexer->row, .bol = lexer->bol };
+    *token = (token_t){ .kind = TOKEN_EOF, .value = str, .location = location };
+}
+
 static token_kind_t
 lexer_str_to_token_kind(string_view_t text)
 {