From mboxrd@z Thu Jan 1 00:00:00 1970 Authentication-Results: mail-a.sr.ht; dkim=pass header.d=johnnyrichard.com header.i=@johnnyrichard.com Received: from out-176.mta0.migadu.com (out-176.mta0.migadu.com [IPv6:2001:41d0:1004:224b::b0]) by mail-a.sr.ht (Postfix) with ESMTPS id 122BA200D9 for <~johnnyrichard/olang-devel@lists.sr.ht>; Mon, 19 Feb 2024 18:52:37 +0000 (UTC) Date: Mon, 19 Feb 2024 20:51:28 +0100 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=johnnyrichard.com; s=key1; t=1708368755; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: in-reply-to:in-reply-to:references:references; bh=QFBgEgZVYOqnlPePQ8Km2AcoGCjKWR3LIKeNdE97ejQ=; b=tHmyidFnd7q9Mv6puVUaB4Ua35L59OJst1VQiBJ5/nF/Ktwg9wRjf1CaEH5ErA2fDTu37B 9Uebg2dRLuEkX55un/5Fv2BLp98TGKPXievUC9DBp/2gEb6OxXCRc0ef7l3q36DEq9l9G/ NSJINeIWpOXl0DG028dY9N9AH9hkipKQ/LUD7eAO9KXI1vRCQ3Xz1wxu8hC6eMs1TnZA6b 9PvhImdavgTaNc3yZMFqNRjgNwjITfCfCgK/ykmDZM3gnTrL4+fLZpCQQCMVZXjbBxsj3x KoerFlFVYalbOVudFSPI/EYtv+yroDB8a4hIZuKuaU9YFhVI2qu0AHuxm5mylw== X-Report-Abuse: Please report any abuse attempt to abuse@migadu.com and include these headers. From: Johnny Richard To: Carlos Maniero Cc: ~johnnyrichard/olang-devel@lists.sr.ht Subject: Re: [PATCH olang v3 2/2] lexer: create --dump-tokens cli command Message-ID: References: <20240219013843.15707-1-johnny@johnnyrichard.com> <20240219013843.15707-4-johnny@johnnyrichard.com> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: X-Migadu-Flow: FLOW_OUT X-TUID: zIofyubhgbiH Thanks for review it. Here I have the changes you requested. Let me know if this changes are enough. Johnny Richard -------->8-------- Subject: fixup: review comments --- 0c.c | 20 +++++++++++++++----- lexer.c | 57 ++++++++++++++++++++++++++++++++++----------------------- 2 files changed, 49 insertions(+), 28 deletions(-) diff --git a/src/0c.c b/src/0c.c index e5199a7..0af9caa 100644 --- a/src/0c.c +++ b/src/0c.c @@ -42,6 +42,9 @@ typedef struct cli_opts void print_usage(FILE *stream, char *prog); +static void +print_token(char *file_path, token_t *token); + string_view_t read_entire_file(char *file_path); @@ -80,13 +83,10 @@ main(int argc, char **argv) token_t token = { 0 }; lexer_next_token(&lexer, &token); while (token.kind != TOKEN_EOF) { - printf("%s:%lu:%lu: <%s>\n", - opts.file_path, - token.location.row + 1, - (token.location.offset - token.location.bol) + 1, - token_kind_to_cstr(token.kind)); + print_token(opts.file_path, &token); lexer_next_token(&lexer, &token); } + print_token(opts.file_path, &token); free(file_content.chars); @@ -136,3 +136,13 @@ read_entire_file(char *file_path) return file_content; } + +static void +print_token(char *file_path, token_t *token) +{ + printf("%s:%lu:%lu: <%s>\n", + file_path, + token->location.row + 1, + (token->location.offset - token->location.bol) + 1, + token_kind_to_cstr(token->kind)); +} diff --git a/src/lexer.c b/src/lexer.c index 544a54d..b107762 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -31,7 +31,7 @@ lexer_init(lexer_t *lexer, string_view_t source) } static char -lexer_next_char(lexer_t *lexer); +lexer_current_char(lexer_t *lexer); static void lexer_skip_char(lexer_t *lexer); @@ -46,10 +46,13 @@ static bool _isspace(char c); static void -lexer_init_char_token(lexer_t *lexer, token_t *token, token_kind_t kind); +lexer_init_char_value_token(lexer_t *lexer, token_t *token, token_kind_t kind); static void -lexer_init_str_token(lexer_t *lexer, token_t *token, token_kind_t kind, size_t start_offset); +lexer_init_str_value_token(lexer_t *lexer, token_t *token, token_kind_t kind, size_t start_offset); + +static void +lexer_init_eof_token(lexer_t *lexer, token_t *token); static token_kind_t lexer_str_to_token_kind(string_view_t text); @@ -58,16 +61,16 @@ void lexer_next_token(lexer_t *lexer, token_t *token) { if (lexer_is_eof(lexer)) { - *token = (token_t){ .kind = TOKEN_EOF }; + lexer_init_eof_token(lexer, token); return; } - char current_char = lexer_next_char(lexer); + char current_char = lexer_current_char(lexer); if (_isspace(current_char)) { while (_isspace(current_char) && lexer_is_not_eof(lexer)) { lexer_skip_char(lexer); - current_char = lexer_next_char(lexer); + current_char = lexer_current_char(lexer); } } @@ -76,12 +79,12 @@ lexer_next_token(lexer_t *lexer, token_t *token) size_t start_offset = lexer->offset; while (isalnum(current_char) && lexer_is_not_eof(lexer)) { lexer_skip_char(lexer); - current_char = lexer_next_char(lexer); + current_char = lexer_current_char(lexer); } string_view_t text = { .chars = lexer->source.chars + start_offset, .size = lexer->offset - start_offset }; - lexer_init_str_token(lexer, token, lexer_str_to_token_kind(text), start_offset); + lexer_init_str_value_token(lexer, token, lexer_str_to_token_kind(text), start_offset); return; } @@ -89,46 +92,46 @@ lexer_next_token(lexer_t *lexer, token_t *token) size_t start_offset = lexer->offset; while (isdigit(current_char) && lexer_is_not_eof(lexer)) { lexer_skip_char(lexer); - current_char = lexer_next_char(lexer); + current_char = lexer_current_char(lexer); } - lexer_init_str_token(lexer, token, TOKEN_NUMBER, start_offset); + lexer_init_str_value_token(lexer, token, TOKEN_NUMBER, start_offset); return; } switch (current_char) { case '(': { - lexer_init_char_token(lexer, token, TOKEN_OPAREN); + lexer_init_char_value_token(lexer, token, TOKEN_OPAREN); lexer_skip_char(lexer); return; } case ')': { - lexer_init_char_token(lexer, token, TOKEN_CPAREN); + lexer_init_char_value_token(lexer, token, TOKEN_CPAREN); lexer_skip_char(lexer); return; } case ':': { - lexer_init_char_token(lexer, token, TOKEN_COLON); + lexer_init_char_value_token(lexer, token, TOKEN_COLON); lexer_skip_char(lexer); return; } case '{': { - lexer_init_char_token(lexer, token, TOKEN_OCURLY); + lexer_init_char_value_token(lexer, token, TOKEN_OCURLY); lexer_skip_char(lexer); return; } case '}': { - lexer_init_char_token(lexer, token, TOKEN_CCURLY); + lexer_init_char_value_token(lexer, token, TOKEN_CCURLY); lexer_skip_char(lexer); return; } case '\n': { - lexer_init_char_token(lexer, token, TOKEN_LF); + lexer_init_char_value_token(lexer, token, TOKEN_LF); lexer_skip_char(lexer); return; } default: { - lexer_init_char_token(lexer, token, TOKEN_UNKNOWN); + lexer_init_char_value_token(lexer, token, TOKEN_UNKNOWN); lexer_skip_char(lexer); return; } @@ -136,7 +139,7 @@ lexer_next_token(lexer_t *lexer, token_t *token) } if (lexer_is_eof(lexer)) { - *token = (token_t){ .kind = TOKEN_EOF }; + lexer_init_eof_token(lexer, token); return; } } @@ -158,7 +161,7 @@ token_kind_to_cstr(token_kind_t kind) } static char -lexer_next_char(lexer_t *lexer) +lexer_current_char(lexer_t *lexer) { return lexer->source.chars[lexer->offset]; } @@ -167,7 +170,7 @@ static void lexer_skip_char(lexer_t *lexer) { assert(lexer->offset < lexer->source.size); - if (lexer->source.chars[lexer->offset] == '\n') { + if (lexer_current_char(lexer) == '\n') { lexer->row++; lexer->bol = ++lexer->offset; } else { @@ -190,11 +193,11 @@ lexer_is_not_eof(lexer_t *lexer) static bool _isspace(char c) { - return c == ' ' || c == '\f' || c == '\r' || c == '\t' || c == '\v'; + return c != '\n' && isspace(c); } static void -lexer_init_char_token(lexer_t *lexer, token_t *token, token_kind_t kind) +lexer_init_char_value_token(lexer_t *lexer, token_t *token, token_kind_t kind) { string_view_t str = { .chars = lexer->source.chars + lexer->offset, .size = 1 }; token_loc_t location = { .offset = lexer->offset, .row = lexer->row, .bol = lexer->bol }; @@ -202,13 +205,21 @@ lexer_init_char_token(lexer_t *lexer, token_t *token, token_kind_t kind) } static void -lexer_init_str_token(lexer_t *lexer, token_t *token, token_kind_t kind, size_t start_offset) +lexer_init_str_value_token(lexer_t *lexer, token_t *token, token_kind_t kind, size_t start_offset) { string_view_t str = { .chars = lexer->source.chars + start_offset, .size = lexer->offset - start_offset }; token_loc_t location = { .offset = start_offset, .row = lexer->row, .bol = lexer->bol }; *token = (token_t){ .kind = kind, .value = str, .location = location }; } +static void +lexer_init_eof_token(lexer_t *lexer, token_t *token) +{ + string_view_t str = { 0 }; + token_loc_t location = { .offset = lexer->offset, .row = lexer->row, .bol = lexer->bol }; + *token = (token_t){ .kind = TOKEN_EOF, .value = str, .location = location }; +} + static token_kind_t lexer_str_to_token_kind(string_view_t text) {