public inbox for ~johnnyrichard/olang-devel@lists.sr.ht
 help / color / mirror / code / Atom feed
From: Johnny Richard <johnny@johnnyrichard.com>
To: ~johnnyrichard/olang-devel@lists.sr.ht
Cc: Johnny Richard <johnny@johnnyrichard.com>
Subject: [PATCH olang v2 1/3] lexer: add tokenize support to binary op tokens
Date: Sun, 17 Mar 2024 22:29:22 +0100	[thread overview]
Message-ID: <20240317213638.131057-2-johnny@johnnyrichard.com> (raw)
In-Reply-To: <20240317213638.131057-1-johnny@johnnyrichard.com>

In order to parse token cmp not equals I also added the unary not token.

Signed-off-by: Johnny Richard <johnny@johnnyrichard.com>
---
v2: Add support to tokenize every binary operation tokens

 examples/expression.ol        |   3 +
 src/lexer.c                   | 182 ++++++++++++++++++++++++++++++++--
 src/lexer.h                   |  26 +++++
 tests/integration/cli_test.c  |  56 ++++++++++-
 tests/integration/proc_exec.h |   3 +-
 5 files changed, 261 insertions(+), 9 deletions(-)
 create mode 100644 examples/expression.ol

diff --git a/examples/expression.ol b/examples/expression.ol
new file mode 100644
index 0000000..efa4ab5
--- /dev/null
+++ b/examples/expression.ol
@@ -0,0 +1,3 @@
+fn main(): u32 {
+  return (10 + 1 * 2) - (10 - (1 + 1) / 2)
+}
diff --git a/src/lexer.c b/src/lexer.c
index dd6f11d..14c2962 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -37,6 +37,9 @@ lexer_current_char(lexer_t *lexer);
 static void
 lexer_skip_char(lexer_t *lexer);
 
+static char
+lexer_peek_next_char(lexer_t *lexer);
+
 static bool
 lexer_is_eof(lexer_t *lexer);
 
@@ -101,6 +104,118 @@ lexer_next_token(lexer_t *lexer, token_t *token)
         }
 
         switch (current_char) {
+            case '=': {
+                size_t start_offset = lexer->offset;
+
+                if (lexer_peek_next_char(lexer) == '=') {
+                    lexer_skip_char(lexer);
+                    lexer_skip_char(lexer);
+                    lexer_init_str_value_token(lexer, token, TOKEN_CMP_EQ, start_offset);
+                    return;
+                }
+
+                lexer_init_char_value_token(lexer, token, TOKEN_EQ);
+                lexer_skip_char(lexer);
+                return;
+            }
+            case '!': {
+                size_t start_offset = lexer->offset;
+
+                if (lexer_peek_next_char(lexer) == '=') {
+                    lexer_skip_char(lexer);
+                    lexer_skip_char(lexer);
+                    lexer_init_str_value_token(lexer, token, TOKEN_CMP_NEQ, start_offset);
+                    return;
+                }
+
+                lexer_init_char_value_token(lexer, token, TOKEN_BANG);
+                lexer_skip_char(lexer);
+                return;
+            }
+            case '&': {
+                size_t start_offset = lexer->offset;
+
+                if (lexer_peek_next_char(lexer) == '&') {
+                    lexer_skip_char(lexer);
+                    lexer_skip_char(lexer);
+                    lexer_init_str_value_token(lexer, token, TOKEN_LOGICAL_AND, start_offset);
+                    return;
+                }
+
+                lexer_init_char_value_token(lexer, token, TOKEN_AND);
+                lexer_skip_char(lexer);
+                return;
+            }
+            case '|': {
+                size_t start_offset = lexer->offset;
+
+                if (lexer_peek_next_char(lexer) == '|') {
+                    lexer_skip_char(lexer);
+                    lexer_skip_char(lexer);
+                    lexer_init_str_value_token(lexer, token, TOKEN_LOGICAL_OR, start_offset);
+                    return;
+                }
+
+                lexer_init_char_value_token(lexer, token, TOKEN_PIPE);
+                lexer_skip_char(lexer);
+                return;
+            }
+            case '<': {
+                size_t start_offset = lexer->offset;
+
+                switch (lexer_peek_next_char(lexer)) {
+                    case '<': {
+                        lexer_skip_char(lexer);
+                        lexer_skip_char(lexer);
+                        lexer_init_str_value_token(lexer, token, TOKEN_BITWISE_LSHIFT, start_offset);
+                        return;
+                    }
+                    case '=': {
+                        lexer_skip_char(lexer);
+                        lexer_skip_char(lexer);
+                        lexer_init_str_value_token(lexer, token, TOKEN_CMP_LEQ, start_offset);
+                        return;
+                    }
+                    default: {
+                        lexer_init_char_value_token(lexer, token, TOKEN_LT);
+                        lexer_skip_char(lexer);
+                        return;
+                    }
+                }
+            }
+            case '>': {
+                size_t start_offset = lexer->offset;
+
+                switch (lexer_peek_next_char(lexer)) {
+                    case '>': {
+                        lexer_skip_char(lexer);
+                        lexer_skip_char(lexer);
+                        lexer_init_str_value_token(lexer, token, TOKEN_BITWISE_RSHIFT, start_offset);
+                        return;
+                    }
+                    case '=': {
+                        lexer_skip_char(lexer);
+                        lexer_skip_char(lexer);
+                        lexer_init_str_value_token(lexer, token, TOKEN_CMP_GEQ, start_offset);
+                        return;
+                    }
+                    default: {
+                        lexer_init_char_value_token(lexer, token, TOKEN_GT);
+                        lexer_skip_char(lexer);
+                        return;
+                    }
+                }
+            }
+            case '^': {
+                lexer_init_char_value_token(lexer, token, TOKEN_CIRCUMFLEX);
+                lexer_skip_char(lexer);
+                return;
+            }
+            case '%': {
+                lexer_init_char_value_token(lexer, token, TOKEN_PERCENT);
+                lexer_skip_char(lexer);
+                return;
+            }
             case '(': {
                 lexer_init_char_value_token(lexer, token, TOKEN_OPAREN);
                 lexer_skip_char(lexer);
@@ -126,6 +241,26 @@ lexer_next_token(lexer_t *lexer, token_t *token)
                 lexer_skip_char(lexer);
                 return;
             }
+            case '+': {
+                lexer_init_char_value_token(lexer, token, TOKEN_PLUS);
+                lexer_skip_char(lexer);
+                return;
+            }
+            case '-': {
+                lexer_init_char_value_token(lexer, token, TOKEN_DASH);
+                lexer_skip_char(lexer);
+                return;
+            }
+            case '*': {
+                lexer_init_char_value_token(lexer, token, TOKEN_STAR);
+                lexer_skip_char(lexer);
+                return;
+            }
+            case '/': {
+                lexer_init_char_value_token(lexer, token, TOKEN_SLASH);
+                lexer_skip_char(lexer);
+                return;
+            }
             case '\n': {
                 lexer_init_char_value_token(lexer, token, TOKEN_LF);
                 lexer_skip_char(lexer);
@@ -146,12 +281,38 @@ lexer_next_token(lexer_t *lexer, token_t *token)
 }
 
 static char *token_kind_str_table[] = {
-    [TOKEN_UNKNOWN] = "unknown", [TOKEN_IDENTIFIER] = "identifier",
-    [TOKEN_NUMBER] = "number",   [TOKEN_FN] = "fn",
-    [TOKEN_RETURN] = "return",   [TOKEN_LF] = "line_feed",
-    [TOKEN_OPAREN] = "(",        [TOKEN_CPAREN] = ")",
-    [TOKEN_COLON] = ":",         [TOKEN_OCURLY] = "{",
-    [TOKEN_CCURLY] = "}",        [TOKEN_EOF] = "EOF",
+    [TOKEN_UNKNOWN] = "unknown",
+    [TOKEN_IDENTIFIER] = "identifier",
+    [TOKEN_NUMBER] = "number",
+    [TOKEN_FN] = "fn",
+    [TOKEN_RETURN] = "return",
+    [TOKEN_LF] = "line_feed",
+    [TOKEN_OPAREN] = "(",
+    [TOKEN_CPAREN] = ")",
+    [TOKEN_COLON] = ":",
+    [TOKEN_OCURLY] = "{",
+    [TOKEN_CCURLY] = "}",
+    [TOKEN_PLUS] = "+",
+    [TOKEN_DASH] = "-",
+    [TOKEN_STAR] = "*",
+    [TOKEN_SLASH] = "/",
+    [TOKEN_EQ] = "=",
+    [TOKEN_CMP_EQ] = "==",
+    [TOKEN_BANG] = "!",
+    [TOKEN_CMP_NEQ] = "!=",
+    [TOKEN_LT] = "<",
+    [TOKEN_GT] = ">",
+    [TOKEN_CMP_LEQ] = "<=",
+    [TOKEN_CMP_GEQ] = ">=",
+    [TOKEN_PERCENT] = "%",
+    [TOKEN_BITWISE_LSHIFT] = "<<",
+    [TOKEN_BITWISE_RSHIFT] = ">>",
+    [TOKEN_CIRCUMFLEX] = "^",
+    [TOKEN_PIPE] = "|",
+    [TOKEN_LOGICAL_OR] = "||",
+    [TOKEN_AND] = "&",
+    [TOKEN_LOGICAL_AND] = "&&",
+    [TOKEN_EOF] = "EOF",
 };
 
 char *
@@ -167,6 +328,15 @@ lexer_current_char(lexer_t *lexer)
     return lexer->source.chars[lexer->offset];
 }
 
+static char
+lexer_peek_next_char(lexer_t *lexer)
+{
+    if (lexer->offset + 1 >= lexer->source.size) {
+        return 0;
+    }
+    return lexer->source.chars[lexer->offset + 1];
+}
+
 static void
 lexer_skip_char(lexer_t *lexer)
 {
diff --git a/src/lexer.h b/src/lexer.h
index cb91d7e..5ed777b 100644
--- a/src/lexer.h
+++ b/src/lexer.h
@@ -39,7 +39,33 @@ typedef enum token_kind
     TOKEN_FN,
     TOKEN_RETURN,
 
+    // Equality operators
+    TOKEN_CMP_EQ,
+    TOKEN_CMP_NEQ,
+    TOKEN_CMP_LEQ,
+    TOKEN_CMP_GEQ,
+
+    // Logical Operators
+    TOKEN_LOGICAL_OR,
+    TOKEN_LOGICAL_AND,
+
+    // Bitwise Operators
+    TOKEN_BITWISE_LSHIFT,
+    TOKEN_BITWISE_RSHIFT,
+
     // Single char
+    TOKEN_BANG,
+    TOKEN_GT,
+    TOKEN_LT,
+    TOKEN_PERCENT,
+    TOKEN_AND,
+    TOKEN_PIPE,
+    TOKEN_CIRCUMFLEX,
+    TOKEN_EQ,
+    TOKEN_PLUS,
+    TOKEN_DASH,
+    TOKEN_SLASH,
+    TOKEN_STAR,
     TOKEN_LF,
     TOKEN_OPAREN,
     TOKEN_CPAREN,
diff --git a/tests/integration/cli_test.c b/tests/integration/cli_test.c
index 8cc22f9..d46471b 100644
--- a/tests/integration/cli_test.c
+++ b/tests/integration/cli_test.c
@@ -20,7 +20,7 @@
 #include <stdio.h>
 
 static MunitResult
-test_cli_dump_tokens(const MunitParameter params[], void *user_data_or_fixture)
+test_cli_dump_tokens_example_main_exit(const MunitParameter params[], void *user_data_or_fixture)
 {
     cli_result_t compilation_result = cli_runner_compiler_dump_tokens("../../examples/main_exit.ol");
     munit_assert_int(compilation_result.exec.exit_code, ==, 0);
@@ -42,6 +42,47 @@ test_cli_dump_tokens(const MunitParameter params[], void *user_data_or_fixture)
     return MUNIT_OK;
 }
 
+static MunitResult
+test_cli_dump_tokens_example_expression(const MunitParameter params[], void *user_data_or_fixture)
+{
+    cli_result_t compilation_result = cli_runner_compiler_dump_tokens("../../examples/expression.ol");
+    munit_assert_int(compilation_result.exec.exit_code, ==, 0);
+    munit_assert_string_equal(compilation_result.exec.stdout_buf,
+                              "../../examples/expression.ol:1:1: <fn>\n"
+                              "../../examples/expression.ol:1:4: <identifier>\n"
+                              "../../examples/expression.ol:1:8: <(>\n"
+                              "../../examples/expression.ol:1:9: <)>\n"
+                              "../../examples/expression.ol:1:10: <:>\n"
+                              "../../examples/expression.ol:1:12: <identifier>\n"
+                              "../../examples/expression.ol:1:16: <{>\n"
+                              "../../examples/expression.ol:1:17: <line_feed>\n"
+                              "../../examples/expression.ol:2:3: <return>\n"
+                              "../../examples/expression.ol:2:10: <(>\n"
+                              "../../examples/expression.ol:2:11: <number>\n"
+                              "../../examples/expression.ol:2:14: <+>\n"
+                              "../../examples/expression.ol:2:16: <number>\n"
+                              "../../examples/expression.ol:2:18: <*>\n"
+                              "../../examples/expression.ol:2:20: <number>\n"
+                              "../../examples/expression.ol:2:21: <)>\n"
+                              "../../examples/expression.ol:2:23: <->\n"
+                              "../../examples/expression.ol:2:25: <(>\n"
+                              "../../examples/expression.ol:2:26: <number>\n"
+                              "../../examples/expression.ol:2:29: <->\n"
+                              "../../examples/expression.ol:2:31: <(>\n"
+                              "../../examples/expression.ol:2:32: <number>\n"
+                              "../../examples/expression.ol:2:34: <+>\n"
+                              "../../examples/expression.ol:2:36: <number>\n"
+                              "../../examples/expression.ol:2:37: <)>\n"
+                              "../../examples/expression.ol:2:39: </>\n"
+                              "../../examples/expression.ol:2:41: <number>\n"
+                              "../../examples/expression.ol:2:42: <)>\n"
+                              "../../examples/expression.ol:2:43: <line_feed>\n"
+                              "../../examples/expression.ol:3:1: <}>\n"
+                              "../../examples/expression.ol:3:2: <line_feed>\n"
+                              "../../examples/expression.ol:4:1: <EOF>\n");
+    return MUNIT_OK;
+}
+
 static MunitResult
 test_cli_compile_minimal_program(const MunitParameter params[], void *user_data_or_fixture)
 {
@@ -62,7 +103,18 @@ test_cli_compile_minimal_program(const MunitParameter params[], void *user_data_
 }
 
 static MunitTest tests[] = {
-    { "/test_cli_dump_tokens", test_cli_dump_tokens, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL },
+    { "/test_cli_dump_tokens_example_main_exit",
+      test_cli_dump_tokens_example_main_exit,
+      NULL,
+      NULL,
+      MUNIT_TEST_OPTION_NONE,
+      NULL },
+    { "/test_cli_dump_tokens_example_expression",
+      test_cli_dump_tokens_example_expression,
+      NULL,
+      NULL,
+      MUNIT_TEST_OPTION_NONE,
+      NULL },
     { "/test_cli_compile_minimal_program", test_cli_compile_minimal_program, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL },
     { NULL, NULL, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL }
 };
diff --git a/tests/integration/proc_exec.h b/tests/integration/proc_exec.h
index 135aa6a..45c2977 100644
--- a/tests/integration/proc_exec.h
+++ b/tests/integration/proc_exec.h
@@ -21,7 +21,8 @@
 typedef struct proc_exec_result
 {
     int exit_code;
-    char stdout_buf[1024];
+    // FIXME: output buffer shouldn't be fixed size
+    char stdout_buf[2048];
 } proc_exec_result_t;
 
 typedef struct proc_exec_command
-- 
2.44.0


  reply	other threads:[~2024-03-17 20:37 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-03-17 21:29 [PATCH olang v2 0/3] frontend: add binary operation expr support Johnny Richard
2024-03-17 21:29 ` Johnny Richard [this message]
2024-03-18  0:30   ` [PATCH olang v2 1/3] lexer: add tokenize support to binary op tokens Carlos Maniero
2024-03-18  8:49     ` Johnny Richard
2024-03-17 21:29 ` [PATCH olang v2 2/3] ast: create binary operation ast node Johnny Richard
2024-03-17 21:29 ` [PATCH olang v2 3/3] parser: add all binary operation expressions Johnny Richard
2024-03-17 20:37   ` [olang/patches/.build.yml] build success builds.sr.ht

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240317213638.131057-2-johnny@johnnyrichard.com \
    --to=johnny@johnnyrichard.com \
    --cc=~johnnyrichard/olang-devel@lists.sr.ht \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://git.johnnyrichard.com/olang.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox