From: Johnny Richard <johnny@johnnyrichard.com>
To: ~johnnyrichard/olang-devel@lists.sr.ht
Cc: Johnny Richard <johnny@johnnyrichard.com>
Subject: [PATCH olang v2 1/3] lexer: add tokenize support to binary op tokens
Date: Sun, 17 Mar 2024 22:29:22 +0100 [thread overview]
Message-ID: <20240317213638.131057-2-johnny@johnnyrichard.com> (raw)
In-Reply-To: <20240317213638.131057-1-johnny@johnnyrichard.com>
In order to parse token cmp not equals I also added the unary not token.
Signed-off-by: Johnny Richard <johnny@johnnyrichard.com>
---
v2: Add support to tokenize every binary operation tokens
examples/expression.ol | 3 +
src/lexer.c | 182 ++++++++++++++++++++++++++++++++--
src/lexer.h | 26 +++++
tests/integration/cli_test.c | 56 ++++++++++-
tests/integration/proc_exec.h | 3 +-
5 files changed, 261 insertions(+), 9 deletions(-)
create mode 100644 examples/expression.ol
diff --git a/examples/expression.ol b/examples/expression.ol
new file mode 100644
index 0000000..efa4ab5
--- /dev/null
+++ b/examples/expression.ol
@@ -0,0 +1,3 @@
+fn main(): u32 {
+ return (10 + 1 * 2) - (10 - (1 + 1) / 2)
+}
diff --git a/src/lexer.c b/src/lexer.c
index dd6f11d..14c2962 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -37,6 +37,9 @@ lexer_current_char(lexer_t *lexer);
static void
lexer_skip_char(lexer_t *lexer);
+static char
+lexer_peek_next_char(lexer_t *lexer);
+
static bool
lexer_is_eof(lexer_t *lexer);
@@ -101,6 +104,118 @@ lexer_next_token(lexer_t *lexer, token_t *token)
}
switch (current_char) {
+ case '=': {
+ size_t start_offset = lexer->offset;
+
+ if (lexer_peek_next_char(lexer) == '=') {
+ lexer_skip_char(lexer);
+ lexer_skip_char(lexer);
+ lexer_init_str_value_token(lexer, token, TOKEN_CMP_EQ, start_offset);
+ return;
+ }
+
+ lexer_init_char_value_token(lexer, token, TOKEN_EQ);
+ lexer_skip_char(lexer);
+ return;
+ }
+ case '!': {
+ size_t start_offset = lexer->offset;
+
+ if (lexer_peek_next_char(lexer) == '=') {
+ lexer_skip_char(lexer);
+ lexer_skip_char(lexer);
+ lexer_init_str_value_token(lexer, token, TOKEN_CMP_NEQ, start_offset);
+ return;
+ }
+
+ lexer_init_char_value_token(lexer, token, TOKEN_BANG);
+ lexer_skip_char(lexer);
+ return;
+ }
+ case '&': {
+ size_t start_offset = lexer->offset;
+
+ if (lexer_peek_next_char(lexer) == '&') {
+ lexer_skip_char(lexer);
+ lexer_skip_char(lexer);
+ lexer_init_str_value_token(lexer, token, TOKEN_LOGICAL_AND, start_offset);
+ return;
+ }
+
+ lexer_init_char_value_token(lexer, token, TOKEN_AND);
+ lexer_skip_char(lexer);
+ return;
+ }
+ case '|': {
+ size_t start_offset = lexer->offset;
+
+ if (lexer_peek_next_char(lexer) == '|') {
+ lexer_skip_char(lexer);
+ lexer_skip_char(lexer);
+ lexer_init_str_value_token(lexer, token, TOKEN_LOGICAL_OR, start_offset);
+ return;
+ }
+
+ lexer_init_char_value_token(lexer, token, TOKEN_PIPE);
+ lexer_skip_char(lexer);
+ return;
+ }
+ case '<': {
+ size_t start_offset = lexer->offset;
+
+ switch (lexer_peek_next_char(lexer)) {
+ case '<': {
+ lexer_skip_char(lexer);
+ lexer_skip_char(lexer);
+ lexer_init_str_value_token(lexer, token, TOKEN_BITWISE_LSHIFT, start_offset);
+ return;
+ }
+ case '=': {
+ lexer_skip_char(lexer);
+ lexer_skip_char(lexer);
+ lexer_init_str_value_token(lexer, token, TOKEN_CMP_LEQ, start_offset);
+ return;
+ }
+ default: {
+ lexer_init_char_value_token(lexer, token, TOKEN_LT);
+ lexer_skip_char(lexer);
+ return;
+ }
+ }
+ }
+ case '>': {
+ size_t start_offset = lexer->offset;
+
+ switch (lexer_peek_next_char(lexer)) {
+ case '>': {
+ lexer_skip_char(lexer);
+ lexer_skip_char(lexer);
+ lexer_init_str_value_token(lexer, token, TOKEN_BITWISE_RSHIFT, start_offset);
+ return;
+ }
+ case '=': {
+ lexer_skip_char(lexer);
+ lexer_skip_char(lexer);
+ lexer_init_str_value_token(lexer, token, TOKEN_CMP_GEQ, start_offset);
+ return;
+ }
+ default: {
+ lexer_init_char_value_token(lexer, token, TOKEN_GT);
+ lexer_skip_char(lexer);
+ return;
+ }
+ }
+ }
+ case '^': {
+ lexer_init_char_value_token(lexer, token, TOKEN_CIRCUMFLEX);
+ lexer_skip_char(lexer);
+ return;
+ }
+ case '%': {
+ lexer_init_char_value_token(lexer, token, TOKEN_PERCENT);
+ lexer_skip_char(lexer);
+ return;
+ }
case '(': {
lexer_init_char_value_token(lexer, token, TOKEN_OPAREN);
lexer_skip_char(lexer);
@@ -126,6 +241,26 @@ lexer_next_token(lexer_t *lexer, token_t *token)
lexer_skip_char(lexer);
return;
}
+ case '+': {
+ lexer_init_char_value_token(lexer, token, TOKEN_PLUS);
+ lexer_skip_char(lexer);
+ return;
+ }
+ case '-': {
+ lexer_init_char_value_token(lexer, token, TOKEN_DASH);
+ lexer_skip_char(lexer);
+ return;
+ }
+ case '*': {
+ lexer_init_char_value_token(lexer, token, TOKEN_STAR);
+ lexer_skip_char(lexer);
+ return;
+ }
+ case '/': {
+ lexer_init_char_value_token(lexer, token, TOKEN_SLASH);
+ lexer_skip_char(lexer);
+ return;
+ }
case '\n': {
lexer_init_char_value_token(lexer, token, TOKEN_LF);
lexer_skip_char(lexer);
@@ -146,12 +281,38 @@ lexer_next_token(lexer_t *lexer, token_t *token)
}
static char *token_kind_str_table[] = {
- [TOKEN_UNKNOWN] = "unknown", [TOKEN_IDENTIFIER] = "identifier",
- [TOKEN_NUMBER] = "number", [TOKEN_FN] = "fn",
- [TOKEN_RETURN] = "return", [TOKEN_LF] = "line_feed",
- [TOKEN_OPAREN] = "(", [TOKEN_CPAREN] = ")",
- [TOKEN_COLON] = ":", [TOKEN_OCURLY] = "{",
- [TOKEN_CCURLY] = "}", [TOKEN_EOF] = "EOF",
+ [TOKEN_UNKNOWN] = "unknown",
+ [TOKEN_IDENTIFIER] = "identifier",
+ [TOKEN_NUMBER] = "number",
+ [TOKEN_FN] = "fn",
+ [TOKEN_RETURN] = "return",
+ [TOKEN_LF] = "line_feed",
+ [TOKEN_OPAREN] = "(",
+ [TOKEN_CPAREN] = ")",
+ [TOKEN_COLON] = ":",
+ [TOKEN_OCURLY] = "{",
+ [TOKEN_CCURLY] = "}",
+ [TOKEN_PLUS] = "+",
+ [TOKEN_DASH] = "-",
+ [TOKEN_STAR] = "*",
+ [TOKEN_SLASH] = "/",
+ [TOKEN_EQ] = "=",
+ [TOKEN_CMP_EQ] = "==",
+ [TOKEN_BANG] = "!",
+ [TOKEN_CMP_NEQ] = "!=",
+ [TOKEN_LT] = "<",
+ [TOKEN_GT] = ">",
+ [TOKEN_CMP_LEQ] = "<=",
+ [TOKEN_CMP_GEQ] = ">=",
+ [TOKEN_PERCENT] = "%",
+ [TOKEN_BITWISE_LSHIFT] = "<<",
+ [TOKEN_BITWISE_RSHIFT] = ">>",
+ [TOKEN_CIRCUMFLEX] = "^",
+ [TOKEN_PIPE] = "|",
+ [TOKEN_LOGICAL_OR] = "||",
+ [TOKEN_AND] = "&",
+ [TOKEN_LOGICAL_AND] = "&&",
+ [TOKEN_EOF] = "EOF",
};
char *
@@ -167,6 +328,15 @@ lexer_current_char(lexer_t *lexer)
return lexer->source.chars[lexer->offset];
}
+static char
+lexer_peek_next_char(lexer_t *lexer)
+{
+ if (lexer->offset + 1 >= lexer->source.size) {
+ return 0;
+ }
+ return lexer->source.chars[lexer->offset + 1];
+}
+
static void
lexer_skip_char(lexer_t *lexer)
{
diff --git a/src/lexer.h b/src/lexer.h
index cb91d7e..5ed777b 100644
--- a/src/lexer.h
+++ b/src/lexer.h
@@ -39,7 +39,33 @@ typedef enum token_kind
TOKEN_FN,
TOKEN_RETURN,
+ // Equality operators
+ TOKEN_CMP_EQ,
+ TOKEN_CMP_NEQ,
+ TOKEN_CMP_LEQ,
+ TOKEN_CMP_GEQ,
+
+ // Logical Operators
+ TOKEN_LOGICAL_OR,
+ TOKEN_LOGICAL_AND,
+
+ // Bitwise Operators
+ TOKEN_BITWISE_LSHIFT,
+ TOKEN_BITWISE_RSHIFT,
+
// Single char
+ TOKEN_BANG,
+ TOKEN_GT,
+ TOKEN_LT,
+ TOKEN_PERCENT,
+ TOKEN_AND,
+ TOKEN_PIPE,
+ TOKEN_CIRCUMFLEX,
+ TOKEN_EQ,
+ TOKEN_PLUS,
+ TOKEN_DASH,
+ TOKEN_SLASH,
+ TOKEN_STAR,
TOKEN_LF,
TOKEN_OPAREN,
TOKEN_CPAREN,
diff --git a/tests/integration/cli_test.c b/tests/integration/cli_test.c
index 8cc22f9..d46471b 100644
--- a/tests/integration/cli_test.c
+++ b/tests/integration/cli_test.c
@@ -20,7 +20,7 @@
#include <stdio.h>
static MunitResult
-test_cli_dump_tokens(const MunitParameter params[], void *user_data_or_fixture)
+test_cli_dump_tokens_example_main_exit(const MunitParameter params[], void *user_data_or_fixture)
{
cli_result_t compilation_result = cli_runner_compiler_dump_tokens("../../examples/main_exit.ol");
munit_assert_int(compilation_result.exec.exit_code, ==, 0);
@@ -42,6 +42,47 @@ test_cli_dump_tokens(const MunitParameter params[], void *user_data_or_fixture)
return MUNIT_OK;
}
+static MunitResult
+test_cli_dump_tokens_example_expression(const MunitParameter params[], void *user_data_or_fixture)
+{
+ cli_result_t compilation_result = cli_runner_compiler_dump_tokens("../../examples/expression.ol");
+ munit_assert_int(compilation_result.exec.exit_code, ==, 0);
+ munit_assert_string_equal(compilation_result.exec.stdout_buf,
+ "../../examples/expression.ol:1:1: <fn>\n"
+ "../../examples/expression.ol:1:4: <identifier>\n"
+ "../../examples/expression.ol:1:8: <(>\n"
+ "../../examples/expression.ol:1:9: <)>\n"
+ "../../examples/expression.ol:1:10: <:>\n"
+ "../../examples/expression.ol:1:12: <identifier>\n"
+ "../../examples/expression.ol:1:16: <{>\n"
+ "../../examples/expression.ol:1:17: <line_feed>\n"
+ "../../examples/expression.ol:2:3: <return>\n"
+ "../../examples/expression.ol:2:10: <(>\n"
+ "../../examples/expression.ol:2:11: <number>\n"
+ "../../examples/expression.ol:2:14: <+>\n"
+ "../../examples/expression.ol:2:16: <number>\n"
+ "../../examples/expression.ol:2:18: <*>\n"
+ "../../examples/expression.ol:2:20: <number>\n"
+ "../../examples/expression.ol:2:21: <)>\n"
+ "../../examples/expression.ol:2:23: <->\n"
+ "../../examples/expression.ol:2:25: <(>\n"
+ "../../examples/expression.ol:2:26: <number>\n"
+ "../../examples/expression.ol:2:29: <->\n"
+ "../../examples/expression.ol:2:31: <(>\n"
+ "../../examples/expression.ol:2:32: <number>\n"
+ "../../examples/expression.ol:2:34: <+>\n"
+ "../../examples/expression.ol:2:36: <number>\n"
+ "../../examples/expression.ol:2:37: <)>\n"
+ "../../examples/expression.ol:2:39: </>\n"
+ "../../examples/expression.ol:2:41: <number>\n"
+ "../../examples/expression.ol:2:42: <)>\n"
+ "../../examples/expression.ol:2:43: <line_feed>\n"
+ "../../examples/expression.ol:3:1: <}>\n"
+ "../../examples/expression.ol:3:2: <line_feed>\n"
+ "../../examples/expression.ol:4:1: <EOF>\n");
+ return MUNIT_OK;
+}
+
static MunitResult
test_cli_compile_minimal_program(const MunitParameter params[], void *user_data_or_fixture)
{
@@ -62,7 +103,18 @@ test_cli_compile_minimal_program(const MunitParameter params[], void *user_data_
}
static MunitTest tests[] = {
- { "/test_cli_dump_tokens", test_cli_dump_tokens, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL },
+ { "/test_cli_dump_tokens_example_main_exit",
+ test_cli_dump_tokens_example_main_exit,
+ NULL,
+ NULL,
+ MUNIT_TEST_OPTION_NONE,
+ NULL },
+ { "/test_cli_dump_tokens_example_expression",
+ test_cli_dump_tokens_example_expression,
+ NULL,
+ NULL,
+ MUNIT_TEST_OPTION_NONE,
+ NULL },
{ "/test_cli_compile_minimal_program", test_cli_compile_minimal_program, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL },
{ NULL, NULL, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL }
};
diff --git a/tests/integration/proc_exec.h b/tests/integration/proc_exec.h
index 135aa6a..45c2977 100644
--- a/tests/integration/proc_exec.h
+++ b/tests/integration/proc_exec.h
@@ -21,7 +21,8 @@
typedef struct proc_exec_result
{
int exit_code;
- char stdout_buf[1024];
+ // FIXME: output buffer shouldn't be fixed size
+ char stdout_buf[2048];
} proc_exec_result_t;
typedef struct proc_exec_command
--
2.44.0
next prev parent reply other threads:[~2024-03-17 20:37 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-03-17 21:29 [PATCH olang v2 0/3] frontend: add binary operation expr support Johnny Richard
2024-03-17 21:29 ` Johnny Richard [this message]
2024-03-18 0:30 ` [PATCH olang v2 1/3] lexer: add tokenize support to binary op tokens Carlos Maniero
2024-03-18 8:49 ` Johnny Richard
2024-03-17 21:29 ` [PATCH olang v2 2/3] ast: create binary operation ast node Johnny Richard
2024-03-17 21:29 ` [PATCH olang v2 3/3] parser: add all binary operation expressions Johnny Richard
2024-03-17 20:37 ` [olang/patches/.build.yml] build success builds.sr.ht
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240317213638.131057-2-johnny@johnnyrichard.com \
--to=johnny@johnnyrichard.com \
--cc=~johnnyrichard/olang-devel@lists.sr.ht \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://git.johnnyrichard.com/olang.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox