public inbox for ~johnnyrichard/olang-devel@lists.sr.ht
 help / color / mirror / code / Atom feed
From: Johnny Richard <johnny@johnnyrichard.com>
To: ~johnnyrichard/olang-devel@lists.sr.ht
Cc: Johnny Richard <johnny@johnnyrichard.com>
Subject: [PATCH olang v1 1/2] cli: add new option to pretty print AST tree
Date: Mon, 25 Mar 2024 23:36:11 +0100	[thread overview]
Message-ID: <20240325223715.459442-2-johnny@johnnyrichard.com> (raw)
In-Reply-To: <20240325223715.459442-1-johnny@johnnyrichard.com>

Understanding the intricacies of a parser can be challenging without
adequate visualization tools. This commit addresses this by introducing
a new CLI option to the compiler, enabling the generation of
ASCII-formatted AST trees for easy inspection post-syntax analysis.

It's worth noting that due to current limitations, the tool is optimized
for trees with up to 64 levels.  Beyond 64 levels this tool won't bring
much value I believe.

Signed-off-by: Johnny Richard <johnny@johnnyrichard.com>
---
 docs/manpages/olang.md |   4 +
 src/cli.c              |   5 +-
 src/cli.h              |   3 +-
 src/main.c             |  31 +++++
 src/pretty_print_ast.c | 286 +++++++++++++++++++++++++++++++++++++++++
 src/pretty_print_ast.h |  25 ++++
 6 files changed, 352 insertions(+), 2 deletions(-)
 create mode 100644 src/pretty_print_ast.c
 create mode 100644 src/pretty_print_ast.h

diff --git a/docs/manpages/olang.md b/docs/manpages/olang.md
index 8ed6726..fbca5c3 100644
--- a/docs/manpages/olang.md
+++ b/docs/manpages/olang.md
@@ -11,6 +11,7 @@ olang - O Programming Language compiler
 **olang**
     source_file
     [**----dump-tokens**]
+    [**----dump-ast**]
     [**--o** ___output_file___ [**----save-temps**] [**----arch** ___arch___] [**----sysroot** ___dir___]]
 
 # DESCRIPTION
@@ -23,6 +24,9 @@ contains utilities to help the language development.
 **----dump-tokens**
 :   Display lexical tokens given a soruce.0 code.
 
+**----dump-ast**
+:   Display AST tree to stdout right after syntax analyzes
+
 **--o** ___file___
 :   Compile program into a binary file
 
diff --git a/src/cli.c b/src/cli.c
index a113bbb..fa73b60 100644
--- a/src/cli.c
+++ b/src/cli.c
@@ -44,6 +44,8 @@ cli_parse_args(int argc, char **argv)
     while (arg != NULL) {
         if (strcmp(arg, "--dump-tokens") == 0) {
             opts.options |= CLI_OPT_DUMP_TOKENS;
+        } else if (strcmp(arg, "--dump-ast") == 0) {
+            opts.options |= CLI_OPT_DUMP_AST;
         } else if (strcmp(arg, "--save-temps") == 0) {
             opts.options |= CLI_OPT_SAVE_TEMPS;
         } else if (strcmp(arg, "-o") == 0) {
@@ -60,7 +62,7 @@ cli_parse_args(int argc, char **argv)
         arg = cli_args_shift(&args);
     }
 
-    if (opts.options & CLI_OPT_OUTPUT || opts.options & CLI_OPT_DUMP_TOKENS) {
+    if (opts.options & CLI_OPT_OUTPUT || opts.options & CLI_OPT_DUMP_TOKENS || opts.options & CLI_OPT_DUMP_AST) {
         return opts;
     }
 
@@ -139,6 +141,7 @@ cli_print_usage(FILE *stream, char *compiler_path)
             "Usage: %s [options] file...\n"
             "Options:\n"
             "  --dump-tokens    Display lexer token stream\n"
+            "  --dump-ast       Display ast tree to stdout\n"
             "  --arch <arch>    Binary arch: default to x86_64 (x86_64 | aarch64)\n"
             "  --sysroot <dir>  System root dir where the GNU Assembler and GNU Linker are located: default to '/'\n"
             "  -o <file>        Compile program into a binary file\n"
diff --git a/src/cli.h b/src/cli.h
index b517b2c..3f4c3a9 100644
--- a/src/cli.h
+++ b/src/cli.h
@@ -42,7 +42,8 @@ typedef enum
     CLI_OPT_OUTPUT = 1 << 1,
     CLI_OPT_SAVE_TEMPS = 1 << 2,
     CLI_OPT_ARCH = 1 << 3,
-    CLI_OPT_SYSROOT = 1 << 4
+    CLI_OPT_SYSROOT = 1 << 4,
+    CLI_OPT_DUMP_AST = 1 << 5
 } cli_opt_t;
 
 cli_opts_t
diff --git a/src/main.c b/src/main.c
index ff0aaa8..70e7d3f 100644
--- a/src/main.c
+++ b/src/main.c
@@ -27,6 +27,7 @@
 #include "codegen_linux_x86_64.h"
 #include "lexer.h"
 #include "parser.h"
+#include "pretty_print_ast.h"
 #include "string_view.h"
 
 // TODO: find a better solution to define the arena capacity
@@ -35,6 +36,9 @@
 void
 handle_dump_tokens(cli_opts_t *opts);
 
+void
+handle_dump_ast(cli_opts_t *opts);
+
 void
 handle_codegen_linux(cli_opts_t *opts);
 
@@ -54,6 +58,11 @@ main(int argc, char **argv)
         return EXIT_SUCCESS;
     }
 
+    if (opts.options & CLI_OPT_DUMP_AST) {
+        handle_dump_ast(&opts);
+        return EXIT_SUCCESS;
+    }
+
     if (opts.options & CLI_OPT_OUTPUT) {
         handle_codegen_linux(&opts);
         return EXIT_SUCCESS;
@@ -92,6 +101,28 @@ handle_dump_tokens(cli_opts_t *opts)
     arena_free(&arena);
 }
 
+void
+handle_dump_ast(cli_opts_t *opts)
+{
+    if (opts->file_path == NULL) {
+        cli_print_usage(stderr, opts->compiler_path);
+        exit(EXIT_FAILURE);
+    }
+
+    arena_t arena = arena_new(ARENA_CAPACITY);
+    lexer_t lexer = { 0 };
+    parser_t parser = { 0 };
+
+    string_view_t file_content = read_entire_file(opts->file_path, &arena);
+
+    lexer_init(&lexer, file_content);
+    parser_init(&parser, &lexer, &arena, opts->file_path);
+
+    ast_node_t *ast = parser_parse_program(&parser);
+
+    pretty_print_ast(ast);
+}
+
 void
 handle_codegen_linux(cli_opts_t *opts)
 {
diff --git a/src/pretty_print_ast.c b/src/pretty_print_ast.c
new file mode 100644
index 0000000..e950796
--- /dev/null
+++ b/src/pretty_print_ast.c
@@ -0,0 +1,286 @@
+/*
+ * Copyright (C) 2024 olang maintainers
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+#include "pretty_print_ast.h"
+#include "arena.h"
+#include "list.h"
+#include <assert.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#define ANSI_COLOR_MAGENTA "\x1b[35m"
+#define ANSI_COLOR_RESET "\x1b[0m"
+#define PP_IS_BIT_SET(data, index) ((data)&1 << index)
+
+typedef struct pretty_print_node
+{
+    char *name;
+    list_t *children;
+} pretty_print_node_t;
+
+static bool
+stdout_supports_color()
+{
+    struct stat st;
+    fstat(STDOUT_FILENO, &st);
+    return S_ISCHR(st.st_mode);
+}
+
+static void
+pretty_print_print_ident(uint64_t *prefix, size_t level, bool lst_children)
+{
+    assert(level < 64);
+
+    bool support_color = stdout_supports_color();
+
+    if (support_color) {
+        printf(ANSI_COLOR_MAGENTA);
+    }
+
+    for (size_t i = 0; i < level; ++i) {
+
+        if (!PP_IS_BIT_SET(*prefix, i)) {
+            printf("  ");
+            continue;
+        }
+
+        bool last_index = i + 1 == level;
+
+        if (!last_index) {
+            printf("| ");
+        } else if (lst_children) {
+            printf("`-");
+        } else {
+            printf("|-");
+        }
+    }
+
+    if (support_color) {
+        printf(ANSI_COLOR_RESET);
+    }
+}
+
+static void
+pretty_print_tree(pretty_print_node_t *node, uint64_t *prefix, size_t level, bool lst_children)
+{
+    pretty_print_print_ident(prefix, level, lst_children);
+
+    list_t *list = node->children;
+    if (list != NULL)
+        (*prefix) |= 1 << level;
+    if (lst_children)
+        (*prefix) ^= 1 << (level - 1);
+
+    printf("%s\n", node->name);
+
+    size_t size = list_size(list);
+    for (size_t i = 0; i < size; ++i) {
+        pretty_print_node_t *it = (pretty_print_node_t *)list_get(list, i)->value;
+        pretty_print_tree(it, prefix, level + 1, i + 1 == size);
+    }
+}
+
+static pretty_print_node_t *
+pretty_print_node_new(arena_t *arena)
+{
+    pretty_print_node_t *node = (pretty_print_node_t *)arena_alloc(arena, sizeof(pretty_print_node_t));
+    node->children = (list_t *)arena_alloc(arena, sizeof(list_t));
+    list_init(node->children, arena);
+    return node;
+}
+
+static pretty_print_node_t *
+ast_node_to_pretty_print_node(ast_node_t *ast, arena_t *arena)
+{
+    switch (ast->kind) {
+        case AST_NODE_PROGRAM: {
+            pretty_print_node_t *node = pretty_print_node_new(arena);
+            node->name = "Translation_Unit";
+
+            pretty_print_node_t *fn_node = ast_node_to_pretty_print_node(ast->data.as_program.fn, arena);
+            list_append(node->children, fn_node);
+            return node;
+        }
+        case AST_NODE_FN_DEF: {
+            pretty_print_node_t *node = pretty_print_node_new(arena);
+            ast_fn_definition_t fn_def = ast->data.as_fn_def;
+
+            char name[256];
+            sprintf(name,
+                    "Function_Definition <name:" SV_FMT "> <return:%d>",
+                    SV_ARG(fn_def.identifier),
+                    fn_def.return_type);
+            node->name = (char *)arena_alloc(arena, sizeof(char) * (strlen(name) + 1));
+            strcpy(node->name, name);
+
+            pretty_print_node_t *block = ast_node_to_pretty_print_node(fn_def.block, arena);
+            list_append(node->children, block);
+            return node;
+        }
+        case AST_NODE_BLOCK: {
+            pretty_print_node_t *node = pretty_print_node_new(arena);
+            ast_block_t block = ast->data.as_block;
+
+            node->name = "Block";
+
+            size_t block_nodes_size = list_size(block.nodes);
+            for (size_t i = 0; i < block_nodes_size; ++i) {
+                ast_node_t *ast_node = (ast_node_t *)list_get(block.nodes, i)->value;
+                pretty_print_node_t *child = ast_node_to_pretty_print_node(ast_node, arena);
+                list_append(node->children, child);
+            }
+            return node;
+        }
+        case AST_NODE_RETURN_STMT: {
+            pretty_print_node_t *node = pretty_print_node_new(arena);
+            ast_return_stmt_t return_stmt = ast->data.as_return_stmt;
+
+            node->name = "Return_Statement";
+
+            pretty_print_node_t *child = ast_node_to_pretty_print_node(return_stmt.data, arena);
+            list_append(node->children, child);
+
+            return node;
+        }
+        case AST_NODE_LITERAL: {
+            pretty_print_node_t *node = pretty_print_node_new(arena);
+            ast_literal_t literal = ast->data.as_literal;
+
+            char name[256];
+            switch (literal.kind) {
+                case AST_LITERAL_U32: {
+                    sprintf(name, "Literal <kind:u32> <value:%d>", literal.value.as_u32);
+                    node->name = (char *)arena_alloc(arena, sizeof(char) * (strlen(name) + 1));
+                    strcpy(node->name, name);
+                    break;
+                }
+                default:
+                    assert(0 && "literal not implemented");
+            }
+
+            return node;
+        }
+        case AST_NODE_BINARY_OP: {
+            pretty_print_node_t *node = pretty_print_node_new(arena);
+            ast_binary_op_t binop = ast->data.as_bin_op;
+
+            switch (binop.kind) {
+                case AST_BINOP_ADDITION: {
+                    node->name = "Binary_Operation (+)";
+                    break;
+                }
+                case AST_BINOP_SUBTRACTION: {
+                    node->name = "Binary_Operation (-)";
+                    break;
+                }
+                case AST_BINOP_MULTIPLICATION: {
+                    node->name = "Binary_Operation (*)";
+                    break;
+                }
+                case AST_BINOP_DIVISION: {
+                    node->name = "Binary_Operation (/)";
+                    break;
+                }
+                case AST_BINOP_REMINDER: {
+                    node->name = "Binary_Operation (%)";
+                    break;
+                }
+                case AST_BINOP_BITWISE_LSHIFT: {
+                    node->name = "Binary_Operation (<<)";
+                    break;
+                }
+                case AST_BINOP_BITWISE_RSHIFT: {
+                    node->name = "Binary_Operation (>>)";
+                    break;
+                }
+                case AST_BINOP_BITWISE_XOR: {
+                    node->name = "Binary_Operation (^)";
+                    break;
+                }
+                case AST_BINOP_BITWISE_AND: {
+                    node->name = "Binary_Operation (&)";
+                    break;
+                }
+                case AST_BINOP_BITWISE_OR: {
+                    node->name = "Binary_Operation (|)";
+                    break;
+                }
+                case AST_BINOP_CMP_LT: {
+                    node->name = "Binary_Operation (<)";
+                    break;
+                }
+                case AST_BINOP_CMP_GT: {
+                    node->name = "Binary_Operation (>)";
+                    break;
+                }
+                case AST_BINOP_CMP_LEQ: {
+                    node->name = "Binary_Operation (<=)";
+                    break;
+                }
+                case AST_BINOP_CMP_GEQ: {
+                    node->name = "Binary_Operation (>=)";
+                    break;
+                }
+                case AST_BINOP_CMP_EQ: {
+                    node->name = "Binary_Operation (==)";
+                    break;
+                }
+                case AST_BINOP_CMP_NEQ: {
+                    node->name = "Binary_Operation (!=)";
+                    break;
+                }
+                case AST_BINOP_LOGICAL_AND: {
+                    node->name = "Binary_Operation (&&)";
+                    break;
+                }
+                case AST_BINOP_LOGICAL_OR: {
+                    node->name = "Binary_Operation (|)";
+                    break;
+                }
+                default:
+                    assert(false && "binop not implemented");
+            }
+
+            pretty_print_node_t *lhs = ast_node_to_pretty_print_node(binop.lhs, arena);
+            pretty_print_node_t *rhs = ast_node_to_pretty_print_node(binop.rhs, arena);
+
+            list_append(node->children, lhs);
+            list_append(node->children, rhs);
+
+            return node;
+        }
+        default: {
+            printf("node kind = '%d' not implmented\n", ast->kind);
+            assert(false);
+        }
+    }
+    return NULL;
+}
+
+void
+pretty_print_ast(ast_node_t *ast)
+{
+    arena_t arena = arena_new(8 * 1024);
+    pretty_print_node_t *root = ast_node_to_pretty_print_node(ast, &arena);
+    uint64_t prefix = 0;
+
+    pretty_print_tree(root, &prefix, 0, true);
+
+    arena_free(&arena);
+}
diff --git a/src/pretty_print_ast.h b/src/pretty_print_ast.h
new file mode 100644
index 0000000..e0b4c11
--- /dev/null
+++ b/src/pretty_print_ast.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2024 olang maintainers
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+#ifndef PRETTY_PRINT_AST
+#define PRETTY_PRINT_AST
+
+#include "ast.h"
+
+void
+pretty_print_ast(ast_node_t *ast);
+
+#endif /* PRETTY_PRINT_AST */
-- 
2.44.0


  reply	other threads:[~2024-03-25 21:36 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-03-25 22:36 [PATCH olang v1 0/2] Introduce CLI option for improved AST tree visualization Johnny Richard
2024-03-25 22:36 ` Johnny Richard [this message]
2024-03-25 22:36 ` [PATCH olang v1 2/2] cli: remove code duplication Johnny Richard
2024-03-25 21:37   ` [olang/patches/.build.yml] build failed builds.sr.ht
2024-03-26  2:32     ` Carlos Maniero
2024-03-26  2:35       ` Carlos Maniero
2024-03-26  2:30   ` [PATCH olang v1 2/2] cli: remove code duplication Carlos Maniero

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240325223715.459442-2-johnny@johnnyrichard.com \
    --to=johnny@johnnyrichard.com \
    --cc=~johnnyrichard/olang-devel@lists.sr.ht \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://git.johnnyrichard.com/olang.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox