* [PATCH olang v1 1/3] be: create linux-x86_64 gas asm codegen
2024-03-04 19:23 [PATCH olang v1 0/3] implement assembly linux x86_64 compiler Johnny Richard
@ 2024-03-04 19:23 ` Johnny Richard
2024-03-04 19:23 ` [PATCH olang v1 2/3] string_view: add function to create from cstr Johnny Richard
` (2 subsequent siblings)
3 siblings, 0 replies; 10+ messages in thread
From: Johnny Richard @ 2024-03-04 19:23 UTC (permalink / raw)
To: ~johnnyrichard/olang-devel; +Cc: Johnny Richard
This patch creates basic functions to generate GAS assembly for
examples/main_exit.0.
Signed-off-by: Johnny Richard <johnny@johnnyrichard.com>
---
src/codegen_linux_x86_64.c | 81 ++++++++++++++++++++++++++++++++++++++
src/codegen_linux_x86_64.h | 25 ++++++++++++
2 files changed, 106 insertions(+)
create mode 100644 src/codegen_linux_x86_64.c
create mode 100644 src/codegen_linux_x86_64.h
diff --git a/src/codegen_linux_x86_64.c b/src/codegen_linux_x86_64.c
new file mode 100644
index 0000000..3d4b17e
--- /dev/null
+++ b/src/codegen_linux_x86_64.c
@@ -0,0 +1,81 @@
+/*
+ * Copyright (C) 2024 olang maintainers
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+#include <assert.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#include "codegen_linux_x86_64.h"
+#include "list.h"
+
+#define SYS_exit (60)
+
+static void
+codegen_linux_x86_64_emit_start_entrypoint(FILE *out);
+
+static void
+codegen_linux_x86_64_emit_function(FILE *out, ast_fn_definition_t *fn);
+
+void
+codegen_linux_x86_64_emit_program(FILE *out, ast_node_t *prog)
+{
+ codegen_linux_x86_64_emit_start_entrypoint(out);
+
+ assert(prog->kind == AST_NODE_FN_DEF);
+ ast_fn_definition_t fn = prog->data.as_fn_def;
+
+ assert(string_view_eq_to_cstr(fn.identifier, "main"));
+ codegen_linux_x86_64_emit_function(out, &fn);
+}
+
+static void
+codegen_linux_x86_64_emit_start_entrypoint(FILE *out)
+{
+ fprintf(out, ".text\n");
+ fprintf(out, ".globl _start\n\n");
+
+ fprintf(out, "_start:\n");
+ fprintf(out, " call main\n");
+ fprintf(out, " mov %%eax, %%edi\n");
+ fprintf(out, " mov $%d, %%eax\n", SYS_exit);
+ fprintf(out, " syscall\n");
+}
+
+static void
+codegen_linux_x86_64_emit_function(FILE *out, ast_fn_definition_t *fn)
+{
+ ast_node_t *block_node = fn->block;
+ assert(block_node->kind == AST_NODE_BLOCK);
+ ast_block_t block = block_node->data.as_block;
+
+ assert(list_size(block.nodes) == 1);
+
+ list_item_t *nodes_item = list_get(block.nodes, 0);
+ ast_node_t *return_node = nodes_item->value;
+ assert(return_node->kind == AST_NODE_RETURN_STMT);
+ ast_return_stmt_t return_stmt = return_node->data.as_return_stmt;
+
+ ast_node_t *literal_node = return_stmt.data;
+ assert(literal_node->kind == AST_NODE_LITERAL);
+ ast_literal_t literal_u32 = literal_node->data.as_literal;
+
+ assert(literal_u32.kind == AST_LITERAL_U32);
+ uint32_t exit_code = literal_u32.value.as_u32;
+
+ fprintf(out, "" SV_FMT ":\n", SV_ARG(fn->identifier));
+ fprintf(out, " mov $%d, %%eax\n", exit_code);
+ fprintf(out, " ret\n");
+}
diff --git a/src/codegen_linux_x86_64.h b/src/codegen_linux_x86_64.h
new file mode 100644
index 0000000..2050c91
--- /dev/null
+++ b/src/codegen_linux_x86_64.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2024 olang maintainers
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+#ifndef CODEGEN_X86_64_H
+#define CODEGEN_X86_64_H
+
+#include "ast.h"
+
+void
+codegen_linux_x86_64_emit_program(FILE *out, ast_node_t *prog);
+
+#endif /* CODEGEN_X86_64_H */
--
2.44.0
^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH olang v1 3/3] cli: add compilation -o option with --save-temps
2024-03-04 19:23 [PATCH olang v1 0/3] implement assembly linux x86_64 compiler Johnny Richard
2024-03-04 19:23 ` [PATCH olang v1 1/3] be: create linux-x86_64 gas asm codegen Johnny Richard
2024-03-04 19:23 ` [PATCH olang v1 2/3] string_view: add function to create from cstr Johnny Richard
@ 2024-03-04 19:23 ` Johnny Richard
2024-03-04 18:33 ` [olang/patches/.build.yml] build failed builds.sr.ht
2024-03-05 2:02 ` [PATCH olang v1 3/3] cli: add compilation -o option with --save-temps Carlos Maniero
2024-03-05 8:53 ` [PATCH olang v1 0/3] implement assembly linux x86_64 compiler Johnny Richard
3 siblings, 2 replies; 10+ messages in thread
From: Johnny Richard @ 2024-03-04 19:23 UTC (permalink / raw)
To: ~johnnyrichard/olang-devel; +Cc: Johnny Richard
In order to compile the program, we are introduction the option -o to
the compiler.
This implementation is very similar to the gcc one. Since the program
is made of a single file, no need to over complicate the compilation
with multiple files.
The option --save-temps is used to keep files used to create the binary
file (.a and .o). By default this option is disabled.
WARNING
-------
This implementation relies on "as" (GNU Assembler) and the "ld" (GNU
linker) command. Make sure you have it install before trying to compile
any program.
Signed-off-by: Johnny Richard <johnny@johnnyrichard.com>
---
docs/manpages/0c.md | 13 +++-
src/main.c | 180 +++++++++++++++++++++++++++++++++++---------
2 files changed, 157 insertions(+), 36 deletions(-)
diff --git a/docs/manpages/0c.md b/docs/manpages/0c.md
index 87a56df..e3d3cfc 100644
--- a/docs/manpages/0c.md
+++ b/docs/manpages/0c.md
@@ -4,11 +4,14 @@
# NAME
-0c - zero langague compiler
+0c - zero language compiler
# SYNOPSIS
-**0c** **----dump-tokens** source.0
+**0c**
+ source_file
+ [**----dump-tokens**]
+ [**--o** output_file [**----save-temps**]]
# DESCRIPTION
@@ -19,3 +22,9 @@ contains utilities to help the language development.
**----dump-tokens**
: Display lexical tokens given a soruce.0 code.
+
+**--o <file>**
+: Compile program into a binary file
+
+**----save-temps**
+: Keep temp files used to compile program
diff --git a/src/main.c b/src/main.c
index 2b2f12a..b27715a 100644
--- a/src/main.c
+++ b/src/main.c
@@ -22,7 +22,9 @@
#include <string.h>
#include "arena.h"
+#include "codegen_linux_x86_64.h"
#include "lexer.h"
+#include "parser.h"
#include "string_view.h"
// TODO: find a better solution to define the arena capacity
@@ -37,20 +39,38 @@ typedef struct cli_args
char *
cli_args_shift(cli_args_t *args);
+typedef enum
+{
+ CLI_OPT_DUMP_TOKENS = 1 << 1,
+ CLI_OPT_OUTPUT = 1 << 2,
+ CLI_OPT_SAVE_TEMPS = 1 << 3
+} cli_opt;
+
typedef struct cli_opts
{
- bool dump_tokens;
- char *file_path;
+ uint32_t options;
+ string_view_t prog;
+ string_view_t file_path;
+ string_view_t output_bin;
} cli_opts_t;
void
-print_usage(FILE *stream, char *prog);
+print_usage(FILE *stream, string_view_t prog);
+
+void
+handle_dump_tokens(cli_opts_t *opts);
+
+void
+handle_codegen_linux_x86_64(cli_opts_t *opts);
static void
print_token(char *file_path, token_t *token);
string_view_t
-read_entire_file(char *file_path, arena_t *arena);
+read_entire_file(string_view_t file_path, arena_t *arena);
+
+void
+cli_opts_parse_output(cli_opts_t *opts, cli_args_t *args);
int
main(int argc, char **argv)
@@ -58,28 +78,58 @@ main(int argc, char **argv)
cli_args_t args = { .argc = argc, .argv = argv };
cli_opts_t opts = { 0 };
- char *prog = cli_args_shift(&args);
+ opts.prog = string_view_from_cstr(cli_args_shift(&args));
- if (argc != 3) {
- print_usage(stderr, prog);
- return EXIT_FAILURE;
- }
-
- for (char *arg = cli_args_shift(&args); arg != NULL; arg = cli_args_shift(&args)) {
+ char *arg = cli_args_shift(&args);
+ while (arg != NULL) {
if (strcmp(arg, "--dump-tokens") == 0) {
- opts.dump_tokens = true;
+ opts.options |= CLI_OPT_DUMP_TOKENS;
+ arg = cli_args_shift(&args);
+ } else if (strcmp(arg, "--save-temps") == 0) {
+ opts.options |= CLI_OPT_SAVE_TEMPS;
+ arg = cli_args_shift(&args);
+ } else if (strcmp(arg, "-o") == 0) {
+ cli_opts_parse_output(&opts, &args);
+ arg = cli_args_shift(&args);
} else {
- opts.file_path = arg;
+ opts.file_path = string_view_from_cstr(arg);
+ arg = cli_args_shift(&args);
}
}
- if (!opts.dump_tokens) {
- print_usage(stderr, prog);
- return EXIT_FAILURE;
+ if (opts.options & CLI_OPT_OUTPUT) {
+ handle_codegen_linux_x86_64(&opts);
+ return EXIT_SUCCESS;
+ }
+
+ if (opts.options & CLI_OPT_DUMP_TOKENS) {
+ handle_dump_tokens(&opts);
+ return EXIT_SUCCESS;
+ }
+
+ print_usage(stderr, opts.prog);
+ return EXIT_FAILURE;
+}
+
+char *
+cli_args_shift(cli_args_t *args)
+{
+ if (args->argc == 0)
+ return NULL;
+ --(args->argc);
+ return *(args->argv)++;
+}
+
+void
+handle_dump_tokens(cli_opts_t *opts)
+{
+ if (opts->file_path.chars == NULL) {
+ print_usage(stderr, opts->prog);
+ exit(EXIT_FAILURE);
}
arena_t arena = arena_new(ARENA_CAPACITY);
- string_view_t file_content = read_entire_file(opts.file_path, &arena);
+ string_view_t file_content = read_entire_file(opts->file_path, &arena);
lexer_t lexer = { 0 };
lexer_init(&lexer, file_content);
@@ -87,38 +137,82 @@ main(int argc, char **argv)
token_t token = { 0 };
lexer_next_token(&lexer, &token);
while (token.kind != TOKEN_EOF) {
- print_token(opts.file_path, &token);
+ print_token(opts->file_path.chars, &token);
lexer_next_token(&lexer, &token);
}
- print_token(opts.file_path, &token);
+ print_token(opts->file_path.chars, &token);
- free(file_content.chars);
-
- return EXIT_SUCCESS;
+ arena_free(&arena);
}
-char *
-cli_args_shift(cli_args_t *args)
+void
+handle_codegen_linux_x86_64(cli_opts_t *opts)
{
- if (args->argc == 0)
- return NULL;
- --(args->argc);
- return *(args->argv)++;
+ if (opts->file_path.chars == NULL) {
+ print_usage(stderr, opts->prog);
+ exit(EXIT_FAILURE);
+ }
+
+ arena_t arena = arena_new(ARENA_CAPACITY);
+ lexer_t lexer = { 0 };
+ parser_t parser = { 0 };
+
+ string_view_t file_content = read_entire_file(opts->file_path, &arena);
+ lexer_init(&lexer, file_content);
+ parser_init(&parser, &lexer, &arena, opts->file_path.chars);
+
+ ast_node_t *ast = parser_parse_fn_definition(&parser);
+
+ string_view_t asm_ext = string_view_from_cstr(".s");
+ char asm_file[opts->output_bin.size + asm_ext.size + 1];
+ memcpy(asm_file, opts->output_bin.chars, opts->output_bin.size);
+ memcpy(asm_file + opts->output_bin.size, asm_ext.chars, asm_ext.size);
+ asm_file[opts->output_bin.size + asm_ext.size] = 0;
+
+ FILE *out = fopen(asm_file, "w");
+ assert(out);
+ codegen_linux_x86_64_emit_program(out, ast);
+ fclose(out);
+
+ char command[512];
+ sprintf(command, "as %s -o " SV_FMT ".o", asm_file, SV_ARG(opts->output_bin));
+ system(command);
+
+ sprintf(command, "ld " SV_FMT ".o -o " SV_FMT "", SV_ARG(opts->output_bin), SV_ARG(opts->output_bin));
+ system(command);
+
+ if (!(opts->options & CLI_OPT_SAVE_TEMPS)) {
+ char output_file[256];
+
+ sprintf(output_file, "" SV_FMT ".s", SV_ARG(opts->output_bin));
+ remove(output_file);
+
+ sprintf(output_file, "" SV_FMT ".o", SV_ARG(opts->output_bin));
+ remove(output_file);
+ }
+
+ arena_free(&arena);
}
void
-print_usage(FILE *stream, char *prog)
+print_usage(FILE *stream, string_view_t prog)
{
- fprintf(stream, "usage: %s <source.0> --dump-tokens\n", prog);
+ fprintf(stream,
+ "Usage: " SV_FMT " [options] file...\n"
+ "Options:\n"
+ " --dump-tokens\t\tDisplay lexer token stream\n"
+ " -o <file>\t\tCompile program into a binary file\n"
+ " --save-temps\t\tKeep temp files used to compile program\n",
+ SV_ARG(prog));
}
string_view_t
-read_entire_file(char *file_path, arena_t *arena)
+read_entire_file(string_view_t file_path, arena_t *arena)
{
- FILE *stream = fopen(file_path, "rb");
+ FILE *stream = fopen(file_path.chars, "rb");
if (stream == NULL) {
- fprintf(stderr, "Could not open file %s: %s\n", file_path, strerror(errno));
+ fprintf(stderr, "error: could not open file " SV_FMT ": %s\n", SV_ARG(file_path), strerror(errno));
exit(EXIT_FAILURE);
}
@@ -133,7 +227,7 @@ read_entire_file(char *file_path, arena_t *arena)
file_content.chars = (char *)arena_alloc(arena, (size_t)file_content.size);
if (file_content.chars == NULL) {
- fprintf(stderr, "Could not read file %s: %s\n", file_path, strerror(errno));
+ fprintf(stderr, "Could not read file " SV_FMT ": %s\n", SV_ARG(file_path), strerror(errno));
exit(EXIT_FAILURE);
}
@@ -143,6 +237,24 @@ read_entire_file(char *file_path, arena_t *arena)
return file_content;
}
+void
+cli_opts_parse_output(cli_opts_t *opts, cli_args_t *args)
+{
+ assert(opts && "opts is required");
+ assert(opts && "args is required");
+
+ char *output_bin = cli_args_shift(args);
+
+ if (output_bin == NULL) {
+ fprintf(stderr, "error: missing filename after '-o'\n");
+ print_usage(stderr, opts->prog);
+ exit(EXIT_FAILURE);
+ }
+
+ opts->options |= CLI_OPT_OUTPUT;
+ opts->output_bin = string_view_from_cstr(output_bin);
+}
+
static void
print_token(char *file_path, token_t *token)
{
--
2.44.0
^ permalink raw reply [flat|nested] 10+ messages in thread