/* * Copyright (C) 2024 olang maintainers * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include #include #include #include #include #include "ast.h" #include "lexer.h" #include "parser.h" static bool skip_expected_token(parser_t *parser, token_kind_t expected_kind); static bool expected_next_token(parser_t *parser, token_t *token, token_kind_t kind); static bool expected_token(token_t *token, token_kind_t kind); static bool parser_parse_type(parser_t *parser, string_view_t *type); static ast_node_t * parser_parse_block(parser_t *parser); static ast_node_t * parser_parse_return_stmt(parser_t *parser); static ast_node_t * parser_parse_if_stmt(parser_t *parser); static ast_node_t * parser_parse_var_def(parser_t *parser); static ast_node_t * parser_parse_fn_definition(parser_t *parser); static list_t * parser_parse_fn_args(parser_t *parser); static list_t * parser_parse_fn_params(parser_t *parser); static ast_node_t * parser_parse_expr(parser_t *parser); static ast_node_t * parser_parse_factor(parser_t *parser); static void skip_line_feeds(lexer_t *lexer); void parser_init(parser_t *parser, lexer_t *lexer, arena_t *arena) { assert(parser && "parser is required"); assert(lexer && "lexer is required"); parser->lexer = lexer; parser->arena = arena; } ast_node_t * parser_parse_translation_unit(parser_t *parser) { token_t token; ast_node_t *translation_unit_node = ast_new_translation_unit(parser->arena); skip_line_feeds(parser->lexer); lexer_peek_next(parser->lexer, &token); while (token.kind != TOKEN_EOF) { ast_node_t *fn = parser_parse_fn_definition(parser); if (fn == NULL) { return NULL; } list_append(translation_unit_node->as_translation_unit.decls, fn); skip_line_feeds(parser->lexer); lexer_peek_next(parser->lexer, &token); } return translation_unit_node; } static ast_binary_op_kind_t token_kind_to_binary_op_kind(token_kind_t kind) { switch (kind) { case TOKEN_PLUS: return AST_BINOP_ADDITION; case TOKEN_DASH: return AST_BINOP_SUBTRACTION; case TOKEN_SLASH: return AST_BINOP_DIVISION; case TOKEN_STAR: return AST_BINOP_MULTIPLICATION; case TOKEN_PERCENT: return AST_BINOP_REMINDER; case TOKEN_BITWISE_LSHIFT: return AST_BINOP_BITWISE_LSHIFT; case TOKEN_BITWISE_RSHIFT: return AST_BINOP_BITWISE_RSHIFT; case TOKEN_CIRCUMFLEX: return AST_BINOP_BITWISE_XOR; case TOKEN_AND: return AST_BINOP_BITWISE_AND; case TOKEN_PIPE: return AST_BINOP_BITWISE_OR; case TOKEN_LT: return AST_BINOP_CMP_LT; case TOKEN_GT: return AST_BINOP_CMP_GT; case TOKEN_CMP_LEQ: return AST_BINOP_CMP_LEQ; case TOKEN_CMP_GEQ: return AST_BINOP_CMP_GEQ; case TOKEN_CMP_EQ: return AST_BINOP_CMP_EQ; case TOKEN_CMP_NEQ: return AST_BINOP_CMP_NEQ; case TOKEN_LOGICAL_AND: return AST_BINOP_LOGICAL_AND; case TOKEN_LOGICAL_OR: return AST_BINOP_LOGICAL_OR; default: { fprintf(stderr, "error: token kind (%s) not compatible with binary op kind\n", token_kind_to_cstr(kind)); assert(false); } } } typedef enum { BINOP_MIN_PREC, BINOP_LOGICAL_OR_PREC, BINOP_LOGICAL_AND_PREC, BINOP_BITWISE_OR_PREC, BINOP_BITWISE_XOR_PREC, BINOP_BITWISE_AND_PREC, BINOP_CMP_EQUALITY_PREC, BINOP_CMP_RELATIONAL_PREC, BINOP_BITWISE_SHIFT_PREC, BINOP_ADDITIVE_PREC, BINOP_MULTIPLICATIVE_PREC, } binary_op_precedence_t; static binary_op_precedence_t get_binary_op_precedence(token_kind_t kind) { switch (kind) { case TOKEN_PLUS: case TOKEN_DASH: return BINOP_ADDITIVE_PREC; case TOKEN_SLASH: case TOKEN_STAR: case TOKEN_PERCENT: return BINOP_MULTIPLICATIVE_PREC; case TOKEN_BITWISE_LSHIFT: case TOKEN_BITWISE_RSHIFT: return BINOP_BITWISE_SHIFT_PREC; case TOKEN_LT: case TOKEN_GT: case TOKEN_CMP_LEQ: case TOKEN_CMP_GEQ: return BINOP_CMP_RELATIONAL_PREC; case TOKEN_CMP_EQ: case TOKEN_CMP_NEQ: return BINOP_CMP_EQUALITY_PREC; case TOKEN_AND: return BINOP_BITWISE_AND_PREC; case TOKEN_CIRCUMFLEX: return BINOP_BITWISE_XOR_PREC; case TOKEN_PIPE: return BINOP_BITWISE_OR_PREC; case TOKEN_LOGICAL_AND: return BINOP_LOGICAL_AND_PREC; case TOKEN_LOGICAL_OR: return BINOP_LOGICAL_OR_PREC; default: assert(false); } } static ast_node_t * parser_parse_expr_1(parser_t *parser, ast_node_t *lhs, size_t prev_precedence) { token_t lookahead_token; lexer_peek_next(parser->lexer, &lookahead_token); while (token_kind_is_binary_op(lookahead_token.kind) && get_binary_op_precedence(lookahead_token.kind) >= prev_precedence) { token_t token_op; lexer_next_token(parser->lexer, &token_op); ast_node_t *rhs = parser_parse_factor(parser); if (rhs == NULL) { return NULL; } lexer_peek_next(parser->lexer, &lookahead_token); while (token_kind_is_binary_op(lookahead_token.kind) && get_binary_op_precedence(lookahead_token.kind) > get_binary_op_precedence(token_op.kind)) { rhs = parser_parse_expr_1(parser, rhs, get_binary_op_precedence(token_op.kind)); lexer_peek_next(parser->lexer, &lookahead_token); } lhs = ast_new_node_bin_op(parser->arena, token_kind_to_binary_op_kind(token_op.kind), lhs, rhs); if (lhs == NULL) { return NULL; } } return lhs; } static ast_node_t * parser_parse_expr(parser_t *parser) { ast_node_t *lhs = parser_parse_factor(parser); if (lhs == NULL) { return NULL; } return parser_parse_expr_1(parser, lhs, BINOP_MIN_PREC); } static ast_node_t * parser_parse_factor(parser_t *parser) { token_t token; lexer_next_token(parser->lexer, &token); switch (token.kind) { case TOKEN_NUMBER: return ast_new_node_literal_u32(parser->arena, string_view_to_u32(token.value)); case TOKEN_ID: { string_view_t id = token.value; lexer_peek_next(parser->lexer, &token); if (token.kind == TOKEN_OPAREN) { list_t *args = parser_parse_fn_args(parser); return ast_new_node_fn_call(parser->arena, id, args); } return ast_new_node_ref(parser->arena, id); } case TOKEN_OPAREN: { ast_node_t *expr = parser_parse_expr(parser); if (expr == NULL) { return NULL; } if (!skip_expected_token(parser, TOKEN_CPAREN)) { return NULL; } return expr; } default: { fprintf(stderr, "error: parse_factor: unsupported or invalid token (%s)\n", token_kind_to_cstr(token.kind)); assert(false); } } } static list_t * parser_parse_fn_args(parser_t *parser) { if (!skip_expected_token(parser, TOKEN_OPAREN)) { return NULL; } list_t *args = arena_alloc(parser->arena, sizeof(list_t)); if (args == NULL) { fprintf(stderr, "[FATAL] Out of memory: parser_parse_fn_args: %s\n", strerror(errno)); exit(EXIT_FAILURE); } list_init(args, parser->arena); skip_line_feeds(parser->lexer); token_t token; lexer_peek_next(parser->lexer, &token); bool is_not_first_arg = false; while (token.kind != TOKEN_CPAREN && token.kind != TOKEN_EOF) { if (is_not_first_arg && expected_token(&token, TOKEN_COMMA)) { lexer_next_token(parser->lexer, &token); } ast_node_t *expr = parser_parse_expr(parser); list_append(args, expr); skip_line_feeds(parser->lexer); lexer_peek_next(parser->lexer, &token); is_not_first_arg = true; } if (!skip_expected_token(parser, TOKEN_CPAREN)) { return NULL; } return args; } static list_t * parser_parse_fn_params(parser_t *parser) { if (!skip_expected_token(parser, TOKEN_OPAREN)) { return NULL; } list_t *params = arena_alloc(parser->arena, sizeof(list_t)); if (params == NULL) { fprintf(stderr, "[FATAL] Out of memory: parser_parse_fn_params: %s\n", strerror(errno)); exit(EXIT_FAILURE); } list_init(params, parser->arena); skip_line_feeds(parser->lexer); token_t token; lexer_next_token(parser->lexer, &token); bool is_not_first_param = false; while (token.kind != TOKEN_CPAREN && token.kind != TOKEN_EOF) { if (is_not_first_param && expected_token(&token, TOKEN_COMMA)) { lexer_next_token(parser->lexer, &token); } if (!expected_token(&token, TOKEN_ID)) { return NULL; } string_view_t type_id; parser_parse_type(parser, &type_id); ast_fn_param_t *param = ast_new_fn_param(parser->arena, token.value, type_id); list_append(params, param); skip_line_feeds(parser->lexer); lexer_next_token(parser->lexer, &token); is_not_first_param = true; } if (!expected_token(&token, TOKEN_CPAREN)) { return NULL; } return params; } ast_node_t * parser_parse_fn_definition(parser_t *parser) { if (!skip_expected_token(parser, TOKEN_FN)) { return NULL; } skip_line_feeds(parser->lexer); token_t fn_name_token; if (!expected_next_token(parser, &fn_name_token, TOKEN_ID)) { return NULL; } skip_line_feeds(parser->lexer); list_t *params = parser_parse_fn_params(parser); if (params == NULL) { return NULL; } string_view_t fn_return_type; if (!parser_parse_type(parser, &fn_return_type)) { return NULL; } skip_line_feeds(parser->lexer); ast_node_t *block = parser_parse_block(parser); if (block == NULL) { return NULL; } return ast_new_node_fn_def(parser->arena, fn_name_token.value, params, fn_return_type, block); } static bool parser_parse_type(parser_t *parser, string_view_t *type) { skip_line_feeds(parser->lexer); if (!skip_expected_token(parser, TOKEN_COLON)) { return false; } skip_line_feeds(parser->lexer); token_t token; if (!expected_next_token(parser, &token, TOKEN_ID)) { return false; } *type = token.value; return true; } static ast_node_t * parser_parse_block(parser_t *parser) { if (!skip_expected_token(parser, TOKEN_OCURLY)) { return NULL; } skip_line_feeds(parser->lexer); ast_node_t *node_block = ast_new_node_block(parser->arena); if (node_block == NULL) { return NULL; } token_t next_token; StartLoop: lexer_peek_next(parser->lexer, &next_token); ast_node_t *node = NULL; switch (next_token.kind) { case TOKEN_RETURN: { node = parser_parse_return_stmt(parser); break; } case TOKEN_IF: { node = parser_parse_if_stmt(parser); break; } case TOKEN_VAR: { node = parser_parse_var_def(parser); break; } case TOKEN_CCURLY: { goto EndLoop; } default: { // FIXME: write a better error message goto EndLoop; } } if (node == NULL) { return NULL; } list_append(node_block->as_block.nodes, node); goto StartLoop; EndLoop: skip_line_feeds(parser->lexer); if (!skip_expected_token(parser, TOKEN_CCURLY)) { return NULL; } return node_block; } static ast_node_t * parser_parse_return_stmt(parser_t *parser) { if (!skip_expected_token(parser, TOKEN_RETURN)) { return NULL; } ast_node_t *expr = parser_parse_expr(parser); if (expr == NULL) { return NULL; } ast_node_t *node_return_stmt = ast_new_node_return_stmt(parser->arena, expr); assert(node_return_stmt); if (!skip_expected_token(parser, TOKEN_LF)) { return NULL; } skip_line_feeds(parser->lexer); return node_return_stmt; } static ast_node_t * parser_parse_if_stmt(parser_t *parser) { if (!skip_expected_token(parser, TOKEN_IF)) { return NULL; } ast_node_t *cond = parser_parse_expr(parser); if (cond == NULL) { return NULL; } ast_node_t *then = parser_parse_block(parser); if (then == NULL) { return NULL; } ast_node_t *_else = NULL; token_t next_token; lexer_next_token(parser->lexer, &next_token); // FIXME: We are not allowing line feed right after if block statement when // the else branch is present. We also noticed that if has the same // problem which will be addressed later. if (next_token.kind == TOKEN_ELSE) { _else = parser_parse_block(parser); if (_else == NULL) { return NULL; } } else if (!expected_token(&next_token, TOKEN_LF)) { return NULL; } ast_node_t *node_if_stmt = ast_new_node_if_stmt(parser->arena, cond, then, _else); assert(node_if_stmt); skip_line_feeds(parser->lexer); return node_if_stmt; } static ast_node_t * parser_parse_var_def(parser_t *parser) { if (!skip_expected_token(parser, TOKEN_VAR)) { return NULL; } token_t id_token; if (!expected_next_token(parser, &id_token, TOKEN_ID)) { return NULL; } string_view_t var_type; if (!parser_parse_type(parser, &var_type)) { return NULL; } // FIXME: assignment must be optional according to the spec if (!skip_expected_token(parser, TOKEN_EQ)) { return NULL; } ast_node_t *expr = parser_parse_expr(parser); if (expr == NULL) { return NULL; } ast_node_t *var_node = ast_new_node_var_def(parser->arena, id_token.value, var_type, expr); skip_line_feeds(parser->lexer); return var_node; } static bool skip_expected_token(parser_t *parser, token_kind_t expected_kind) { token_t token; return expected_next_token(parser, &token, expected_kind); } static bool expected_next_token(parser_t *parser, token_t *token, token_kind_t expected_kind) { lexer_next_token(parser->lexer, token); return expected_token(token, expected_kind); } static bool expected_token(token_t *token, token_kind_t expected_kind) { if (token->kind != expected_kind) { fprintf(stderr, "%s:%lu:%lu: syntax error: got '" SV_FMT "' token but expect '%s'\n", token->loc.src.filepath, token_loc_to_lineno(token->loc), token_loc_to_colno(token->loc), SV_ARG(token->value), token_kind_to_cstr(expected_kind)); fprintf(stderr, SV_FMT "\n", SV_ARG(token_loc_to_line(token->loc))); fprintf(stderr, "%*s\n", (int)token_loc_to_colno(token->loc), "^"); exit(EXIT_FAILURE); } return true; } static void skip_line_feeds(lexer_t *lexer) { token_t token; lexer_peek_next(lexer, &token); while (token.kind == TOKEN_LF) { lexer_next_token(lexer, &token); lexer_peek_next(lexer, &token); } }