public inbox for ~johnnyrichard/
 help / color / mirror / code / Atom feed
From: Carlos Maniero <>
To: ~johnnyrichard/
Cc: Carlos Maniero <>
Subject: [PATCH olang] docs: add script to generate man pages from docstring
Date: Mon, 28 Oct 2024 02:06:29 +0000 (UTC)	[thread overview]
Message-ID: <> (raw)

There is no initial intention to make this manuals public but just to
support the core developers.

Signed-off-by: Carlos Maniero <>
The Makefile was inspired on the test/olc/Makefile and is supporting

If you have manpath in your system you can use the following

echo "MANDATORY_MANPATH /the_location_to_/olang/docs/man/" >> ~/.manpath

 Makefile              |   7 +
 scripts/gen-docstring | 704 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 711 insertions(+)
 create mode 100755 scripts/gen-docstring

diff --git a/Makefile b/Makefile
index 58526ac..2d021d2 100644
--- a/Makefile
+++ b/Makefile
@@ -20,6 +20,7 @@ BUILDDIR := build
 SRCS := $(wildcard $(SRCDIR)/*.c)
 HEADERS := $(wildcard $(SRCDIR)/*.h)
+DEV_MAN_PAGES := $(patsubst %,, $(HEADERS))
 OBJS := $(patsubst $(SRCDIR)/%.c, $(BUILDDIR)/%.o, $(SRCS))
 .PHONY: all
@@ -115,6 +116,12 @@ docs:
 	$(MAKE) -C docs dist
+dev-man-docs: $(DEV_MAN_PAGES)
 $(BUILDDIR)/%.o: $(SRCDIR)/%.c
 	@$(CC) $(CFLAGS) -c $< -o $@
 	@printf 'CC\t%s\n' '$@'
+	@./scripts/gen-docstring $< `basename $<` docs/man/
diff --git a/scripts/gen-docstring b/scripts/gen-docstring
new file mode 100755
index 0000000..ec3afcb
--- /dev/null
+++ b/scripts/gen-docstring
@@ -0,0 +1,704 @@
+# Copyright (C) 2024 Carlos Maniero<>
+# Copyright (C) 2024 Johnny Richard<>
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# GNU General Public License for more details.
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <>.
+# TODO: Create a man for the text bellow:
+gen-docstring - Parses docstring into man.
+    ./scripts/gen-docstring file_name import_name man_output_dir
+    - It works for macros definition, typedef struct, typedef union, functions
+    - It may not work with definitions that uses macros
+    - It does not works well with inner structs/union
+        /**
+         * This is the man page title
+         */
+        typedef struct my_struct
+        {
+        } my_struct_t;
+        This will generate a man page for the my_struct_t.
+        All sections must start with a capital letter and must include a dash
+        line above.
+        /**
+         * This is the man page title
+         *
+         * This is my description
+         * .B it supports groff syntax
+         *
+         * This is my section
+         * ------------------
+         *
+         * This is my description
+         * .B it supports groff syntax
+         */
+         All the content disposed right after the man page belongs to the
+         description section if no section is defined.
+    GROUPS:
+        You may want to group many docstrings into a single man page. So you
+        can use group_name(3) where 3 is the man level.
+        /**
+         * my_group(3) - This is the man page title
+         *
+         * Description of the first item
+         */
+        ...
+        /**
+         * my_group(3) - This text will be ignored
+         *
+         * Description of the second item
+         */
+        ...
+        The script will generate a single file within the group_name the other
+        declarations will be linked to the file within the group_name.
+        The title of the declaration that matches the structure been commented
+        will be used as the man page title.
+import os
+import sys
+import re
+from enum import Enum
+from collections import namedtuple
+Ctx = namedtuple('Ctx', 'filename include_name man_output_dir')
+ctx = None
+Token = namedtuple('Token', 'kind value location')
+token_rules = [
+    docstr_rule    := r'(?P<DOCSTR>/\*\*.*?\*/)',
+    bcomment_rule  := r'(?P<BDOCSTR>/\*.*?\*/)',
+    comment_rule   := r'(?P<COMMENT>//[^\n]*)',
+    keyword_rule   := r'(?P<KEYWORD>typedef|struct|union|enum)[\s]',
+    marco_rule     := r'(?P<MACRO>#[a-zA-Z_][a-zA-Z0-9_]*)',
+    string_rule    := r'(?P<STRING>"([^"\\]*(\\.[^"\\])*)*")',
+    hex_rule       := r'(?P<HEX>0[xX][0-9a-fA-F]+)',
+    octal_rule     := r'(?P<OCTAL>0[oO]?[0-8]+)',
+    binary_rule    := r'(?P<BINARY>0[bB]?[0-1]+)',
+    decimal_rule   := r'(?P<DECIMAL>\d+)',
+    char_rule      := r'(?P<CHAR>\'[\w ]\')',
+    comma_rule     := r'(?P<COMMA>,)',
+    dot_rule       := r'(?P<DOT>\.)',
+    arrow_rule     := r'(?P<ARROW>->)',
+    lshift_rule    := r'(?P<LSHIFT><<)',
+    rshift_rule    := r'(?P<RSHIFT>>>)',
+    semicolon_rule := r'(?P<SEMICOLON>;)',
+    eq_rule        := r'(?P<EQ>=)',
+    star_rule      := r'(?P<STAR>\*)',
+    plus_rule      := r'(?P<PLUS>\+)',
+    slash_rule     := r'(?P<SLASH>/)',
+    and_rule       := r'(?P<AND>&)',
+    pipe_rule      := r'(?P<PIPE>\|)',
+    tilde_rule     := r'(?P<TILDE>~)',
+    bang_rule      := r'(?P<BANG>!)',
+    dash_rule      := r'(?P<DASH>-)',
+    lbrace_rule    := r'(?P<LBRACE>{)',
+    rbrace_rule    := r'(?P<RBRACE>})',
+    lparen_rule    := r'(?P<LPAREN>\()',
+    rparen_rule    := r'(?P<RPAREN>\))',
+    ident_rule     := r'(?P<IDENT>[a-zA-Z_][a-z-A-Z0-9_]*)',
+    ws_rule        := r'(?P<WS>\s+)',
+tokenizer_pattern = re.compile('|'.join(token_rules), re.DOTALL)
+def tokenize(code):
+    pos = 0
+    tokens = []
+    while pos < len(code):
+        match = tokenizer_pattern.match(code, pos)
+        if match:
+            token_kind = match.lastgroup
+            if token_kind != 'WS':
+                tokens.append(Token(token_kind,, match.span()))
+            pos = match.end()
+        else:
+            tokens.append(Token('UNKNOWN', code[pos], (pos, pos + 1)))
+            pos += 1
+    return tokens
+re_section_name = r'[A-Z].*$'
+re_group = r'(([a-zA-Z_]\w+)\((.*?)\))'
+re_group_title = r'^(?P<group_name>([a-zA-Z_]\w+))\((?P<man_level>\d)\)( - (?P<title>.*))?'
+Field = namedtuple('Field', 'name type')
+EnumField = namedtuple('EnumField', 'name value')
+class Section:
+    def __init__(self, name, contents):
+ = name
+        self.contents = contents
+        self.subsections = []
+class DocString:
+    def __init__(self, comment_lines, code_node):
+        self.comment_lines = comment_lines
+        self.code_node = code_node
+    def get_name(self):
+        return self.code_node.get_name()
+    def get_group_name(self):
+        if self.is_group():
+            return re.match(re_group_title, self.comment_lines[0]).group('group_name')
+        return self.get_name()
+    def is_group(self):
+        return re.match(re_group_title, self.comment_lines[0])
+    def get_man_level(self):
+        if self.is_group():
+            return re.match(re_group_title, self.comment_lines[0]).group('man_level')
+        return 3
+    def is_entry_doc(self):
+        return self.get_group_name() == self.get_name()
+    def get_title(self):
+        if self.is_group():
+            return re.match(re_group_title, self.comment_lines[0]).group('title')
+        return self.comment_lines[0]
+    def get_description(self):
+        description = ""
+        for line in self.comment_lines[2:]:
+            if line.startswith('='):
+                break
+            elif len(description) > 0 and description[-1] != '\n':
+                description += ' '
+            description += line + '\n'
+        return description.strip()
+    def get_sections(self):
+        sections = []
+        section_name = None
+        section_contents = ""
+        lines = self.comment_lines[2:]
+        index = 0
+        while len(lines) > index:
+            line = lines[index]
+            next_line = None
+            if len(lines) > index + 1:
+                next_line = lines[index + 1]
+            if re.match(re_section_name, line) and (next_line and re.match('-+', next_line)):
+                if section_name:
+                    sections.append(Section(section_name, section_contents[1:-1]))
+                    section_name = None
+                    section_contents = ""
+                section_name = line.strip().upper()
+                index += 1
+            elif index == 0:
+                section_name = 'DESCRIPTION'
+                section_contents += line + '\n'
+            elif section_name:
+                section_contents += line + '\n'
+            index += 1
+        if section_name:
+            sections.append(Section(section_name, section_contents))
+        return sections
+class DocStringFile:
+    def __init__(self, docstrings):
+        self.docstrings = docstrings
+    def get_name(self):
+        return self.docstrings[0].get_group_name()
+    def get_man_level(self):
+        return self.docstrings[0].get_man_level()
+    def get_group_name(self):
+        names = []
+        for docstring in self.docstrings:
+            names.append(docstring.get_name())
+        return ", ".join(names)
+    def is_group(self):
+        return len(self.docstrings) > 1
+    def get_title(self):
+        return self.docstrings[0].get_title()
+    def get_description(self):
+        return self.docstrings[0].get_description()
+    def get_filepath(self):
+        man_level = self.get_man_level()
+        filename = f'{self.get_name()}.{man_level}'
+        return os.path.join(ctx.man_output_dir, f'man{man_level}', filename)
+    def get_links(self):
+        man_level = self.get_man_level()
+        to = os.path.join(f'man{man_level}', f'{self.get_name()}.{man_level}')
+        for docstring in self.docstrings:
+            if docstring.is_entry_doc():
+                continue
+            filename = f'{docstring.get_name()}.{man_level}'
+            yield (os.path.join(ctx.man_output_dir, f'man{man_level}', filename), to)
+    def get_sections(self):
+        if not self.is_group():
+            return self.docstrings[0].get_sections()
+        sections = []
+        for docstring in self.docstrings:
+            for section in docstring.get_sections():
+                parent_section = next(filter(lambda x: ==, sections), None)
+                if not parent_section:
+                    parent_section = Section(, '')
+                    sections.append(parent_section)
+                parent_section.subsections.append(section)
+       = docstring.get_name()
+        return sections
+class MacroNode:
+    def __init__(self, tokens, file_contents):
+        self.tokens = tokens
+        self.kind = NodeKind.MACRO
+        self.file_contents = file_contents
+    def get_name(self):
+        return self.tokens[1].value
+    def get_contents(self):
+        start_location = self.tokens[0].location[0]
+        index = 0;
+        while True:
+            cur_char = self.file_contents[start_location + index]
+            next_char = self.file_contents[start_location + index + 1]
+            if next_char == '\n' and cur_char != '\\':
+                break
+            index += 1
+        return self.file_contents[start_location:start_location+index+1]
+class TypedefNode:
+    def __init__(self, tokens, kind):
+        self.tokens = tokens
+        self.kind = kind
+    def get_name(self):
+        tokens = self.tokens
+        # FIXME: support typdef without brackets
+        while len(tokens) > 0 and tokens[0].value != '{':
+            tokens = tokens[1:]
+        else:
+            tokens = tokens[1:]
+        open_brackets = 1
+        while len(tokens) > 0 and open_brackets != 0:
+            if tokens[0].value == '{':
+                open_brackets += 1
+            elif tokens[0].value == '}':
+                open_brackets -= 1
+            tokens = tokens[1:]
+        if len(tokens) == 0:
+            raise Exception("could not find the typedef name")
+        return tokens[0].value
+    def get_declaration(self):
+        final_index = 0
+        tokens = self.tokens
+        while tokens[0].value != '{':
+            tokens = tokens[1:]
+            final_index += 1
+        return " ".join([token.value for token in self.tokens[0:final_index]])
+    def get_fields(self):
+        tokens = self.tokens
+        while tokens[0].value != '{':
+            tokens = tokens[1:]
+        else:
+            tokens = tokens[1:]
+        while tokens[0].value != '}':
+            #FIXME: support inner declarations properly
+            #       It is working but with a poor representation
+            end_index = 1
+            level = 0
+            while tokens[end_index].value != ';' or level > 0:
+                if tokens[end_index].value == '{':
+                    level += 1
+                elif tokens[end_index].value == '}':
+                    level -= 1
+                end_index += 1
+            token_name = tokens[end_index - 1].value
+            token_type = " ".join([token.value for token in tokens[0:end_index - 1]])
+            yield Field(token_name, token_type)
+            tokens = tokens[end_index + 1:]
+class TypedefEnumNode(TypedefNode):
+    def tokens_to_field(self, value_tokens):
+        if len(value_tokens) > 2:
+            return EnumField(value_tokens[0], " ".join(value_tokens[2:]))
+        return EnumField(value_tokens[0], None)
+    def get_fields(self):
+        tokens = self.tokens
+        while tokens[0].value != '{':
+            tokens = tokens[1:]
+        else:
+            tokens = tokens[1:]
+        value_tokens = []
+        while True:
+            if tokens[0].value == ',' or tokens[0] == '}':
+                yield self.tokens_to_field(value_tokens)
+                value_tokens = []
+            else:
+                value_tokens.append(tokens[0].value)
+            if tokens[0].value == '}':
+                break
+            tokens = tokens[1:]
+        else:
+            if len(value_tokens) > 0:
+                yield self.tokens_to_field(value_tokens)
+class FunctionNode:
+    def __init__(self, tokens):
+        self.tokens = tokens
+        self.kind = NodeKind.FUNCTION
+    def get_ret_type(self):
+        end_index = 0
+        while self.tokens[end_index].value != '(':
+            end_index += 1
+        return " ".join([token.value for token in self.tokens[0:end_index - 1]])
+    def get_name(self):
+        tokens = self.tokens
+        while len(tokens) > 1 and tokens[1].value != '(':
+            tokens = tokens[1:]
+        return tokens[0].value
+    def get_fields(self):
+        tokens = self.tokens
+        while tokens[0].value != '(':
+            tokens = tokens[1:]
+        else:
+            tokens = tokens[1:]
+        while True:
+            end_index = 1
+            while tokens[end_index].value != ',' and tokens[end_index].value != ')':
+                end_index += 1
+            token_name = tokens[end_index - 1].value
+            # FIXME: use file file contents to get this range
+            token_type = " ".join([token.value for token in tokens[0:end_index - 1]])
+            yield Field(token_name, token_type)
+            if tokens[end_index].value == ')':
+                break
+            tokens = tokens[end_index + 1:]
+def get_code_node(tokens, file):
+    if tokens[0].kind == 'MACRO':
+        return MacroNode(tokens, file)
+    # FIXME: allows structures without typedef
+    if tokens[0].value == 'typedef' and tokens[1].value == 'union':
+        return TypedefNode(tokens, NodeKind.TYPEDEF_UNION)
+    if tokens[0].value == 'typedef' and tokens[1].value == 'struct':
+        return TypedefNode(tokens, NodeKind.TYPEDEF_STRUCT)
+    if tokens[0].value == 'typedef' and tokens[1].value == 'enum':
+        return TypedefEnumNode(tokens, NodeKind.TYPEDEF_ENUM)
+    return FunctionNode(tokens)
+def group_docstring_into_files(docstrings):
+    files = []
+    docstrings = sorted(docstrings, key=lambda x: x.is_entry_doc(), reverse=True)
+    for docstring in docstrings:
+        file = next(filter(lambda x: x.get_name() == docstring.get_group_name(), files), None)
+        if docstring.is_group() and file:
+            file.docstrings.append(docstring)
+        else:
+            files.append(DocStringFile([docstring]))
+    return files
+def extract_comment(comment):
+    lines = comment.splitlines()
+    comment_lines = []
+    for line in lines[1:]:
+        if line.strip().startswith("*/"):
+            break
+        comment_lines.append(line.strip()[2:])
+    return comment_lines
+def extract_docstring(tokens, lines):
+    docstrings = []
+    while len(tokens) > 0:
+        token = tokens[0]
+        if token.kind == 'DOCSTR':
+            docstrings.append(
+                DocString(
+                    extract_comment(token.value),
+                    get_code_node(tokens[1:], lines)
+                )
+            )
+        tokens = tokens[1:]
+    return group_docstring_into_files(docstrings)
+def man_print_fn_synopsis(docstring):
+    groff_lines = []
+    groff_lines.append(f".nf")
+    fields = list(docstring.code_node.get_fields())
+    paren = f"{docstring.code_node.get_ret_type()} {docstring.code_node.get_name()}("
+    paren_len = len(paren)
+    post = ","
+    for index, field in enumerate(fields):
+        if index == len(fields) - 1:
+            post = ");"
+        groff_lines.append(f".BI \"{paren}{field.type} \" {} {post}")
+        paren = " " * paren_len
+    groff_lines.append(f".fi")
+    return groff_lines
+def man_print_typedef(docstring):
+    groff_lines = []
+    groff_lines.append(f'.B "{docstring.code_node.get_declaration()}"')
+    groff_lines.append('.br')
+    groff_lines.append('.B "{"')
+    for index, field in enumerate(docstring.code_node.get_fields()):
+        groff_lines.append('.br')
+        if field.type:
+            groff_lines.append(f'.BI "    {field.type} " "{}";')
+        else:
+            groff_lines.append(f'.B "    {};"')
+    groff_lines.append('.br')
+    groff_lines.append(f'.B "}} {docstring.code_node.get_name()};"')
+    return groff_lines
+def man_print_enum_synopisis(docstring):
+    groff_lines = []
+    groff_lines.append(f'.B "{docstring.code_node.get_declaration()}"')
+    groff_lines.append('.br')
+    groff_lines.append('.B "{"')
+    for index, field in enumerate(docstring.code_node.get_fields()):
+        groff_lines.append('.br')
+        if field.value:
+            groff_lines.append(f'.BR "    {} " "= {field.value}",')
+        else:
+            groff_lines.append(f'.B "    {},"')
+    groff_lines.append('.br')
+    groff_lines.append(f'.B "}} {docstring.code_node.get_name()};"')
+    return groff_lines
+def man_print_macro(docstring):
+    contents = docstring.code_node.get_contents().replace('\\', '\\\\').splitlines()
+    return [
+        '.nl'
+        '.B' + contents[0],
+        ] + contents[1:] + ['.ni']
+def ascii_to_groff(text):
+    text = re.sub(r'^ *<code>', '.EX', text, flags=re.M)
+    text = re.sub(r'^ *</code>', '.EE', text, flags=re.M)
+    text = re.sub(r'<b>(.*?)</b>', r'\\fB\1\\fR', text, flags=re.M)
+    text = re.sub(re_group, r'\\fB\2\\fR(\3)', text, flags=re.M)
+    text = re.sub(r'<i>(.*?)</i>', r'\\fI\1\\fR', text, flags=re.M)
+    groff_lines = []
+    ascii_lines = text.splitlines()
+    line_index = 0
+    while len(ascii_lines) > line_index:
+        ascii_line = ascii_lines[line_index]
+        if ascii_line.startswith('@'):
+            groff_lines.append('.TP')
+            field_name = ascii_line.split(':')[0][1:]
+            groff_lines.append(f'.I {field_name}')
+            description = ":".join(ascii_line.split(':')[1:]).strip()
+            while len(ascii_lines) > line_index + 1 and ascii_lines[line_index + 1][0] == ' ':
+                description += ' ' + ascii_lines[line_index + 1].strip()
+                line_index += 1
+            groff_lines.append(description)
+        else:
+            groff_lines.append(ascii_line)
+        line_index += 1
+    return groff_lines
+def generate_docs():
+    with open(ctx.filename) as file:
+        file_contents =
+        for doc_file in extract_docstring(tokenize(file_contents), file_contents):
+            groff_lines = []
+            groff_lines.append(f".TH {doc_file.get_name()} {doc_file.get_man_level()} {doc_file.get_name()} \"\" \"Olang Hacker's manual\"")
+            groff_lines.append(f".SH NAME")
+            groff_lines.append(f"{doc_file.get_group_name()} \\- {doc_file.get_title()}")
+            groff_lines.append(f".SH SYNOPSIS")
+            groff_lines.append(f".B #include <{ctx.include_name}>")
+            groff_lines.append(f".P")
+            for index, docstring in enumerate(doc_file.docstrings):
+                if index:
+                    groff_lines.append('.P')
+                node = docstring.code_node
+                if node.kind == NodeKind.FUNCTION:
+                    groff_lines += man_print_fn_synopsis(docstring)
+                elif node.kind in [NodeKind.TYPEDEF_STRUCT, NodeKind.TYPEDEF_UNION]:
+                    groff_lines += man_print_typedef(docstring)
+                elif node.kind == NodeKind.MACRO:
+                    groff_lines += man_print_macro(docstring)
+                elif node.kind == NodeKind.TYPEDEF_ENUM:
+                    groff_lines += man_print_enum_synopisis(docstring)
+            for section in doc_file.get_sections():
+                groff_lines.append(f'.SH {}')
+                groff_lines += ascii_to_groff(section.contents)
+                for subsection in section.subsections:
+                    groff_lines.append(f'.SS {}')
+                    groff_lines += ascii_to_groff(subsection.contents)
+            print(f'MAN\t{doc_file.get_filepath()}')
+            os.makedirs(os.path.dirname(doc_file.get_filepath()), exist_ok=True)
+            file = open(doc_file.get_filepath(), "w")
+            file.write("\n".join(groff_lines))
+            file.close()
+            for (link, to) in doc_file.get_links():
+                print(f'MAN\t{link}')
+                os.makedirs(os.path.dirname(link), exist_ok=True)
+                file = open(link, "w")
+                file.write(f".so {to}")
+                file.close()
+if __name__ == "__main__":
+    if len(sys.argv) != 4:
+        print(f'USAGE:\n\t{sys.argv[0]} file include_name man_output_dir')
+        exit(1)
+    ctx = Ctx(*sys.argv[1:])
+    generate_docs()

base-commit: f87fb371a0105a458be07bd3f269bb45da913d16

             reply	other threads:[~2024-10-28  2:06 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-10-28  2:06 Carlos Maniero [this message]
2024-10-28  2:07 ` [olang/patches/.build.yml] build success

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \ \ \
    --cc=~johnnyrichard/ \

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox