# Copyright (C) 2024 Carlos Maniero # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . import sys import re re_hl_block = r'.*?
(?P.*?)<\/pre>'

lexers = {
    'default': [
        (r'^(?P +)', None),
        (r'^(?P\d+)', 'num'),
        (r'^(?P\w+)', 'ident'),
        (r'^(?P\(|\)|{|})', 'brac'),
        (r'^(?P&\w+;|\^|\~|!|@|\#|\$|%|&|\*|-|\+|\.|=|,|:|;|\||\\|/|\?)', 'symbol'),
        (r'^(?P.+)', None),
    ],
    'olang': [
        (r'^(?P#.*)', 'comment'),
        (r'^(?P\w+) *?\(', 'fn-name'),
        (r'^(?Pfn|if|else|return|while|var|const|extern)', 'key'),
        (r'^\b(?Pu8|u16|u32|u64)\b', 'type'),
    ],
    'c': [
        (r'^(?P#.*)', 'macro'),
        (r'^(?P".*?")', 'str'),
        (r'^(?P//.*)', 'comment'),
        (r'^(?P\w+) *?\(', 'fn-name'),
        (r'^(?Pfn|if|else|return|while|var|const|extern)', 'key'),
        (r'^\b(?Pint|long|float)\b', 'type'),
    ],
    'bash': [
        (r'^(?P-+.+? ?)', 'opt'),
        (r'^(?P[A-z0\d\.-/]+)', 'ident'),
    ],
    'output': [
        (r'(?P.*)', None),
    ],
    'ebnf': [
        (r'^(?P.+?::=)', 'fn-name'),
        (r'^(?P\|)', 'fn-name'),
        (r'^(?P\?|\*)', 'key'),
        (r'^(?P\(\*.*?\*\))', 'comment'),
        (r'^(?P\'.*?\')', 'str'),
    ]
}

def format_code_line(line, lang):
    new_line = ''

    lexer = lexers.get(lang, []) + lexers['default']

    while line != '':
        for (regex, name) in lexer:
            match = re.match(regex, line)
            if match:
                value = match.group('value')
                line = line[len(value):]
                if name:
                    new_line += f'{value}'
                else:
                    new_line += value
                break


    return f'{new_line}'

def format_pre(match):
    lang = match.group('lang')
    css_class = match.group('class')
    css_classes = f'code-hl lang-{lang} {css_class or ""}'.strip()
    raw_lines = match.group('code').split('\n')

    while raw_lines[-1].strip() == '':
        raw_lines = raw_lines[:-1]

    code = "\n".join([format_code_line(line, lang) for line in raw_lines])

    return f'
{code}
' def codehl(code): return re.sub(re_hl_block, format_pre, code, flags=re.DOTALL) if __name__ == '__main__': for file_name in sys.argv[1:]: print(f"HL\t{file_name}") with open(file_name, 'r+') as file: content = codehl(file.read()) file.seek(0) file.write(content) file.truncate()