1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
| | # Copyright (C) 2024 Carlos Maniero<carlos@maniero.me>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
import sys
import re
re_hl_block = r'<!--syntax:(?P<lang>\w+)( (?P<class>.*?))?-->.*?<pre class="verbatim">(?P<code>.*?)<\/pre>'
lexers = {
'default': [
(r'^(?P<value> +)', None),
(r'^(?P<value>\d+)', 'num'),
(r'^(?P<value>\w+)', 'ident'),
(r'^(?P<value>\(|\)|{|})', 'brac'),
(r'^(?P<value>&\w+;|\^|\~|!|@|\#|\$|%|&|\*|-|\+|\.|=|,|:|;|\||\\|/|\?)', 'symbol'),
(r'^(?P<value>.+)', None),
],
'olang': [
(r'^(?P<value>#.*)', 'comment'),
(r'^(?P<value>\w+) *?\(', 'fn-name'),
(r'^(?P<value>fn|if|else|return|while|var|const|extern)', 'key'),
(r'^\b(?P<value>u8|u16|u32|u64)\b', 'type'),
],
'c': [
(r'^(?P<value>#.*)', 'macro'),
(r'^(?P<value>".*?")', 'str'),
(r'^(?P<value>//.*)', 'comment'),
(r'^(?P<value>\w+) *?\(', 'fn-name'),
(r'^(?P<value>fn|if|else|return|while|var|const|extern)', 'key'),
(r'^\b(?P<value>int|long|float)\b', 'type'),
],
'bash': [
(r'^(?P<value>-+.+? ?)', 'opt'),
(r'^(?P<value>[A-z0\d\.-/]+)', 'ident'),
],
'output': [
(r'(?P<value>.*)', None),
],
'ebnf': [
(r'^(?P<value>.+?::=)', 'fn-name'),
(r'^(?P<value>\|)', 'fn-name'),
(r'^(?P<value>\?|\*)', 'key'),
(r'^(?P<value>\(\*.*?\*\))', 'comment'),
(r'^(?P<value>\'.*?\')', 'str'),
]
}
def format_code_line(line, lang):
new_line = ''
lexer = lexers.get(lang, []) + lexers['default']
while line != '':
for (regex, name) in lexer:
match = re.match(regex, line)
if match:
value = match.group('value')
line = line[len(value):]
if name:
new_line += f'<span class="code-{name}">{value}</span>'
else:
new_line += value
break
return f'<span class="code-line">{new_line}</span>'
def format_pre(match):
lang = match.group('lang')
css_class = match.group('class')
css_classes = f'code-hl lang-{lang} {css_class or ""}'.strip()
raw_lines = match.group('code').split('\n')
while raw_lines[-1].strip() == '':
raw_lines = raw_lines[:-1]
code = "\n".join([format_code_line(line, lang) for line in raw_lines])
return f'<pre class="{css_classes}">{code}</pre>'
def codehl(code):
return re.sub(re_hl_block, format_pre, code, flags=re.DOTALL)
if __name__ == '__main__':
for file_name in sys.argv[1:]:
print(f"HL\t{file_name}")
with open(file_name, 'r+') as file:
content = codehl(file.read())
file.seek(0)
file.write(content)
file.truncate()
|