diff --git a/main.py b/main.py index 9b6afae..2a4fab9 100644 --- a/main.py +++ b/main.py @@ -69,6 +69,58 @@ def fix_math(content): return out + content[pos:] +def fix_short_code_content(tag, params, content): + if tag == 'c' or tag == 'cpp' or tag == 'python' or tag == 'csharp' or tag == 'java' or tag == 'javascript' or tag == 'typescript' or tag == 'bash' or tag == 'html' or tag == 'css' or tag == 'php' or tag == 'sql' or tag == 'json' or tag == 'xml' or tag == 'yaml' or tag == 'markdown' or tag == 'text' or tag == 'plaintext': + return f"```{tag}\n{content}\n```" + elif tag == 'latex': + return f"\({content}\)" + elif tag == 'graphviz': + return f"::: graphviz\n{content}\n:::\n" + elif tag == 'abcjs' or tag == 'abc': + return f"::: abc\n{content}\n:::\n" + else: + return None + + +def fix_short_code(content): + shortcode_regex = re.compile(r'\[([a-zA-Z0-9]+)([^\]]*)\](.+?)\[\/\1\]', re.DOTALL) + pos = 0 + out = '' + while m := shortcode_regex.search(content, pos): + out = out + content[pos:m.start()] + fixed_content = fix_short_code_content(m.group(1), m.group(2), m.group(3)) + if fixed_content is None: + out = out + m.group(0) + else: + out = out + fixed_content + pos = m.end() + return out + content[pos:] + + +def fix_nesting_code_blocks(content): + lines = content.splitlines() + out = [] + for i in range(0, len(lines)): + line = lines[i] + if line.startswith('```'): + if i + 1 < len(lines) and lines[i + 1].startswith('```'): + continue + out.append(line) + return '\n'.join(out) + + +def trim_code_blocks(content): + codeblock_regex = re.compile(r'```(.+?)```', re.DOTALL) + pos = 0 + out = '' + while m := codeblock_regex.search(content, pos): + out = out + content[pos:m.start()].strip() + parts = re.split(r'[\n\r ]', m.group(1), 1) + out = out + '\n\n```' + parts[0].strip() + '\n' + parts[1].strip() + '\n```\n\n' + pos = m.end() + return out + content[pos:].strip() + + mkdirp('output') with open(in_file, 'r', encoding='utf-8') as f: @@ -149,7 +201,12 @@ for item in items: content = content + '\n' + '\n\n'.join(ref_output) + '\n' + content = fix_short_code(content) content = fix_math(content) + content = fix_nesting_code_blocks(content) + content = trim_code_blocks(content) + if not content.endswith('\n'): + content = content + '\n' frontmatter = f"---\nlayout: {post_type}\nid: {post_id}\ntitle: \"{title}\"\ncreator: \"{creator}\"\ndate: {date}\ncategories:\n{categories}\ntags:\n{tags}\ndraft: {'true' if status == 'draft' else 'false'}\npublished: {'true' if status == 'publish' else 'false'}\n---\n\n"