add shortcode converter and code blocks trimmer

This commit is contained in:
dousha 2025-01-06 15:08:45 +08:00
parent 238f7a3657
commit 514bf7ac93

57
main.py
View File

@ -69,6 +69,58 @@ def fix_math(content):
return out + content[pos:]
def fix_short_code_content(tag, params, content):
if tag == 'c' or tag == 'cpp' or tag == 'python' or tag == 'csharp' or tag == 'java' or tag == 'javascript' or tag == 'typescript' or tag == 'bash' or tag == 'html' or tag == 'css' or tag == 'php' or tag == 'sql' or tag == 'json' or tag == 'xml' or tag == 'yaml' or tag == 'markdown' or tag == 'text' or tag == 'plaintext':
return f"```{tag}\n{content}\n```"
elif tag == 'latex':
return f"\({content}\)"
elif tag == 'graphviz':
return f"::: graphviz\n{content}\n:::\n"
elif tag == 'abcjs' or tag == 'abc':
return f"::: abc\n{content}\n:::\n"
else:
return None
def fix_short_code(content):
shortcode_regex = re.compile(r'\[([a-zA-Z0-9]+)([^\]]*)\](.+?)\[\/\1\]', re.DOTALL)
pos = 0
out = ''
while m := shortcode_regex.search(content, pos):
out = out + content[pos:m.start()]
fixed_content = fix_short_code_content(m.group(1), m.group(2), m.group(3))
if fixed_content is None:
out = out + m.group(0)
else:
out = out + fixed_content
pos = m.end()
return out + content[pos:]
def fix_nesting_code_blocks(content):
lines = content.splitlines()
out = []
for i in range(0, len(lines)):
line = lines[i]
if line.startswith('```'):
if i + 1 < len(lines) and lines[i + 1].startswith('```'):
continue
out.append(line)
return '\n'.join(out)
def trim_code_blocks(content):
codeblock_regex = re.compile(r'```(.+?)```', re.DOTALL)
pos = 0
out = ''
while m := codeblock_regex.search(content, pos):
out = out + content[pos:m.start()].strip()
parts = re.split(r'[\n\r ]', m.group(1), 1)
out = out + '\n\n```' + parts[0].strip() + '\n' + parts[1].strip() + '\n```\n\n'
pos = m.end()
return out + content[pos:].strip()
mkdirp('output')
with open(in_file, 'r', encoding='utf-8') as f:
@ -149,7 +201,12 @@ for item in items:
content = content + '\n' + '\n\n'.join(ref_output) + '\n'
content = fix_short_code(content)
content = fix_math(content)
content = fix_nesting_code_blocks(content)
content = trim_code_blocks(content)
if not content.endswith('\n'):
content = content + '\n'
frontmatter = f"---\nlayout: {post_type}\nid: {post_id}\ntitle: \"{title}\"\ncreator: \"{creator}\"\ndate: {date}\ncategories:\n{categories}\ntags:\n{tags}\ndraft: {'true' if status == 'draft' else 'false'}\npublished: {'true' if status == 'publish' else 'false'}\n---\n\n"