add shortcode converter and code blocks trimmer

2025-01-06 15:08:45 +08:00 · 2025-01-06 15:08:45 +08:00 · 514bf7ac93
commit 514bf7ac93
parent 238f7a3657
1 changed files with 57 additions and 0 deletions
--- a/main.py
+++ b/main.py
@ -69,6 +69,58 @@ def fix_math(content):
 	return out + content[pos:]
 def fix_short_code_content(tag, params, content):
 	if tag == 'c' or tag == 'cpp' or tag == 'python' or tag == 'csharp' or tag == 'java' or tag == 'javascript' or tag == 'typescript' or tag == 'bash' or tag == 'html' or tag == 'css' or tag == 'php' or tag == 'sql' or tag == 'json' or tag == 'xml' or tag == 'yaml' or tag == 'markdown' or tag == 'text' or tag == 'plaintext':
 		return f"```{tag}\n{content}\n```"
 	elif tag == 'latex':
 		return f"\({content}\)"
 	elif tag == 'graphviz':
 		return f"::: graphviz\n{content}\n:::\n"
 	elif tag == 'abcjs' or tag == 'abc':
 		return f"::: abc\n{content}\n:::\n"
 	else:
 		return None
 def fix_short_code(content):
 	shortcode_regex = re.compile(r'\[([a-zA-Z0-9]+)([^\]]*)\](.+?)\[\/\1\]', re.DOTALL)
 	pos = 0
 	out = ''
 	while m := shortcode_regex.search(content, pos):
 		out = out + content[pos:m.start()]
 		fixed_content = fix_short_code_content(m.group(1), m.group(2), m.group(3))
 		if fixed_content is None:
 			out = out + m.group(0)
 		else:
 			out = out + fixed_content
 		pos = m.end()
 	return out + content[pos:]
 def fix_nesting_code_blocks(content):
 	lines = content.splitlines()
 	out = []
 	for i in range(0, len(lines)):
 		line = lines[i]
 		if line.startswith('```'):
 			if i + 1 < len(lines) and lines[i + 1].startswith('```'):
 				continue
 		out.append(line)
 	return '\n'.join(out)
 def trim_code_blocks(content):
 	codeblock_regex = re.compile(r'```(.+?)```', re.DOTALL)
 	pos = 0
 	out = ''
 	while m := codeblock_regex.search(content, pos):
 		out = out + content[pos:m.start()].strip()
 		parts = re.split(r'[\n\r ]', m.group(1), 1)
 		out = out + '\n\n```' + parts[0].strip() + '\n' + parts[1].strip() + '\n```\n\n'
 		pos = m.end()
 	return out + content[pos:].strip()
 mkdirp('output')
 with open(in_file, 'r', encoding='utf-8') as f:
@ -149,7 +201,12 @@ for item in items:
 		content = content + '\n' + '\n\n'.join(ref_output) + '\n'
 	content = fix_short_code(content)
 	content = fix_math(content)
 	content = fix_nesting_code_blocks(content)
 	content = trim_code_blocks(content)
 	if not content.endswith('\n'):
 		content = content + '\n'
 	frontmatter = f"---\nlayout: {post_type}\nid: {post_id}\ntitle: \"{title}\"\ncreator: \"{creator}\"\ndate: {date}\ncategories:\n{categories}\ntags:\n{tags}\ndraft: {'true' if status == 'draft' else 'false'}\npublished: {'true' if status == 'publish' else 'false'}\n---\n\n"