preserve syntax highlighter language settings
This commit is contained in:
parent
1f2dde537c
commit
b7a5a8dce2
3
.idea/misc.xml
generated
3
.idea/misc.xml
generated
@ -1,4 +1,7 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
<project version="4">
|
<project version="4">
|
||||||
|
<component name="Black">
|
||||||
|
<option name="sdkName" value="wp-migrator" />
|
||||||
|
</component>
|
||||||
<component name="ProjectRootManager" version="2" project-jdk-name="wp-migrator" project-jdk-type="Python SDK" />
|
<component name="ProjectRootManager" version="2" project-jdk-name="wp-migrator" project-jdk-type="Python SDK" />
|
||||||
</project>
|
</project>
|
39
main.py
39
main.py
@ -6,9 +6,11 @@ from slugify import slugify
|
|||||||
from markdownify import MarkdownConverter
|
from markdownify import MarkdownConverter
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
import json
|
||||||
|
|
||||||
in_file = 'wpexport20241219.xml'
|
in_file = 'wpexport20241219.xml'
|
||||||
|
|
||||||
|
|
||||||
class IframeConverter(MarkdownConverter):
|
class IframeConverter(MarkdownConverter):
|
||||||
def convert_iframe(self, el, text, convert_as_inline):
|
def convert_iframe(self, el, text, convert_as_inline):
|
||||||
src = el['src']
|
src = el['src']
|
||||||
@ -32,6 +34,10 @@ def md(html, **options):
|
|||||||
return IframeConverter(**options).convert(html)
|
return IframeConverter(**options).convert(html)
|
||||||
|
|
||||||
|
|
||||||
|
def code_parser(el):
|
||||||
|
return el['data-language'][0] if el.has_attr('data-language') else 'text'
|
||||||
|
|
||||||
|
|
||||||
mkdirp('output')
|
mkdirp('output')
|
||||||
|
|
||||||
with open(in_file, 'r', encoding='utf-8') as f:
|
with open(in_file, 'r', encoding='utf-8') as f:
|
||||||
@ -59,13 +65,33 @@ for item in items:
|
|||||||
tags = item.findAll('category', {'domain': 'post_tag'})
|
tags = item.findAll('category', {'domain': 'post_tag'})
|
||||||
tags = '\n'.join(list(map(lambda x: ' - ' + x.contents[0], tags)))
|
tags = '\n'.join(list(map(lambda x: ' - ' + x.contents[0], tags)))
|
||||||
categories = item.findAll('category', {'domain': 'category'})
|
categories = item.findAll('category', {'domain': 'category'})
|
||||||
categories = '\n'.join(list(map(lambda x: ' - ' + x.contents[0], categories)))
|
categories = '\n'.join(
|
||||||
|
list(map(lambda x: ' - ' + x.contents[0], categories)))
|
||||||
content = item.find('content:encoded', recursive=False).contents[0]
|
content = item.find('content:encoded', recursive=False).contents[0]
|
||||||
content = re.sub(r'<!-- \/?wp:.+?\s*-->', '', content)
|
content = re.sub(r'<!-- /?wp:((?!syntaxhighlighter).)*\s*-->', '', content)
|
||||||
content = re.sub(r'<!--more-->', '<p class="more">:::more:::</p>', content)
|
content = re.sub(r'<!--more-->', '<p class="more">:::more:::</p>', content)
|
||||||
content = '\n'.join([s.strip() for s in content.splitlines() if s.strip()])
|
lines = [s for s in content.splitlines()]
|
||||||
content = md(content).strip() + '\n'
|
for i in range(len(lines)):
|
||||||
frontmatter = f"---\nlayout: {post_type}\nid: {post_id}\ntitle: \"{title}\"\ncreator: \"{creator}\"\ndate: {date}\ncategories:\n{categories}\ntags:\n{tags}\ndraft: {'yes' if status == 'draft' else 'no'}\npublished: {'yes' if status == 'publish' else 'no'}\n---\n\n"
|
line = lines[i]
|
||||||
|
if line.startswith('<!-- wp:syntaxhighlighter/code'):
|
||||||
|
possibleParams = line[30:-4].strip()
|
||||||
|
if possibleParams == '':
|
||||||
|
continue
|
||||||
|
params = json.loads(possibleParams)
|
||||||
|
nextline = lines[i + 1]
|
||||||
|
if 'language' in params:
|
||||||
|
nextline = nextline[
|
||||||
|
:4] + f" data-language={params['language']} " + nextline[
|
||||||
|
4:]
|
||||||
|
if 'highlightLines' in params:
|
||||||
|
nextline = nextline[
|
||||||
|
:4] + f" data-highlight={params['highlightLines']} " + nextline[
|
||||||
|
4:]
|
||||||
|
lines[i + 1] = nextline
|
||||||
|
|
||||||
|
content = '\n'.join(lines)
|
||||||
|
content = md(content, code_language_callback=code_parser).strip() + '\n'
|
||||||
|
frontmatter = f"---\nlayout: {post_type}\nid: {post_id}\ntitle: \"{title}\"\ncreator: \"{creator}\"\ndate: {date}\ncategories:\n{categories}\ntags:\n{tags}\ndraft: {'true' if status == 'draft' else 'false'}\npublished: {'true' if status == 'publish' else 'false'}\n---\n\n"
|
||||||
|
|
||||||
title_slug = slugify(title)
|
title_slug = slugify(title)
|
||||||
if len(title_slug) < 1:
|
if len(title_slug) < 1:
|
||||||
@ -77,7 +103,8 @@ for item in items:
|
|||||||
filename_disambiguation = 1
|
filename_disambiguation = 1
|
||||||
|
|
||||||
while os.path.exists(filename_candidate):
|
while os.path.exists(filename_candidate):
|
||||||
filename_candidate = "output/" + title_slug + "_(" + str(filename_disambiguation) + ").md"
|
filename_candidate = "output/" + title_slug + "_(" + str(
|
||||||
|
filename_disambiguation) + ").md"
|
||||||
filename_disambiguation = filename_disambiguation + 1
|
filename_disambiguation = filename_disambiguation + 1
|
||||||
|
|
||||||
with open(filename_candidate, 'w', encoding='utf-8') as f:
|
with open(filename_candidate, 'w', encoding='utf-8') as f:
|
||||||
|
Loading…
Reference in New Issue
Block a user