also preserves iframes

This commit is contained in:
dousha 2024-12-28 16:35:36 +08:00
parent f06d83658e
commit 1f2dde537c

21
main.py
View File

@ -3,16 +3,35 @@
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from slugify import slugify from slugify import slugify
from markdownify import markdownify as md from markdownify import MarkdownConverter
import os import os
import re import re
in_file = 'wpexport20241219.xml' in_file = 'wpexport20241219.xml'
class IframeConverter(MarkdownConverter):
def convert_iframe(self, el, text, convert_as_inline):
src = el['src']
if 'youtube' in src:
code = re.search(r'embed/([^?]+)', src).group(1)
return f"::: youtube {code}\n:::\n"
if 'steam' in src:
code = re.search(r'\/([0-9]+)', src).group(1)
return f"::: steam {code}\n:::\n"
return f"::: iframe {src}\n:::\n"
def mkdirp(path): def mkdirp(path):
if not os.path.exists(path): if not os.path.exists(path):
os.makedirs(path) os.makedirs(path)
def md(html, **options):
return IframeConverter(**options).convert(html)
mkdirp('output') mkdirp('output')
with open(in_file, 'r', encoding='utf-8') as f: with open(in_file, 'r', encoding='utf-8') as f: