also preserves iframes

This commit is contained in:
dousha 2024-12-28 16:35:36 +08:00
parent f06d83658e
commit 1f2dde537c

21
main.py
View File

@ -3,16 +3,35 @@
from bs4 import BeautifulSoup
from slugify import slugify
from markdownify import markdownify as md
from markdownify import MarkdownConverter
import os
import re
in_file = 'wpexport20241219.xml'
class IframeConverter(MarkdownConverter):
def convert_iframe(self, el, text, convert_as_inline):
src = el['src']
if 'youtube' in src:
code = re.search(r'embed/([^?]+)', src).group(1)
return f"::: youtube {code}\n:::\n"
if 'steam' in src:
code = re.search(r'\/([0-9]+)', src).group(1)
return f"::: steam {code}\n:::\n"
return f"::: iframe {src}\n:::\n"
def mkdirp(path):
if not os.path.exists(path):
os.makedirs(path)
def md(html, **options):
return IframeConverter(**options).convert(html)
mkdirp('output')
with open(in_file, 'r', encoding='utf-8') as f: