From 1f2dde537c3e714742acbc9ed5ea5857e11fde4a Mon Sep 17 00:00:00 2001 From: dousha Date: Sat, 28 Dec 2024 16:35:36 +0800 Subject: [PATCH] also preserves iframes --- main.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/main.py b/main.py index c040be0..dc8296f 100644 --- a/main.py +++ b/main.py @@ -3,16 +3,35 @@ from bs4 import BeautifulSoup from slugify import slugify -from markdownify import markdownify as md +from markdownify import MarkdownConverter import os import re in_file = 'wpexport20241219.xml' +class IframeConverter(MarkdownConverter): + def convert_iframe(self, el, text, convert_as_inline): + src = el['src'] + if 'youtube' in src: + code = re.search(r'embed/([^?]+)', src).group(1) + return f"::: youtube {code}\n:::\n" + + if 'steam' in src: + code = re.search(r'\/([0-9]+)', src).group(1) + return f"::: steam {code}\n:::\n" + + return f"::: iframe {src}\n:::\n" + + def mkdirp(path): if not os.path.exists(path): os.makedirs(path) + +def md(html, **options): + return IframeConverter(**options).convert(html) + + mkdirp('output') with open(in_file, 'r', encoding='utf-8') as f: