diff --git a/convert.py b/convert.py deleted file mode 100644 index c040be0..0000000 --- a/convert.py +++ /dev/null @@ -1,66 +0,0 @@ -#!/usr/bin/env python3 -# encoding: utf-8 - -from bs4 import BeautifulSoup -from slugify import slugify -from markdownify import markdownify as md -import os -import re - -in_file = 'wpexport20241219.xml' - -def mkdirp(path): - if not os.path.exists(path): - os.makedirs(path) - -mkdirp('output') - -with open(in_file, 'r', encoding='utf-8') as f: - raw_content = f.read() - -soup = BeautifulSoup(raw_content, 'xml') - -items = soup.find_all('item') -print(f"{len(items)} items found") - -for item in items: - post_type_raw = item.find('wp:post_type', recursive=False) - if post_type_raw is None: - continue - - post_type = post_type_raw.contents[0] - if post_type != 'page' and post_type != 'post': - continue - - post_id = item.find('wp:post_id', recursive=False).contents[0] - title = item.find('title', recursive=False).contents[0] - creator = item.find('dc:creator', recursive=False).contents[0] - date = item.find('wp:post_date', recursive=False).contents[0] - status = item.find('wp:status', recursive=False).contents[0] - tags = item.findAll('category', { 'domain': 'post_tag' }) - tags = '\n'.join(list(map(lambda x: ' - ' + x.contents[0], tags))) - categories = item.findAll('category', { 'domain': 'category' }) - categories = '\n'.join(list(map(lambda x: ' - ' + x.contents[0], categories))) - content = item.find('content:encoded', recursive=False).contents[0] - content = re.sub(r'', '', content) - content = re.sub(r'', '

:::more:::

', content) - content = '\n'.join([s.strip() for s in content.splitlines() if s.strip()]) - content = md(content).strip() + '\n' - frontmatter = f"---\nlayout: {post_type}\nid: {post_id}\ntitle: \"{title}\"\ncreator: \"{creator}\"\ndate: {date}\ncategories:\n{categories}\ntags:\n{tags}\ndraft: {'yes' if status == 'draft' else 'no'}\npublished: {'yes' if status == 'publish' else 'no'}\n---\n\n" - - title_slug = slugify(title) - if len(title_slug) < 1: - title_slug = 'untitled' - - output_folder = 'output/' + str(date)[:4] + '/' + str(date)[5:7] + '/' - mkdirp(output_folder) - filename_candidate = output_folder + title_slug + ".md" - filename_disambiguation = 1 - - while os.path.exists(filename_candidate): - filename_candidate = "output/" + title_slug + "_(" + str(filename_disambiguation) + ").md" - filename_disambiguation = filename_disambiguation + 1 - - with open(filename_candidate, 'w', encoding='utf-8') as f: - f.write(frontmatter) - f.write(content)