init
This commit is contained in:
commit
20f77f4339
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
/venv/
|
||||
/output/
|
||||
|
8
.idea/.gitignore
generated
vendored
Normal file
8
.idea/.gitignore
generated
vendored
Normal file
@ -0,0 +1,8 @@
|
||||
# 默认忽略的文件
|
||||
/shelf/
|
||||
/workspace.xml
|
||||
# 基于编辑器的 HTTP 客户端请求
|
||||
/httpRequests/
|
||||
# Datasource local storage ignored files
|
||||
/dataSources/
|
||||
/dataSources.local.xml
|
6
.idea/inspectionProfiles/profiles_settings.xml
generated
Normal file
6
.idea/inspectionProfiles/profiles_settings.xml
generated
Normal file
@ -0,0 +1,6 @@
|
||||
<component name="InspectionProjectProfileManager">
|
||||
<settings>
|
||||
<option name="USE_PROJECT_PROFILE" value="false" />
|
||||
<version value="1.0" />
|
||||
</settings>
|
||||
</component>
|
4
.idea/misc.xml
generated
Normal file
4
.idea/misc.xml
generated
Normal file
@ -0,0 +1,4 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="wp-migrator" project-jdk-type="Python SDK" />
|
||||
</project>
|
8
.idea/modules.xml
generated
Normal file
8
.idea/modules.xml
generated
Normal file
@ -0,0 +1,8 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectModuleManager">
|
||||
<modules>
|
||||
<module fileurl="file://$PROJECT_DIR$/.idea/wp-migrator.iml" filepath="$PROJECT_DIR$/.idea/wp-migrator.iml" />
|
||||
</modules>
|
||||
</component>
|
||||
</project>
|
6
.idea/vcs.xml
generated
Normal file
6
.idea/vcs.xml
generated
Normal file
@ -0,0 +1,6 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="VcsDirectoryMappings">
|
||||
<mapping directory="" vcs="Git" />
|
||||
</component>
|
||||
</project>
|
10
.idea/wp-migrator.iml
generated
Normal file
10
.idea/wp-migrator.iml
generated
Normal file
@ -0,0 +1,10 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="PYTHON_MODULE" version="4">
|
||||
<component name="NewModuleRootManager">
|
||||
<content url="file://$MODULE_DIR$">
|
||||
<excludeFolder url="file://$MODULE_DIR$/.venv" />
|
||||
</content>
|
||||
<orderEntry type="jdk" jdkName="wp-migrator" jdkType="Python SDK" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
</module>
|
62
main.py
Normal file
62
main.py
Normal file
@ -0,0 +1,62 @@
|
||||
#!/usr/bin/env python3
|
||||
# encoding: utf-8
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from slugify import slugify
|
||||
from markdownify import markdownify as md
|
||||
import os
|
||||
import re
|
||||
|
||||
in_file = 'wpexport20241219.xml'
|
||||
|
||||
if not os.path.exists('output'):
|
||||
os.makedirs('output')
|
||||
|
||||
with open(in_file, 'r', encoding='utf-8') as f:
|
||||
raw_content = f.read()
|
||||
|
||||
soup = BeautifulSoup(raw_content, 'xml')
|
||||
|
||||
items = soup.find_all('item')
|
||||
print(f"{len(items)} items found")
|
||||
|
||||
for item in items:
|
||||
post_type_raw = item.find('wp:post_type', recursive=False)
|
||||
if post_type_raw is None:
|
||||
continue
|
||||
|
||||
post_type = post_type_raw.contents[0]
|
||||
if post_type != 'page' and post_type != 'post':
|
||||
continue
|
||||
|
||||
post_id = item.find('wp:post_id', recursive=False).contents[0]
|
||||
title = item.find('title', recursive=False).contents[0]
|
||||
creator = item.find('dc:creator', recursive=False).contents[0]
|
||||
date = item.find('wp:post_date', recursive=False).contents[0]
|
||||
status = item.find('wp:status', recursive=False).contents[0]
|
||||
tags = item.findAll('category', { 'domain': 'post_tag' })
|
||||
tags = '\n'.join(list(map(lambda x: ' - ' + x.contents[0], tags)))
|
||||
categories = item.findAll('category', { 'domain': 'category' })
|
||||
categories = '\n'.join(list(map(lambda x: ' - ' + x.contents[0], categories)))
|
||||
content = item.find('content:encoded', recursive=False).contents[0]
|
||||
content = re.sub(r'<!-- \/?wp:.+?\s*-->', '', content)
|
||||
content = re.sub(r'<!--more-->', '<p class="more">:::more:::</p>', content)
|
||||
content = '\n'.join([s.strip() for s in content.splitlines() if s.strip()])
|
||||
content = md(content).strip()
|
||||
frontmatter = f"---\nlayout: {post_type}\ntitle: \"{title}\"\ncreator: \"{creator}\"\ndate: {date}\ncategories:\n{categories}\ntags:\n{tags}\ndraft: {'yes' if status == 'draft' else 'no'}\n---\n\n"
|
||||
|
||||
title_slug = slugify(title)
|
||||
if len(title_slug) < 1:
|
||||
title_slug = 'untitled'
|
||||
|
||||
filename_candidate = "output/" + title_slug + ".md"
|
||||
filename_disambiguation = 1
|
||||
|
||||
while os.path.exists(filename_candidate):
|
||||
filename_candidate = "output/" + title_slug + "_(" + str(filename_disambiguation) + ").md"
|
||||
filename_disambiguation = filename_disambiguation + 1
|
||||
|
||||
with open(filename_candidate, 'w', encoding='utf-8') as f:
|
||||
f.write(frontmatter)
|
||||
f.write(content)
|
||||
|
8
requirements.txt
Normal file
8
requirements.txt
Normal file
@ -0,0 +1,8 @@
|
||||
beautifulsoup4==4.12.3
|
||||
lxml==5.3.0
|
||||
markdownify==0.14.1
|
||||
python-slugify==8.0.4
|
||||
six==1.17.0
|
||||
soupsieve==2.6
|
||||
text-unidecode==1.3
|
||||
|
Loading…
Reference in New Issue
Block a user