Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import os
- import pymysql.cursors
- import html2text as ht
- host = 'localhost'
- port = 3306
- username = 'root'
- password = 'root'
- database = 'wordpress'
- # Connect to the database
- connection = pymysql.connect(host=host,
- port=port,
- user=username,
- password=password,
- db=database)
- work_path = os.getcwd() + os.path.sep + "posts"
- if not os.path.exists(work_path):
- os.makedirs(work_path)
- print("文件将会导出到脚本同目录下的'posts'文件夹中")
- hexo_md_head = """---
- title: %s
- category: auto_export
- comments: false
- comment: false
- date: %s
- updated:
- tags:
- permalink:
- thumbnail:
- toc:
- top:
- mathJax:
- ---\n\n
- %s
- """
- # 查询所有文章
- sql_all = """SELECT `ID`,`post_date_gmt`,`post_title`,`post_status`,`post_modified_gmt`,`post_type`,`post_content`
- FROM `wp_posts`
- WHERE `post_parent`=0
- AND `post_type`='post'
- ORDER BY `post_date_gmt` DESC
- """
- # 根据ID查询文章最新版本
- sql_post_newest = """SELECT `ID`,`post_date_gmt`,`post_title`,`post_status`,`post_modified_gmt`,`post_type`,`post_content`
- FROM `wp_posts`
- WHERE `post_parent`=%s
- AND `post_status`='inherit'
- ORDER BY `post_date_gmt`
- DESC """
- try:
- with connection.cursor() as cursor:
- cursor.execute(sql_all)
- results = cursor.fetchall()
- print("共查询到 %d 篇文章,即将开始导出..." % (results.__len__()))
- htmlWorker = ht.HTML2Text()
- htmlWorker.bypass_tables = False
- for i, row in enumerate(results):
- ID = row[0]
- date = row[1]
- title = row[2]
- content = row[6]
- print("正在导出第 %d 篇文章: ID=%d, date=%s, title=《%s》" % (i+1, ID, date, title))
- cursor.execute(sql_post_newest % ID)
- new_post = cursor.fetchone()
- if new_post:
- title = new_post[2]
- content = new_post[6]
- content = htmlWorker.handle(content)
- try:
- tmpPost = open(work_path + os.path.sep + str(date)[0:10] + "-"
- + str(title).replace(" ", "-")
- .replace("/", "").replace("|","").replace("\\","").replace("\"","'")
- .replace(":","").replace("*","").replace("?","").replace("#","")
- .replace("<","(").replace(">",")").replace("【","[").replace("】","]")
- + ".md",
- mode='w', encoding="utf-8")
- tmpPost.write(hexo_md_head % (title, date, content))
- tmpPost.flush()
- tmpPost.close()
- except:
- err_post_info = "文章导出失败: ID=%d, title=《%s》 " % (ID, title)
- print(err_post_info + "请查看日志")
- export_log = open("." + os.path.sep + "error.log", mode='a', encoding='utf-8')
- export_log.write(err_post_info + "\n\n")
- export_log.close()
- cursor.close()
- finally:
- connection.close()
Add Comment
Please, Sign In to add comment