Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from dataclasses import dataclass, field
- from datetime import datetime
- from typing import List
- import os
- import hashlib
- import yaml
- import markdown
- def compute_sha1(file_path: str) -> str:
- """计算给定文件的SHA1哈希值"""
- sha1 = hashlib.sha1()
- with open(file_path, "rb") as f:
- while chunk := f.read(8192): # 逐块读取文件
- sha1.update(chunk)
- return sha1.hexdigest()
- @dataclass
- class FileMetadata:
- create_date: str = field(default_factory=lambda: datetime.now().strftime("%Y-%m-%d %H:%M:%S")) # 创建日期
- edit_date: str = field(default_factory=lambda: datetime.now().strftime("%Y-%m-%d %H:%M:%S")) # 编辑日期
- sha1sum: str = "" # 文件的SHA1哈希值
- filepath: str = "" # 文件路径
- tags: List[str] = field(default_factory=list) # 标签列表
- title: str = ""
- def to_dict(self):
- """将FileMetadata转换为字典,以便于JSON序列化"""
- return {
- "create_date": self.create_date,
- "edit_date": self.edit_date,
- "sha1sum": self.sha1sum,
- "filepath": self.filepath,
- "tags": self.tags,
- "title": self.title
- }
- def get_file_metadata(file_path: str, tags: List[str]) -> FileMetadata:
- """获取文件的元数据,包括文件夹路径名作为标签"""
- # 检查文件扩展名
- _, file_extension = os.path.splitext(file_path)
- allowed_extensions = ['.md'] # 允许的扩展名列表
- if file_extension.lower() not in allowed_extensions:
- raise ValueError(f"文件扩展名 '{file_extension}' 不被允许。请使用以下扩展名之一: {', '.join(allowed_extensions)}")
- # 获取文件的创建时间和修改时间
- create_time = datetime.fromtimestamp(os.path.getctime(file_path)).strftime("%Y-%m-%d %H:%M")
- edit_time = datetime.fromtimestamp(os.path.getmtime(file_path)).strftime("%Y-%m-%d %H:%M")
- # 计算文件的SHA1哈希值
- sha1sum = compute_sha1(file_path)
- # 读取文件的第一行以获取标题
- title = ""
- with open(file_path, 'r', encoding='utf-8') as f:
- first_line = f.readline().strip()
- if first_line.startswith("# "):
- title = first_line[2:].strip() # 提取标题内容
- # 创建FileMetadata实例
- file_metadata = FileMetadata(
- create_date=create_time,
- edit_date=edit_time,
- sha1sum=sha1sum,
- filepath=file_path,
- tags=tags,
- title=title # 添加标题
- )
- return file_metadata
- def process_files_in_directory(directory: str, tags: List[str]) -> List[FileMetadata]:
- """处理指定目录中的所有文件并返回元数据"""
- metadata_list = []
- if os.path.isdir(directory):
- for filename in os.listdir(directory):
- file_path = os.path.join(directory, filename)
- if os.path.isfile(file_path): # 确保是文件
- try:
- file_metadata = get_file_metadata(file_path, tags)
- metadata_list.append(file_metadata)
- except:
- pass
- else:
- print(f"{directory} 不是一个有效的目录。")
- return metadata_list
- # yaml
- # read config
- # see blog_example.yaml
- def read_config(config_file: str) -> dict:
- """读取 YAML 配置文件"""
- with open(config_file, 'r') as file:
- config = yaml.safe_load(file)
- return config
- # markdown
- def convert_md_to_html(md_file_path: str, output_html_path: str):
- """将 Markdown 文件转换为 HTML 并保存"""
- # 检查文件扩展名
- _, file_extension = os.path.splitext(md_file_path)
- if file_extension.lower() != '.md':
- raise ValueError("提供的文件不是一个 Markdown 文件。请确保文件扩展名为 .md")
- # 读取 Markdown 文件内容
- with open(md_file_path, 'r', encoding='utf-8') as md_file:
- md_content = md_file.read()
- # 转换为 HTML
- html_content = markdown.markdown(md_content)
- # 保存到 HTML 文件
- with open(output_html_path, 'w', encoding='utf-8') as html_file:
- html_file.write(html_content)
- print(f"已将 Markdown 文件 '{md_file_path}' 转换为 HTML 并保存为 '{output_html_path}'")
- if __name__ == "__main__":
- # 使用示例
- md_file_path = 'example.md' # 输入 Markdown 文件路径
- output_html_path = 'output.html' # 输出 HTML 文件路径
- convert_md_to_html(md_file_path, output_html_path)
Advertisement
Add Comment
Please, Sign In to add comment