Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- """
- ChatGPT Conversation Exporter - JSON to Markdown
- ---------------------------------------------------
- - Reads the `conversations.json` file.
- - Extracts metadata (title, timestamps, model used, etc.).
- - Organizes messages in chronological order.
- - Saves each conversation as a formatted .md file.
- Instructions:
- - Extract conversations.json from the ChatGPT Export .zip file
- - Run the script in the same directory:
- python export_conversations.py (or→ python3 export_conversations.py)
- - Markdown files are saved in a folder `exported_chats`
- """
- import json
- import re
- import os
- from datetime import datetime
- # Load the conversations.json file
- file_path = "conversations.json" # Ensure this is the correct path
- with open(file_path, "r", encoding="utf-8") as file:
- conversations = json.load(file)
- # Function to clean filenames (remove invalid characters)
- def clean_filename(title):
- return re.sub(r'[<>:"/\\|?*]', '_', title) # Replaces invalid characters with "_"
- # Function to convert a UNIX timestamp to a readable format (UTC)
- def format_timestamp(timestamp):
- if isinstance(timestamp, (int, float)):
- return datetime.utcfromtimestamp(timestamp).strftime("%Y-%m-%d %H:%M:%S UTC")
- return "Unknown Time"
- # Function to remove hidden Unicode artifacts (, , etc.) **without breaking formatting**
- def sanitize_text(text):
- return re.sub(r"[\uE000-\uF8FF]", "", text) # Removes Private Use Unicode characters (PUA)
- # Function to process a single conversation and convert it to Markdown format
- def convert_conversation_to_markdown(convo):
- title = convo.get("title", "Untitled Conversation")
- filename = clean_filename(title) + ".md" # Clean title for a safe filename
- # Extract metadata
- conversation_id = convo.get("id", "Unknown ID")
- created_time = format_timestamp(convo.get("create_time"))
- updated_time = format_timestamp(convo.get("update_time"))
- model = convo.get("model", "Unknown Model")
- custom_gpt = convo.get("gpt_title", None) # Custom GPT name if used
- # Build metadata block at the top of the file
- metadata_block = f"""# {title}
- ---
- Title: {title}
- Conversation ID: {conversation_id}
- Created: {created_time}
- Last Updated: {updated_time}
- Model: {model}"""
- if custom_gpt:
- metadata_block += f"\nCustom GPT: {custom_gpt}"
- metadata_block += "\n---\n\n"
- markdown_output = metadata_block
- # Extract messages in chronological order using the mapping structure
- mapping = convo.get("mapping", {})
- # Find the root message (no parent)
- root_id = next((key for key, val in mapping.items() if val.get("parent") is None), None)
- if root_id:
- queue = [root_id]
- while queue:
- current_id = queue.pop(0)
- node = mapping.get(current_id, {})
- message_data = node.get("message", {})
- if message_data:
- role = message_data.get("author", {}).get("role", "unknown")
- timestamp = message_data.get("create_time", None)
- formatted_time = format_timestamp(timestamp)
- content_parts = message_data.get("content", {}).get("parts", [])
- # Sanitize text **without breaking formatting**
- content = "\n".join([sanitize_text(str(part)) for part in content_parts if isinstance(part, (str, int, float))]).strip()
- if content: # Avoid empty messages
- if role == "user":
- # Convert User's message into block quotes
- formatted_content = "\n".join([f"> {line}" for line in content.split("\n")])
- markdown_output += f"> **User ({formatted_time}):**\n>\n{formatted_content}\n\n"
- elif role == "assistant":
- markdown_output += f"**Assistant ({formatted_time}):** {content}\n\n"
- # Add children to the queue for processing in order
- queue.extend(node.get("children", []))
- markdown_output += "**────────────**\n\n" # Separator at the end of the conversation
- return filename, markdown_output
- # Create an output folder for Markdown files
- output_folder = "exported_chats"
- os.makedirs(output_folder, exist_ok=True)
- # Process each conversation and save it as a separate Markdown file
- for convo in conversations:
- try:
- filename, markdown_content = convert_conversation_to_markdown(convo)
- md_file_path = os.path.join(output_folder, filename)
- with open(md_file_path, "w", encoding="utf-8") as md_file:
- md_file.write(markdown_content)
- print(f" Saved: {md_file_path}")
- except Exception as e:
- print(f" Error processing conversation '{convo.get('title', 'Untitled')}': {e}")
- print("\n All conversations have been processed!")
Advertisement
Add Comment
Please, Sign In to add comment