ChatGPT Conversation Exporter - JSON to Markdown

"""
ChatGPT Conversation Exporter - JSON to Markdown
---------------------------------------------------

- Reads the `conversations.json` file.
- Extracts metadata (title, timestamps, model used, etc.).
- Organizes messages in chronological order.
- Saves each conversation as a formatted .md file.

Instructions:
- Extract conversations.json from the ChatGPT Export .zip file
- Run the script in the same directory:
    python export_conversations.py (or→ python3 export_conversations.py)
- Markdown files are saved in a folder `exported_chats`

"""
import json
import re
import os
from datetime import datetime

# Load the conversations.json file
file_path = "conversations.json"  # Ensure this is the correct path
with open(file_path, "r", encoding="utf-8") as file:
    conversations = json.load(file)

# Function to clean filenames (remove invalid characters)
def clean_filename(title):
    return re.sub(r'[<>:"/\\|?*]', '_', title)  # Replaces invalid characters with "_"

# Function to convert a UNIX timestamp to a readable format (UTC)
def format_timestamp(timestamp):
    if isinstance(timestamp, (int, float)):
        return datetime.utcfromtimestamp(timestamp).strftime("%Y-%m-%d %H:%M:%S UTC")
    return "Unknown Time"

# Function to remove hidden Unicode artifacts (, , etc.) **without breaking formatting**
def sanitize_text(text):
    return re.sub(r"[\uE000-\uF8FF]", "", text)  # Removes Private Use Unicode characters (PUA)

# Function to process a single conversation and convert it to Markdown format
def convert_conversation_to_markdown(convo):
    title = convo.get("title", "Untitled Conversation")
    filename = clean_filename(title) + ".md"  # Clean title for a safe filename

    # Extract metadata
    conversation_id = convo.get("id", "Unknown ID")
    created_time = format_timestamp(convo.get("create_time"))
    updated_time = format_timestamp(convo.get("update_time"))
    model = convo.get("model", "Unknown Model")
    custom_gpt = convo.get("gpt_title", None)  # Custom GPT name if used

    # Build metadata block at the top of the file
    metadata_block = f"""# {title}

---
Title: {title}
Conversation ID: {conversation_id}
Created: {created_time}
Last Updated: {updated_time}
Model: {model}"""

    if custom_gpt:
        metadata_block += f"\nCustom GPT: {custom_gpt}"

    metadata_block += "\n---\n\n"

    markdown_output = metadata_block

    # Extract messages in chronological order using the mapping structure
    mapping = convo.get("mapping", {})

    # Find the root message (no parent)
    root_id = next((key for key, val in mapping.items() if val.get("parent") is None), None)

    if root_id:
        queue = [root_id]
        while queue:
            current_id = queue.pop(0)
            node = mapping.get(current_id, {})
            message_data = node.get("message", {})

            if message_data:
                role = message_data.get("author", {}).get("role", "unknown")
                timestamp = message_data.get("create_time", None)
                formatted_time = format_timestamp(timestamp)
                content_parts = message_data.get("content", {}).get("parts", [])

                # Sanitize text **without breaking formatting**
                content = "\n".join([sanitize_text(str(part)) for part in content_parts if isinstance(part, (str, int, float))]).strip()

                if content:  # Avoid empty messages
                    if role == "user":
                        # Convert User's message into block quotes
                        formatted_content = "\n".join([f"> {line}" for line in content.split("\n")])
                        markdown_output += f"> **User ({formatted_time}):**\n>\n{formatted_content}\n\n"
                    elif role == "assistant":
                        markdown_output += f"**Assistant ({formatted_time}):** {content}\n\n"

            # Add children to the queue for processing in order
            queue.extend(node.get("children", []))

    markdown_output += "**────────────**\n\n"  # Separator at the end of the conversation

    return filename, markdown_output

# Create an output folder for Markdown files
output_folder = "exported_chats"
os.makedirs(output_folder, exist_ok=True)

# Process each conversation and save it as a separate Markdown file
for convo in conversations:
    try:
        filename, markdown_content = convert_conversation_to_markdown(convo)
        md_file_path = os.path.join(output_folder, filename)

        with open(md_file_path, "w", encoding="utf-8") as md_file:
            md_file.write(markdown_content)

        print(f" Saved: {md_file_path}")
    except Exception as e:
        print(f" Error processing conversation '{convo.get('title', 'Untitled')}': {e}")

print("\n All conversations have been processed!")