Render YouTube Livechat as an HTML Webpage MkII

import json
import os
import html
import re

"""
This script renders YouTube live chat data from '.live_chat.json' files, generated by yt-dlp, into an interactive HTML webpage.
It extracts chat messages, author details, and timestamps from the JSON files and presents them in a visually structured format,
including a header with a "FakeTube" logo, video URL, and a link to the Python script. The script processes all '.live_chat.json'
files in a specified directory, generating a corresponding HTML file for each to replay the live chat in a browser-friendly format.

**Requirements:**
- Python libraries: 'os', 'json', 'html', and 're'. (json must be installed manually).
- '.live_chat.json' files created by yt-dlp, containing live chat data from YouTube videos, using these arguments: --skip-download --write-subs live_chat --match-filter is_live

**Process:**
1. Prompts the user to input the full directory path containing the '.live_chat.json' files.
2. Scans the directory for all files ending in '.live_chat.json'.
3. For each JSON file:
   - Extracts the YouTube video ID from the filename using a regular expression.
   - Reads the file line by line, parsing JSON objects to extract chat messages, author names, profile pictures, and timestamps.
   - Formats messages, supporting both text and emoji rendering.
   - Sorts messages by timestamp for chronological display.
   - Generates an HTML file with:
     - A header featuring the "FakeTube" logo, video URL (clickable and copyable), and a script link.
     - A scrollable chat container displaying messages with profile pictures, authors, timestamps, and content.
     - A footer box showing the original JSON filename.
4. Saves each HTML file in the same directory with the base filename and a '.html' extension.

**Usage:**
Run the script and enter the full path to the directory containing '.live_chat.json' files. The script will process each file
and generate an HTML file for every valid JSON file found. Open the resulting HTML files in a web browser to view the
rendered live chat replay.

**Customization:**
- Modify the HTML and CSS in the `generate_html` function to alter the webpage's appearance (e.g., change colors, adjust layout and chatbox height/width,
  or modify the scrollbar style).
- Update the `extract_video_id` function to handle different filename conventions if needed.
- Replace the pastebin script link in the header (in `generate_html`) with a custom URL pointing to your script's source.
"""


def ms_to_time(ms):
    """Convert milliseconds to HH:MM:SS format."""
    try:
        ms = int(ms)
    except ValueError:
        return "00:00:00"
    seconds = ms // 1000
    hours = seconds // 3600
    minutes = (seconds % 3600) // 60
    seconds = seconds % 60
    return f"{hours:02d}:{minutes:02d}:{seconds:02d}"

def get_directory_path():
    """Prompt user for the full directory path containing 'live_chat.json' files and validate it."""
    while True:
        dir_path = input("Please enter the full directory path containing 'live_chat.json' files: ").strip()
        if os.path.isdir(dir_path):
            return dir_path
        else:
            print(f"Error: '{dir_path}' is not a directory. Please try again.")

def extract_video_id(filename):
    """Extract the YouTube video ID from the filename."""
    try:
        end_index = filename.rfind('.live_chat.json')
        if end_index == -1:
            return None
        start_index = filename.rfind('[', 0, end_index)
        end_bracket_index = filename.find(']', start_index, end_index)
        if start_index != -1 and end_bracket_index != -1:
            video_id = filename[start_index + 1:end_bracket_index]
            if len(video_id) == 11 and re.match(r'^[a-zA-Z0-9_-]{11}$', video_id):
                return video_id
    except Exception:
        return None
    return None

def process_json_file(json_file_path):
    """Process a single JSON file and extract chat messages."""
    messages = []
    try:
        with open(json_file_path, 'r', encoding='utf-8') as f:
            for line in f:
                try:
                    event = json.loads(line)
                    actions = event.get('replayChatItemAction', {}).get('actions', [])
                    video_offset_time_msec = event.get('videoOffsetTimeMsec', '0')

                    for action in actions:
                        item = action.get('addChatItemAction', {}).get('item', {})
                        renderer = item.get('liveChatTextMessageRenderer')
                        if renderer:
                            message_runs = renderer.get('message', {}).get('runs', [])
                            message_parts = []
                            for run in message_runs:
                                if 'text' in run:
                                    message_parts.append(run['text'])
                                elif 'emoji' in run:
                                    emoji_url = run['emoji']['image']['thumbnails'][0]['url']
                                    message_parts.append(f'<img src="{emoji_url}" alt="emoji" style="width: 20px; height: 20px; vertical-align: middle;">')
                            message_html = ''.join(message_parts)

                            author_name = renderer.get('authorName', {}).get('simpleText', 'Unknown')
                            thumbnails = renderer.get('authorPhoto', {}).get('thumbnails', [])
                            author_photo = ""
                            if thumbnails:
                                # Sort thumbnails by width to prefer 64x64 over 32x32
                                sorted_thumbnails = sorted(thumbnails, key=lambda x: x.get('width', 0), reverse=True)
                                author_photo = sorted_thumbnails[0].get('url', '')
                            if not author_photo:
                                author_photo = "https://via.placeholder.com/32?text="
                            timestamp_formatted = ms_to_time(video_offset_time_msec)

                            messages.append({
                                'message_html': message_html,
                                'author': author_name,
                                'photo': author_photo,
                                'timestamp': video_offset_time_msec,
                                'timestamp_formatted': timestamp_formatted
                            })
                except json.JSONDecodeError:
                    continue  # Skip invalid lines
    except Exception as e:
        print(f"Error processing '{json_file_path}': {e}")
        return None
    return messages

def generate_html(messages, base_filename, video_url, output_dir):
    """Generate HTML content for the chat messages and save it to a file."""
    escaped_filename = html.escape(base_filename)
    html_content = f'''
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>YouTube Live Chat Replay - {escaped_filename}</title>
    <style>
        body {{
            font-family: Arial, sans-serif;
            background-color: #f0f0f0;
            margin: 0;
            padding: 0;
        }}
        .header {{
            width: 100%;
            display: flex;
            justify-content: space-between;
            align-items: center;
            padding: 10px;
            background: #fff;
            border-bottom: 1px solid #ddd;
            box-sizing: border-box;
        }}
        .logo {{
            font-size: 24px;
        }}
        .logo span.black {{
            color: black;
        }}
        .logo span.red {{
            color: white;
            background: #cc0000;
            padding: 0 5px;
            border-radius: 5px;
        }}
        .subtitle {{
            font-size: 12px;
        }}
        .url-bar {{
            text-align: center;
            border: 1px solid #ddd;
            border-radius: 20px;
            padding: 5px 10px;
            background: #fff;
        }}
        .url-bar button {{
            border-radius: 10px;
            border: 1px solid;
        }}
        .url-bar button:hover {{
            background: #e0e0e0;
            transition: background 0.2s ease;
        }}
        .script-link {{
            border: 1px solid;
            border-radius: 20px;
            padding: 5px 10px;
            text-decoration: none;
            color: black;
            background: #f0f0f0;
            transition: background 0.2s ease;
            font-size: 14px
        }}
        .script-link:hover {{
            background: #e0e0e0;
        }}
        #chat-container {{
            max-width: 800px;
            height: 730px;
            overflow-y: auto;
            background-color: #fff;
            border: 1px solid #aaa;
            border-radius: 5px;
            padding: 10px;
            margin: 20px auto;
            box-shadow: 0 2px 5px rgba(0,0,0,0.1);
        }}
        #chat-container::-webkit-scrollbar {{
            width: 20px;
        }}
        #chat-container::-webkit-scrollbar-track {{
            background: #f1f1f1;
            border: 1px solid #ccc;
        }}
        #chat-container::-webkit-scrollbar-thumb {{
            background: #888;
            border-radius: 5px;
        }}
        #chat-container::-webkit-scrollbar-thumb:hover {{
            background: #555;
        }}
        .chat-message {{
            display: flex;
            align-items: flex-start;
            margin-bottom: 15px;
        }}
        .chat-message img.profile-pic {{
            width: 32px;
            height: 32px;
            border-radius: 50%;
            margin-right: 10px;
        }}
        .message-content {{
            flex: 1;
        }}
        .author {{
            font-weight: bold;
            color: #333;
            margin-right: 10px;
        }}
        .timestamp {{
            color: #888;
            font-size: 0.9em;
            margin-right: 10px;
        }}
        .message-text {{
            margin: 5px 0 0 0;
            color: #555;
            word-wrap: break-word;
        }}
        .filename-box {{
            margin: 20px auto;
            padding: 10px;
            border: 1px solid #ccc;
            border-radius: 5px;
            background-color: #f9f9f9;
            text-align: center;
            max-width: 800px;
        }}
    </style>
</head>
<body>
    <div class="header">
        <div>
            <div class="logo"><span class="black">Fake</span><span class="red">Tube</span></div>
            <div class="subtitle">Rendered with <a href="https://github.com/yt-dlp/yt-dlp" target="_blank">YT-DLP</a> live_chat.json files</div>
        </div>
        <div class="url-bar">
            <span onclick="navigator.clipboard.writeText('{video_url}')">{video_url}</span>
            <button onclick="window.open('{video_url}', '_blank')">Open in New Tab</button>
        </div>
        <a href="https://pastebin.com/4mLyLHN8" target="_blank" class="script-link">Open Python Script in New Tab</a>
    </div>
    <div id="chat-container">
'''

    # Add each message to the HTML
    for msg in messages:
        html_content += f'''
        <div class="chat-message">
            <a href="{msg['photo']}" target="_blank"><img src="{msg['photo']}" alt="N/A" class="profile-pic"></a>
            <div class="message-content">
                <span class="author">{msg['author']}</span>
                <span class="timestamp">{msg['timestamp_formatted']}</span>
                <p class="message-text">{msg['message_html']}</p>
            </div>
        </div>
        '''

    # Add the filename box
    html_content += f'''
    </div>
    <div class="filename-box">
        <p>{escaped_filename}</p>
    </div>
</body>
</html>
'''

    # Write to HTML file in the output directory
    html_filename = base_filename + '.html'
    output_file = os.path.join(output_dir, html_filename)
    with open(output_file, 'w', encoding='utf-8') as f:
        f.write(html_content)
    print(f"Chat has been rendered to '{os.path.abspath(output_file)}'.")

def main():
    dir_path = get_directory_path()
    json_files = [f for f in os.listdir(dir_path) if f.endswith('.live_chat.json') and os.path.isfile(os.path.join(dir_path, f))]
    if not json_files:
        print("No '.live_chat.json' files found in the directory.")
        return
    print(f"Found {len(json_files)} '.live_chat.json' files to process.")
    for filename in json_files:
        json_file_path = os.path.join(dir_path, filename)
        print(f"Processing '{filename}'...")
        video_id = extract_video_id(filename)
        if video_id:
            video_url = f"https://www.youtube.com/watch?v={video_id}"
        else:
            video_url = "N/A"
        messages = process_json_file(json_file_path)
        if messages is not None:
            messages.sort(key=lambda x: int(x['timestamp']))
            base_filename = os.path.splitext(filename)[0]
            generate_html(messages, base_filename, video_url, dir_path)
        else:
            print(f"Skipping '{filename}' due to errors.")

if __name__ == "__main__":
    main()