Default Script for data analysis

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import json
from datetime import datetime
import os
from pathlib import Path

def process_games_data(data, source_name=None):
    """Process a single player's game data"""
    df = pd.DataFrame(data['games'])

    # Add data source (player) name
    df['dataSource'] = source_name or data['metadata']['playerName']

    # Convert timestamp to datetime
    df['timestamp'] = pd.to_datetime(df['timestamp'])

    # Convert numeric columns
    numeric_columns = ['score', 'currentKills', 'bestKillStreak', 'bestScore', 'topPosition']
    for col in numeric_columns:
        df[col] = pd.to_numeric(df[col], errors='coerce')

    # Convert timeAlive to seconds
    def convert_time_to_seconds(time_str):
        time_str = str(time_str).strip()
        if 'm' in time_str:
            parts = time_str.split('m')
            minutes = int(parts[0])
            seconds = int(parts[1].replace('s', '').strip())
            return minutes * 60 + seconds
        else:
            return float(time_str.replace('s', '').strip())

    df['timeAlive'] = df['timeAlive'].apply(convert_time_to_seconds)

    return df

def analyze_powerline_data(json_paths=None):
    """
    Analyze Powerline.io game data from multiple JSON files

    Parameters:
    json_paths : dict or None
        Dictionary of {name: path} pairs. If None, will look for JSON files in current directory
    """
    # If no paths provided, look for JSON files in current directory
    if json_paths is None:
        json_files = list(Path('.').glob('*.json'))
        json_paths = {f.stem: f for f in json_files}

    if not json_paths:
        print("No JSON files found!")
        return

    # Load and combine all data
    all_data = pd.DataFrame()
    for player_name, file_path in json_paths.items():
        try:
            with open(file_path, 'r', encoding='utf-8') as file:
                data = json.load(file)
                df = process_games_data(data, player_name)
                all_data = pd.concat([all_data, df], ignore_index=True)
            print(f"Successfully loaded data for {player_name}")
        except Exception as e:
            print(f"Error loading data for {player_name}: {str(e)}")

    if all_data.empty:
        print("No data was loaded. Please check your file paths.")
        return

    # Set up plotting style
    plt.style.use('seaborn-v0_8')
    fig_size = (12, 6)

    # 1. Alias Performance Analysis
    plt.figure(figsize=fig_size)
    alias_stats = all_data.groupby(['dataSource', 'playerName'])['score'].agg(['mean', 'count']).reset_index()
    sns.scatterplot(data=alias_stats, x='mean', y='count', hue='dataSource',
                    size='count', sizes=(100, 1000), alpha=0.6)
    for _, row in alias_stats.iterrows():
        plt.annotate(row['playerName'], (row['mean'], row['count']))
    plt.title('Alias Performance: Average Score vs Games Played')
    plt.xlabel('Average Score')
    plt.ylabel('Number of Games')
    plt.tight_layout()

    # 2. Score Distribution by Alias
    plt.figure(figsize=fig_size)
    sns.boxplot(data=all_data, x='playerName', y='score', hue='dataSource')
    plt.title('Score Distribution by Alias')
    plt.xticks(rotation=45)
    plt.tight_layout()

    # 3. Time Analysis
    plt.figure(figsize=fig_size)
    all_data['hour'] = all_data['timestamp'].dt.hour
    hourly_performance = all_data.groupby(['dataSource', 'hour'])['score'].mean().reset_index()
    sns.lineplot(data=hourly_performance, x='hour', y='score', hue='dataSource')
    plt.title('Average Score by Hour of Day')
    plt.tight_layout()

    # 4. Death Type Analysis
    plt.figure(figsize=fig_size)
    death_counts = all_data.groupby(['dataSource', 'deathType']).size().unstack(fill_value=0)
    death_counts.plot(kind='bar', stacked=True)
    plt.title('Death Type Distribution by Player')
    plt.xticks(rotation=45)
    plt.tight_layout()

    # 5. Kill Performance
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
    sns.boxplot(data=all_data, x='dataSource', y='currentKills', ax=ax1)
    ax1.set_title('Kills Distribution by Player')
    ax1.tick_params(axis='x', rotation=45)

    sns.boxplot(data=all_data, x='dataSource', y='bestKillStreak', ax=ax2)
    ax2.set_title('Best Kill Streak Distribution by Player')
    ax2.tick_params(axis='x', rotation=45)
    plt.tight_layout()

    # Print detailed statistics
    print("\n=== Detailed Statistics ===\n")

    # Overall player statistics
    print("Player Statistics:")
    player_stats = all_data.groupby('dataSource').agg({
        'score': ['count', 'mean', 'max'],
        'timeAlive': 'mean',
        'currentKills': ['mean', 'sum'],
        'bestKillStreak': 'max',
        'topPosition': 'min'  # min because lower position is better
    }).round(2)
    print(player_stats)

    # Alias statistics
    print("\nAlias Statistics:")
    alias_detailed_stats = all_data.groupby(['dataSource', 'playerName']).agg({
        'score': ['count', 'mean', 'max'],
        'timeAlive': 'mean',
        'currentKills': ['mean', 'sum'],
        'bestKillStreak': 'max',
        'topPosition': 'min'
    }).round(2)
    print(alias_detailed_stats)

    # Top performances
    print("\nTop 5 Individual Performances:")
    top_games = all_data.nlargest(5, 'score')[
        ['dataSource', 'playerName', 'timestamp', 'score', 'currentKills', 'timeAlive']
    ]
    print(top_games)

    # Death type analysis
    print("\nDeath Type Distribution:")
    death_dist = all_data.groupby(['dataSource', 'deathType']).size().unstack(fill_value=0)
    print(death_dist)

    plt.show()

if __name__ == "__main__":
    #Specify paths (you can list more than one to compare to other users' data) you probably want to rename your json so you can type it out easily
    json_paths = {
        'your default name': r'C:\Users\youruser\yourpath\yourdata.json'
    }
    analyze_powerline_data(json_paths)