Advertisement
Guest User

Default Script for data analysis

a guest
Feb 6th, 2025
20
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.04 KB | None | 0 0
  1. import pandas as pd
  2. import numpy as np
  3. import seaborn as sns
  4. import matplotlib.pyplot as plt
  5. import json
  6. from datetime import datetime
  7. import os
  8. from pathlib import Path
  9.  
  10. def process_games_data(data, source_name=None):
  11. """Process a single player's game data"""
  12. df = pd.DataFrame(data['games'])
  13.  
  14. # Add data source (player) name
  15. df['dataSource'] = source_name or data['metadata']['playerName']
  16.  
  17. # Convert timestamp to datetime
  18. df['timestamp'] = pd.to_datetime(df['timestamp'])
  19.  
  20. # Convert numeric columns
  21. numeric_columns = ['score', 'currentKills', 'bestKillStreak', 'bestScore', 'topPosition']
  22. for col in numeric_columns:
  23. df[col] = pd.to_numeric(df[col], errors='coerce')
  24.  
  25. # Convert timeAlive to seconds
  26. def convert_time_to_seconds(time_str):
  27. time_str = str(time_str).strip()
  28. if 'm' in time_str:
  29. parts = time_str.split('m')
  30. minutes = int(parts[0])
  31. seconds = int(parts[1].replace('s', '').strip())
  32. return minutes * 60 + seconds
  33. else:
  34. return float(time_str.replace('s', '').strip())
  35.  
  36. df['timeAlive'] = df['timeAlive'].apply(convert_time_to_seconds)
  37.  
  38. return df
  39.  
  40. def analyze_powerline_data(json_paths=None):
  41. """
  42. Analyze Powerline.io game data from multiple JSON files
  43.  
  44. Parameters:
  45. json_paths : dict or None
  46. Dictionary of {name: path} pairs. If None, will look for JSON files in current directory
  47. """
  48. # If no paths provided, look for JSON files in current directory
  49. if json_paths is None:
  50. json_files = list(Path('.').glob('*.json'))
  51. json_paths = {f.stem: f for f in json_files}
  52.  
  53. if not json_paths:
  54. print("No JSON files found!")
  55. return
  56.  
  57. # Load and combine all data
  58. all_data = pd.DataFrame()
  59. for player_name, file_path in json_paths.items():
  60. try:
  61. with open(file_path, 'r', encoding='utf-8') as file:
  62. data = json.load(file)
  63. df = process_games_data(data, player_name)
  64. all_data = pd.concat([all_data, df], ignore_index=True)
  65. print(f"Successfully loaded data for {player_name}")
  66. except Exception as e:
  67. print(f"Error loading data for {player_name}: {str(e)}")
  68.  
  69. if all_data.empty:
  70. print("No data was loaded. Please check your file paths.")
  71. return
  72.  
  73. # Set up plotting style
  74. plt.style.use('seaborn-v0_8')
  75. fig_size = (12, 6)
  76.  
  77. # 1. Alias Performance Analysis
  78. plt.figure(figsize=fig_size)
  79. alias_stats = all_data.groupby(['dataSource', 'playerName'])['score'].agg(['mean', 'count']).reset_index()
  80. sns.scatterplot(data=alias_stats, x='mean', y='count', hue='dataSource',
  81. size='count', sizes=(100, 1000), alpha=0.6)
  82. for _, row in alias_stats.iterrows():
  83. plt.annotate(row['playerName'], (row['mean'], row['count']))
  84. plt.title('Alias Performance: Average Score vs Games Played')
  85. plt.xlabel('Average Score')
  86. plt.ylabel('Number of Games')
  87. plt.tight_layout()
  88.  
  89. # 2. Score Distribution by Alias
  90. plt.figure(figsize=fig_size)
  91. sns.boxplot(data=all_data, x='playerName', y='score', hue='dataSource')
  92. plt.title('Score Distribution by Alias')
  93. plt.xticks(rotation=45)
  94. plt.tight_layout()
  95.  
  96. # 3. Time Analysis
  97. plt.figure(figsize=fig_size)
  98. all_data['hour'] = all_data['timestamp'].dt.hour
  99. hourly_performance = all_data.groupby(['dataSource', 'hour'])['score'].mean().reset_index()
  100. sns.lineplot(data=hourly_performance, x='hour', y='score', hue='dataSource')
  101. plt.title('Average Score by Hour of Day')
  102. plt.tight_layout()
  103.  
  104. # 4. Death Type Analysis
  105. plt.figure(figsize=fig_size)
  106. death_counts = all_data.groupby(['dataSource', 'deathType']).size().unstack(fill_value=0)
  107. death_counts.plot(kind='bar', stacked=True)
  108. plt.title('Death Type Distribution by Player')
  109. plt.xticks(rotation=45)
  110. plt.tight_layout()
  111.  
  112. # 5. Kill Performance
  113. fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
  114. sns.boxplot(data=all_data, x='dataSource', y='currentKills', ax=ax1)
  115. ax1.set_title('Kills Distribution by Player')
  116. ax1.tick_params(axis='x', rotation=45)
  117.  
  118. sns.boxplot(data=all_data, x='dataSource', y='bestKillStreak', ax=ax2)
  119. ax2.set_title('Best Kill Streak Distribution by Player')
  120. ax2.tick_params(axis='x', rotation=45)
  121. plt.tight_layout()
  122.  
  123. # Print detailed statistics
  124. print("\n=== Detailed Statistics ===\n")
  125.  
  126. # Overall player statistics
  127. print("Player Statistics:")
  128. player_stats = all_data.groupby('dataSource').agg({
  129. 'score': ['count', 'mean', 'max'],
  130. 'timeAlive': 'mean',
  131. 'currentKills': ['mean', 'sum'],
  132. 'bestKillStreak': 'max',
  133. 'topPosition': 'min' # min because lower position is better
  134. }).round(2)
  135. print(player_stats)
  136.  
  137. # Alias statistics
  138. print("\nAlias Statistics:")
  139. alias_detailed_stats = all_data.groupby(['dataSource', 'playerName']).agg({
  140. 'score': ['count', 'mean', 'max'],
  141. 'timeAlive': 'mean',
  142. 'currentKills': ['mean', 'sum'],
  143. 'bestKillStreak': 'max',
  144. 'topPosition': 'min'
  145. }).round(2)
  146. print(alias_detailed_stats)
  147.  
  148. # Top performances
  149. print("\nTop 5 Individual Performances:")
  150. top_games = all_data.nlargest(5, 'score')[
  151. ['dataSource', 'playerName', 'timestamp', 'score', 'currentKills', 'timeAlive']
  152. ]
  153. print(top_games)
  154.  
  155. # Death type analysis
  156. print("\nDeath Type Distribution:")
  157. death_dist = all_data.groupby(['dataSource', 'deathType']).size().unstack(fill_value=0)
  158. print(death_dist)
  159.  
  160. plt.show()
  161.  
  162. if __name__ == "__main__":
  163. #Specify paths (you can list more than one to compare to other users' data) you probably want to rename your json so you can type it out easily
  164. json_paths = {
  165. 'your default name': r'C:\Users\youruser\yourpath\yourdata.json'
  166. }
  167. analyze_powerline_data(json_paths)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement