genericPaster

Automod ana

Jul 23rd, 2025
362
0
6 days
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 15.64 KB | None | 0 0
  1. #!/usr/bin/env python3
  2. """
  3.    Moderation Log Analyzer
  4.    
  5.    Analyzes Reddit moderation logs to track automod effectiveness
  6.    and identify when human moderators are undoing automod actions.
  7. """
  8.  
  9. import json
  10. import re
  11. from datetime import datetime
  12. from collections import defaultdict, Counter
  13. from dataclasses import dataclass
  14. from typing import List, Dict, Tuple, Optional
  15.  
  16. @dataclass
  17. class ModAction:
  18.     """    Represents a single moderation action from the log.
  19.    
  20.    """
  21.     time: str
  22.     moderator: str
  23.     action_type: str
  24.     action: str
  25.     content: str
  26.     username: str
  27.     parsed_time: datetime
  28.  
  29. class ModerationAnalyzer:
  30.     """    Analyzes moderation logs to track automod effectiveness and
  31.           human moderator interventions.
  32.    
  33.    """
  34.    
  35.     def __init__(self, json_file_path: str):
  36.         """        Initialize analyzer with moderation log data.
  37.        
  38.        Args:
  39.            json_file_path: Path to the JSON moderation log file
  40.        """
  41.         self.actions = []
  42.         self.load_data(json_file_path)
  43.    
  44.     def load_data(self, json_file_path: str) -> None:
  45.         """        Load and parse moderation log data from JSON file.
  46.        
  47.        """
  48.         with open(json_file_path, 'r', encoding='utf-8') as f:
  49.             data = json.load(f)
  50.        
  51.         for entry in data.get('JSONdata', []):
  52.             action = self.parse_action(entry)
  53.             if action:
  54.                 self.actions.append(action)
  55.        
  56.         # Sort by time for chronological analysis
  57.         self.actions.sort(key=lambda x: x.parsed_time)
  58.    
  59.     def parse_action(self, entry: Dict) -> Optional[ModAction]:
  60.         """        Parse a single log entry into a ModAction object.
  61.        
  62.        Extracts username from content using multiple patterns to handle
  63.        various formats in the mod log.
  64.        """
  65.         try:
  66.             content = entry.get('Content', '')
  67.            
  68.             # Extract username using multiple patterns
  69.             username = self.extract_username(content)
  70.             if username == 'unknown':
  71.                 return None
  72.            
  73.             # Parse time
  74.             time_str = entry.get('Time', '')
  75.             parsed_time = self.parse_time(time_str)
  76.            
  77.             return ModAction(
  78.                 time=time_str,
  79.                 moderator=entry.get('Moderator', ''),
  80.                 action_type=entry.get('Type', ''),
  81.                 action=entry.get('Action', ''),
  82.                 content=content,
  83.                 username=username,
  84.                 parsed_time=parsed_time
  85.             )
  86.         except Exception as e:
  87.             print(f"Error parsing entry: {e}")
  88.             return None
  89.    
  90.     def extract_username(self, content: str) -> str:
  91.         """        Extract username from content using various patterns.
  92.        
  93.        Handles different formats and edge cases like system actions
  94.        and bot-generated content.
  95.        """
  96.         # Handle empty or very short content
  97.         if not content or len(content.strip()) < 3:
  98.             return 'system'
  99.        
  100.         # Try different username patterns
  101.         patterns = [
  102.             r'^u/(\S+)'  # Captures non-whitespace characters after u/
  103.         ]
  104.         for pattern in patterns:
  105.             match = re.search(pattern, content, re.IGNORECASE)
  106.             if match:
  107.                 username = match.group(1)
  108.                 username = username.rstrip('.,!?:;')
  109.                 if len(username) >= 2:
  110.                     return username
  111.         return 'unknown'
  112.    
  113.     def parse_time(self, time_str: str) -> datetime:
  114.         """        Parse time string into datetime object.
  115.        
  116.        """
  117.         try:
  118.             return datetime.strptime(time_str, '%I:%M %p %b %d, %Y')
  119.         except:
  120.             try:
  121.                 return datetime.strptime(time_str, '%H:%M %p %b %d, %Y')
  122.             except:
  123.                 return datetime.now()  # Fallback
  124.    
  125.     def identify_undos(self) -> List[Tuple[ModAction, ModAction]]:
  126.  
  127.         undos = []
  128.        
  129.         # Group all actions by username only
  130.         user_actions = defaultdict(list)
  131.         for action in self.actions:
  132.             user_actions[action.username].append(action)
  133.        
  134.         # Look for AutoMod → Human undo patterns for each user
  135.         for username, actions in user_actions.items():
  136.             # Sort actions by time for this user
  137.             actions.sort(key=lambda x: x.parsed_time)
  138.            
  139.             # Find all AutoMod → Human action pairs
  140.             for i, automod_action in enumerate(actions):
  141.                 if 'AutoModerator' not in automod_action.moderator:
  142.                     continue
  143.                    
  144.                 # Look for subsequent human actions on same user
  145.                 for j in range(i + 1, len(actions)):
  146.                     human_action = actions[j]
  147.                    
  148.                     # Skip if it's another AutoMod action
  149.                     if 'AutoModerator' in human_action.moderator:
  150.                         continue
  151.                    
  152.                     # Check if this human action undoes the AutoMod action
  153.                     if self.is_undo_pair(automod_action, human_action):
  154.                         undos.append((automod_action, human_action))
  155.                         break  # Only match first undo for this AutoMod action
  156.        
  157.         return undos
  158.    
  159.     def is_undo_pair(self, action1: ModAction, action2: ModAction) -> bool:
  160.         """        Determine if two actions constitute an undo pair.
  161.        
  162.        For username-based correlation, we look for:
  163.        - AutoMod removes/spams → Human approves/unspams
  164.        - Any opposing moderation actions on the same user
  165.        """
  166.         # Define opposing action patterns
  167.         removal_actions = ['Remove', 'Spam']
  168.         approval_actions = ['Approve', 'Unspam']
  169.        
  170.         action1_is_removal = any(word in action1.action for word in removal_actions)
  171.         action1_is_approval = any(word in action1.action for word in approval_actions)
  172.        
  173.         action2_is_removal = any(word in action2.action for word in removal_actions)
  174.         action2_is_approval = any(word in action2.action for word in approval_actions)
  175.        
  176.         # Check if action2 undoes action1
  177.         if action1_is_removal and action2_is_approval:
  178.             return True
  179.        
  180.         # Also check reverse case (though less common in AutoMod context)
  181.         if action1_is_approval and action2_is_removal:
  182.             return True
  183.        
  184.         return False
  185.    
  186.     def generate_summary_report(self) -> str:
  187.         """        Generate comprehensive summary report of moderation activity.
  188.        
  189.        """
  190.         report = []
  191.         report.append("=" * 60)
  192.         report.append("MODERATION LOG ANALYSIS SUMMARY")
  193.         report.append("=" * 60)
  194.        
  195.         # Basic statistics
  196.         total_actions = len(self.actions)
  197.         automod_actions = len([a for a in self.actions if 'AutoModerator' in a.moderator])
  198.         human_actions = total_actions - automod_actions
  199.        
  200.         report.append(f"\nBASIC STATISTICS:")
  201.         report.append(f"Total Actions: {total_actions}")
  202.         report.append(f"AutoModerator Actions: {automod_actions} ({automod_actions/total_actions*100:.1f}%)")
  203.         report.append(f"Human Moderator Actions: {human_actions} ({human_actions/total_actions*100:.1f}%)")
  204.        
  205.         # Action type breakdown
  206.         action_counts = Counter(action.action for action in self.actions)
  207.         report.append(f"\nACTION BREAKDOWN:")
  208.         for action, count in action_counts.most_common():
  209.             report.append(f"  {action}: {count}")
  210.        
  211.         # Moderator activity
  212.         mod_counts = Counter(action.moderator for action in self.actions)
  213.         report.append(f"\nMODERATOR ACTIVITY:")
  214.         for mod, count in mod_counts.most_common():
  215.             report.append(f"  {mod}: {count}")
  216.        
  217.         # Undo analysis
  218.         undos = self.identify_undos()
  219.         report.append(f"\nUNDO ANALYSIS:")
  220.         report.append(f"Total Undo Actions Found: {len(undos)}")
  221.        
  222.         if undos:
  223.             # Categorize undos
  224.             automod_undone = [u for u in undos if 'AutoModerator' in u[0].moderator]
  225.             human_undone = [u for u in undos if 'AutoModerator' not in u[0].moderator]
  226.            
  227.             report.append(f"AutoModerator Actions Undone by Humans: {len(automod_undone)}")
  228.             report.append(f"Human Actions Undone: {len(human_undone)}")
  229.            
  230.             if automod_actions > 0:
  231.                 undo_rate = len(automod_undone) / automod_actions * 100
  232.                 report.append(f"AutoMod Undo Rate: {undo_rate:.2f}%")
  233.        
  234.         return "\n".join(report)
  235.  
  236.     def get_undoable_actions(self, actions: List[ModAction]) -> List[ModAction]:
  237.         """        Filter actions to only those that could potentially be undone.
  238.        
  239.        Only removal/spam actions can be undone by approval/unspam actions.
  240.        """
  241.         removal_actions = ['Remove', 'Spam']
  242.         return [action for action in actions
  243.                 if any(word in action.action for word in removal_actions)]
  244.    
  245.     def generate_effectiveness_report(self) -> str:
  246.         """        Generate report on automod effectiveness for the experiment.
  247.        
  248.        Separates analysis by content type (Comments vs Posts/Other).
  249.        Only counts undoable actions in effectiveness calculations.
  250.        """
  251.         undos = self.identify_undos()
  252.         all_automod_actions = [a for a in self.actions if 'AutoModerator' in a.moderator]
  253.        
  254.         # Only count actions that could potentially be undone
  255.         automod_actions = self.get_undoable_actions(all_automod_actions)
  256.         automod_undone = [u for u in undos if 'AutoModerator' in u[0].moderator]
  257.        
  258.         report = []
  259.         report.append("=" * 60)
  260.         report.append("AUTOMOD EFFECTIVENESS ANALYSIS")
  261.         report.append("=" * 60)
  262.        
  263.         if not automod_actions:
  264.             if all_automod_actions:
  265.                 report.append("AutoModerator performed actions, but none were undoable removal/spam actions.")
  266.                 report.append(f"Total AutoMod actions: {len(all_automod_actions)} (sticky, distinguish, etc.)")
  267.             else:
  268.                 report.append("No AutoModerator actions found in log.")
  269.             return "\n".join(report)
  270.        
  271.         # Show both totals for transparency
  272.         report.append(f"AutoMod total actions: {len(all_automod_actions)}")
  273.         report.append(f"AutoMod undoable actions (Remove/Spam): {len(automod_actions)}")
  274.        
  275.         # Separate actions by content type
  276.         comment_actions = [a for a in automod_actions if a.action_type == 'Comments']
  277.         post_actions = [a for a in automod_actions if a.action_type != 'Comments']
  278.        
  279.         comment_undone = [u for u in automod_undone if u[0].action_type == 'Comments']
  280.         post_undone = [u for u in automod_undone if u[0].action_type != 'Comments']
  281.        
  282.         # Overall effectiveness (only for undoable actions)
  283.         total_automod = len(automod_actions)
  284.         undone_count = len(automod_undone)
  285.         effectiveness = ((total_automod - undone_count) / total_automod) * 100 if total_automod > 0 else 0
  286.        
  287.         report.append(f"\nOVERALL EFFECTIVENESS (Undoable Actions Only):")
  288.         report.append(f"Total AutoMod Undoable Actions: {total_automod}")
  289.         report.append(f"Actions Undone by Humans: {undone_count}")
  290.         report.append(f"Effectiveness Rate: {effectiveness:.1f}%")
  291.         report.append(f"Undo Rate: {(undone_count/total_automod)*100:.1f}%")
  292.        
  293.         # Comments-specific analysis
  294.         if comment_actions:
  295.             comment_total = len(comment_actions)
  296.             comment_undone_count = len(comment_undone)
  297.             comment_effectiveness = ((comment_total - comment_undone_count) / comment_total) * 100
  298.            
  299.             report.append(f"\nCOMMENT MODERATION EFFECTIVENESS:")
  300.             report.append(f"Total AutoMod Comment Actions: {comment_total}")
  301.             report.append(f"Comment Actions Undone: {comment_undone_count}")
  302.             report.append(f"Comment Effectiveness Rate: {comment_effectiveness:.1f}%")
  303.             report.append(f"Comment Undo Rate: {(comment_undone_count/comment_total)*100:.1f}%")
  304.            
  305.             # Break down comment actions by type
  306.             comment_by_action = defaultdict(int)
  307.             comment_undos_by_action = defaultdict(int)
  308.            
  309.             for action in comment_actions:
  310.                 comment_by_action[action.action] += 1
  311.            
  312.             for original, _ in comment_undone:
  313.                 comment_undos_by_action[original.action] += 1
  314.            
  315.             if comment_by_action:
  316.                 report.append(f"\n  Comment Actions Breakdown:")
  317.                 for action_type in comment_by_action:
  318.                     total = comment_by_action[action_type]
  319.                     undone = comment_undos_by_action[action_type]
  320.                     rate = ((total - undone) / total) * 100 if total > 0 else 0
  321.                    
  322.                     report.append(f"    {action_type}:")
  323.                     report.append(f"      Total: {total}, Undone: {undone}, Effectiveness: {rate:.1f}%")
  324.        
  325.         # Posts-specific analysis  
  326.         if post_actions:
  327.             post_total = len(post_actions)
  328.             post_undone_count = len(post_undone)
  329.             post_effectiveness = ((post_total - post_undone_count) / post_total) * 100
  330.            
  331.             report.append(f"\nPOST MODERATION EFFECTIVENESS:")
  332.             report.append(f"Total AutoMod Post Actions: {post_total}")
  333.             report.append(f"Post Actions Undone: {post_undone_count}")
  334.             report.append(f"Post Effectiveness Rate: {post_effectiveness:.1f}%")
  335.             report.append(f"Post Undo Rate: {(post_undone_count/post_total)*100:.1f}%")
  336.            
  337.             # Break down post actions by type
  338.             post_by_action = defaultdict(int)
  339.             post_undos_by_action = defaultdict(int)
  340.            
  341.             for action in post_actions:
  342.                 post_by_action[action.action] += 1
  343.            
  344.             for original, _ in post_undone:
  345.                 post_undos_by_action[original.action] += 1
  346.            
  347.             if post_by_action:
  348.                 report.append(f"\n  Post Actions Breakdown:")
  349.                 for action_type in post_by_action:
  350.                     total = post_by_action[action_type]
  351.                     undone = post_undos_by_action[action_type]
  352.                     rate = ((total - undone) / total) * 100 if total > 0 else 0
  353.                    
  354.                     report.append(f"    {action_type}:")
  355.                     report.append(f"      Total: {total}, Undone: {undone}, Effectiveness: {rate:.1f}%")
  356.                
  357.         return "\n".join(report)
  358.  
  359. def main():
  360.     """    Main function to run the analysis.
  361.    
  362.    """
  363.     import sys
  364.    
  365.     if len(sys.argv) != 2:
  366.         print("Usage: python mod_analyzer.py <json_file>")
  367.         sys.exit(1)
  368.    
  369.     json_file = sys.argv[1]
  370.    
  371.     try:
  372.         analyzer = ModerationAnalyzer(json_file)
  373.        
  374.         print(analyzer.generate_summary_report())
  375.         print("\n\n")
  376.         print(analyzer.generate_effectiveness_report())
  377.         print("\n\n")
  378.        
  379.     except FileNotFoundError:
  380.         print(f"Error: File '{json_file}' not found.")
  381.     except json.JSONDecodeError:
  382.         print(f"Error: Invalid JSON in file '{json_file}'.")
  383.     except Exception as e:
  384.         print(f"Error analyzing file: {e}")
  385.  
  386. if __name__ == "__main__":
  387.     main()
Advertisement
Add Comment
Please, Sign In to add comment