Advertisement
Guest User

Rockland County Analysis Script

a guest
Jun 11th, 2025
50
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 6.13 KB | None | 0 0
  1. import pandas as pd
  2. import numpy as np
  3. import io
  4.  
  5. def create_df_from_text(csv_text):
  6.     """Creates a pandas DataFrame from a block of CSV text."""
  7.     return pd.read_csv(io.StringIO(csv_text))
  8.  
  9. def process_election_data(df, race_name):
  10.     """
  11.    Loads, cleans, and processes an election results DataFrame.
  12.    - Aggregates votes for candidates across different party lines.
  13.    - Identifies the main Democratic and Republican candidates.
  14.    - Pivots the data to have one row per reporting unit with columns for major party votes.
  15.    """
  16.     # Filter out summary rows like 'United States Senator'
  17.     df = df[~df['Reporting Unit'].str.contains('United States Senator', case=False, na=False)]
  18.     # Filter out rows with 0 votes for both major candidates, like Ramapo 100 & 107
  19.     df = df[~df['Candidate'].isin(['Adjudicated Write-Ins', 'Chase Oliver', 'Claudia De La Cruz', 'Cornel West', 'Jill Stein', 'Peter Sonski'])]
  20.  
  21.     # Aggregate votes for each candidate within each reporting unit
  22.     candidate_totals = df.groupby(['Reporting Unit', 'Candidate'])['Votes'].sum().reset_index()
  23.  
  24.     # Dynamically identify the main party candidates
  25.     try:
  26.         rep_candidate_name = df[df['Party'] == 'Republican']['Candidate'].unique()[0]
  27.         dem_candidate_name = df[df['Party'] == 'Democratic']['Candidate'].unique()[0]
  28.     except IndexError:
  29.         print(f"Error: Could not identify a unique Republican or Democratic candidate for {race_name}.")
  30.         return None
  31.  
  32.     # Pivot the table to make candidates columns
  33.     pivoted = candidate_totals.pivot_table(
  34.         index='Reporting Unit', columns='Candidate', values='Votes', fill_value=0
  35.     ).reset_index()
  36.  
  37.     # Ensure the major candidate columns exist
  38.     if rep_candidate_name not in pivoted.columns: pivoted[rep_candidate_name] = 0
  39.     if dem_candidate_name not in pivoted.columns: pivoted[dem_candidate_name] = 0
  40.  
  41.     processed_df = pivoted[['Reporting Unit', rep_candidate_name, dem_candidate_name]]
  42.     processed_df.columns = ['Reporting Unit', f'Rep_Votes_{race_name}', f'Dem_Votes_{race_name}']
  43.     return processed_df
  44.  
  45. def run_anomaly_analysis(pres_df, sen_df):
  46.     """
  47.    Performs a series of tests to find implausible Republican overperformance.
  48.    """
  49.     # --- 1. Data Preparation ---
  50.     pres_data = process_election_data(pres_df, 'President')
  51.     sen_data = process_election_data(sen_df, 'Senator')
  52.  
  53.     if pres_data is None or sen_data is None:
  54.         return
  55.  
  56.     results = pd.merge(pres_data, sen_data, on='Reporting Unit')
  57.  
  58.     # Filter out precincts with very low total vote counts to avoid noise
  59.     results = results[(results['Rep_Votes_President'] + results['Dem_Votes_President']) >= 20]
  60.    
  61.     # --- 2. Calculate Core Metrics ---
  62.     P_R = results['Rep_Votes_President']
  63.     P_D = results['Dem_Votes_President']
  64.     S_R = results['Rep_Votes_Senator']
  65.     S_D = results['Dem_Votes_Senator']
  66.    
  67.     # Republican Overperformance (the primary metric of interest)
  68.     results['Rep_Overperformance_Votes'] = P_R - S_R
  69.     # Use np.divide to prevent division by zero errors
  70.     results['Rep_Overperformance_Pct'] = np.divide(P_R - S_R, P_R, out=np.zeros_like(P_R, dtype=float), where=P_R!=0) * 100
  71.  
  72.     # Margin calculations for the "Flip" test
  73.     total_pres = P_R + P_D
  74.     total_sen = S_R + S_D
  75.     results['Pres_Margin_Rep_Pct'] = np.divide(P_R - P_D, total_pres, out=np.zeros_like(P_R, dtype=float), where=total_pres!=0) * 100
  76.     results['Sen_Margin_Rep_Pct'] = np.divide(S_R - S_D, total_sen, out=np.zeros_like(S_R, dtype=float), where=total_sen!=0) * 100
  77.     results['Margin_Swing_pp'] = results['Pres_Margin_Rep_Pct'] - results['Sen_Margin_Rep_Pct']
  78.  
  79.     # Partisan Skew test
  80.     results['Pres_Rep_Share'] = np.divide(P_R, total_pres, out=np.zeros_like(P_R, dtype=float), where=total_pres!=0) * 100
  81.  
  82.     # --- 3. Display Results ---
  83.     pd.set_option('display.width', 1000)
  84.    
  85.     print("\n" + "="*80)
  86.     print("Test 1: Top 15 Precincts with the Highest Republican Presidential Overperformance")
  87.     print("This measures the % of Republican presidential voters who did not vote for the Rep. senator.")
  88.     print("="*80)
  89.     overperformers = results.sort_values(by='Rep_Overperformance_Pct', ascending=False).head(15)
  90.     print(overperformers[[
  91.         'Reporting Unit', 'Rep_Votes_President', 'Dem_Votes_President', 'Rep_Votes_Senator', 'Dem_Votes_Senator', 'Rep_Overperformance_Votes', 'Rep_Overperformance_Pct'
  92.     ]].to_string(index=False, formatters={
  93.         'Rep_Overperformance_Pct': '{:,.1f}%'.format
  94.     }))
  95.  
  96.     print("\n" + "="*80)
  97.     print("Test 2: Top 15 Precincts with the Largest Vote Margin 'Flips'")
  98.     print("This measures the swing in the Republican margin between the Pres. and Sen. races.")
  99.     print("="*80)
  100.     flippers = results.sort_values(by='Margin_Swing_pp', ascending=False).head(15)
  101.     print(flippers[[
  102.         'Reporting Unit', 'Pres_Margin_Rep_Pct', 'Sen_Margin_Rep_Pct', 'Margin_Swing_pp'
  103.     ]].to_string(index=False, formatters={
  104.         'Pres_Margin_Rep_Pct': '{:+.1f}%'.format,
  105.         'Sen_Margin_Rep_Pct': '{:+.1f}%'.format,
  106.         'Margin_Swing_pp': '{:,.1f} pp'.format,
  107.     }))
  108.  
  109.     print("\n" + "="*80)
  110.     print("Test 3: Precincts with Extreme Partisan Skew (>95% for Republican Pres. Candidate)")
  111.     print("="*80)
  112.     skewed = results[results['Pres_Rep_Share'] > 95].sort_values(by='Pres_Rep_Share', ascending=False)
  113.     print(skewed[[
  114.         'Reporting Unit', 'Rep_Votes_President', 'Dem_Votes_President', 'Pres_Rep_Share'
  115.     ]].to_string(index=False, formatters={'Pres_Rep_Share': '{:,.1f}%'.format}))
  116.  
  117.  
  118. if __name__ == '__main__':
  119.     # --- Data Provided by User ---
  120.     president_results_file = "raw_results/election_results_president.csv"
  121.     with open(president_results_file, "r") as f:
  122.         presidential_text = f.read()
  123.    
  124.     senate_results_file = "raw_results/election_results_senator.csv"
  125.     with open(senate_results_file, "r") as f:
  126.         senate_text = f.read()
  127.    
  128.     # Create DataFrames
  129.     pres_df = create_df_from_text(presidential_text.strip())
  130.     sen_df = create_df_from_text(senate_text.strip())
  131.    
  132.     # Run the analysis
  133.     run_anomaly_analysis(pres_df, sen_df)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement