Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- # coding: utf-8
- # In[18]:
- import pandas as pd
- import numpy as np
- from IPython.core.interactiveshell import InteractiveShell
- InteractiveShell.ast_node_interactivity = "all"
- # In[19]:
- def index_prev_next(ix):
- ix_prev = [i-1 for i in ix]
- ix_next = [i+1 for i in ix]
- ix_present = ix_prev + list(ix) + list(ix_next)
- ix_present.sort()
- return ix_present
- # ## Data Prep
- # In[20]:
- path = "../data_files/Election/"
- csv_file = "nyt_ts.csv"
- csv_path = path + csv_file
- df_nyt_ts = pd.read_csv(csv_path)
- # In[21]:
- df_nyt_ts.timestamp = pd.to_datetime(df_nyt_ts.timestamp)
- # In[22]:
- df_nyt_ts = df_nyt_ts.query("votes > 0")
- # In[23]:
- df_nyt_ts['votes_rep'] = df_nyt_ts.votes * df_nyt_ts.vote_share_rep
- df_nyt_ts['votes_dem'] = df_nyt_ts.votes * df_nyt_ts.vote_share_dem
- # In[24]:
- df_nyt_ts.sort_values(['state','timestamp'], inplace=True)
- df_nyt_ts.reset_index(inplace=True, drop=True)
- # In[25]:
- vote_cols = ['votes','votes_rep','votes_dem']
- vote_diff_cols = [c+"_diff" for c in vote_cols]
- df_nyt_ts[vote_diff_cols] = df_nyt_ts.groupby(['state'])[vote_cols].diff()
- # In[26]:
- df_nyt_ts['compare_rep'] = df_nyt_ts.eval("votes_rep_diff / votes_diff")
- df_nyt_ts['compare_dem'] = df_nyt_ts.eval("votes_dem_diff / votes_diff")
- # ## Biased Vote Dump
- # In[27]:
- cols = ['state','timestamp','vote_share_rep','vote_share_dem','votes',
- 'votes_rep','votes_dem','votes_diff','votes_rep_diff','votes_dem_diff']
- # In[28]:
- threshold_dump = 10**5
- threshold_compare = 0.1
- # ### Unfavotable for Trump
- # In[30]:
- df = df_nyt_ts.query("(votes_diff > @threshold_dump) & (compare_rep < @threshold_compare)")
- ix_present = index_prev_next(df.index)
- df_xlsx = df_nyt_ts.loc[ix_present][cols + ['compare_rep']]
- df_xlsx
- df_xlsx.to_excel("vote_dump_against_trump.xlsx")
- # ### Unfavotable for Biden
- # In[31]:
- df = df_nyt_ts.query("(votes_diff > @threshold_dump) & (compare_dem < @threshold_compare)")
- ix_present = index_prev_next(df.index)
- df_xlsx = df_nyt_ts.loc[ix_present][cols + ['compare_rep']]
- df_xlsx
- df_xlsx.to_excel("vote_dump_against_biden.xlsx")
- # ## Vote Switch
- # In[32]:
- flt = (df_nyt_ts.votes_rep_diff == df_nyt_ts.votes_dem_diff*-1)
- flt2 = (df_nyt_ts.votes_rep_diff != 0)
- df = df_nyt_ts[flt & flt2].copy()
- ix_present = index_prev_next(df.index)
- df_xlsx = df_nyt_ts.reindex(ix_present)[cols]
- df_xlsx
- df_xlsx.to_excel("vote_switch.xlsx")
- # In[33]:
- threshold_switch = 20
- flt = np.abs(df_nyt_ts.votes_rep_diff - df_nyt_ts.votes_dem_diff*-1) < threshold_switch
- flt2 = (df_nyt_ts.votes_rep_diff != 0)
- flt3 = np.sign(df_nyt_ts.votes_rep_diff * df_nyt_ts.votes_dem_diff) < 0
- df = df_nyt_ts[flt & flt2 & flt3].copy()
- ix_present = index_prev_next(df.index)
- df_xlsx = df_nyt_ts.reindex(ix_present)[cols]
- df_xlsx
- df_xlsx.to_excel("vote_switch_within_20.xlsx")
- # ## Votes Decrease
- # In[36]:
- thresh_vote_dec = -10**5
- ix_diff_neg = df_nyt_ts.query("(votes_diff < @thresh_vote_dec)").index
- ix_present = index_prev_next(ix_diff_neg)
- df_xlsx = df_nyt_ts.loc[ix_present,cols]
- df_xlsx
- df_xlsx.to_excel("vote_decrease.xlsx")
RAW Paste Data