Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # US Presidential Elections 2020
- #
- # Folder structure:
- # - src/ : this notebook.
- # - data/ : output folder for CSV with clean data.
- # - data/json/ : output folder for JSON files.
- # - data/backup/ : output folder for timestamped JSON files, for backup.
- # - output/ : charts generated.
- import glob
- import io
- import re
- from datetime import datetime
- import pandas as pd
- import json
- import matplotlib.pyplot as plt
- # Just because I want to make this script self-contained
- states = pd.DataFrame(
- [
- ["alabama", "Alabama", "America/Chicago"],
- ["alaska", "Alaska", "America/Anchorage"],
- ["arizona", "Arizona", "America/Boise"],
- ["arkansas", "Arkansas", "America/Chicago"],
- ["california", "California", "America/Los_Angeles"],
- ["colorado", "Colorado", "America/Boise"],
- ["connecticut", "Connecticut", "America/New_York"],
- ["delaware", "Delaware", "America/New_York"],
- ["district-of-columbia", "District of Columbia", "America/New_York"],
- ["florida", "Florida", "America/New_York"],
- ["georgia", "Georgia", "America/New_York"],
- ["idaho", "Idaho", "America/Boise"],
- ["illinois", "Illinois", "America/Chicago"],
- ["indiana", "Indiana", "America/New_York"],
- ["iowa", "Iowa", "America/Chicago"],
- ["new-jersey", "New Jersey", "America/New_York"],
- ["kansas", "Kansas", "America/Chicago"],
- ["kentucky", "Kentucky", "America/New_York"],
- ["louisiana", "Louisiana", "America/Chicago"],
- ["maine", "Maine", "America/New_York"],
- ["maryland", "Maryland", "America/New_York"],
- ["massachusetts", "Massachusetts", "America/New_York"],
- ["new-mexico", "New Mexico", "America/New_York"],
- ["michigan", "Michigan", "America/New_York"],
- ["minnesota", "Minnesota", "America/Chicago"],
- ["mississippi", "Mississippi", "America/New_York"],
- ["missouri", "Missouri", "America/Chicago"],
- ["montana", "Montana", "America/Boise"],
- ["nebraska", "Nebraska", "America/Chicago"],
- ["nevada", "Nevada", "America/New_York"],
- ["new-hampshire", "New Hampshire", "America/New_York"],
- ["new-york", "New York", "America/New_York"],
- ["north-carolina", "North Carolina", "America/New_York"],
- ["north-dakota", "North Dakota", "America/Chicago"],
- ["ohio", "Ohio", "America/New_York"],
- ["oklahoma", "Oklahoma", "America/Chicago"],
- ["oregon", "Oregon", "America/Los_Angeles"],
- ["pennsylvania", "Pennsylvania", "America/New_York"],
- ["rhode-island", "Rhode Island", "America/New_York"],
- ["south-carolina", "South Carolina", "America/New_York"],
- ["south-dakota", "South Dakota", "America/Chicago"],
- ["tennessee", "Tennessee", "America/New_York"],
- ["texas", "Texas", "America/Chicago"],
- ["utah", "Utah", "America/Boise"],
- ["vermont", "Vermont", "America/New_York"],
- ["virginia", "Virginia", "America/New_York"],
- ["washington", "Washington", "America/Los_Angeles"],
- ["west-virginia", "West Virginia", "America/New_York"],
- ["wisconsin", "Wisconsin", "America/Chicago"],
- ["wyoming", "Wyoming", "America/Boise"]
- ], columns=['state', 'state_name', 'timezone'])
- # Download NYT datasets to local folder
- s = datetime.now().strftime("%Y-%m-%d_%H_%M_%S")
- for state in states['state'].values.tolist():
- print("Downloading", state, "...")
- t = pd.read_json("https://static01.nyt.com/elections-assets/2020/data/api/2020-11-03/race-page/" + state + "/president.json")
- t.to_json("../data/json/" + state + ".json")
- t.to_json("../data/backup/" + state + "_backup_" + s + ".json")
- print("... DONE!")
- # Load and prep data
- def read_votes():
- csv_content = "state,timestamp,votes,eevp,trumpd,bidenj\r\n"
- for fname in glob.glob("..\\data\\json\\" + "*.json"):
- with open(fname, encoding="utf8") as f:
- x = json.load(f)
- state = re.sub(r"..\\data\\json\\", "", fname)
- state = re.sub(".json", "", state)
- xts = x["data"]["races"][0]["timeseries"]
- for i in range(len(xts)):
- csv_content = csv_content + f'{state},{xts[i]["timestamp"]},{xts[i]["votes"]},{xts[i]["eevp"]},{xts[i]["vote_shares"]["trumpd"]},{xts[i]["vote_shares"]["bidenj"]}\r\n'
- df = pd.read_csv(io.StringIO(csv_content))
- return df
- print("Processing data...")
- votes = read_votes()
- votes = votes[votes['votes'] > 0]
- votes['timestamp_utc'] = pd.to_datetime(votes['timestamp'], utc=True)
- votes = votes.merge(states, left_on='state', right_on='state')
- def f(row):
- return row['timestamp_utc'].tz_convert(states[states['state'] == row['state']].timezone.values[0])
- votes['timestamp_local'] = votes.apply(f, axis=1)
- votes['eevp'] = votes['eevp'] / 100
- votes['trumpd_votes'] = votes['votes'] * votes['trumpd']
- votes['bidenj_votes'] = votes['votes'] * votes['bidenj']
- votes['votes_lag'] = votes.groupby(["state"])['votes'].shift(1)
- votes['trumpd_votes_lag'] = votes.groupby(["state"])['trumpd_votes'].shift(1)
- votes['bidenj_votes_lag'] = votes.groupby(["state"])['bidenj_votes'].shift(1)
- votes = votes.fillna(0)
- votes['votes_add'] = votes['votes'] - votes['votes_lag']
- votes['trumpd_votes_add'] = votes['trumpd_votes'] - votes['trumpd_votes_lag']
- votes['bidenj_votes_add'] = votes['bidenj_votes'] - votes['bidenj_votes_lag']
- votes = votes.drop(columns=['state'])
- votes = votes.drop(columns=['trumpd_votes_lag'])
- votes = votes.drop(columns=['bidenj_votes_lag'])
- votes = votes.rename(columns={"state_name": "state"})
- votes = votes.sort_values(by=['state', 'timestamp_utc'])
- votes['batch_number'] = votes.groupby(['state']).cumcount()+1
- # Save prep data as CSV
- print("Saving clean data file...")
- votes.to_csv('../data/clean_data.csv', index=False)
- # Create charts
- print("Generating charts...")
- for state in votes['state'].unique():
- t = votes[votes['state'] == state]
- TIMEZONE = t['timezone'].unique()[0]
- fig, axs = plt.subplots(3, 1, figsize=(20, 10), dpi=300)
- fig.suptitle(state, fontsize=12)
- axs[0].bar(t['batch_number'], t['trumpd_votes_add'], label="Trump", color='red', alpha=.5)
- axs[0].bar(t['batch_number'], t['bidenj_votes_add'], label="Biden", color='blue', alpha=.5)
- axs[0].grid(color='gray', alpha=.1, linestyle='--', linewidth=1)
- axs[0].set_ylabel('Votes Added')
- axs[0].legend(loc="lower right")
- axs[1].plot(t['batch_number'], t['trumpd_votes'], label="Trump", color='red', alpha=.7)
- axs[1].plot(t['batch_number'], t['bidenj_votes'], label="Biden", color='blue', alpha=.7)
- axs[1].grid(color='gray', alpha=.1, linestyle='--', linewidth=1)
- axs[1].set_ylabel('Votes Total')
- axs[1].legend(loc="lower right")
- axs[2].plot(t['batch_number'], t['eevp'], label="% votes counted", color="gray", alpha=.3)
- axs[2].plot(t['batch_number'], t['trumpd'], label="Trump's share", color="red", alpha=.7)
- axs[2].plot(t['batch_number'], t['bidenj'], label="Biden's share", color="blue", alpha=.7)
- axs[2].grid(color='gray', alpha=.1, linestyle='--', linewidth=1)
- axs[2].set_ylim([0, 1])
- axs[2].set_ylabel('%')
- axs[2].legend(loc="lower right")
- axs[2].set_xlabel('Drops')
- peak = t[
- abs(t['bidenj_votes_add'] - t['trumpd_votes_add']) == max(abs(t['bidenj_votes_add'] - t['trumpd_votes_add']))]
- peak_datetime = peak['timestamp_local'].values[0]
- peak_max = max(peak['trumpd_votes_add'].values[0], peak['bidenj_votes_add'].values[0])
- peak_min = min(peak['trumpd_votes_add'].values[0], peak['bidenj_votes_add'].values[0])
- peak_max_perc = peak_max / (peak['trumpd_votes_add'].values[0] + peak['bidenj_votes_add'].values[0]) * 100
- peak_min_perc = peak_min / (peak['trumpd_votes_add'].values[0] + peak['bidenj_votes_add'].values[0]) * 100
- peak_max_whom = "Biden" if peak['bidenj_votes_add'].values[0] > peak['trumpd_votes_add'].values[0] else "Trump"
- peak_min_whom = "Biden" if peak['bidenj_votes_add'].values[0] < peak['trumpd_votes_add'].values[0] else "Trump"
- peak_label = str(peak_datetime) + "\n" + str(round(peak_max)) + " votes for " + peak_max_whom + "\n" + str(
- round(peak_min)) + " votes for " + peak_min_whom
- if len(peak) > 0:
- axs[1].annotate(peak_label,
- xy=(peak['batch_number'].values, 1),
- horizontalalignment='center',
- verticalalignment='bottom',
- fontsize=8
- )
- axs[1].axvline(x=peak['batch_number'].values, color='green', alpha=.3)
- axs[2].axvline(x=peak['batch_number'].values, color='green', alpha=.3)
- fig.tight_layout()
- # plt.show()
- plt.savefig('../output/chart_' + state + '.png', facecolor='white', transparent=False)
- print("DONE!")
Advertisement
Add Comment
Please, Sign In to add comment