Guest User

US Presidential Elections 2020 charts

a guest
Nov 22nd, 2020
374
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 8.86 KB | None | 0 0
  1. # US Presidential Elections 2020
  2. #
  3. # Folder structure:
  4. # - src/         : this notebook.
  5. # - data/        : output folder for CSV with clean data.
  6. # - data/json/   : output folder for JSON files.
  7. # - data/backup/ : output folder for timestamped JSON files, for backup.
  8. # - output/      : charts generated.
  9.  
  10.  
  11. import glob
  12. import io
  13. import re
  14. from datetime import datetime
  15. import pandas as pd
  16. import json
  17. import matplotlib.pyplot as plt
  18.  
  19.  
  20. # Just because I want to make this script self-contained
  21.  
  22. states = pd.DataFrame(
  23.     [
  24.         ["alabama", "Alabama", "America/Chicago"],
  25.         ["alaska", "Alaska", "America/Anchorage"],
  26.         ["arizona", "Arizona", "America/Boise"],
  27.         ["arkansas", "Arkansas", "America/Chicago"],
  28.         ["california", "California", "America/Los_Angeles"],
  29.         ["colorado", "Colorado", "America/Boise"],
  30.         ["connecticut", "Connecticut", "America/New_York"],
  31.         ["delaware", "Delaware", "America/New_York"],
  32.         ["district-of-columbia", "District of Columbia", "America/New_York"],
  33.         ["florida", "Florida", "America/New_York"],
  34.         ["georgia", "Georgia", "America/New_York"],
  35.         ["idaho", "Idaho", "America/Boise"],
  36.         ["illinois", "Illinois", "America/Chicago"],
  37.         ["indiana", "Indiana", "America/New_York"],
  38.         ["iowa", "Iowa", "America/Chicago"],
  39.         ["new-jersey", "New Jersey", "America/New_York"],
  40.         ["kansas", "Kansas", "America/Chicago"],
  41.         ["kentucky", "Kentucky", "America/New_York"],
  42.         ["louisiana", "Louisiana", "America/Chicago"],
  43.         ["maine", "Maine", "America/New_York"],
  44.         ["maryland", "Maryland", "America/New_York"],
  45.         ["massachusetts", "Massachusetts", "America/New_York"],
  46.         ["new-mexico", "New Mexico", "America/New_York"],
  47.         ["michigan", "Michigan", "America/New_York"],
  48.         ["minnesota", "Minnesota", "America/Chicago"],
  49.         ["mississippi", "Mississippi", "America/New_York"],
  50.         ["missouri", "Missouri", "America/Chicago"],
  51.         ["montana", "Montana", "America/Boise"],
  52.         ["nebraska", "Nebraska", "America/Chicago"],
  53.         ["nevada", "Nevada", "America/New_York"],
  54.         ["new-hampshire", "New Hampshire", "America/New_York"],
  55.         ["new-york", "New York", "America/New_York"],
  56.         ["north-carolina", "North Carolina", "America/New_York"],
  57.         ["north-dakota", "North Dakota", "America/Chicago"],
  58.         ["ohio", "Ohio", "America/New_York"],
  59.         ["oklahoma", "Oklahoma", "America/Chicago"],
  60.         ["oregon", "Oregon", "America/Los_Angeles"],
  61.         ["pennsylvania", "Pennsylvania", "America/New_York"],
  62.         ["rhode-island", "Rhode Island", "America/New_York"],
  63.         ["south-carolina", "South Carolina", "America/New_York"],
  64.         ["south-dakota", "South Dakota", "America/Chicago"],
  65.         ["tennessee", "Tennessee", "America/New_York"],
  66.         ["texas", "Texas", "America/Chicago"],
  67.         ["utah", "Utah", "America/Boise"],
  68.         ["vermont", "Vermont", "America/New_York"],
  69.         ["virginia", "Virginia", "America/New_York"],
  70.         ["washington", "Washington", "America/Los_Angeles"],
  71.         ["west-virginia", "West Virginia", "America/New_York"],
  72.         ["wisconsin", "Wisconsin", "America/Chicago"],
  73.         ["wyoming", "Wyoming", "America/Boise"]
  74.     ], columns=['state', 'state_name', 'timezone'])
  75.  
  76.  
  77. # Download NYT datasets to local folder
  78.  
  79. s = datetime.now().strftime("%Y-%m-%d_%H_%M_%S")
  80. for state in states['state'].values.tolist():
  81.     print("Downloading", state, "...")
  82.     t = pd.read_json("https://static01.nyt.com/elections-assets/2020/data/api/2020-11-03/race-page/" + state + "/president.json")
  83.     t.to_json("../data/json/" + state + ".json")
  84.     t.to_json("../data/backup/" + state + "_backup_" + s + ".json")
  85. print("... DONE!")
  86.  
  87.  
  88. # Load and prep data
  89.  
  90. def read_votes():
  91.     csv_content = "state,timestamp,votes,eevp,trumpd,bidenj\r\n"
  92.     for fname in glob.glob("..\\data\\json\\" + "*.json"):
  93.         with open(fname, encoding="utf8") as f:
  94.             x = json.load(f)
  95.  
  96.         state = re.sub(r"..\\data\\json\\", "", fname)
  97.         state = re.sub(".json", "", state)
  98.  
  99.         xts = x["data"]["races"][0]["timeseries"]
  100.         for i in range(len(xts)):
  101.             csv_content = csv_content + f'{state},{xts[i]["timestamp"]},{xts[i]["votes"]},{xts[i]["eevp"]},{xts[i]["vote_shares"]["trumpd"]},{xts[i]["vote_shares"]["bidenj"]}\r\n'
  102.  
  103.     df = pd.read_csv(io.StringIO(csv_content))
  104.  
  105.     return df
  106.  
  107. print("Processing data...")
  108.  
  109. votes = read_votes()
  110.  
  111. votes = votes[votes['votes'] > 0]
  112.  
  113. votes['timestamp_utc'] = pd.to_datetime(votes['timestamp'], utc=True)
  114. votes = votes.merge(states, left_on='state', right_on='state')
  115.  
  116. def f(row):
  117.     return row['timestamp_utc'].tz_convert(states[states['state'] == row['state']].timezone.values[0])
  118. votes['timestamp_local'] = votes.apply(f, axis=1)
  119.  
  120. votes['eevp'] = votes['eevp'] / 100
  121.  
  122. votes['trumpd_votes'] = votes['votes'] * votes['trumpd']
  123. votes['bidenj_votes'] = votes['votes'] * votes['bidenj']
  124.  
  125. votes['votes_lag'] = votes.groupby(["state"])['votes'].shift(1)
  126. votes['trumpd_votes_lag'] = votes.groupby(["state"])['trumpd_votes'].shift(1)
  127. votes['bidenj_votes_lag'] = votes.groupby(["state"])['bidenj_votes'].shift(1)
  128.  
  129. votes = votes.fillna(0)
  130.  
  131. votes['votes_add'] = votes['votes'] - votes['votes_lag']
  132. votes['trumpd_votes_add'] = votes['trumpd_votes'] - votes['trumpd_votes_lag']
  133. votes['bidenj_votes_add'] = votes['bidenj_votes'] - votes['bidenj_votes_lag']
  134.  
  135. votes = votes.drop(columns=['state'])
  136. votes = votes.drop(columns=['trumpd_votes_lag'])
  137. votes = votes.drop(columns=['bidenj_votes_lag'])
  138. votes = votes.rename(columns={"state_name": "state"})
  139.  
  140. votes = votes.sort_values(by=['state', 'timestamp_utc'])
  141.  
  142. votes['batch_number'] = votes.groupby(['state']).cumcount()+1
  143.  
  144.  
  145. # Save prep data as CSV
  146.  
  147. print("Saving clean data file...")
  148.  
  149. votes.to_csv('../data/clean_data.csv', index=False)
  150.  
  151.  
  152. # Create charts
  153.  
  154. print("Generating charts...")
  155.  
  156. for state in votes['state'].unique():
  157.  
  158.     t = votes[votes['state'] == state]
  159.  
  160.     TIMEZONE = t['timezone'].unique()[0]
  161.  
  162.     fig, axs = plt.subplots(3, 1, figsize=(20, 10), dpi=300)
  163.     fig.suptitle(state, fontsize=12)
  164.  
  165.     axs[0].bar(t['batch_number'], t['trumpd_votes_add'], label="Trump", color='red', alpha=.5)
  166.     axs[0].bar(t['batch_number'], t['bidenj_votes_add'], label="Biden", color='blue', alpha=.5)
  167.     axs[0].grid(color='gray', alpha=.1, linestyle='--', linewidth=1)
  168.     axs[0].set_ylabel('Votes Added')
  169.     axs[0].legend(loc="lower right")
  170.  
  171.     axs[1].plot(t['batch_number'], t['trumpd_votes'], label="Trump", color='red', alpha=.7)
  172.     axs[1].plot(t['batch_number'], t['bidenj_votes'], label="Biden", color='blue', alpha=.7)
  173.     axs[1].grid(color='gray', alpha=.1, linestyle='--', linewidth=1)
  174.     axs[1].set_ylabel('Votes Total')
  175.     axs[1].legend(loc="lower right")
  176.  
  177.     axs[2].plot(t['batch_number'], t['eevp'], label="% votes counted", color="gray", alpha=.3)
  178.     axs[2].plot(t['batch_number'], t['trumpd'], label="Trump's share", color="red", alpha=.7)
  179.     axs[2].plot(t['batch_number'], t['bidenj'], label="Biden's share", color="blue", alpha=.7)
  180.     axs[2].grid(color='gray', alpha=.1, linestyle='--', linewidth=1)
  181.     axs[2].set_ylim([0, 1])
  182.     axs[2].set_ylabel('%')
  183.     axs[2].legend(loc="lower right")
  184.     axs[2].set_xlabel('Drops')
  185.  
  186.     peak = t[
  187.         abs(t['bidenj_votes_add'] - t['trumpd_votes_add']) == max(abs(t['bidenj_votes_add'] - t['trumpd_votes_add']))]
  188.     peak_datetime = peak['timestamp_local'].values[0]
  189.     peak_max = max(peak['trumpd_votes_add'].values[0], peak['bidenj_votes_add'].values[0])
  190.     peak_min = min(peak['trumpd_votes_add'].values[0], peak['bidenj_votes_add'].values[0])
  191.     peak_max_perc = peak_max / (peak['trumpd_votes_add'].values[0] + peak['bidenj_votes_add'].values[0]) * 100
  192.     peak_min_perc = peak_min / (peak['trumpd_votes_add'].values[0] + peak['bidenj_votes_add'].values[0]) * 100
  193.     peak_max_whom = "Biden" if peak['bidenj_votes_add'].values[0] > peak['trumpd_votes_add'].values[0] else "Trump"
  194.     peak_min_whom = "Biden" if peak['bidenj_votes_add'].values[0] < peak['trumpd_votes_add'].values[0] else "Trump"
  195.     peak_label = str(peak_datetime) + "\n" + str(round(peak_max)) + " votes for " + peak_max_whom + "\n" + str(
  196.         round(peak_min)) + " votes for " + peak_min_whom
  197.  
  198.     if len(peak) > 0:
  199.         axs[1].annotate(peak_label,
  200.                         xy=(peak['batch_number'].values, 1),
  201.                         horizontalalignment='center',
  202.                         verticalalignment='bottom',
  203.                         fontsize=8
  204.                         )
  205.         axs[1].axvline(x=peak['batch_number'].values, color='green', alpha=.3)
  206.         axs[2].axvline(x=peak['batch_number'].values, color='green', alpha=.3)
  207.  
  208.     fig.tight_layout()
  209.     # plt.show()
  210.  
  211.     plt.savefig('../output/chart_' + state + '.png', facecolor='white', transparent=False)
  212.  
  213. print("DONE!")
  214.  
Advertisement
Add Comment
Please, Sign In to add comment