SHOW:
|
|
- or go back to the newest paste.
1 | import pandas as pd | |
2 | import pprint | |
3 | import requests | |
4 | ||
5 | def collapse_results_by_party(results_by_candidate, candidates): | |
6 | results_by_party = {} | |
7 | for candidate, count in results_by_candidate.iteritems(): | |
8 | party = candidates[candidate]['party'] | |
9 | results_by_party[party] = results_by_party.get(party, 0) + count | |
10 | ||
11 | return results_by_party | |
12 | ||
13 | states = [ | |
14 | 'Alaska', 'Alabama', 'Arkansas', 'Arizona', 'California', 'Colorado', | |
15 | 'Connecticut', 'Delaware', 'Florida', 'Georgia', | |
16 | 'Hawaii', 'Iowa', 'Idaho', 'Illinois', 'Indiana', 'Kansas', 'Kentucky', | |
17 | 'Louisiana', 'Massachusetts', 'Maryland', 'Maine', 'Michigan', | |
18 | 'Minnesota', 'Missouri', 'Mississippi', 'Montana', 'North Carolina', | |
19 | 'North Dakota', 'Nebraska', 'New Hampshire', 'New Jersey', 'New Mexico', | |
20 | 'Nevada', 'New York', 'Ohio', 'Oklahoma', 'Oregon', 'Pennsylvania', | |
21 | 'Rhode Island', 'South Carolina', 'South Dakota', 'Tennessee', 'Texas', | |
22 | 'Utah', 'Virginia', 'Vermont', 'Washington', 'Wisconsin', | |
23 | 'West Virginia', 'Wyoming', | |
24 | ] | |
25 | ||
26 | all_results = {} | |
27 | for state in states: | |
28 | print 'Downloading {}'.format(state) | |
29 | formatted_state = state.lower().replace(' ', '-') | |
30 | state_results = requests.get('https://static01.nyt.com/elections-assets/2020/data/api/2020-11-03/race-page/{}/president.json'.format(formatted_state)).json() | |
31 | all_results[formatted_state] = state_results | |
32 | ||
33 | records = [] | |
34 | for state, state_results in all_results.iteritems(): | |
35 | race = state_results['data']['races'][0] | |
36 | ||
37 | for candidate in race['candidates']: | |
38 | if candidate['party_id'] == 'republican': | |
39 | candidate['party'] = 'rep' | |
40 | elif candidate['party_id'] == 'democrat': | |
41 | candidate['party'] = 'dem' | |
42 | else: | |
43 | candidate['party'] = 'trd' | |
44 | candidates = { candidate['candidate_key']: candidate for candidate in race['candidates'] } | |
45 | ||
46 | for data_point in race['timeseries']: | |
47 | data_point['state'] = state | |
48 | data_point['expected_votes'] = race['tot_exp_vote'] | |
49 | data_point['trump2016'] = race['trump2016'] | |
50 | data_point['votes2012'] = race['votes2012'] | |
51 | data_point['votes2016'] = race['votes2016'] | |
52 | ||
53 | vote_shares = collapse_results_by_party(data_point['vote_shares'], candidates) | |
54 | for party in ['rep', 'dem', 'trd']: | |
55 | data_point['vote_share_{}'.format(party)] = vote_shares.get(party, 0) | |
56 | ||
57 | data_point.pop('vote_shares') | |
58 | records.append(data_point) | |
59 | ||
60 | time_series_df = pd.DataFrame.from_records(records) | |
61 | time_series_df.to_csv('data/nyt_ts.csv', encoding='utf-8') |