View difference between Paste ID: KXnLsVbW and NkYXrJEX
SHOW: | | - or go back to the newest paste.
1
import pandas as pd
2
import pprint
3
import requests
4
5
def collapse_results_by_party(results_by_candidate, candidates):
6
    results_by_party = {}
7
    for candidate, count in results_by_candidate.iteritems():
8
        party = candidates[candidate]['party']
9
        results_by_party[party] = results_by_party.get(party, 0) + count
10
11
    return results_by_party
12
13
states = [
14
 'Alaska', 'Alabama', 'Arkansas', 'Arizona', 'California', 'Colorado',
15
 'Connecticut', 'Delaware', 'Florida', 'Georgia',
16
 'Hawaii', 'Iowa', 'Idaho', 'Illinois', 'Indiana', 'Kansas', 'Kentucky',
17
 'Louisiana', 'Massachusetts', 'Maryland', 'Maine', 'Michigan',
18
 'Minnesota', 'Missouri', 'Mississippi', 'Montana', 'North Carolina',
19
 'North Dakota', 'Nebraska', 'New Hampshire', 'New Jersey', 'New Mexico',
20
 'Nevada', 'New York', 'Ohio', 'Oklahoma', 'Oregon', 'Pennsylvania',
21
 'Rhode Island', 'South Carolina', 'South Dakota', 'Tennessee', 'Texas',
22
 'Utah', 'Virginia', 'Vermont', 'Washington', 'Wisconsin',
23
 'West Virginia', 'Wyoming',
24
]
25
26
all_results = {}
27
for state in states:
28
    print 'Downloading {}'.format(state)
29
    formatted_state = state.lower().replace(' ', '-')
30
    state_results = requests.get('https://static01.nyt.com/elections-assets/2020/data/api/2020-11-03/race-page/{}/president.json'.format(formatted_state)).json()
31
    all_results[formatted_state] = state_results
32
33
records = []
34
for state, state_results in all_results.iteritems():
35
    race = state_results['data']['races'][0]
36
37
    for candidate in race['candidates']:
38
        if candidate['party_id'] == 'republican':
39
            candidate['party'] = 'rep'
40
        elif candidate['party_id'] == 'democrat':
41
            candidate['party'] = 'dem'
42
        else:
43
            candidate['party'] = 'trd'
44
    candidates = { candidate['candidate_key']: candidate for candidate in race['candidates'] }
45
46
    for data_point in race['timeseries']:
47
        data_point['state']             = state
48
        data_point['expected_votes']    = race['tot_exp_vote']
49
        data_point['trump2016']         = race['trump2016']
50
        data_point['votes2012']         = race['votes2012']
51
        data_point['votes2016']         = race['votes2016']
52
53
        vote_shares = collapse_results_by_party(data_point['vote_shares'], candidates)
54
        for party in ['rep', 'dem', 'trd']:
55
            data_point['vote_share_{}'.format(party)] = vote_shares.get(party, 0)
56
57
        data_point.pop('vote_shares')
58
        records.append(data_point)
59
60
time_series_df = pd.DataFrame.from_records(records)
61
time_series_df.to_csv('data/nyt_ts.csv', encoding='utf-8')