Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library(data.table)
- library(stringr)
- OE_PATH = '~/medsl/openelections'
- `%=%` = function(string, pattern) {
- str_detect(string, stringr::regex(pattern, TRUE))
- }
- `%-%` = function(string, pattern) {
- str_remove_all(string, regex(pattern, TRUE))
- }
- # Read and combine counties
- paths = list.files(file.path(OE_PATH, 'openelections-data-ny', '2016'),
- '20161108.*general__', full.names = TRUE)
- stopifnot(length(paths) == 62)
- ny_counties = lapply(paths, function(p) {
- d = withCallingHandlers({ fread(p) }, warning = function(w) { print(p) })
- keep = names(d)[!str_detect(names(d), '^V\\d+')]
- d[, c(keep), with = FALSE]
- })
- ny_counties = setNames(ny_counties, str_extract(basename(paths), '(?<=general__).*(?=__precinct)'))
- d = rbindlist(ny_counties, fill = TRUE, idcol = 'path')
- d = melt(d, id.vars = c('county', 'precinct', 'office', 'district', 'candidate',
- 'party', 'path'), variable.name = 'mode', value.name = 'votes', variable.factor =
- FALSE, na.rm = TRUE)
- # After removing thousands separators, votes can be integer
- d[, votes := votes %-% ',']
- d[, votes := as.integer(votes)]
- # Drop Total, Total Outside NYC, Total NYC, Statewide Total, Public Counter
- d[candidate %=% 'Public Counter', unique(county)]
- # [1] "Bronx" "New York" "Queens" "Richmond"
- unique(d[candidate %=% 'total', .(county, candidate)])
- # county candidate
- # 1: Cayuga Total Special Votes
- # 2: Cayuga Total Votes
- # 3: Chemung Total
- # 4: Chenango Total
- # 5: Delaware Total
- # 6: Erie Total
- # 7: Franklin Total
- # 8: Genesee Total
- # 9: Jefferson Total
- # 10: Lewis Total
- # 11: Livingston Total
- # 12: Monroe Total
- # 13: Orange Total Votes Cast
- # 14: Orleans Total
- # 15: Putnam Total
- # 16: Schoharie Total Special Votes
- # 17: Schoharie Total Votes
- # 18: Seneca Total
- # 19: Steuben Total Enrolled Voters
- # 20: Sullivan Total
- # 21: Wyoming Total
- # 22: Yates Total
- # county candidate
- # In Niagara, we have "Wilson/000/1", "Wilson/000/2", ... "Wilson/000"
- d[(county == 'Niagara' & str_count(precinct, '\\/') == 1), unique(precinct)]
- # [1] "Cambria/000" "City of Lockport/001" "City of Lockport/002"
- # [4] "City of Lockport/003" "City of Lockport/004" "City of Lockport/005"
- # [7] "Hartland/000" "Lewiston/000" "N Tonawanda/001"
- # [10] "N Tonawanda/002" "N Tonawanda/003" "Newfane/000"
- # [13] "Niagara Falls/003" "Niagara Falls/004" "Niagara Falls/005"
- # [16] "Niagara Falls/006" "Pendleton/000" "Porter/000"
- # [19] "Royalton/000" "Somerset/000" "Lockport/000"
- # [22] "Niagara/000" "Wheatfield/000" "Wilson/000"
- # In Columbia, mode "votes" = "AbsenteeAffidavit" + "ElectionDayVotes"
- d[county == 'Columbia', unique(mode)]
- # [1] "votes" "election_day" "absentee"
- # Same in Herkimer, but "votes" = "absentee_affidavit" + "polling_place"
- d[county == 'Herkimer', unique(mode)]
- # [1] "votes" "election_day" "absentee"
- # In Lewis and Seneca, "absentee" + "election_day" = "votes"
- d[county %in% c('Lewis', 'Seneca'), unique(mode)]
- # [1] "votes" "election_day" "absentee"
- # In St. Lawrence, mode "votes" = "machine_votes" + "absentee" + "affidavit" + "absentee_hc"
- d[county == 'St. Lawrence', unique(mode)]
- # [1] "votes" "absentee" "machine_votes" "absentee_hc"
- # [5] "affidavit"
- # Candidate totals and cumulative precinct rows...
- unique(d[candidate %=% 'total', .(county, candidate)])
- # county candidate
- # 1: Cayuga Total Special Votes
- # 2: Cayuga Total Votes
- # 3: Chemung Total
- # 4: Chenango Total
- # 5: Delaware Total
- # 6: Erie Total
- # 7: Franklin Total
- # 8: Genesee Total
- # 9: Jefferson Total
- # 10: Lewis Total
- # 11: Livingston Total
- # 12: Monroe Total
- # 13: Orange Total Votes Cast
- # 14: Orleans Total
- # 15: Putnam Total
- # 16: Schoharie Total Special Votes
- # 17: Schoharie Total Votes
- # 18: Seneca Total
- # 19: Steuben Total Enrolled Voters
- # 20: Sullivan Total
- # 21: Wyoming Total
- # 22: Yates Total
- # county candidate
- unique(d[precinct %=% 'cumulative|total', .(county, precinct)])
- # county precinct
- # 1: Lewis TOTALS
- # 2: Madison TOTAL
- # 3: Montgomery Total
- # 4: Nassau Total
- # 5: Oneida Total
- # 6: Onondaga Syracuse Total
- # 7: Onondaga Onondaga Town Total
- # 8: Onondaga Total
- # 9: Ontario Total
- # 10: Orleans TOTAL
- # 11: Oswego Total
- # 12: Otsego TOTAL
- # 13: Rockland Total
- # 14: Saratoga Total
- # 15: Schuyler Cumulative
- # 16: Schuyler Total
- # 17: Seneca Cumulative
- # 18: Seneca TOTAL
- # 19: Tompkins Total
- # 20: Ulster Total
- unique(d[precinct == '', .(county, precinct)])
- # county precinct
- # 1: Wayne
- # 2: Wyoming
- # Ballots cast...
- d[candidate %=% 'ballots cast', unique(county)]
- # [1] "Niagara" "Schenectady" "St. Lawrence"
Add Comment
Please, Sign In to add comment