Guest User

Untitled

a guest
Jun 19th, 2018
103
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.72 KB | None | 0 0
  1. library(data.table)
  2. library(stringr)
  3.  
  4. OE_PATH = '~/medsl/openelections'
  5.  
  6. `%=%` = function(string, pattern) {
  7. str_detect(string, stringr::regex(pattern, TRUE))
  8. }
  9.  
  10. `%-%` = function(string, pattern) {
  11. str_remove_all(string, regex(pattern, TRUE))
  12. }
  13.  
  14. # Read and combine counties
  15. paths = list.files(file.path(OE_PATH, 'openelections-data-ny', '2016'),
  16. '20161108.*general__', full.names = TRUE)
  17. stopifnot(length(paths) == 62)
  18.  
  19. ny_counties = lapply(paths, function(p) {
  20. d = withCallingHandlers({ fread(p) }, warning = function(w) { print(p) })
  21. keep = names(d)[!str_detect(names(d), '^V\\d+')]
  22. d[, c(keep), with = FALSE]
  23. })
  24. ny_counties = setNames(ny_counties, str_extract(basename(paths), '(?<=general__).*(?=__precinct)'))
  25. d = rbindlist(ny_counties, fill = TRUE, idcol = 'path')
  26.  
  27. d = melt(d, id.vars = c('county', 'precinct', 'office', 'district', 'candidate',
  28. 'party', 'path'), variable.name = 'mode', value.name = 'votes', variable.factor =
  29. FALSE, na.rm = TRUE)
  30.  
  31. # After removing thousands separators, votes can be integer
  32. d[, votes := votes %-% ',']
  33. d[, votes := as.integer(votes)]
  34.  
  35. # Drop Total, Total Outside NYC, Total NYC, Statewide Total, Public Counter
  36. d[candidate %=% 'Public Counter', unique(county)]
  37. # [1] "Bronx" "New York" "Queens" "Richmond"
  38.  
  39. unique(d[candidate %=% 'total', .(county, candidate)])
  40. # county candidate
  41. # 1: Cayuga Total Special Votes
  42. # 2: Cayuga Total Votes
  43. # 3: Chemung Total
  44. # 4: Chenango Total
  45. # 5: Delaware Total
  46. # 6: Erie Total
  47. # 7: Franklin Total
  48. # 8: Genesee Total
  49. # 9: Jefferson Total
  50. # 10: Lewis Total
  51. # 11: Livingston Total
  52. # 12: Monroe Total
  53. # 13: Orange Total Votes Cast
  54. # 14: Orleans Total
  55. # 15: Putnam Total
  56. # 16: Schoharie Total Special Votes
  57. # 17: Schoharie Total Votes
  58. # 18: Seneca Total
  59. # 19: Steuben Total Enrolled Voters
  60. # 20: Sullivan Total
  61. # 21: Wyoming Total
  62. # 22: Yates Total
  63. # county candidate
  64.  
  65. # In Niagara, we have "Wilson/000/1", "Wilson/000/2", ... "Wilson/000"
  66. d[(county == 'Niagara' & str_count(precinct, '\\/') == 1), unique(precinct)]
  67. # [1] "Cambria/000" "City of Lockport/001" "City of Lockport/002"
  68. # [4] "City of Lockport/003" "City of Lockport/004" "City of Lockport/005"
  69. # [7] "Hartland/000" "Lewiston/000" "N Tonawanda/001"
  70. # [10] "N Tonawanda/002" "N Tonawanda/003" "Newfane/000"
  71. # [13] "Niagara Falls/003" "Niagara Falls/004" "Niagara Falls/005"
  72. # [16] "Niagara Falls/006" "Pendleton/000" "Porter/000"
  73. # [19] "Royalton/000" "Somerset/000" "Lockport/000"
  74. # [22] "Niagara/000" "Wheatfield/000" "Wilson/000"
  75.  
  76. # In Columbia, mode "votes" = "AbsenteeAffidavit" + "ElectionDayVotes"
  77. d[county == 'Columbia', unique(mode)]
  78. # [1] "votes" "election_day" "absentee"
  79.  
  80. # Same in Herkimer, but "votes" = "absentee_affidavit" + "polling_place"
  81. d[county == 'Herkimer', unique(mode)]
  82. # [1] "votes" "election_day" "absentee"
  83.  
  84. # In Lewis and Seneca, "absentee" + "election_day" = "votes"
  85. d[county %in% c('Lewis', 'Seneca'), unique(mode)]
  86. # [1] "votes" "election_day" "absentee"
  87.  
  88. # In St. Lawrence, mode "votes" = "machine_votes" + "absentee" + "affidavit" + "absentee_hc"
  89. d[county == 'St. Lawrence', unique(mode)]
  90. # [1] "votes" "absentee" "machine_votes" "absentee_hc"
  91. # [5] "affidavit"
  92.  
  93. # Candidate totals and cumulative precinct rows...
  94. unique(d[candidate %=% 'total', .(county, candidate)])
  95. # county candidate
  96. # 1: Cayuga Total Special Votes
  97. # 2: Cayuga Total Votes
  98. # 3: Chemung Total
  99. # 4: Chenango Total
  100. # 5: Delaware Total
  101. # 6: Erie Total
  102. # 7: Franklin Total
  103. # 8: Genesee Total
  104. # 9: Jefferson Total
  105. # 10: Lewis Total
  106. # 11: Livingston Total
  107. # 12: Monroe Total
  108. # 13: Orange Total Votes Cast
  109. # 14: Orleans Total
  110. # 15: Putnam Total
  111. # 16: Schoharie Total Special Votes
  112. # 17: Schoharie Total Votes
  113. # 18: Seneca Total
  114. # 19: Steuben Total Enrolled Voters
  115. # 20: Sullivan Total
  116. # 21: Wyoming Total
  117. # 22: Yates Total
  118. # county candidate
  119.  
  120. unique(d[precinct %=% 'cumulative|total', .(county, precinct)])
  121. # county precinct
  122. # 1: Lewis TOTALS
  123. # 2: Madison TOTAL
  124. # 3: Montgomery Total
  125. # 4: Nassau Total
  126. # 5: Oneida Total
  127. # 6: Onondaga Syracuse Total
  128. # 7: Onondaga Onondaga Town Total
  129. # 8: Onondaga Total
  130. # 9: Ontario Total
  131. # 10: Orleans TOTAL
  132. # 11: Oswego Total
  133. # 12: Otsego TOTAL
  134. # 13: Rockland Total
  135. # 14: Saratoga Total
  136. # 15: Schuyler Cumulative
  137. # 16: Schuyler Total
  138. # 17: Seneca Cumulative
  139. # 18: Seneca TOTAL
  140. # 19: Tompkins Total
  141. # 20: Ulster Total
  142.  
  143. unique(d[precinct == '', .(county, precinct)])
  144. # county precinct
  145. # 1: Wayne
  146. # 2: Wyoming
  147.  
  148. # Ballots cast...
  149. d[candidate %=% 'ballots cast', unique(county)]
  150. # [1] "Niagara" "Schenectady" "St. Lawrence"
Add Comment
Please, Sign In to add comment