Guest User

Untitled

a guest
May 22nd, 2018
100
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.21 KB | None | 0 0
  1. #!/usr/bin/env ruby
  2.  
  3. filename = ARGV.first
  4.  
  5. if filename.nil? or filename == ""
  6. filename = "all-names.csv"
  7. end
  8.  
  9. begin
  10. require 'fileutils'
  11. require 'rubygems'
  12. require 'sunlight'
  13. require 'fastercsv'
  14. rescue
  15. puts "Couldn't load dependencies. Try running two commands and try again:\n\nsudo gem install fastercsv\nsudo gem install sunlight"
  16. exit
  17. end
  18.  
  19. unless File.exists?(filename)
  20. puts "Couldn't locate #{filename}. Place it in the same directory as this script."
  21. exit
  22. end
  23.  
  24. @@misses = 0
  25. @@duplicates = 0
  26. Sunlight::Base.api_key = 'sunlight9'
  27.  
  28.  
  29. def legislator_for_name(name)
  30. options = {}
  31.  
  32. # get rid of "HON." prefix and split on spaces
  33. name = name.gsub /^HON\.\s?/i, ''
  34. pieces = name.split /\s+/
  35.  
  36. # might be a state in parentheses at the end
  37. options[:state] = pieces.pop.gsub(/[\(\)]/, '') if pieces.last =~ /^\([a-zA-Z]+\)$/
  38.  
  39. # might be a suffix at the end
  40. options[:name_suffix] = "#{pieces.pop.gsub(/\./, '')}." if pieces.last =~ /^Jr\.?$/i
  41. options[:name_suffix] = pieces.pop if pieces.last =~ /^I+$/i
  42.  
  43. options[:lastname] = pieces.pop.gsub /,/, ''
  44.  
  45. options[:firstname] = pieces.first
  46.  
  47. results = Sunlight::Legislator.all_where options
  48. if results.size == 1
  49. results.first
  50.  
  51. # no result, could be either the wrong first name, or out of office
  52. elsif results.size == 0
  53. # try the name as a nickname first
  54. options[:nickname] = options.delete :firstname
  55.  
  56. results = Sunlight::Legislator.all_where options
  57. if results.size == 1
  58. results.first
  59.  
  60. # must be out of office then?
  61. elsif results.size == 0
  62. options[:in_office] = 0
  63.  
  64. # reset to doing firstname first
  65. options[:firstname] = options.delete :nickname
  66.  
  67. results = Sunlight::Legislator.all_where options
  68. if results.size == 1
  69. results.first
  70.  
  71. elsif results.size == 0
  72. # try as nickname again, this time out of office
  73. options[:nickname] = options.delete :firstname
  74.  
  75. results = Sunlight::Legislator.all_where options
  76. if results.size == 1
  77. results.first
  78.  
  79. # OK, we'll accept a result if it matches on last name only,
  80. # but only if there's only one result amongst both in and out of office legislators
  81. elsif results.size == 0
  82. options.delete :nickname
  83. options.delete :firstname
  84.  
  85. if legislator = unique_for(options)
  86. legislator
  87.  
  88. else
  89. # finally, try the combo last name
  90. options[:lastname] = "#{pieces.pop} #{options[:lastname]}"
  91.  
  92. if legislator = unique_for(options)
  93. legislator
  94.  
  95. else
  96. @@misses += 1
  97. puts "I GIVE UP. Couldn't match on options: #{options.merge(:pieces => pieces).inspect}"
  98. end
  99. end
  100.  
  101. elsif results.size > 0
  102. @@duplicates += 1
  103. puts "Duplicates for options: #{options.inspect}"
  104. end
  105.  
  106. elsif results.size > 0
  107. @@duplicates += 1
  108. puts "Duplicates for options: #{options.inspect}"
  109. end
  110.  
  111. elsif results.size > 0
  112. @@duplicates += 1
  113. puts "Duplicates for options: #{options.inspect}"
  114. end
  115.  
  116. # duplicate first name and last name of in-office legislator
  117. elsif results.size > 0
  118. @@duplicates += 1
  119. puts "Duplicates for options: #{options.inspect}"
  120. end
  121.  
  122. end
  123.  
  124.  
  125. # need a unique result or nothing, across both in and out of office legislators, for the given options
  126. # this is done for last name only checks
  127. def unique_for(options)
  128. options[:in_office] = 1
  129. in_results = Sunlight::Legislator.all_where options
  130. return nil if in_results.size > 1
  131.  
  132. options[:in_office] = 0
  133. out_results = Sunlight::Legislator.all_where options
  134.  
  135. if in_results.size == 1 and out_results.size == 0
  136. in_results.first
  137. elsif in_results.size == 0 and out_results.size == 1
  138. out_results.first
  139. else
  140. nil
  141. end
  142. end
  143.  
  144. def name_for(legislator)
  145. nickname = legislator.nickname && legislator.nickname != "" ? " \'#{legislator.nickname}\'" : ""
  146. firstname = "#{legislator.firstname}#{nickname}"
  147. lastname = legislator.name_suffix && legislator.name_suffix != "" ? "#{legislator.lastname} #{legislator.name_suffix}" : legislator.lastname
  148.  
  149. "#{legislator.title}. #{firstname} #{lastname}".upcase
  150. end
  151.  
  152.  
  153. puts "Trying to match up names in #{filename}..."
  154.  
  155. names = {}
  156. FasterCSV.foreach(filename) do |row|
  157. name = row[0]
  158.  
  159. # Members' names will always start with "HON."
  160. if name =~ /HON\./
  161. legislator = legislator_for_name name
  162.  
  163. if legislator
  164. names[name] = {:bioguide_id => legislator.bioguide_id, :name_check => name_for(legislator), :in_office => legislator.in_office}
  165. end
  166. end
  167. end
  168.  
  169. FileUtils.rm("bioguide_ids.csv") if File.exist? "bioguide_ids.csv"
  170. FasterCSV.open("bioguide_ids.csv", "w") do |csv|
  171. csv << ['bioguide_id', 'name', 'name_check', 'in_office']
  172. names.each do |name, values|
  173. csv << [values[:bioguide_id], name, values[:name_check], values[:in_office]]
  174. end
  175. end
  176.  
  177. puts ""
  178. puts "Out of #{names.keys.size} names:"
  179. puts "#{@@misses} attempts failed to match a legislator entirely."
  180. puts "#{@@duplicates} attempts matched too many legislators."
  181. puts ""
  182. puts "Wrote names and bioguide IDs out to bioguide_ids.csv."
Add Comment
Please, Sign In to add comment