Advertisement
Guest User

Auswertungsskript f Teil 1 BPT12.2-Antragsreihenfolgeumfrage

a guest
Nov 15th, 2012
103
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #!/usr/bin/env ruby
  2. # Simple tool to get done with some kind of free-form vote ballots
  3. #
  4. # written for the Bundesparteitag 2012.2 Antragsreihenfolgeumfrage
  5. # of the popcorn-industry reviving Piratenpartei Deutschland.
  6. #
  7. # Copyright (c) 2012, Stephan Beyer <s-beyer at gmx.net>
  8. # All rights reserved.
  9. #
  10. # Redistribution and use in source and binary forms, with or without
  11. # modification, are permitted provided that the following conditions are met:
  12. #
  13. # 1. Redistributions of source code must retain the above copyright notice, this
  14. #    list of conditions and the following disclaimer.
  15. # 2. Redistributions in binary form must reproduce the above copyright notice,
  16. #    this list of conditions and the following disclaimer in the documentation
  17. #    and/or other materials provided with the distribution.
  18. #
  19. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  20. # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  21. # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  22. # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
  23. # ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  24. # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  25. # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  26. # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  27. # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  28. # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29. #
  30. # The views and conclusions contained in the software and documentation are those
  31. # of the author and should not be interpreted as representing official policies,
  32. # either expressed or implied, of the any project.
  33.  
  34. DEBUG=true
  35.  
  36. # lots of configurating regex data
  37. programmantraege = [
  38.   "PA:? ? ?",
  39.   '\(\)PA',
  40.   "PS", # Tippgischt fixes are here, too
  41.   "Programma PA",
  42.   "Programmantrag: ?P?A?",
  43.   "Programmantrag: PS",
  44.   "A",
  45.   "Eure c&p-Vorlage:PA",
  46. ]
  47. positionspapiere = [
  48.   "PO? ? ?",
  49.   "Positionspapier:? P?",
  50. ]
  51. sonstige = [
  52.   "X",
  53.   "bX",
  54. ]
  55. satzungsantraege = [
  56.   "S\303\204A ?",
  57.   "SA",
  58.   "S\303\244A",
  59.   "SA\303\204",
  60.   "S\303\204",
  61.   "PS\303\204",
  62.   "SA\314\210A",
  63.   "Satzungantrag: S\303\204A",
  64. ]
  65. alle = (programmantraege + positionspapiere + sonstige + satzungsantraege).map do |x|
  66.     x + "[0-9]+"
  67. end.join("|")
  68. rauschen = '[ \t,;]*'
  69. final = "^" + ("#{rauschen}(#{alle})?")*20 + "#{rauschen}"
  70.  
  71. regex = Regexp.new(final, Regexp::IGNORECASE)
  72.  
  73. manual_parse = {
  74.   519 => ["P035","P035","P035","PA067","PA439"],
  75.   632 => ["PA095","PA095","PA095"],
  76.   960 => ["PA221","PA222","PA223","PA224","PA225","PA226","PA008","PA089","PA150","PA151","PA152","PA153","PA164","PA165","PA172","PA246","PA254","PA278","PA288","PA293"],
  77.   962 => ["PA188","PA188","PA188","PA207","PA207","PA207","PA210","PA210","PA210"],
  78.   1996 => ["PA129","PA103","PA616","PA060","PA200","PA250","PA305","P019","PA174","PA240","PA633","PA375","PA195","PA109","PA110","PA111","PA112","PA113","PA248","PA141"], # corrections only
  79.   2097 => ["PA377","PA377","PA377"],
  80.   2246 => ["PA600"], # corrections only
  81.   2297 => ["PA095","PA049","PA444","PA094","PA092","PA129","PA054","PA403","PA035","PA185","PA006","PA030","PA501","PA298","PA124","PA036","PA083","PA197","S\303\204A030","PA197"], # corrections only
  82.   2376 => ["PA101","PA062","PA246","PA511","PA518"],
  83.   2558 => ["PA119","PA119","PA119","PA051","PA234","PA237","PA610","PA441","PA248","PA281","PA527","PA087","PA207","PA629","PA100","PA101","PA038","PA552"],
  84.   2562 => ["S\303\204A015","PA003","PA066","PA122","X012","PA003"],
  85.   2857 => ["PA457","PA425","PA001","PA186","PA092","PA092","S\303\204A005","PA535","PA055","PA635","PA048","PA403","S\303\204A008","PA395","PA053","PA053","PA220","PA542","PA169","PA635"],
  86.   2904 => ["PA188","PA545","PA001","X014","PA174","PA379","PA637","PA174","PA455"], # corrections only
  87.   2949 => ["PA014","PA034","PA032","PA007","PA006","PA112","PA075","PA078","PA096","PA158"], # corrections only
  88.   3000 => ["PA012","PA079","PA087","PA001","PA299","PA059","PA098","PA131","PA545","PA541","PA457","PA092","PA399","PA240","PA466","PA140","PA150","PA192","PA001","PA240"], # corrections only
  89. }
  90. ignore_post_match = [
  91.   "Pr",
  92.   "P",
  93.   "egal",
  94.   "./.",
  95.   "?",
  96.   "gehts auch komplizierter?",
  97.   "Ich favorisiere Keinen Antrag. Einige sind fraglich ob diese gestellt werden sollen, den sie verschwenden wertvolle Zeit.",
  98. ]
  99.  
  100. unify = [
  101.   [Regexp.new("^(#{programmantraege.join("|")})([0-9]{1})$", Regexp::IGNORECASE), 'PA00\2'],
  102.   [Regexp.new("^(#{programmantraege.join("|")})([0-9]{2})$", Regexp::IGNORECASE), 'PA0\2'],
  103.   [Regexp.new("^(#{programmantraege.join("|")})0?([0-9]{3})$", Regexp::IGNORECASE), 'PA\2'],
  104.   [Regexp.new("^(#{positionspapiere.join("|")})([0-9]{1})$", Regexp::IGNORECASE), 'P00\2'],
  105.   [Regexp.new("^(#{positionspapiere.join("|")})([0-9]{2})$", Regexp::IGNORECASE), 'P0\2'],
  106.   [Regexp.new("^(#{positionspapiere.join("|")})0?([0-9]{3})$", Regexp::IGNORECASE), 'P\2'],
  107.   [Regexp.new("^(#{sonstige.join("|")})([0-9]{1})$", Regexp::IGNORECASE), 'X00\2'],
  108.   [Regexp.new("^(#{sonstige.join("|")})([0-9]{2})$", Regexp::IGNORECASE), 'X0\2'],
  109.   [Regexp.new("^(#{sonstige.join("|")})0?([0-9]{3})$", Regexp::IGNORECASE), 'X\2'],
  110.   [Regexp.new("^(#{satzungsantraege.join("|")})([0-9]{1})$", Regexp::IGNORECASE), "S\303\204A00\\2"],
  111.   [Regexp.new("^(#{satzungsantraege.join("|")})([0-9]{2})$", Regexp::IGNORECASE), "S\303\204A0\\2"],
  112.   [Regexp.new("^(#{satzungsantraege.join("|")})0?([0-9]{3})$", Regexp::IGNORECASE), "S\303\204A\\2"],
  113. ]
  114.  
  115. # the actual code
  116. # 1. read max 20 items per voter into array "ballots"
  117. resfile = File.open("results-survey71446.csv")
  118. i = 0
  119. ballots = []
  120. math_loser_count20 = 0
  121. resfile.each_line do |l|
  122.     STDERR.print("\r")
  123.     i += 1
  124.     next if i == 1
  125.     STDERR.print("#{i} ")
  126.  
  127.     unless manual_parse[i].nil?
  128.         ballots.push(manual_parse[i])
  129.         STDERR.puts "manually parsed"
  130.         next
  131.     end
  132.  
  133.     input = l.split(/","/)[5]
  134.     next if input.empty?
  135.     m = regex.match(input)
  136.     if m.nil?
  137.         STDERR.puts("Can't parse input:")
  138.         STDERR.puts(input)
  139.         STDERR.puts("Exiting...")
  140.         exit
  141.     end
  142.     unless ignore_post_match.member?(m.post_match)
  143.         # count people who voted for more than 20 items
  144.         # (in Debug mode, print out information and wait for RETURN)
  145.         unless (m.post_match.empty?)
  146.             math_loser_count20 += 1
  147.             if DEBUG and m[20].nil?
  148.                 p i
  149.                 p m
  150.                 p m.post_match
  151.                 STDIN.readline
  152.             end
  153.         end
  154.     end
  155.     tmp_ballot = []
  156.     1.upto(m.length) do |j|
  157.         break if m[j].nil? # here we can "break", for some other regex we might want to "next"
  158.         tmp = m[j]
  159.         unify.each do |tmp_regex,tmp_to|
  160.             tmp.gsub!(tmp_regex, tmp_to)
  161.         end
  162.         tmp_ballot << tmp
  163.     end
  164.     ballots.push(tmp_ballot) unless tmp_ballot.empty?
  165. end
  166.  
  167.  
  168. # 2. we count votes in buckets
  169. count_buckets = Hash.new(0)
  170. math_loser_count3 = 0
  171. ballots.each do |ballot|
  172.     math_loser_counted = false
  173.     ballot_count_buckets = Hash.new(0)
  174.     ballot.each do |item|
  175.         ballot_count_buckets[item] += 1
  176.     end
  177.  
  178.     ballot_count_buckets.each do |item, count|
  179.         if count > 3 and not math_loser_counted
  180.             math_loser_count3 += 1
  181.             math_loser_counted = true
  182.             count = 3
  183.         end
  184.         count_buckets[item] += count
  185.     end
  186. end
  187.  
  188. # 3. output
  189. puts "Es gab bei dem Umfrageteil #{ballots.length} Abgaben."
  190. puts
  191. puts "Dabei gab es #{math_loser_count20} Abgaben, wo mehr als 20 Anträge benannt waren. Hier haben wir nur die ersten 20 gezählt."
  192. puts
  193. puts "Des Weiteren gab es #{math_loser_count3} Abgaben, wo gleiche Anträge mehr als 3 mal benannt waren. Auch hier haben wir jeweils nur 3 gezählt."
  194. puts
  195. puts "Ergebnisse (Antragsnummer und Anzahl Nennung):"
  196. count_buckets.sort do |p1,p2|
  197.     p1[1] <=> p2[1]
  198. end.reverse_each do |item, count|
  199.     puts "# [[Antrag:Bundesparteitag_2012.2/Antragsportal/#{item}|#{item}]] &ndash; #{count}"
  200. end
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement