Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env ruby
- # Simple tool to get done with some kind of free-form vote ballots
- #
- # written for the Bundesparteitag 2012.2 Antragsreihenfolgeumfrage
- # of the popcorn-industry reviving Piratenpartei Deutschland.
- #
- # Copyright (c) 2012, Stephan Beyer <s-beyer at gmx.net>
- # All rights reserved.
- #
- # Redistribution and use in source and binary forms, with or without
- # modification, are permitted provided that the following conditions are met:
- #
- # 1. Redistributions of source code must retain the above copyright notice, this
- # list of conditions and the following disclaimer.
- # 2. Redistributions in binary form must reproduce the above copyright notice,
- # this list of conditions and the following disclaimer in the documentation
- # and/or other materials provided with the distribution.
- #
- # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
- # ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- #
- # The views and conclusions contained in the software and documentation are those
- # of the author and should not be interpreted as representing official policies,
- # either expressed or implied, of the any project.
- DEBUG=true
- # lots of configurating regex data
- programmantraege = [
- "PA:? ? ?",
- '\(\)PA',
- "PS", # Tippgischt fixes are here, too
- "Programma PA",
- "Programmantrag: ?P?A?",
- "Programmantrag: PS",
- "A",
- "Eure c&p-Vorlage:PA",
- ]
- positionspapiere = [
- "PO? ? ?",
- "Positionspapier:? P?",
- ]
- sonstige = [
- "X",
- "bX",
- ]
- satzungsantraege = [
- "S\303\204A ?",
- "SA",
- "S\303\244A",
- "SA\303\204",
- "S\303\204",
- "PS\303\204",
- "SA\314\210A",
- "Satzungantrag: S\303\204A",
- ]
- alle = (programmantraege + positionspapiere + sonstige + satzungsantraege).map do |x|
- x + "[0-9]+"
- end.join("|")
- rauschen = '[ \t,;]*'
- final = "^" + ("#{rauschen}(#{alle})?")*20 + "#{rauschen}"
- regex = Regexp.new(final, Regexp::IGNORECASE)
- manual_parse = {
- 519 => ["P035","P035","P035","PA067","PA439"],
- 632 => ["PA095","PA095","PA095"],
- 960 => ["PA221","PA222","PA223","PA224","PA225","PA226","PA008","PA089","PA150","PA151","PA152","PA153","PA164","PA165","PA172","PA246","PA254","PA278","PA288","PA293"],
- 962 => ["PA188","PA188","PA188","PA207","PA207","PA207","PA210","PA210","PA210"],
- 1996 => ["PA129","PA103","PA616","PA060","PA200","PA250","PA305","P019","PA174","PA240","PA633","PA375","PA195","PA109","PA110","PA111","PA112","PA113","PA248","PA141"], # corrections only
- 2097 => ["PA377","PA377","PA377"],
- 2246 => ["PA600"], # corrections only
- 2297 => ["PA095","PA049","PA444","PA094","PA092","PA129","PA054","PA403","PA035","PA185","PA006","PA030","PA501","PA298","PA124","PA036","PA083","PA197","S\303\204A030","PA197"], # corrections only
- 2376 => ["PA101","PA062","PA246","PA511","PA518"],
- 2558 => ["PA119","PA119","PA119","PA051","PA234","PA237","PA610","PA441","PA248","PA281","PA527","PA087","PA207","PA629","PA100","PA101","PA038","PA552"],
- 2562 => ["S\303\204A015","PA003","PA066","PA122","X012","PA003"],
- 2857 => ["PA457","PA425","PA001","PA186","PA092","PA092","S\303\204A005","PA535","PA055","PA635","PA048","PA403","S\303\204A008","PA395","PA053","PA053","PA220","PA542","PA169","PA635"],
- 2904 => ["PA188","PA545","PA001","X014","PA174","PA379","PA637","PA174","PA455"], # corrections only
- 2949 => ["PA014","PA034","PA032","PA007","PA006","PA112","PA075","PA078","PA096","PA158"], # corrections only
- 3000 => ["PA012","PA079","PA087","PA001","PA299","PA059","PA098","PA131","PA545","PA541","PA457","PA092","PA399","PA240","PA466","PA140","PA150","PA192","PA001","PA240"], # corrections only
- }
- ignore_post_match = [
- "Pr",
- "P",
- "egal",
- "./.",
- "?",
- "gehts auch komplizierter?",
- "Ich favorisiere Keinen Antrag. Einige sind fraglich ob diese gestellt werden sollen, den sie verschwenden wertvolle Zeit.",
- ]
- unify = [
- [Regexp.new("^(#{programmantraege.join("|")})([0-9]{1})$", Regexp::IGNORECASE), 'PA00\2'],
- [Regexp.new("^(#{programmantraege.join("|")})([0-9]{2})$", Regexp::IGNORECASE), 'PA0\2'],
- [Regexp.new("^(#{programmantraege.join("|")})0?([0-9]{3})$", Regexp::IGNORECASE), 'PA\2'],
- [Regexp.new("^(#{positionspapiere.join("|")})([0-9]{1})$", Regexp::IGNORECASE), 'P00\2'],
- [Regexp.new("^(#{positionspapiere.join("|")})([0-9]{2})$", Regexp::IGNORECASE), 'P0\2'],
- [Regexp.new("^(#{positionspapiere.join("|")})0?([0-9]{3})$", Regexp::IGNORECASE), 'P\2'],
- [Regexp.new("^(#{sonstige.join("|")})([0-9]{1})$", Regexp::IGNORECASE), 'X00\2'],
- [Regexp.new("^(#{sonstige.join("|")})([0-9]{2})$", Regexp::IGNORECASE), 'X0\2'],
- [Regexp.new("^(#{sonstige.join("|")})0?([0-9]{3})$", Regexp::IGNORECASE), 'X\2'],
- [Regexp.new("^(#{satzungsantraege.join("|")})([0-9]{1})$", Regexp::IGNORECASE), "S\303\204A00\\2"],
- [Regexp.new("^(#{satzungsantraege.join("|")})([0-9]{2})$", Regexp::IGNORECASE), "S\303\204A0\\2"],
- [Regexp.new("^(#{satzungsantraege.join("|")})0?([0-9]{3})$", Regexp::IGNORECASE), "S\303\204A\\2"],
- ]
- # the actual code
- # 1. read max 20 items per voter into array "ballots"
- resfile = File.open("results-survey71446.csv")
- i = 0
- ballots = []
- math_loser_count20 = 0
- resfile.each_line do |l|
- STDERR.print("\r")
- i += 1
- next if i == 1
- STDERR.print("#{i} ")
- unless manual_parse[i].nil?
- ballots.push(manual_parse[i])
- STDERR.puts "manually parsed"
- next
- end
- input = l.split(/","/)[5]
- next if input.empty?
- m = regex.match(input)
- if m.nil?
- STDERR.puts("Can't parse input:")
- STDERR.puts(input)
- STDERR.puts("Exiting...")
- exit
- end
- unless ignore_post_match.member?(m.post_match)
- # count people who voted for more than 20 items
- # (in Debug mode, print out information and wait for RETURN)
- unless (m.post_match.empty?)
- math_loser_count20 += 1
- if DEBUG and m[20].nil?
- p i
- p m
- p m.post_match
- STDIN.readline
- end
- end
- end
- tmp_ballot = []
- 1.upto(m.length) do |j|
- break if m[j].nil? # here we can "break", for some other regex we might want to "next"
- tmp = m[j]
- unify.each do |tmp_regex,tmp_to|
- tmp.gsub!(tmp_regex, tmp_to)
- end
- tmp_ballot << tmp
- end
- ballots.push(tmp_ballot) unless tmp_ballot.empty?
- end
- # 2. we count votes in buckets
- count_buckets = Hash.new(0)
- math_loser_count3 = 0
- ballots.each do |ballot|
- math_loser_counted = false
- ballot_count_buckets = Hash.new(0)
- ballot.each do |item|
- ballot_count_buckets[item] += 1
- end
- ballot_count_buckets.each do |item, count|
- if count > 3 and not math_loser_counted
- math_loser_count3 += 1
- math_loser_counted = true
- count = 3
- end
- count_buckets[item] += count
- end
- end
- # 3. output
- puts "Es gab bei dem Umfrageteil #{ballots.length} Abgaben."
- puts
- puts "Dabei gab es #{math_loser_count20} Abgaben, wo mehr als 20 Anträge benannt waren. Hier haben wir nur die ersten 20 gezählt."
- puts
- puts "Des Weiteren gab es #{math_loser_count3} Abgaben, wo gleiche Anträge mehr als 3 mal benannt waren. Auch hier haben wir jeweils nur 3 gezählt."
- puts
- puts "Ergebnisse (Antragsnummer und Anzahl Nennung):"
- count_buckets.sort do |p1,p2|
- p1[1] <=> p2[1]
- end.reverse_each do |item, count|
- puts "# [[Antrag:Bundesparteitag_2012.2/Antragsportal/#{item}|#{item}]] – #{count}"
- end
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement