SHOW:
|
|
- or go back to the newest paste.
1 | #!/usr/bin/env ruby | |
2 | # Simple tool to get done with some kind of free-form vote ballots | |
3 | # | |
4 | # written for the Bundesparteitag 2012.2 Antragsreihenfolgeumfrage | |
5 | # of the popcorn-industry reviving Piratenpartei Deutschland. | |
6 | # | |
7 | # Copyright (c) 2012, Stephan Beyer <s-beyer at gmx.net> | |
8 | # All rights reserved. | |
9 | # | |
10 | # Redistribution and use in source and binary forms, with or without | |
11 | # modification, are permitted provided that the following conditions are met: | |
12 | # | |
13 | # 1. Redistributions of source code must retain the above copyright notice, this | |
14 | # list of conditions and the following disclaimer. | |
15 | # 2. Redistributions in binary form must reproduce the above copyright notice, | |
16 | # this list of conditions and the following disclaimer in the documentation | |
17 | # and/or other materials provided with the distribution. | |
18 | # | |
19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | |
20 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |
21 | # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR | |
23 | # ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | |
24 | # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
25 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND | |
26 | # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
27 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
28 | # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
29 | # | |
30 | # The views and conclusions contained in the software and documentation are those | |
31 | # of the author and should not be interpreted as representing official policies, | |
32 | # either expressed or implied, of the any project. | |
33 | ||
34 | DEBUG=true | |
35 | ||
36 | # lots of configurating regex data | |
37 | programmantraege = [ | |
38 | "PA:? ? ?", | |
39 | '\(\)PA', | |
40 | "PS", # Tippgischt fixes are here, too | |
41 | "Programma PA", | |
42 | "Programmantrag: ?P?A?", | |
43 | "Programmantrag: PS", | |
44 | "A", | |
45 | "Eure c&p-Vorlage:PA", | |
46 | ] | |
47 | positionspapiere = [ | |
48 | "PO? ? ?", | |
49 | "Positionspapier:? P?", | |
50 | ] | |
51 | sonstige = [ | |
52 | "X", | |
53 | "bX", | |
54 | ] | |
55 | satzungsantraege = [ | |
56 | "S\303\204A ?", | |
57 | "SA", | |
58 | "S\303\244A", | |
59 | "SA\303\204", | |
60 | "S\303\204", | |
61 | "PS\303\204", | |
62 | "SA\314\210A", | |
63 | "Satzungantrag: S\303\204A", | |
64 | ] | |
65 | alle = (programmantraege + positionspapiere + sonstige + satzungsantraege).map do |x| | |
66 | x + "[0-9]+" | |
67 | end.join("|") | |
68 | rauschen = '[ \t,;]*' | |
69 | final = "^" + ("#{rauschen}(#{alle})?")*20 + "#{rauschen}" | |
70 | ||
71 | regex = Regexp.new(final, Regexp::IGNORECASE) | |
72 | ||
73 | manual_parse = { | |
74 | 519 => ["P035","P035","P035","PA067","PA439"], | |
75 | 632 => ["PA095","PA095","PA095"], | |
76 | 960 => ["PA221","PA222","PA223","PA224","PA225","PA226","PA008","PA089","PA150","PA151","PA152","PA153","PA164","PA165","PA172","PA246","PA254","PA278","PA288","PA293"], | |
77 | 962 => ["PA188","PA188","PA188","PA207","PA207","PA207","PA210","PA210","PA210"], | |
78 | - | 1996 => ["PA129","PA103","PA616","PA060","PA200","PS250","PA305","P019","PA174","PA240","PA633","PA375","PA195","PA109","PA110","PA111","PA112","PA113","PA248","PA141"], # corrections only |
78 | + | 1996 => ["PA129","PA103","PA616","PA060","PA200","PA250","PA305","P019","PA174","PA240","PA633","PA375","PA195","PA109","PA110","PA111","PA112","PA113","PA248","PA141"], # corrections only |
79 | 2097 => ["PA377","PA377","PA377"], | |
80 | 2246 => ["PA600"], # corrections only | |
81 | 2297 => ["PA095","PA049","PA444","PA094","PA092","PA129","PA054","PA403","PA035","PA185","PA006","PA030","PA501","PA298","PA124","PA036","PA083","PA197","S\303\204A030","PA197"], # corrections only | |
82 | 2376 => ["PA101","PA062","PA246","PA511","PA518"], | |
83 | 2558 => ["PA119","PA119","PA119","PA051","PA234","PA237","PA610","PA441","PA248","PA281","PA527","PA087","PA207","PA629","PA100","PA101","PA038","PA552"], | |
84 | 2562 => ["S\303\204A015","PA003","PA066","PA122","X012","PA003"], | |
85 | 2857 => ["PA457","PA425","PA001","PA186","PA092","PA092","S\303\204A005","PA535","PA055","PA635","PA048","PA403","S\303\204A008","PA395","PA053","PA053","PA220","PA542","PA169","PA635"], | |
86 | 2904 => ["PA188","PA545","PA001","X014","PA174","PA379","PA637","PA174","PA455"], # corrections only | |
87 | 2949 => ["PA014","PA034","PA032","PA007","PA006","PA112","PA075","PA078","PA096","PA158"], # corrections only | |
88 | 3000 => ["PA012","PA079","PA087","PA001","PA299","PA059","PA098","PA131","PA545","PA541","PA457","PA092","PA399","PA240","PA466","PA140","PA150","PA192","PA001","PA240"], # corrections only | |
89 | } | |
90 | ignore_post_match = [ | |
91 | "Pr", | |
92 | "P", | |
93 | "egal", | |
94 | "./.", | |
95 | "?", | |
96 | "gehts auch komplizierter?", | |
97 | "Ich favorisiere Keinen Antrag. Einige sind fraglich ob diese gestellt werden sollen, den sie verschwenden wertvolle Zeit.", | |
98 | ] | |
99 | ||
100 | unify = [ | |
101 | [Regexp.new("^(#{programmantraege.join("|")})([0-9]{1})$", Regexp::IGNORECASE), 'PA00\2'], | |
102 | [Regexp.new("^(#{programmantraege.join("|")})([0-9]{2})$", Regexp::IGNORECASE), 'PA0\2'], | |
103 | [Regexp.new("^(#{programmantraege.join("|")})0?([0-9]{3})$", Regexp::IGNORECASE), 'PA\2'], | |
104 | [Regexp.new("^(#{positionspapiere.join("|")})([0-9]{1})$", Regexp::IGNORECASE), 'P00\2'], | |
105 | [Regexp.new("^(#{positionspapiere.join("|")})([0-9]{2})$", Regexp::IGNORECASE), 'P0\2'], | |
106 | [Regexp.new("^(#{positionspapiere.join("|")})0?([0-9]{3})$", Regexp::IGNORECASE), 'P\2'], | |
107 | [Regexp.new("^(#{sonstige.join("|")})([0-9]{1})$", Regexp::IGNORECASE), 'X00\2'], | |
108 | [Regexp.new("^(#{sonstige.join("|")})([0-9]{2})$", Regexp::IGNORECASE), 'X0\2'], | |
109 | [Regexp.new("^(#{sonstige.join("|")})0?([0-9]{3})$", Regexp::IGNORECASE), 'X\2'], | |
110 | [Regexp.new("^(#{satzungsantraege.join("|")})([0-9]{1})$", Regexp::IGNORECASE), "S\303\204A00\\2"], | |
111 | [Regexp.new("^(#{satzungsantraege.join("|")})([0-9]{2})$", Regexp::IGNORECASE), "S\303\204A0\\2"], | |
112 | [Regexp.new("^(#{satzungsantraege.join("|")})0?([0-9]{3})$", Regexp::IGNORECASE), "S\303\204A\\2"], | |
113 | ] | |
114 | ||
115 | # the actual code | |
116 | # 1. read max 20 items per voter into array "ballots" | |
117 | resfile = File.open("results-survey71446.csv") | |
118 | i = 0 | |
119 | ballots = [] | |
120 | math_loser_count20 = 0 | |
121 | resfile.each_line do |l| | |
122 | STDERR.print("\r") | |
123 | i += 1 | |
124 | next if i == 1 | |
125 | STDERR.print("#{i} ") | |
126 | ||
127 | unless manual_parse[i].nil? | |
128 | ballots.push(manual_parse[i]) | |
129 | STDERR.puts "manually parsed" | |
130 | next | |
131 | end | |
132 | ||
133 | input = l.split(/","/)[5] | |
134 | next if input.empty? | |
135 | m = regex.match(input) | |
136 | if m.nil? | |
137 | STDERR.puts("Can't parse input:") | |
138 | STDERR.puts(input) | |
139 | STDERR.puts("Exiting...") | |
140 | exit | |
141 | end | |
142 | unless ignore_post_match.member?(m.post_match) | |
143 | # count people who voted for more than 20 items | |
144 | # (in Debug mode, print out information and wait for RETURN) | |
145 | unless (m.post_match.empty?) | |
146 | math_loser_count20 += 1 | |
147 | if DEBUG and m[20].nil? | |
148 | p i | |
149 | p m | |
150 | p m.post_match | |
151 | STDIN.readline | |
152 | end | |
153 | end | |
154 | end | |
155 | tmp_ballot = [] | |
156 | 1.upto(m.length) do |j| | |
157 | break if m[j].nil? # here we can "break", for some other regex we might want to "next" | |
158 | tmp = m[j] | |
159 | unify.each do |tmp_regex,tmp_to| | |
160 | tmp.gsub!(tmp_regex, tmp_to) | |
161 | end | |
162 | tmp_ballot << tmp | |
163 | end | |
164 | ballots.push(tmp_ballot) unless tmp_ballot.empty? | |
165 | end | |
166 | ||
167 | ||
168 | # 2. we count votes in buckets | |
169 | count_buckets = Hash.new(0) | |
170 | math_loser_count3 = 0 | |
171 | ballots.each do |ballot| | |
172 | math_loser_counted = false | |
173 | ballot_count_buckets = Hash.new(0) | |
174 | ballot.each do |item| | |
175 | ballot_count_buckets[item] += 1 | |
176 | end | |
177 | ||
178 | ballot_count_buckets.each do |item, count| | |
179 | if count > 3 and not math_loser_counted | |
180 | math_loser_count3 += 1 | |
181 | math_loser_counted = true | |
182 | count = 3 | |
183 | end | |
184 | count_buckets[item] += count | |
185 | end | |
186 | end | |
187 | ||
188 | # 3. output | |
189 | puts "Es gab bei dem Umfrageteil #{ballots.length} Abgaben." | |
190 | puts | |
191 | puts "Dabei gab es #{math_loser_count20} Abgaben, wo mehr als 20 Anträge benannt waren. Hier haben wir nur die ersten 20 gezählt." | |
192 | puts | |
193 | puts "Des Weiteren gab es #{math_loser_count3} Abgaben, wo gleiche Anträge mehr als 3 mal benannt waren. Auch hier haben wir jeweils nur 3 gezählt." | |
194 | puts | |
195 | puts "Ergebnisse (Antragsnummer und Anzahl Nennung):" | |
196 | count_buckets.sort do |p1,p2| | |
197 | p1[1] <=> p2[1] | |
198 | end.reverse_each do |item, count| | |
199 | puts "# [[Antrag:Bundesparteitag_2012.2/Antragsportal/#{item}|#{item}]] – #{count}" | |
200 | end |