View difference between Paste ID: zML3ddFv and D3crYwr2
SHOW: | | - or go back to the newest paste.
1
#!/usr/bin/env ruby
2
# Simple tool to get done with some kind of free-form vote ballots
3
#
4
# written for the Bundesparteitag 2012.2 Antragsreihenfolgeumfrage
5
# of the popcorn-industry reviving Piratenpartei Deutschland.
6
#
7
# Copyright (c) 2012, Stephan Beyer <s-beyer at gmx.net>
8
# All rights reserved.
9
#
10
# Redistribution and use in source and binary forms, with or without
11
# modification, are permitted provided that the following conditions are met:
12
#
13
# 1. Redistributions of source code must retain the above copyright notice, this
14
#    list of conditions and the following disclaimer.
15
# 2. Redistributions in binary form must reproduce the above copyright notice,
16
#    this list of conditions and the following disclaimer in the documentation
17
#    and/or other materials provided with the distribution.
18
#
19
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
26
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
#
30
# The views and conclusions contained in the software and documentation are those
31
# of the author and should not be interpreted as representing official policies,
32
# either expressed or implied, of the any project.
33
34
DEBUG=true
35
36
# lots of configurating regex data
37
programmantraege = [
38
  "PA:? ? ?",
39
  '\(\)PA',
40
  "PS", # Tippgischt fixes are here, too
41
  "Programma PA",
42
  "Programmantrag: ?P?A?",
43
  "Programmantrag: PS",
44
  "A",
45
  "Eure c&p-Vorlage:PA", 
46
]
47
positionspapiere = [
48
  "PO? ? ?",
49
  "Positionspapier:? P?",
50
]
51
sonstige = [
52
  "X",
53
  "bX",
54
]
55
satzungsantraege = [
56
  "S\303\204A ?",
57
  "SA",
58
  "S\303\244A",
59
  "SA\303\204",
60
  "S\303\204",
61
  "PS\303\204",
62
  "SA\314\210A",
63
  "Satzungantrag: S\303\204A",
64
]
65
alle = (programmantraege + positionspapiere + sonstige + satzungsantraege).map do |x|
66
	x + "[0-9]+"
67
end.join("|")
68
rauschen = '[ \t,;]*'
69
final = "^" + ("#{rauschen}(#{alle})?")*20 + "#{rauschen}"
70
71
regex = Regexp.new(final, Regexp::IGNORECASE)
72
73
manual_parse = {
74
  519 => ["P035","P035","P035","PA067","PA439"],
75
  632 => ["PA095","PA095","PA095"],
76
  960 => ["PA221","PA222","PA223","PA224","PA225","PA226","PA008","PA089","PA150","PA151","PA152","PA153","PA164","PA165","PA172","PA246","PA254","PA278","PA288","PA293"],
77
  962 => ["PA188","PA188","PA188","PA207","PA207","PA207","PA210","PA210","PA210"],
78-
  1996 => ["PA129","PA103","PA616","PA060","PA200","PS250","PA305","P019","PA174","PA240","PA633","PA375","PA195","PA109","PA110","PA111","PA112","PA113","PA248","PA141"], # corrections only
78+
  1996 => ["PA129","PA103","PA616","PA060","PA200","PA250","PA305","P019","PA174","PA240","PA633","PA375","PA195","PA109","PA110","PA111","PA112","PA113","PA248","PA141"], # corrections only
79
  2097 => ["PA377","PA377","PA377"],
80
  2246 => ["PA600"], # corrections only
81
  2297 => ["PA095","PA049","PA444","PA094","PA092","PA129","PA054","PA403","PA035","PA185","PA006","PA030","PA501","PA298","PA124","PA036","PA083","PA197","S\303\204A030","PA197"], # corrections only
82
  2376 => ["PA101","PA062","PA246","PA511","PA518"],
83
  2558 => ["PA119","PA119","PA119","PA051","PA234","PA237","PA610","PA441","PA248","PA281","PA527","PA087","PA207","PA629","PA100","PA101","PA038","PA552"],
84
  2562 => ["S\303\204A015","PA003","PA066","PA122","X012","PA003"],
85
  2857 => ["PA457","PA425","PA001","PA186","PA092","PA092","S\303\204A005","PA535","PA055","PA635","PA048","PA403","S\303\204A008","PA395","PA053","PA053","PA220","PA542","PA169","PA635"],
86
  2904 => ["PA188","PA545","PA001","X014","PA174","PA379","PA637","PA174","PA455"], # corrections only
87
  2949 => ["PA014","PA034","PA032","PA007","PA006","PA112","PA075","PA078","PA096","PA158"], # corrections only
88
  3000 => ["PA012","PA079","PA087","PA001","PA299","PA059","PA098","PA131","PA545","PA541","PA457","PA092","PA399","PA240","PA466","PA140","PA150","PA192","PA001","PA240"], # corrections only
89
}
90
ignore_post_match = [
91
  "Pr",
92
  "P",
93
  "egal",
94
  "./.",
95
  "?",
96
  "gehts auch komplizierter?",
97
  "Ich favorisiere Keinen Antrag. Einige sind fraglich ob diese gestellt werden sollen, den sie verschwenden wertvolle Zeit.",
98
]
99
100
unify = [
101
  [Regexp.new("^(#{programmantraege.join("|")})([0-9]{1})$", Regexp::IGNORECASE), 'PA00\2'],
102
  [Regexp.new("^(#{programmantraege.join("|")})([0-9]{2})$", Regexp::IGNORECASE), 'PA0\2'],
103
  [Regexp.new("^(#{programmantraege.join("|")})0?([0-9]{3})$", Regexp::IGNORECASE), 'PA\2'],
104
  [Regexp.new("^(#{positionspapiere.join("|")})([0-9]{1})$", Regexp::IGNORECASE), 'P00\2'],
105
  [Regexp.new("^(#{positionspapiere.join("|")})([0-9]{2})$", Regexp::IGNORECASE), 'P0\2'],
106
  [Regexp.new("^(#{positionspapiere.join("|")})0?([0-9]{3})$", Regexp::IGNORECASE), 'P\2'],
107
  [Regexp.new("^(#{sonstige.join("|")})([0-9]{1})$", Regexp::IGNORECASE), 'X00\2'],
108
  [Regexp.new("^(#{sonstige.join("|")})([0-9]{2})$", Regexp::IGNORECASE), 'X0\2'],
109
  [Regexp.new("^(#{sonstige.join("|")})0?([0-9]{3})$", Regexp::IGNORECASE), 'X\2'],
110
  [Regexp.new("^(#{satzungsantraege.join("|")})([0-9]{1})$", Regexp::IGNORECASE), "S\303\204A00\\2"],
111
  [Regexp.new("^(#{satzungsantraege.join("|")})([0-9]{2})$", Regexp::IGNORECASE), "S\303\204A0\\2"],
112
  [Regexp.new("^(#{satzungsantraege.join("|")})0?([0-9]{3})$", Regexp::IGNORECASE), "S\303\204A\\2"],
113
]
114
115
# the actual code
116
# 1. read max 20 items per voter into array "ballots"
117
resfile = File.open("results-survey71446.csv")
118
i = 0
119
ballots = []
120
math_loser_count20 = 0
121
resfile.each_line do |l|
122
	STDERR.print("\r")
123
	i += 1
124
	next if i == 1
125
	STDERR.print("#{i} ")
126
127
	unless manual_parse[i].nil?
128
		ballots.push(manual_parse[i])
129
		STDERR.puts "manually parsed"
130
		next
131
	end
132
133
	input = l.split(/","/)[5]
134
	next if input.empty?
135
	m = regex.match(input)
136
	if m.nil?
137
		STDERR.puts("Can't parse input:")
138
		STDERR.puts(input)
139
		STDERR.puts("Exiting...")
140
		exit
141
	end
142
	unless ignore_post_match.member?(m.post_match)
143
		# count people who voted for more than 20 items
144
		# (in Debug mode, print out information and wait for RETURN)
145
		unless (m.post_match.empty?)
146
			math_loser_count20 += 1
147
			if DEBUG and m[20].nil?
148
				p i
149
				p m
150
				p m.post_match
151
				STDIN.readline
152
			end
153
		end
154
	end
155
	tmp_ballot = []
156
	1.upto(m.length) do |j|
157
		break if m[j].nil? # here we can "break", for some other regex we might want to "next"
158
		tmp = m[j]
159
		unify.each do |tmp_regex,tmp_to|
160
			tmp.gsub!(tmp_regex, tmp_to)
161
		end
162
		tmp_ballot << tmp
163
	end
164
	ballots.push(tmp_ballot) unless tmp_ballot.empty?
165
end
166
167
168
# 2. we count votes in buckets
169
count_buckets = Hash.new(0)
170
math_loser_count3 = 0
171
ballots.each do |ballot|
172
	math_loser_counted = false
173
	ballot_count_buckets = Hash.new(0)
174
	ballot.each do |item|
175
		ballot_count_buckets[item] += 1
176
	end
177
178
	ballot_count_buckets.each do |item, count|
179
		if count > 3 and not math_loser_counted
180
			math_loser_count3 += 1
181
			math_loser_counted = true
182
			count = 3
183
		end
184
		count_buckets[item] += count
185
	end
186
end
187
188
# 3. output
189
puts "Es gab bei dem Umfrageteil #{ballots.length} Abgaben."
190
puts
191
puts "Dabei gab es #{math_loser_count20} Abgaben, wo mehr als 20 Anträge benannt waren. Hier haben wir nur die ersten 20 gezählt."
192
puts
193
puts "Des Weiteren gab es #{math_loser_count3} Abgaben, wo gleiche Anträge mehr als 3 mal benannt waren. Auch hier haben wir jeweils nur 3 gezählt."
194
puts
195
puts "Ergebnisse (Antragsnummer und Anzahl Nennung):"
196
count_buckets.sort do |p1,p2|
197
	p1[1] <=> p2[1]
198
end.reverse_each do |item, count|
199
	puts "# [[Antrag:Bundesparteitag_2012.2/Antragsportal/#{item}|#{item}]] &ndash; #{count}"
200
end