Advertisement
Guest User

Untitled

a guest
Oct 27th, 2019
141
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 7.46 KB | None | 0 0
  1. #!/usr/bin/perl
  2. use strict;
  3. use warnings;
  4.  
  5. my $file1 = $ARGV[0] or die "No file1 specified";
  6. my $file2 = $ARGV[1] or die "No file2 specified";
  7. my $outdir = $ARGV[2] or die "no outdir specified";
  8. mkdir $outdir unless (-e $outdir);
  9.  
  10. my %scores = ();
  11. my %power = ();
  12. my %weird = ();
  13. my %teamCnt = ();
  14. my %volatile = ();
  15.  
  16. my %fixMapping = (
  17. 'Cal' => 'California',
  18. 'Miami (FL)' => 'Miami',
  19. 'W Michigan' => 'Western Michigan',
  20. 'E Michigan' => 'Eastern Michigan',
  21. 'UVA' => 'Virginia',
  22. 'FSU' => 'Florida State',
  23. 'Hawaii' => 'Hawai\'i',
  24. 'OSU' => 'Ohio State',
  25. 'UNC' => 'North Carolina',
  26. 'USF' => 'South Florida',
  27. 'FIU' => 'Florida International',
  28. 'FAU' => 'Florida Atlantic',
  29. 'UConn' => 'Connecticut',
  30. 'ECU' => 'Eastern Carolina'
  31. );
  32.  
  33. sub read_csv2($) {
  34. my $file = shift;
  35. open(DATA, '<', $file) or die "could not open $file";
  36. my $header = <DATA>;
  37. chomp $header;
  38. my ($homeTeam, $homeScore, $awayTeam, $awayScore) = (0, 0, 0, 0);
  39. my @fields = split(/,\s*/, $header);
  40. for my $i (0 .. $#fields) {
  41. if ($fields[$i] =~ /Home team/i) {
  42. $homeTeam = $i;
  43. }
  44. if ($fields[$i] =~ /Vis team/i) {
  45. $awayTeam = $i;
  46. }
  47. if ($fields[$i] =~ /score/i) {
  48. ($homeTeam > $awayTeam) ? $homeScore = $i : $awayScore = $i;
  49. }
  50. }
  51. my $i = 100000;
  52. while (my $line = <DATA>){
  53. chomp $line;
  54. $line =~ s/St,/State,/g;
  55. @fields = split(/,\s*/, $line);
  56. next if ($fields[$homeScore] eq '');
  57. $scores{$i}{'t1'} = (exists $fixMapping{$fields[$homeTeam]}) ? $fixMapping{$fields[$homeTeam]} : $fields[$homeTeam];
  58. $scores{$i}{'t1s'} = $fields[$homeScore];
  59. $teamCnt{$scores{$i}{'t1'}} = (exists $teamCnt{$scores{$i}{'t1'}}) ? $teamCnt{$scores{$i}{'t1'}} + 1 : 1;
  60.  
  61. $scores{$i}{'t2'} = (exists $fixMapping{$fields[$awayTeam]}) ? $fixMapping{$fields[$awayTeam]} : $fields[$awayTeam];
  62. $scores{$i}{'t2s'} = $fields[$awayScore];
  63. $teamCnt{$scores{$i}{'t2'}} = (exists $teamCnt{$scores{$i}{'t2'}}) ? $teamCnt{$scores{$i}{'t2'}} + 1 : 1;
  64.  
  65. $i++;
  66. }
  67. }
  68.  
  69.  
  70.  
  71. sub read_csv($) {
  72. my $file = shift;
  73. open(DATA, '<', $file) or die "could not open $file";
  74. my $header = <DATA>;
  75. chomp $header;
  76. my ($homeTeam, $homeScore, $awayTeam, $awayScore) = (0, 0, 0, 0);
  77. my @fields = split(/,\s*/, $header);
  78. for my $i (0 .. $#fields) {
  79. if ($fields[$i] =~ /home_team/i) {
  80. $homeTeam = $i;
  81. }
  82. if ($fields[$i] =~ /away_team/i) {
  83. $awayTeam = $i;
  84. }
  85. if ($fields[$i] =~ /home_points/i) {
  86. $homeScore = $i;
  87. }
  88. if ($fields[$i] =~ /away_points/i) {
  89. $awayScore = $i;
  90. }
  91. }
  92. my $i = 0;
  93. while (my $line = <DATA>){
  94. chomp $line;
  95. @fields = split(/,\s*/, $line);
  96. next if ($fields[$homeScore] eq '');
  97. $scores{$i}{'t1'} = $fields[$homeTeam];
  98. $scores{$i}{'t1s'} = $fields[$homeScore];
  99. $teamCnt{$fields[$homeTeam]} = (exists $teamCnt{$fields[$homeTeam]}) ? $teamCnt{$fields[$homeTeam]} + 1 : 1;
  100.  
  101. $scores{$i}{'t2'} = $fields[$awayTeam];
  102. $scores{$i}{'t2s'} = $fields[$awayScore];
  103. $teamCnt{$fields[$awayTeam]} = (exists $teamCnt{$fields[$awayTeam]}) ? $teamCnt{$fields[$awayTeam]} + 1 : 1;
  104.  
  105. $i++;
  106. }
  107.  
  108. #foreach my $tm (sort { $teamCnt{$b} <=> $teamCnt{$a} } (keys %teamCnt)){
  109. # DBG missing data issues
  110. #print "$teamCnt{$tm} $tm\n";
  111. #}
  112.  
  113. open(REMOVED, '>', "$outdir/removed.txt") or die "couldn't open removed.";
  114. # Remove teams who have too few connections to the dataset.
  115. my $removed;
  116. do {
  117. $removed = 0;
  118. foreach my $team (keys %teamCnt) {
  119. if ($teamCnt{$team} le 3) {
  120. if ($teamCnt{$team} ge 3) {
  121. print "$team\n";
  122. next;
  123. }
  124. foreach my $matchup (keys %scores) {
  125. #next unless (exists $scores{$matchup});
  126. if ($scores{$matchup}{'t1'} eq $team or $scores{$matchup}{'t2'} eq $team) {
  127. $teamCnt{$scores{$matchup}{'t2'}}--;
  128. $teamCnt{$scores{$matchup}{'t1'}}--;
  129. delete $scores{$matchup};
  130. $removed++;
  131. } }
  132. delete $teamCnt{$team} unless ($teamCnt{$team} ge 1);
  133. print REMOVED "Removed $team\n";
  134. }
  135. }
  136. } while ($removed gt 0);
  137. close(REMOVED);
  138.  
  139. #foreach my $tm (sort { $teamCnt{$b} <=> $teamCnt{$a} } (keys %teamCnt)){
  140. # DBG missing data issues
  141. #print "$teamCnt{$tm} $tm\n";
  142. #}
  143.  
  144. }
  145.  
  146. sub calculate_importance($$) {
  147. my ($powerDiff, $scoreDiff) = @_;
  148. if ($powerDiff lt 0) {
  149. # It's easier to think from the perspective of the higher ranked team.
  150. $powerDiff = 0 - $powerDiff;
  151. $scoreDiff = 0 - $scoreDiff;
  152. }
  153. # Close games and upsets get capped at weight 1.2 - NVM, no reason to.
  154. #if ($scoreDiff le -10) {
  155. # return 1.2;
  156. #}
  157. # by capping at this point value we ensure nobody will get fewer power for scoring more.
  158. if ($scoreDiff ge 55){
  159. return .55;
  160. }
  161. # Calculate importance of game which decreases as blowout status goes up.
  162. # 10 pt win = 1
  163. # 20 pt win = .9
  164. # 30 pt win = .8
  165. # 40 pt win = .7
  166. # 50 pt win = .6
  167. # 55+pt win = .55 - magical cutoff where higher victories would mean less points for winning.
  168. return (1 - (($scoreDiff - 10)/100));
  169. }
  170.  
  171. sub calculate_deltas($$$$){
  172. my ($t1Power, $t2Power, $scoreDiff, $velocity) = @_;
  173. # scoreDiff = t1 - t2, so t1 should go up if they win big
  174. my $t1Target = $t2Power + $scoreDiff;
  175. my $t2Target = $t1Power - $scoreDiff;
  176. #$velocity = $velocity* calculate_importance($t1Power - $t2Power, $scoreDiff);
  177. return ((($t1Target-$t1Power)*$velocity),(($t2Target-$t2Power)*$velocity));
  178. }
  179.  
  180. sub init_power() {
  181. foreach my $i (keys %scores) {
  182. $power{$scores{$i}{'t1'}} = 100;
  183. $power{$scores{$i}{'t2'}} = 100;
  184. }
  185. }
  186.  
  187. sub run_round($) {
  188. my ($velocity) = @_;
  189. my %new_power = %power;
  190. foreach my $i (keys %scores) {
  191. my $t1p = $power{$scores{$i}{'t1'}};
  192. my $t2p = $power{$scores{$i}{'t2'}};
  193. my $diff = $scores{$i}{'t1s'} - $scores{$i}{'t2s'};
  194. my ($d1, $d2) = calculate_deltas($t1p, $t2p, $diff, $velocity);
  195. $new_power{$scores{$i}{'t1'}} += $d1;
  196. $new_power{$scores{$i}{'t2'}} += $d2;
  197. }
  198. %power = %new_power;
  199. }
  200.  
  201. sub print_sorted_power(%){
  202. my %pwr = @_;
  203. my @sorted = sort {$pwr{$b} <=> $pwr{$a}} (keys %pwr);
  204. open (RESULTS, '>', "$outdir/results.txt") or die "Couldn't open results";
  205. foreach my $team (@sorted) {
  206. print RESULTS "$team: $pwr{$team}\n";
  207. }
  208. }
  209.  
  210.  
  211. read_csv2($file2);
  212. read_csv($file1); # also removes extra games with insufficient linkage.
  213. init_power();
  214. my $roundMax = 4000;
  215. my $velConst = 0.1;
  216. for (my $r = 0; $r < $roundMax; $r++) {
  217. # my $velocity = $velConst - $velConst*($r/$roundMax);
  218. # run_round($velocity);
  219. run_round($velConst);
  220. }
  221.  
  222.  
  223. print_sorted_power(%power);
  224.  
  225. foreach my $tm (keys %teamCnt) {
  226. $volatile{$tm} = 0;
  227. }
  228.  
  229. foreach my $i (keys %scores) {
  230. my $t1p = $power{$scores{$i}{'t1'}};
  231. my $t2p = $power{$scores{$i}{'t2'}};
  232. my $diff = ($scores{$i}{'t1s'} - $scores{$i}{'t2s'}) - ($t1p -$t2p) ;
  233. my $name = $scores{$i}{'t1'} . '-' . $scores{$i}{'t2'};
  234. $weird{$name} = $diff;
  235. $volatile{$scores{$i}{'t1'}} += abs($diff);
  236. $volatile{$scores{$i}{'t2'}} += abs($diff);
  237. }
  238.  
  239. open (WEIRD_GAMES, '>', "$outdir/weird_games.txt") or die "Couldn't open weird games";
  240. foreach my $wtf (sort {abs($weird{$b}) <=> abs($weird{$a})} (keys %weird)) {
  241. print WEIRD_GAMES "$wtf $weird{$wtf}\n";
  242. }
  243. close (WEIRD_GAMES);
  244.  
  245.  
  246. open (WEIRD_TEAMS, '>', "$outdir/weird_teams.txt") or die "Couldn't open weird teams";
  247. foreach my $v (sort {$volatile{$b} / $teamCnt{$b} <=> $volatile{$a} / $teamCnt{$a}} (keys %volatile)) {
  248. my $volAve = $volatile{$v} / $teamCnt{$v};
  249. print WEIRD_TEAMS "$v $volAve\n";
  250. }
  251. close(WEIRD_TEAMS);
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement