Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/perl
- use strict;
- use warnings;
- my $file = $ARGV[0] or die "No file specified";
- my $outdir = $ARGV[1] or die "no outdir specified";
- mkdir $outdir unless (-e $outdir);
- my %scores = ();
- my %power = ();
- my %weird = ();
- my %teamCnt = ();
- my %volatile = ();
- sub read_csv($) {
- my $file = shift;
- open(DATA, '<', $file) or die "could not open $file";
- my $header = <DATA>;
- chomp $header;
- my ($homeTeam, $homeScore, $awayTeam, $awayScore) = (0, 0, 0, 0);
- my @fields = split(/,\s*/, $header);
- for my $i (0 .. $#fields) {
- if ($fields[$i] =~ /home_team/i) {
- $homeTeam = $i;
- }
- if ($fields[$i] =~ /away_team/i) {
- $awayTeam = $i;
- }
- if ($fields[$i] =~ /home_points/i) {
- $homeScore = $i;
- }
- if ($fields[$i] =~ /away_points/i) {
- $awayScore = $i;
- }
- }
- my $i = 0;
- while (my $line = <DATA>){
- chomp $line;
- @fields = split(/,\s*/, $line);
- next if ($fields[$homeScore] eq '');
- $scores{$i}{'t1'} = $fields[$homeTeam];
- $scores{$i}{'t1s'} = $fields[$homeScore];
- $teamCnt{$fields[$homeTeam]} = (exists $teamCnt{$fields[$homeTeam]}) ? $teamCnt{$fields[$homeTeam]} + 1 : 1;
- $scores{$i}{'t2'} = $fields[$awayTeam];
- $scores{$i}{'t2s'} = $fields[$awayScore];
- $teamCnt{$fields[$awayTeam]} = (exists $teamCnt{$fields[$awayTeam]}) ? $teamCnt{$fields[$awayTeam]} + 1 : 1;
- $i++;
- }
- #foreach my $tm (sort { $teamCnt{$b} <=> $teamCnt{$a} } (keys %teamCnt)){
- # DBG missing data issues
- #print "$teamCnt{$tm} $tm\n";
- #}
- open(REMOVED, '>', "$outdir/removed.txt") or die "couldn't open removed.";
- # Remove teams who have too few connections to the dataset.
- my $removed;
- do {
- $removed = 0;
- foreach my $team (keys %teamCnt) {
- if ($teamCnt{$team} le 3) {
- if ($teamCnt{$team} ge 3) {
- print "$team\n";
- next;
- }
- foreach my $matchup (keys %scores) {
- #next unless (exists $scores{$matchup});
- if ($scores{$matchup}{'t1'} eq $team or $scores{$matchup}{'t2'} eq $team) {
- $teamCnt{$scores{$matchup}{'t2'}}--;
- $teamCnt{$scores{$matchup}{'t1'}}--;
- delete $scores{$matchup};
- $removed++;
- } }
- delete $teamCnt{$team} unless ($teamCnt{$team} ge 1);
- print REMOVED "Removed $team\n";
- }
- }
- } while ($removed gt 0);
- close(REMOVED);
- #foreach my $tm (sort { $teamCnt{$b} <=> $teamCnt{$a} } (keys %teamCnt)){
- # DBG missing data issues
- #print "$teamCnt{$tm} $tm\n";
- #}
- }
- sub calculate_importance($$) {
- my ($powerDiff, $scoreDiff) = @_;
- if ($powerDiff lt 0) {
- # It's easier to think from the perspective of the higher ranked team.
- $powerDiff = 0 - $powerDiff;
- $scoreDiff = 0 - $scoreDiff;
- }
- # Close games and upsets get capped at weight 1.2 - NVM, no reason to.
- #if ($scoreDiff le -10) {
- # return 1.2;
- #}
- # by capping at this point value we ensure nobody will get fewer power for scoring more.
- if ($scoreDiff ge 55){
- return .55;
- }
- # Calculate importance of game which decreases as blowout status goes up.
- # 10 pt win = 1
- # 20 pt win = .9
- # 30 pt win = .8
- # 40 pt win = .7
- # 50 pt win = .6
- # 55+pt win = .55 - magical cutoff where higher victories would mean less points for winning.
- return (1 - (($scoreDiff - 10)/100));
- }
- sub calculate_deltas($$$$){
- my ($t1Power, $t2Power, $scoreDiff, $velocity) = @_;
- # scoreDiff = t1 - t2, so t1 should go up if they win big
- my $t1Target = $t2Power + $scoreDiff;
- my $t2Target = $t1Power - $scoreDiff;
- $velocity = $velocity* calculate_importance($t1Power - $t2Power, $scoreDiff);
- return ((($t1Target-$t1Power)*$velocity),(($t2Target-$t2Power)*$velocity));
- }
- sub init_power() {
- foreach my $i (keys %scores) {
- $power{$scores{$i}{'t1'}} = 100;
- $power{$scores{$i}{'t2'}} = 100;
- }
- }
- sub run_round($) {
- my ($velocity) = @_;
- my %new_power = %power;
- foreach my $i (keys %scores) {
- my $t1p = $power{$scores{$i}{'t1'}};
- my $t2p = $power{$scores{$i}{'t2'}};
- my $diff = $scores{$i}{'t1s'} - $scores{$i}{'t2s'};
- my ($d1, $d2) = calculate_deltas($t1p, $t2p, $diff, $velocity);
- $new_power{$scores{$i}{'t1'}} += $d1;
- $new_power{$scores{$i}{'t2'}} += $d2;
- }
- %power = %new_power;
- }
- sub print_sorted_power(%){
- my %pwr = @_;
- my @sorted = sort {$pwr{$b} <=> $pwr{$a}} (keys %pwr);
- open (RESULTS, '>', "$outdir/results.txt") or die "Couldn't open results";
- foreach my $team (@sorted) {
- print RESULTS "$team: $pwr{$team}\n";
- }
- }
- read_csv($file);
- init_power();
- my $roundMax = 4000;
- my $velConst = 0.1;
- for (my $r = 0; $r < $roundMax; $r++) {
- # my $velocity = $velConst - $velConst*($r/$roundMax);
- # run_round($velocity);
- run_round($velConst);
- }
- print_sorted_power(%power);
- foreach my $tm (keys %teamCnt) {
- $volatile{$tm} = 0;
- }
- foreach my $i (keys %scores) {
- my $t1p = $power{$scores{$i}{'t1'}};
- my $t2p = $power{$scores{$i}{'t2'}};
- my $diff = ($scores{$i}{'t1s'} - $scores{$i}{'t2s'}) - ($t1p -$t2p) ;
- my $name = $scores{$i}{'t1'} . '-' . $scores{$i}{'t2'};
- $weird{$name} = $diff;
- $volatile{$scores{$i}{'t1'}} += abs($diff);
- $volatile{$scores{$i}{'t2'}} += abs($diff);
- }
- open (WEIRD_GAMES, '>', "$outdir/weird_games.txt") or die "Couldn't open weird games";
- foreach my $wtf (sort {abs($weird{$b}) <=> abs($weird{$a})} (keys %weird)) {
- print WEIRD_GAMES "$wtf $weird{$wtf}\n";
- }
- close (WEIRD_GAMES);
- open (WEIRD_TEAMS, '>', "$outdir/weird_teams.txt") or die "Couldn't open weird teams";
- foreach my $v (sort {$volatile{$b} / $teamCnt{$b} <=> $volatile{$a} / $teamCnt{$a}} (keys %volatile)) {
- my $volAve = $volatile{$v} / $teamCnt{$v};
- print WEIRD_TEAMS "$v $volAve\n";
- }
- close(WEIRD_TEAMS);
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement