Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ## AUTHOR: Sebastian Schwarz
- ## schwarz.sebastian [- at -] campus.lmu.de
- ### CL ROUND OF 16 DRAWINGS
- library(data.table)
- library(plyr)
- library(ggplot2)
- # GROUP FIRST AND SECOND
- fst <- c("MAN", "REA", "PSG", "BAY", "CHL", "BVB", "ATL", "BAR")
- snd <- c("B04","GAL", "PIR", "MAC", "S04", "FCA", "ZEN", "ACM")
- # COUNTRIES WITH FIRST AND SECOND
- ger <- c("BAY", "B04", "S04", "BVB")
- eng <- c("MAN", "CHL", "MAC", "FCA")
- # GROUPS
- A <- c("MAN","B04")
- B <- c("REA","GAL")
- C <- c("PSG","PIR")
- D <- c("BAY","MAC")
- E <- c("CHL","S04")
- F <- c("BVB","FCA")
- G <- c("ATL","ZEN")
- H <- c("BAR","ACM")
- # ALL PAIRS
- par <- CJ(fst=fst, snd=snd)
- # REMOVE ENG vs. ENG AND GER vs. GER AND WITHIN GROUP MATCHES
- par <- par[!(fst %in% ger & snd %in% ger) &
- !(fst %in% eng & snd %in% eng) &
- !(fst %in% A & snd %in% A) &
- !(fst %in% B & snd %in% B) &
- !(fst %in% C & snd %in% C) &
- !(fst %in% D & snd %in% D) &
- !(fst %in% E & snd %in% E) &
- !(fst %in% F & snd %in% F) &
- !(fst %in% G & snd %in% G) &
- !(fst %in% H & snd %in% H) , ]
- # FUNCTION FOR A RANDOM MATCHUP DRAW
- draw <- function() {
- # initialize vector for previously selected group 2nds
- sel <- character(8)
- # initialize result matrix
- result <- matrix(nrow=8,ncol=2)
- # make random draw
- # random order group first teams and begin loop for selecting a group second for this team
- for (i in sample(1:8,8, replace=FALSE)) {
- # select all of all possible matchups for this team
- # (previously selected group 2nd teams are not allowed, all other rules are already done within par)
- man <- par[fst == unique(par$fst)[i] & !(snd %in% sel),]
- # select one opponent randomly of the possible alternatives
- pair <- man[sample(dim(man)[1], 1),]
- # store pair in result matrix
- result[i,] <- as.matrix(pair)
- # store selected 2nd in sel vector (must not be selected a second time)
- sel[i] <- pair$snd
- }
- return(result)
- }
- # draw can fail if one group second has the be selected for one group first, but all possible group second teams
- # are already selected as opponents for previous teams -> error handling in simulation
- # library(doParallel)
- # cl <- makeCluster(3)
- # registerDoParallel(cl)
- # 100k REPLICATIONS AND STORE IN DT
- system.time(dt <- data.table(ldply(1:1000000,function(x){ tryCatch(draw(), error=function(x) {matrix(nrow=8,ncol=2)})})))
- # dt$V3 <- 1
- # setnames(dt, c("V1","V2","V3"))
- # # GET COMPLETE MATRIX
- # sim.matches <- dt[, list(count=length(V3)), by=list(V1,V2)][!(is.na(V1))]
- # all.matches <- CJ(V1=fst, V2=snd)
- #
- # setkey(sim.matches, V1, V2)
- # setkey(all.matches, V1, V2)
- #
- # matches <- sim.matches[all.matches]
- # matches$count[is.na(matches$count)] <- 0
- #
- # # GET TOTAL SUCCESSFUL MATCHUP DRAWS
- # total <- max(matches[,list(count=sum(count)),by=V1]$count)
- #
- # # # PLOT
- # # ggplot(matches, aes(x=V2,y=V1, label=paste(round(count/total*100,1),"%",sep=""), fill=count/total)) +
- # # geom_tile() +
- # # geom_text(colour="white", size=6) +
- # # ylab("") +xlab("") +
- # # theme(legend.position="none")
- # #
- # # # SAVE DATA
- # # save(dt, file="data_uefa")
- # setwd("C:/Users/schwa_000/Desktop/uefa/")
- # files <- dir()
- #
- # files <- files[grep("UEFA_13_12", files)]
- #
- # load(files[1])
- # dt1 <- dt
- # load(files[2])
- # dt2 <- dt
- # load(files[3])
- # dt3 <- dt
- # load(files[4])
- # dt4 <- dt
- # load(files[5])
- # dt5 <- dt
- #
- # dt <- data.table(rbind(dt1,dt2,dt3,dt4,dt5))
- # ADD ID VARIABLE TO DRAWS AND SET COL NAMES
- dt$id <- unlist(lapply(1:1000000, function(x) {rep(x,8)}))
- setnames(dt, c("fst","snd","id"))
- # RESHAPE THE DATA
- dt <- dt[!(is.na(fst))]
- ddt <- dt[, list(id, match = paste(fst,snd,sep="-"))]
- dddt <- ddt[, list(draws=paste(match,collapse=",")), by="id"]
- ddddt <- dddt[, list(count=length(id)), by=draws][order(count)]
- # GET ALL UNIQUE DRAWS AND GET NEW ID
- all.draws <- data.table(draws=unique(ddddt$draws), id=seq(1,3497))
- # GET CONDITIONAL PROBABILITIES
- g0_draw <- all.draws
- g1_draw <- all.draws[grep("BAR-MAC", draws),]
- g2_draw <- g1_draw[grep("MAN-PIR", draws),]
- g3_draw <- g2_draw[grep("ATL-ACM", draws),]
- g4_draw <- g3_draw[grep("PSG-B04", draws),]
- g5_draw <- g4_draw[grep("CHL-GAL", draws),]
- g6_draw <- g5_draw[grep("REA-S04", draws),]
- g7_draw <- g6_draw[grep("BVB-ZEN", draws),]
- g8_draw <- g7_draw[grep("BAY-FCA", draws),]
- # RESHAPE CONDITIONAL PROBABILITIES (COPY PASTE \o/)
- g0_draw_re <- c(
- substr(g0_draw$draw,1,7),
- substr(g0_draw$draw,9,15),
- substr(g0_draw$draw,17,23),
- substr(g0_draw$draw,25,31),
- substr(g0_draw$draw,33,39),
- substr(g0_draw$draw,41,47),
- substr(g0_draw$draw,49,55),
- substr(g0_draw$draw,57,63))
- g1_draw_re <- c(
- substr(g1_draw$draw,1,7),
- substr(g1_draw$draw,9,15),
- substr(g1_draw$draw,17,23),
- substr(g1_draw$draw,25,31),
- substr(g1_draw$draw,33,39),
- substr(g1_draw$draw,41,47),
- substr(g1_draw$draw,49,55),
- substr(g1_draw$draw,57,63))
- g2_draw_re <- c(
- substr(g2_draw$draw,1,7),
- substr(g2_draw$draw,9,15),
- substr(g2_draw$draw,17,23),
- substr(g2_draw$draw,25,31),
- substr(g2_draw$draw,33,39),
- substr(g2_draw$draw,41,47),
- substr(g2_draw$draw,49,55),
- substr(g2_draw$draw,57,63))
- g3_draw_re <- c(
- substr(g3_draw$draw,1,7),
- substr(g3_draw$draw,9,15),
- substr(g3_draw$draw,17,23),
- substr(g3_draw$draw,25,31),
- substr(g3_draw$draw,33,39),
- substr(g3_draw$draw,41,47),
- substr(g3_draw$draw,49,55),
- substr(g3_draw$draw,57,63))
- g4_draw_re <- c(
- substr(g4_draw$draw,1,7),
- substr(g4_draw$draw,9,15),
- substr(g4_draw$draw,17,23),
- substr(g4_draw$draw,25,31),
- substr(g4_draw$draw,33,39),
- substr(g4_draw$draw,41,47),
- substr(g4_draw$draw,49,55),
- substr(g4_draw$draw,57,63))
- g5_draw_re <- c(
- substr(g5_draw$draw,1,7),
- substr(g5_draw$draw,9,15),
- substr(g5_draw$draw,17,23),
- substr(g5_draw$draw,25,31),
- substr(g5_draw$draw,33,39),
- substr(g5_draw$draw,41,47),
- substr(g5_draw$draw,49,55),
- substr(g5_draw$draw,57,63))
- g6_draw_re <- c(
- substr(g6_draw$draw,1,7),
- substr(g6_draw$draw,9,15),
- substr(g6_draw$draw,17,23),
- substr(g6_draw$draw,25,31),
- substr(g6_draw$draw,33,39),
- substr(g6_draw$draw,41,47),
- substr(g6_draw$draw,49,55),
- substr(g6_draw$draw,57,63))
- g7_draw_re <- c(
- substr(g7_draw$draw,1,7),
- substr(g7_draw$draw,9,15),
- substr(g7_draw$draw,17,23),
- substr(g7_draw$draw,25,31),
- substr(g7_draw$draw,33,39),
- substr(g7_draw$draw,41,47),
- substr(g7_draw$draw,49,55),
- substr(g7_draw$draw,57,63))
- g8_draw_re <- c(
- substr(g8_draw$draw,1,7),
- substr(g8_draw$draw,9,15),
- substr(g8_draw$draw,17,23),
- substr(g8_draw$draw,25,31),
- substr(g8_draw$draw,33,39),
- substr(g8_draw$draw,41,47),
- substr(g8_draw$draw,49,55),
- substr(g8_draw$draw,57,63))
- # GET INTO MATRIX
- all.matches.all <- CJ(fst=fst, snd=snd)
- setnames(all.matches.all, c("fst","snd"))
- setkey(all.matches.all, fst, snd)
- # COMPLETE MATCHES MATRIX
- all.matches <- CJ(fst=fst, snd=snd)
- all.matches <- all.matches[,list(V1, V2, paste(V1,"-",V2,sep=""))]
- setnames(all.matches, c("fst","snd","draw"))
- setkey(all.matches, draw)
- # BEFORE ANY DRAW DRAW MATRIX
- g0_draw_re <- data.table(g0_draw_re, seq(1,length(g0_draw_re)))
- setnames(g0_draw_re, c("draw","id"))
- setkey(g0_draw_re, draw)
- g0_draw_ma <- all.matches[g0_draw_re]
- g0_draw_ma <- g0_draw_ma[, list(count=length(id)), by=list(fst,snd)]
- setkey(g0_draw_ma, fst, snd)
- setkey(all.matches.all, fst, snd)
- matches <- g0_draw_ma[all.matches.all]
- matches$count[is.na(matches$count)] <- 0
- ggplot(matches, aes(x=snd,y=fst, label=paste(round(count/dim(g0_draw)[1]*100,1),"%",sep=""), fill=count/dim(g0_draw)[1])) +
- geom_tile() +
- geom_text(colour="white", size=6) +
- ylab("group winners") +xlab("group runners-up") +
- theme(legend.position="none") + ggtitle("Before Any Draw - 3497 Possibilities")
- # AFTER FRIST DRAW MATRIX
- g1_draw_re <- data.table(g1_draw_re, seq(1,length(g1_draw_re)))
- setnames(g1_draw_re, c("draw","id"))
- setkey(g1_draw_re, draw)
- g1_draw_ma <- all.matches[g1_draw_re]
- g1_draw_ma <- g1_draw_ma[, list(count=length(id)), by=list(fst,snd)]
- setkey(g1_draw_ma, fst, snd)
- setkey(all.matches.all, fst, snd)
- matches <- g1_draw_ma[all.matches.all]
- matches$count[is.na(matches$count)] <- 0
- ggplot(matches, aes(x=snd,y=fst, label=paste(round(count/dim(g1_draw)[1]*100,1),"%",sep=""), fill=count/dim(g1_draw)[1])) +
- geom_tile() +
- geom_text(colour="white", size=6) +
- ylab("group winners") +xlab("group runners-up") +
- theme(legend.position="none") + ggtitle("After First Draw - 605 Possibilities")
- # AFTER SECOND DRAW MATRIX
- g2_draw_re <- data.table(g2_draw_re, seq(1,length(g2_draw_re)))
- setnames(g2_draw_re, c("draw","id"))
- setkey(g2_draw_re, draw)
- g2_draw_ma <- all.matches[g2_draw_re]
- g2_draw_ma <- g2_draw_ma[, list(count=length(id)), by=list(fst,snd)]
- setkey(g2_draw_ma, fst, snd)
- setkey(all.matches.all, fst, snd)
- matches <- g2_draw_ma[all.matches.all]
- matches$count[is.na(matches$count)] <- 0
- ggplot(matches, aes(x=snd,y=fst, label=paste(round(count/dim(g2_draw)[1]*100,1),"%",sep=""), fill=count/dim(g2_draw)[1])) +
- geom_tile() +
- geom_text(colour="white", size=6) +
- ylab("group winners") +xlab("group runners-up") +
- theme(legend.position="none") + ggtitle("After Second Draw - 103 Possibilities")
- # AFTER THIRD DRAW MATRIX
- g3_draw_re <- data.table(g3_draw_re, seq(1,length(g3_draw_re)))
- setnames(g3_draw_re, c("draw","id"))
- setkey(g3_draw_re, draw)
- g3_draw_ma <- all.matches[g3_draw_re]
- g3_draw_ma <- g3_draw_ma[, list(count=length(id)), by=list(fst,snd)]
- setkey(g3_draw_ma, fst, snd)
- setkey(all.matches.all, fst, snd)
- matches <- g3_draw_ma[all.matches.all]
- matches$count[is.na(matches$count)] <- 0
- ggplot(matches, aes(x=snd,y=fst, label=paste(round(count/dim(g3_draw)[1]*100,1),"%",sep=""), fill=count/dim(g3_draw)[1])) +
- geom_tile() +
- geom_text(colour="white", size=6) +
- ylab("group winners") +xlab("group runners-up") +
- theme(legend.position="none") + ggtitle("After Thrid Draw - 11 Possibilities")
- # AFTER FOURTH DRAW MATRIX
- g4_draw_re <- data.table(g4_draw_re, seq(1,length(g4_draw_re)))
- setnames(g4_draw_re, c("draw","id"))
- setkey(g4_draw_re, draw)
- g4_draw_ma <- all.matches[g4_draw_re]
- g4_draw_ma <- g4_draw_ma[, list(count=length(id)), by=list(fst,snd)]
- setkey(g4_draw_ma, fst, snd)
- setkey(all.matches.all, fst, snd)
- matches <- g4_draw_ma[all.matches.all]
- matches$count[is.na(matches$count)] <- 0
- ggplot(matches, aes(x=snd,y=fst, label=paste(round(count/dim(g4_draw)[1]*100,1),"%",sep=""), fill=count/dim(g4_draw)[1])) +
- geom_tile() +
- geom_text(colour="white", size=6) +
- ylab("group winners") +xlab("group runners-up") +
- theme(legend.position="none") + ggtitle("After Fourth Draw - 2 Possibilities")
- # AFTER FiFTH DRAW MATRIX
- g5_draw_re <- data.table(g5_draw_re, seq(1,length(g5_draw_re)))
- setnames(g5_draw_re, c("draw","id"))
- setkey(g5_draw_re, draw)
- g5_draw_ma <- all.matches[g5_draw_re]
- g5_draw_ma <- g5_draw_ma[, list(count=length(id)), by=list(fst,snd)]
- setkey(g5_draw_ma, fst, snd)
- setkey(all.matches.all, fst, snd)
- matches <- g5_draw_ma[all.matches.all]
- matches$count[is.na(matches$count)] <- 0
- ggplot(matches, aes(x=snd,y=fst, label=paste(round(count/dim(g5_draw)[1]*100,1),"%",sep=""), fill=count/dim(g5_draw)[1])) +
- geom_tile() +
- geom_text(colour="white", size=6) +
- ylab("group winners") +xlab("group runners-up") +
- theme(legend.position="none") + ggtitle("After Fifth Draw - 1 Possibility")
- # AFTER SIXTH DRAW MATRIX
- g6_draw_re <- data.table(g6_draw_re, seq(1,length(g5_draw_re)))
- setnames(g6_draw_re, c("draw","id"))
- setkey(g6_draw_re, draw)
- g6_draw_ma <- all.matches[g6_draw_re]
- g6_draw_ma <- g6_draw_ma[, list(count=length(id)), by=list(fst,snd)]
- setkey(g6_draw_ma, fst, snd)
- setkey(all.matches.all, fst, snd)
- matches <- g6_draw_ma[all.matches.all]
- matches$count[is.na(matches$count)] <- 0
- ggplot(matches, aes(x=snd,y=fst, label=paste(round(count/dim(g6_draw)[1]*100,1),"%",sep=""), fill=count/dim(g6_draw)[1])) +
- geom_tile() +
- geom_text(colour="white", size=6) +
- ylab("group winners") +xlab("group runners-up") +
- theme(legend.position="none") + ggtitle("After Sixth Draw - 1 Possibility")
- # AFTER SEVENTH DRAW MATRIX
- g7_draw_re <- data.table(g7_draw_re, seq(1,length(g7_draw_re)))
- setnames(g7_draw_re, c("draw","id"))
- setkey(g7_draw_re, draw)
- g7_draw_ma <- all.matches[g7_draw_re]
- g7_draw_ma <- g7_draw_ma[, list(count=length(id)), by=list(fst,snd)]
- setkey(g7_draw_ma, fst, snd)
- setkey(all.matches.all, fst, snd)
- matches <- g7_draw_ma[all.matches.all]
- matches$count[is.na(matches$count)] <- 0
- ggplot(matches, aes(x=snd,y=fst, label=paste(round(count/dim(g7_draw)[1]*100,1),"%",sep=""), fill=count/dim(g7_draw)[1])) +
- geom_tile() +
- geom_text(colour="white", size=6) +
- ylab("group winners") +xlab("group runners-up") +
- theme(legend.position="none") + ggtitle("After Seventh Draw - 1 Possibility")
- # AFTER EIGHTH DRAW MATRIX
- g8_draw_re <- data.table(g8_draw_re, seq(1,length(g8_draw_re)))
- setnames(g8_draw_re, c("draw","id"))
- setkey(g8_draw_re, draw)
- g8_draw_ma <- all.matches[g8_draw_re]
- g8_draw_ma <- g8_draw_ma[, list(count=length(id)), by=list(fst,snd)]
- setkey(g8_draw_ma, fst, snd)
- setkey(all.matches.all, fst, snd)
- matches <- g8_draw_ma[all.matches.all]
- matches$count[is.na(matches$count)] <- 0
- ggplot(matches, aes(x=snd,y=fst, label=paste(round(count/dim(g8_draw)[1]*100,1),"%",sep=""), fill=count/dim(g8_draw)[1])) +
- geom_tile() +
- geom_text(colour="white", size=6) +
- ylab("group winners") +xlab("group runners-up") +
- theme(legend.position="none") + ggtitle("After Eighth Draw - 1 Possibility")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement