Advertisement
Schw4rzR0tG0ld

One Draw Can Change Everything - UEFA CL Round of 16 Drawing

Dec 16th, 2013
381
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 13.62 KB | None | 0 0
  1. ## AUTHOR: Sebastian Schwarz
  2. ## schwarz.sebastian [- at -] campus.lmu.de
  3.  
  4. ### CL ROUND OF 16 DRAWINGS
  5.  
  6. library(data.table)
  7. library(plyr)
  8. library(ggplot2)
  9.  
  10. # GROUP FIRST AND SECOND
  11. fst <- c("MAN", "REA", "PSG", "BAY", "CHL", "BVB", "ATL", "BAR")
  12. snd <- c("B04","GAL", "PIR", "MAC", "S04", "FCA", "ZEN", "ACM")
  13.  
  14. # COUNTRIES WITH FIRST AND SECOND
  15. ger <- c("BAY", "B04", "S04", "BVB")
  16. eng <- c("MAN", "CHL", "MAC", "FCA")
  17.  
  18. # GROUPS
  19. A <- c("MAN","B04")
  20. B <- c("REA","GAL")
  21. C <- c("PSG","PIR")
  22. D <- c("BAY","MAC")
  23. E <- c("CHL","S04")
  24. F <- c("BVB","FCA")
  25. G <- c("ATL","ZEN")
  26. H <- c("BAR","ACM")
  27.  
  28. # ALL PAIRS
  29. par <- CJ(fst=fst, snd=snd)
  30.  
  31. # REMOVE ENG vs. ENG AND GER vs. GER AND WITHIN GROUP MATCHES
  32. par <- par[!(fst %in% ger & snd %in% ger) &
  33.              !(fst %in% eng & snd %in% eng) &
  34.              !(fst %in% A & snd %in% A) &
  35.              !(fst %in% B & snd %in% B) &
  36.              !(fst %in% C & snd %in% C) &
  37.              !(fst %in% D & snd %in% D) &
  38.              !(fst %in% E & snd %in% E) &
  39.              !(fst %in% F & snd %in% F) &
  40.              !(fst %in% G & snd %in% G) &
  41.              !(fst %in% H & snd %in% H) , ]
  42.  
  43.  
  44. # FUNCTION FOR A RANDOM MATCHUP DRAW
  45. draw <- function() {
  46.  
  47.   # initialize vector for previously selected group 2nds
  48.   sel <- character(8)
  49.   # initialize result matrix
  50.   result <- matrix(nrow=8,ncol=2)
  51.  
  52.   # make random draw
  53.   # random order group first teams and begin loop for selecting a group second for this team
  54.   for (i in sample(1:8,8, replace=FALSE)) {
  55.     # select all of all possible matchups for this team
  56.     # (previously selected group 2nd teams are not allowed, all other rules are already done within par)
  57.     man <- par[fst == unique(par$fst)[i] & !(snd %in% sel),]
  58.     # select one opponent randomly of the possible alternatives
  59.     pair <- man[sample(dim(man)[1], 1),]
  60.     # store pair in result matrix
  61.     result[i,] <- as.matrix(pair)
  62.     # store selected 2nd in sel vector (must not be selected a second time)
  63.     sel[i] <- pair$snd
  64.   }
  65.   return(result)
  66. }
  67.  
  68. # draw can fail if one group second has the be selected for one group first, but all possible group second teams
  69. # are already selected as opponents for previous teams -> error handling in simulation
  70.  
  71.  
  72. # library(doParallel)
  73. # cl <- makeCluster(3)
  74. # registerDoParallel(cl)
  75.  
  76. # 100k REPLICATIONS AND STORE IN DT
  77. system.time(dt <- data.table(ldply(1:1000000,function(x){ tryCatch(draw(), error=function(x) {matrix(nrow=8,ncol=2)})})))
  78. # dt$V3 <- 1
  79. # setnames(dt, c("V1","V2","V3"))
  80.  
  81. # # GET COMPLETE MATRIX
  82. # sim.matches <- dt[, list(count=length(V3)), by=list(V1,V2)][!(is.na(V1))]
  83. # all.matches <- CJ(V1=fst, V2=snd)
  84. #
  85. # setkey(sim.matches, V1, V2)
  86. # setkey(all.matches, V1, V2)
  87. #
  88. # matches <- sim.matches[all.matches]
  89. # matches$count[is.na(matches$count)] <- 0
  90. #
  91. # # GET TOTAL SUCCESSFUL MATCHUP DRAWS
  92. # total <- max(matches[,list(count=sum(count)),by=V1]$count)
  93. #
  94. # # # PLOT
  95. # # ggplot(matches, aes(x=V2,y=V1, label=paste(round(count/total*100,1),"%",sep=""), fill=count/total)) +
  96. # #   geom_tile() +
  97. # #   geom_text(colour="white", size=6) +
  98. # #   ylab("") +xlab("") +
  99. # #   theme(legend.position="none")
  100. # #
  101. # # # SAVE DATA
  102. # # save(dt, file="data_uefa")
  103.  
  104. # setwd("C:/Users/schwa_000/Desktop/uefa/")
  105. # files <- dir()
  106. #
  107. # files <- files[grep("UEFA_13_12", files)]
  108. #
  109. # load(files[1])
  110. # dt1 <- dt
  111. # load(files[2])
  112. # dt2 <- dt
  113. # load(files[3])
  114. # dt3 <- dt
  115. # load(files[4])
  116. # dt4 <- dt
  117. # load(files[5])
  118. # dt5 <- dt
  119. #
  120. # dt <- data.table(rbind(dt1,dt2,dt3,dt4,dt5))
  121.  
  122. # ADD ID VARIABLE TO DRAWS AND SET COL NAMES
  123. dt$id <- unlist(lapply(1:1000000, function(x) {rep(x,8)}))
  124. setnames(dt, c("fst","snd","id"))
  125.  
  126. # RESHAPE THE DATA
  127. dt <- dt[!(is.na(fst))]
  128. ddt <- dt[, list(id, match = paste(fst,snd,sep="-"))]
  129. dddt <- ddt[, list(draws=paste(match,collapse=",")), by="id"]
  130. ddddt <- dddt[, list(count=length(id)), by=draws][order(count)]
  131.  
  132. # GET ALL UNIQUE DRAWS AND GET NEW ID
  133. all.draws <- data.table(draws=unique(ddddt$draws), id=seq(1,3497))
  134.  
  135. # GET CONDITIONAL PROBABILITIES
  136. g0_draw <- all.draws
  137. g1_draw <- all.draws[grep("BAR-MAC", draws),]
  138. g2_draw <- g1_draw[grep("MAN-PIR", draws),]
  139. g3_draw <- g2_draw[grep("ATL-ACM", draws),]
  140. g4_draw <- g3_draw[grep("PSG-B04", draws),]
  141. g5_draw <- g4_draw[grep("CHL-GAL", draws),]
  142. g6_draw <- g5_draw[grep("REA-S04", draws),]
  143. g7_draw <- g6_draw[grep("BVB-ZEN", draws),]
  144. g8_draw <- g7_draw[grep("BAY-FCA", draws),]
  145.  
  146. # RESHAPE CONDITIONAL PROBABILITIES (COPY PASTE \o/)
  147. g0_draw_re <- c(
  148.   substr(g0_draw$draw,1,7),
  149.   substr(g0_draw$draw,9,15),
  150.   substr(g0_draw$draw,17,23),
  151.   substr(g0_draw$draw,25,31),
  152.   substr(g0_draw$draw,33,39),
  153.   substr(g0_draw$draw,41,47),
  154.   substr(g0_draw$draw,49,55),
  155.   substr(g0_draw$draw,57,63))
  156.  
  157. g1_draw_re <- c(
  158. substr(g1_draw$draw,1,7),
  159. substr(g1_draw$draw,9,15),
  160. substr(g1_draw$draw,17,23),
  161. substr(g1_draw$draw,25,31),
  162. substr(g1_draw$draw,33,39),
  163. substr(g1_draw$draw,41,47),
  164. substr(g1_draw$draw,49,55),
  165. substr(g1_draw$draw,57,63))
  166.  
  167. g2_draw_re <- c(
  168.   substr(g2_draw$draw,1,7),
  169.   substr(g2_draw$draw,9,15),
  170.   substr(g2_draw$draw,17,23),
  171.   substr(g2_draw$draw,25,31),
  172.   substr(g2_draw$draw,33,39),
  173.   substr(g2_draw$draw,41,47),
  174.   substr(g2_draw$draw,49,55),
  175.   substr(g2_draw$draw,57,63))
  176.  
  177. g3_draw_re <- c(
  178.   substr(g3_draw$draw,1,7),
  179.   substr(g3_draw$draw,9,15),
  180.   substr(g3_draw$draw,17,23),
  181.   substr(g3_draw$draw,25,31),
  182.   substr(g3_draw$draw,33,39),
  183.   substr(g3_draw$draw,41,47),
  184.   substr(g3_draw$draw,49,55),
  185.   substr(g3_draw$draw,57,63))
  186.  
  187. g4_draw_re <- c(
  188.   substr(g4_draw$draw,1,7),
  189.   substr(g4_draw$draw,9,15),
  190.   substr(g4_draw$draw,17,23),
  191.   substr(g4_draw$draw,25,31),
  192.   substr(g4_draw$draw,33,39),
  193.   substr(g4_draw$draw,41,47),
  194.   substr(g4_draw$draw,49,55),
  195.   substr(g4_draw$draw,57,63))
  196.  
  197. g5_draw_re <- c(
  198.   substr(g5_draw$draw,1,7),
  199.   substr(g5_draw$draw,9,15),
  200.   substr(g5_draw$draw,17,23),
  201.   substr(g5_draw$draw,25,31),
  202.   substr(g5_draw$draw,33,39),
  203.   substr(g5_draw$draw,41,47),
  204.   substr(g5_draw$draw,49,55),
  205.   substr(g5_draw$draw,57,63))
  206.  
  207. g6_draw_re <- c(
  208.   substr(g6_draw$draw,1,7),
  209.   substr(g6_draw$draw,9,15),
  210.   substr(g6_draw$draw,17,23),
  211.   substr(g6_draw$draw,25,31),
  212.   substr(g6_draw$draw,33,39),
  213.   substr(g6_draw$draw,41,47),
  214.   substr(g6_draw$draw,49,55),
  215.   substr(g6_draw$draw,57,63))
  216.  
  217. g7_draw_re <- c(
  218.   substr(g7_draw$draw,1,7),
  219.   substr(g7_draw$draw,9,15),
  220.   substr(g7_draw$draw,17,23),
  221.   substr(g7_draw$draw,25,31),
  222.   substr(g7_draw$draw,33,39),
  223.   substr(g7_draw$draw,41,47),
  224.   substr(g7_draw$draw,49,55),
  225.   substr(g7_draw$draw,57,63))
  226.  
  227. g8_draw_re <- c(
  228.   substr(g8_draw$draw,1,7),
  229.   substr(g8_draw$draw,9,15),
  230.   substr(g8_draw$draw,17,23),
  231.   substr(g8_draw$draw,25,31),
  232.   substr(g8_draw$draw,33,39),
  233.   substr(g8_draw$draw,41,47),
  234.   substr(g8_draw$draw,49,55),
  235.   substr(g8_draw$draw,57,63))
  236.  
  237. # GET INTO MATRIX
  238. all.matches.all <- CJ(fst=fst, snd=snd)
  239. setnames(all.matches.all, c("fst","snd"))
  240. setkey(all.matches.all, fst, snd)
  241.  
  242.  
  243. # COMPLETE MATCHES MATRIX
  244. all.matches <- CJ(fst=fst, snd=snd)
  245. all.matches <- all.matches[,list(V1, V2, paste(V1,"-",V2,sep=""))]
  246. setnames(all.matches, c("fst","snd","draw"))
  247. setkey(all.matches, draw)
  248.  
  249.  
  250. # BEFORE ANY DRAW DRAW MATRIX
  251. g0_draw_re <- data.table(g0_draw_re, seq(1,length(g0_draw_re)))
  252. setnames(g0_draw_re, c("draw","id"))
  253. setkey(g0_draw_re, draw)
  254.  
  255. g0_draw_ma <- all.matches[g0_draw_re]
  256. g0_draw_ma <- g0_draw_ma[, list(count=length(id)), by=list(fst,snd)]
  257.  
  258. setkey(g0_draw_ma, fst, snd)
  259. setkey(all.matches.all, fst, snd)
  260.  
  261. matches <- g0_draw_ma[all.matches.all]
  262. matches$count[is.na(matches$count)] <- 0
  263.  
  264. ggplot(matches, aes(x=snd,y=fst, label=paste(round(count/dim(g0_draw)[1]*100,1),"%",sep=""), fill=count/dim(g0_draw)[1])) +
  265.   geom_tile() +
  266.   geom_text(colour="white", size=6) +
  267.   ylab("group winners") +xlab("group runners-up") +
  268.   theme(legend.position="none") + ggtitle("Before Any Draw - 3497 Possibilities")
  269.  
  270. # AFTER FRIST DRAW MATRIX
  271. g1_draw_re <- data.table(g1_draw_re, seq(1,length(g1_draw_re)))
  272. setnames(g1_draw_re, c("draw","id"))
  273. setkey(g1_draw_re, draw)
  274.  
  275. g1_draw_ma <- all.matches[g1_draw_re]
  276. g1_draw_ma <- g1_draw_ma[, list(count=length(id)), by=list(fst,snd)]
  277.  
  278. setkey(g1_draw_ma, fst, snd)
  279. setkey(all.matches.all, fst, snd)
  280.  
  281. matches <- g1_draw_ma[all.matches.all]
  282. matches$count[is.na(matches$count)] <- 0
  283.  
  284. ggplot(matches, aes(x=snd,y=fst, label=paste(round(count/dim(g1_draw)[1]*100,1),"%",sep=""), fill=count/dim(g1_draw)[1])) +
  285.   geom_tile() +
  286.   geom_text(colour="white", size=6) +
  287.   ylab("group winners") +xlab("group runners-up") +
  288.   theme(legend.position="none") + ggtitle("After First Draw - 605 Possibilities")
  289.  
  290.  
  291. # AFTER SECOND DRAW MATRIX
  292. g2_draw_re <- data.table(g2_draw_re, seq(1,length(g2_draw_re)))
  293. setnames(g2_draw_re, c("draw","id"))
  294. setkey(g2_draw_re, draw)
  295.  
  296. g2_draw_ma <- all.matches[g2_draw_re]
  297. g2_draw_ma <- g2_draw_ma[, list(count=length(id)), by=list(fst,snd)]
  298.  
  299. setkey(g2_draw_ma, fst, snd)
  300. setkey(all.matches.all, fst, snd)
  301.  
  302. matches <- g2_draw_ma[all.matches.all]
  303. matches$count[is.na(matches$count)] <- 0
  304.  
  305. ggplot(matches, aes(x=snd,y=fst, label=paste(round(count/dim(g2_draw)[1]*100,1),"%",sep=""), fill=count/dim(g2_draw)[1])) +
  306.   geom_tile() +
  307.   geom_text(colour="white", size=6) +
  308.   ylab("group winners") +xlab("group runners-up") +
  309.   theme(legend.position="none") + ggtitle("After Second Draw - 103 Possibilities")
  310.  
  311. # AFTER THIRD DRAW MATRIX
  312. g3_draw_re <- data.table(g3_draw_re, seq(1,length(g3_draw_re)))
  313. setnames(g3_draw_re, c("draw","id"))
  314. setkey(g3_draw_re, draw)
  315.  
  316. g3_draw_ma <- all.matches[g3_draw_re]
  317. g3_draw_ma <- g3_draw_ma[, list(count=length(id)), by=list(fst,snd)]
  318.  
  319. setkey(g3_draw_ma, fst, snd)
  320. setkey(all.matches.all, fst, snd)
  321.  
  322. matches <- g3_draw_ma[all.matches.all]
  323. matches$count[is.na(matches$count)] <- 0
  324.  
  325. ggplot(matches, aes(x=snd,y=fst, label=paste(round(count/dim(g3_draw)[1]*100,1),"%",sep=""), fill=count/dim(g3_draw)[1])) +
  326.   geom_tile() +
  327.   geom_text(colour="white", size=6) +
  328.   ylab("group winners") +xlab("group runners-up") +
  329.   theme(legend.position="none") + ggtitle("After Thrid Draw - 11 Possibilities")
  330.  
  331. # AFTER FOURTH DRAW MATRIX
  332. g4_draw_re <- data.table(g4_draw_re, seq(1,length(g4_draw_re)))
  333. setnames(g4_draw_re, c("draw","id"))
  334. setkey(g4_draw_re, draw)
  335.  
  336. g4_draw_ma <- all.matches[g4_draw_re]
  337. g4_draw_ma <- g4_draw_ma[, list(count=length(id)), by=list(fst,snd)]
  338.  
  339. setkey(g4_draw_ma, fst, snd)
  340. setkey(all.matches.all, fst, snd)
  341.  
  342. matches <- g4_draw_ma[all.matches.all]
  343. matches$count[is.na(matches$count)] <- 0
  344.  
  345. ggplot(matches, aes(x=snd,y=fst, label=paste(round(count/dim(g4_draw)[1]*100,1),"%",sep=""), fill=count/dim(g4_draw)[1])) +
  346.   geom_tile() +
  347.   geom_text(colour="white", size=6) +
  348.   ylab("group winners") +xlab("group runners-up") +
  349.   theme(legend.position="none") + ggtitle("After Fourth Draw - 2 Possibilities")
  350.  
  351. # AFTER FiFTH DRAW MATRIX
  352. g5_draw_re <- data.table(g5_draw_re, seq(1,length(g5_draw_re)))
  353. setnames(g5_draw_re, c("draw","id"))
  354. setkey(g5_draw_re, draw)
  355.  
  356. g5_draw_ma <- all.matches[g5_draw_re]
  357. g5_draw_ma <- g5_draw_ma[, list(count=length(id)), by=list(fst,snd)]
  358.  
  359. setkey(g5_draw_ma, fst, snd)
  360. setkey(all.matches.all, fst, snd)
  361.  
  362. matches <- g5_draw_ma[all.matches.all]
  363. matches$count[is.na(matches$count)] <- 0
  364.  
  365. ggplot(matches, aes(x=snd,y=fst, label=paste(round(count/dim(g5_draw)[1]*100,1),"%",sep=""), fill=count/dim(g5_draw)[1])) +
  366.   geom_tile() +
  367.   geom_text(colour="white", size=6) +
  368.   ylab("group winners") +xlab("group runners-up") +
  369.   theme(legend.position="none") + ggtitle("After Fifth Draw - 1 Possibility")
  370.  
  371. # AFTER SIXTH DRAW MATRIX
  372. g6_draw_re <- data.table(g6_draw_re, seq(1,length(g5_draw_re)))
  373. setnames(g6_draw_re, c("draw","id"))
  374. setkey(g6_draw_re, draw)
  375.  
  376. g6_draw_ma <- all.matches[g6_draw_re]
  377. g6_draw_ma <- g6_draw_ma[, list(count=length(id)), by=list(fst,snd)]
  378.  
  379. setkey(g6_draw_ma, fst, snd)
  380. setkey(all.matches.all, fst, snd)
  381.  
  382. matches <- g6_draw_ma[all.matches.all]
  383. matches$count[is.na(matches$count)] <- 0
  384.  
  385. ggplot(matches, aes(x=snd,y=fst, label=paste(round(count/dim(g6_draw)[1]*100,1),"%",sep=""), fill=count/dim(g6_draw)[1])) +
  386.   geom_tile() +
  387.   geom_text(colour="white", size=6) +
  388.   ylab("group winners") +xlab("group runners-up") +
  389.   theme(legend.position="none") + ggtitle("After Sixth Draw - 1 Possibility")
  390.  
  391. # AFTER SEVENTH DRAW MATRIX
  392. g7_draw_re <- data.table(g7_draw_re, seq(1,length(g7_draw_re)))
  393. setnames(g7_draw_re, c("draw","id"))
  394. setkey(g7_draw_re, draw)
  395.  
  396. g7_draw_ma <- all.matches[g7_draw_re]
  397. g7_draw_ma <- g7_draw_ma[, list(count=length(id)), by=list(fst,snd)]
  398.  
  399. setkey(g7_draw_ma, fst, snd)
  400. setkey(all.matches.all, fst, snd)
  401.  
  402. matches <- g7_draw_ma[all.matches.all]
  403. matches$count[is.na(matches$count)] <- 0
  404.  
  405. ggplot(matches, aes(x=snd,y=fst, label=paste(round(count/dim(g7_draw)[1]*100,1),"%",sep=""), fill=count/dim(g7_draw)[1])) +
  406.   geom_tile() +
  407.   geom_text(colour="white", size=6) +
  408.   ylab("group winners") +xlab("group runners-up") +
  409.   theme(legend.position="none") + ggtitle("After Seventh Draw - 1 Possibility")
  410.  
  411. # AFTER EIGHTH DRAW MATRIX
  412. g8_draw_re <- data.table(g8_draw_re, seq(1,length(g8_draw_re)))
  413. setnames(g8_draw_re, c("draw","id"))
  414. setkey(g8_draw_re, draw)
  415.  
  416. g8_draw_ma <- all.matches[g8_draw_re]
  417. g8_draw_ma <- g8_draw_ma[, list(count=length(id)), by=list(fst,snd)]
  418.  
  419. setkey(g8_draw_ma, fst, snd)
  420. setkey(all.matches.all, fst, snd)
  421.  
  422. matches <- g8_draw_ma[all.matches.all]
  423. matches$count[is.na(matches$count)] <- 0
  424.  
  425. ggplot(matches, aes(x=snd,y=fst, label=paste(round(count/dim(g8_draw)[1]*100,1),"%",sep=""), fill=count/dim(g8_draw)[1])) +
  426.   geom_tile() +
  427.   geom_text(colour="white", size=6) +
  428.   ylab("group winners") +xlab("group runners-up") +
  429.   theme(legend.position="none") + ggtitle("After Eighth Draw - 1 Possibility")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement