UEFA CL Drawing Calculation 2014/15

## AUTHOR: Sebastian Schwarz (@Schw4rzR0tGold)
## LICENSE: MIT + Sebastian Schwarz

# Champions League Draw Calculation
library(data.table)
library(plyr)
library(magrittr)

# INSERT DATA
teams.colnames <- c("short", "name", "group",
                    "position", "association")

teams <- rbind(
  c("ATL", "Atletico Madrid", "A", 1, "ESP"),
  c("JUV", "Juventus Turin", "A", 2, "ITA"),
  c("REA", "Real Madrid", "B", 1, "ESP"),
  c("BAS", "FC Basel", "B", 2, "SUI"),
  c("ASM", "AS Monaco", "C", 1, "FRA"),
  c("B04", "Bayer 04 Leverkusen", "C", 2, "GER"),
  c("BVB", "Borussia Dortmund", "D", 1, "GER"),
  c("FCA", "FC Arsenal", "D", 2, "ENG"),
  c("BAY", "FC Bayern München", "E", 1, "GER"),
  c("MAC", "Manchester City", "E", 2, "ENG"),
  c("BAR", "FC Barcelona", "F", 1, "ESP"),
  c("PSG", "Paris St. Germain", "F", 2, "FRA"),
  c("CHL", "FC Chelsea", "G", 1, "ENG"),
  c("S04", "FC Schalke 04", "G", 2, "GER"),
  c("POR", "FC Porto", "H", 1, "POR"),
  c("DON", "FC Shakthar Donetsk", "H", 2, "UKR")
) %>% data.table %>%
  setnames(., teams.colnames)


# GROUP FIRST AND SECOND
teams.first <- teams[position == 1, ] %>%
  setnames(., paste(teams.colnames, "first", sep = "."))

teams.second <- teams[position == 2, ] %>%
  setnames(., paste(teams.colnames, "second", sep = "."))

# ALL PAIRS
# cross join to calcualte all group first vs. group second draws (8x8 = 64)
teams.cj <- setkey(teams.first[,c(k = 1, .SD)], k)[teams.second[, c(k = 1, .SD)],
                                                   allow.cartesian=TRUE][,k:=NULL]

# set all other restrictions (49 pairs remain)
teams.cj <- teams.cj[group.first != group.second &
                       association.first != association.second, ]

# reduce table
teams.cj <- teams.cj[, list(short.first, short.second)]

# ALL POSSIBILITIES
# all group first teams
short.firsts <- unique(teams.cj$short.first)

# Per Team (you might insert a lapply or loop here)
teams.cj.1 <- teams.cj[short.first == short.firsts[1], c(k = 1, .SD)] %>%
  setnames(., c("k", paste(colnames(teams.cj), 1, sep ="."))) %>% setkey(., k)
teams.cj.2 <- teams.cj[short.first == short.firsts[2], c(k = 1, .SD)] %>%
  setnames(., c("k", paste(colnames(teams.cj), 2, sep ="."))) %>% setkey(., k)
teams.cj.3 <- teams.cj[short.first == short.firsts[3], c(k = 1, .SD)] %>%
  setnames(., c("k", paste(colnames(teams.cj), 3, sep ="."))) %>% setkey(., k)
teams.cj.4 <- teams.cj[short.first == short.firsts[4], c(k = 1, .SD)] %>%
  setnames(., c("k", paste(colnames(teams.cj), 4, sep ="."))) %>% setkey(., k)
teams.cj.5 <- teams.cj[short.first == short.firsts[5], c(k = 1, .SD)] %>%
  setnames(., c("k", paste(colnames(teams.cj), 5, sep ="."))) %>% setkey(., k)
teams.cj.6 <- teams.cj[short.first == short.firsts[6], c(k = 1, .SD)] %>%
  setnames(., c("k", paste(colnames(teams.cj), 6, sep ="."))) %>% setkey(., k)
teams.cj.7 <- teams.cj[short.first == short.firsts[7], c(k = 1, .SD)] %>%
  setnames(., c("k", paste(colnames(teams.cj), 7, sep ="."))) %>% setkey(., k)
teams.cj.8 <- teams.cj[short.first == short.firsts[8], c(k = 1, .SD)] %>%
  setnames(., c("k", paste(colnames(teams.cj), 8, sep ="."))) %>% setkey(., k)

# cross join
teams.cj.a <- teams.cj.1[
  teams.cj.2, allow.cartesian=TRUE][
    teams.cj.3, allow.cartesian=TRUE][
      teams.cj.4, allow.cartesian=TRUE][
        teams.cj.5, allow.cartesian=TRUE][
          teams.cj.6, allow.cartesian=TRUE][
            teams.cj.7, allow.cartesian=TRUE][
              teams.cj.8, allow.cartesian=TRUE][,k:=NULL]

# remove impossible (i.e. all where one group second teams is selected twice)
# (could be written more compactly, but this is very clear)
pairs.all <- teams.cj.a[  !(short.second.1 == short.second.2) &
                          !(short.second.1 == short.second.3) &
                          !(short.second.1 == short.second.4) &
                          !(short.second.1 == short.second.5) &
                          !(short.second.1 == short.second.6) &
                          !(short.second.1 == short.second.7) &
                          !(short.second.1 == short.second.8) &
                          !(short.second.2 == short.second.3) &
                          !(short.second.2 == short.second.4) &
                          !(short.second.2 == short.second.5) &
                          !(short.second.2 == short.second.6) &
                          !(short.second.2 == short.second.7) &
                          !(short.second.2 == short.second.8) &
                          !(short.second.3 == short.second.4) &
                          !(short.second.3 == short.second.5) &
                          !(short.second.3 == short.second.6) &
                          !(short.second.3 == short.second.7) &
                          !(short.second.3 == short.second.8) &
                          !(short.second.4 == short.second.5) &
                          !(short.second.4 == short.second.6) &
                          !(short.second.4 == short.second.7) &
                          !(short.second.4 == short.second.8) &
                          !(short.second.5 == short.second.6) &
                          !(short.second.5 == short.second.7) &
                          !(short.second.5 == short.second.8) &
                          !(short.second.6 == short.second.7) &
                          !(short.second.6 == short.second.8) &
                          !(short.second.7 == short.second.8)  ]

# reformat
pairs.all <- as.matrix(pairs.all)

pairs.all <- paste(
  paste(pairs.all[, 1], pairs.all[, 2], sep = "-"),
  paste(pairs.all[, 3], pairs.all[, 4], sep = "-"),
  paste(pairs.all[, 5], pairs.all[, 6], sep = "-"),
  paste(pairs.all[, 7], pairs.all[, 8], sep = "-"),
  paste(pairs.all[, 9], pairs.all[, 10], sep = "-"),
  paste(pairs.all[, 11], pairs.all[, 12], sep = "-"),
  paste(pairs.all[, 13], pairs.all[, 14], sep = "-"),
  paste(pairs.all[, 15], pairs.all[, 16], sep = "-"),
  sep = ",")

# example: calculate probability of FC Bayern vs. FC Basel
length(grep("BAY-BAS", pairs.all)) / length(pairs.all)

# save to file
write.table(pairs.all, file = "all_cl_draws.csv", col.names = FALSE,
            row.names = TRUE, sep = ";")

# yes it is overly vebose... but i think also very clear