Untitled

rm(list=ls())

#### CIPHER AND DECIPHER ----------------------------------------

## Create a cipher string from integer key nums
numCipherString <- function(num)
{
  letters <- sapply(LETTERS, utf8ToInt)
  letters <- suppressWarnings((letters + num - 65) %% 26 + 65)
  cipher <- sapply(letters, intToUtf8)
  return(paste(cipher, collapse = ""))
}

## Generate random cipher string
randomCipherString <- function()
{
  return(paste(sample(LETTERS, 26), collapse = ""))
}

## Create a vector that associates alphabet with cipher string
createCipher <- function(cipherString)
{
  cipherKey <- strsplit(toupper(cipherString), split = "")[[1]]

  if ( length(cipherKey) != 26 ) {
    stop("cipher string must have 26 characters")
  }

  cipher <- vector(mode = "numeric", length = 26+1)
  names(cipher) <- c(LETTERS, " ")
  cipher[LETTERS] <- cipherKey
  cipher[" "] <- " "
  return(cipher)
}

## Apply the cipher to a text
applyCipher <- function(text, cipherString)
{
  cipher <- createCipher(cipherString)

  # check is upper case alphabetic or whitespace
  text <- strsplit(toupper(text), split = "")[[1]]
  in.alphabet <- sapply(text, function(x) {isTRUE(grep("[A-Z]", x) == 1)})
  text[!in.alphabet] <- " "

  return(paste(as.character(cipher[text]), collapse = ""))
}

## Decipher text given a cipher string
applyDecipher <- function(text, cipherString)
{
  cipher <- createCipher(cipherString)
  decipher <- c(LETTERS, " ")
  names(decipher) <- cipher

  # check is upper case alphabetic or whitespace
  text <- strsplit(toupper(text), split = "")[[1]]
  in.alphabet <- sapply(text, function(x) {isTRUE(grep("[A-Z]", x) == 1)})
  text[!in.alphabet] <- " "

  return(paste(as.character(decipher[text]), collapse = ""))
}


### CODE BREAKING -------------------------------------

## COunt the appearance of each letter pair in the text (e.g. "TH" and "ES")
getLanguageCounts <- function(longtext.path)
{
  counts <- list()

  # read line by line
  con <- file(longtext.path, "r")
  while( TRUE ) {
    line <- readLines(con, n = 1)
    if ( length(line) == 0 ) break;

    # check is upper case alphabetic or whitespace
    line <- strsplit(toupper(line), split = "")[[1]]
    in.alphabet <- sapply(line, function(x) {isTRUE(grep("[A-Z]", x) == 1)})
    line[!in.alphabet] <- " "

    pairs <- paste(line[-length(line)],line[-1], sep = "")
    pairs <- table(pairs)

    old <- names(pairs) %in% names(counts)
    old.names <- names(pairs)[old]
    new.names <- names(pairs)[!old]

    counts[old.names] <- as.numeric(counts[old.names]) +
      as.numeric(pairs[old.names])
    counts[new.names] <- as.numeric(pairs[new.names])
  }
  close(con)

  return(counts)
}


## same as getLanguageCounts but for shorter text not in file
getCounts <- function(text)
{
  # check is upper case alphabetic or whitespace
  text <- strsplit(toupper(text), split = "")[[1]]
  in.alphabet <- sapply(text, function(x) {isTRUE(grep("[A-Z]", x) == 1)})
  text[!in.alphabet] <- " "

  pairs <- paste(text[-length(text)],text[-1], sep = "")

  return(as.list(table(pairs)))
}


scoreCipher <- function(text, cipherString, languageCounts )
{
  #cipher <- createCipher(cipher)
  decrypted <- applyDecipher(text, cipherString)
  counts <- getCounts(decrypted)
  score <- 0

  for (pair in intersect( names(counts), names(languageCounts) ) )
  {
    c <- as.numeric(counts[pair])
    l <- as.numeric(languageCounts[pair])
    score <- score + c*log(l)
  }

  return(score)
}


swapCipher <- function( cipherString )
{
  index <- sample(1:nchar(cipherString), 2)
  cipherVec <- strsplit(toupper(cipherString), split = "")[[1]]

  swap1 <- cipherVec[index[1]]
  cipherVec[index[1]] <- cipherVec[index[2]]
  cipherVec[index[2]] <- swap1

  return(paste(cipherVec, collapse = ""))
}

# Biased coin with probability p of heads
biasedCoin <- function(p)
{
  return(runif(1,0,1) < p)
}


crackMCMC <- function(n, text, languageCounts)
{
  currentCipher <- randomCipherString()
  memory <- list()
  bestCipher <- currentCipher
  score <- 0
  for ( i in 1:n )
  {
    memory[i] <- currentCipher
    nextCipher <- swapCipher( currentCipher )

    currentScore <- scoreCipher(text, currentCipher, languageCounts)
    nextScore <- scoreCipher(text, nextCipher, languageCounts)

    p.accept <- min(1, exp(nextScore - currentScore))

    if ( currentScore > score )
    {
       bestCipher <- currentCipher
    }
    if ( biasedCoin(p.accept) )
    {
      currentCipher <- nextCipher
    }
    if ( i %% 500 == 0 )
    {
      cat("iter ", i, ": ", substr(applyDecipher(text, currentCipher),1,99),"\n")
    }
  }

  return(list(best = bestCipher, memory = memory))
}


### TESTING ### -----------------------------------------------------
text <- paste("As Oliver gave this first proof",
              "of the free and proper action of his lungs",
              "the patchwork coverlet which was carelessly",
              "flung over the iron bedstead, rustled;",
              "the pale face of a young woman was raised",
              "feebly from the pillow; and a faint voice",
              "imperfectly articulated the words, Let me",
              "see the child, and die. The surgeon had been",
              "sitting with his face turned towards the fire:",
              "giving the palms of his hands a warm and a rub",
              "alternately. As the young woman spoke, he rose,",
              "and advancing to the bed's head, said, with more",
              "kindness than might have been expected of him: ")

## Use the text of any large book here, e.g. War and Peace.
languageCounts <- getLanguageCounts("warandpeace.txt")

cipher <- "XEBPROHYAUFTIDSJLKZMWVNGQC"

encrypted.text <- applyCipher(text, cipher)

cat("Text To Decode:", encrypted.text, "\n")
result = crackMCMC(10000, encrypted.text, languageCounts)
cat("\n", "Decoded Text:", applyDecipher(encrypted.text, result["best"]), "\n")
cat("MCMC KEY FOUND:", result["best"], "\n")
cat("    ACTUAL KEY:", cipher, "\n")