Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- rm(list=ls())
- #### CIPHER AND DECIPHER ----------------------------------------
- ## Create a cipher string from integer key nums
- numCipherString <- function(num)
- {
- letters <- sapply(LETTERS, utf8ToInt)
- letters <- suppressWarnings((letters + num - 65) %% 26 + 65)
- cipher <- sapply(letters, intToUtf8)
- return(paste(cipher, collapse = ""))
- }
- ## Generate random cipher string
- randomCipherString <- function()
- {
- return(paste(sample(LETTERS, 26), collapse = ""))
- }
- ## Create a vector that associates alphabet with cipher string
- createCipher <- function(cipherString)
- {
- cipherKey <- strsplit(toupper(cipherString), split = "")[[1]]
- if ( length(cipherKey) != 26 ) {
- stop("cipher string must have 26 characters")
- }
- cipher <- vector(mode = "numeric", length = 26+1)
- names(cipher) <- c(LETTERS, " ")
- cipher[LETTERS] <- cipherKey
- cipher[" "] <- " "
- return(cipher)
- }
- ## Apply the cipher to a text
- applyCipher <- function(text, cipherString)
- {
- cipher <- createCipher(cipherString)
- # check is upper case alphabetic or whitespace
- text <- strsplit(toupper(text), split = "")[[1]]
- in.alphabet <- sapply(text, function(x) {isTRUE(grep("[A-Z]", x) == 1)})
- text[!in.alphabet] <- " "
- return(paste(as.character(cipher[text]), collapse = ""))
- }
- ## Decipher text given a cipher string
- applyDecipher <- function(text, cipherString)
- {
- cipher <- createCipher(cipherString)
- decipher <- c(LETTERS, " ")
- names(decipher) <- cipher
- # check is upper case alphabetic or whitespace
- text <- strsplit(toupper(text), split = "")[[1]]
- in.alphabet <- sapply(text, function(x) {isTRUE(grep("[A-Z]", x) == 1)})
- text[!in.alphabet] <- " "
- return(paste(as.character(decipher[text]), collapse = ""))
- }
- ### CODE BREAKING -------------------------------------
- ## COunt the appearance of each letter pair in the text (e.g. "TH" and "ES")
- getLanguageCounts <- function(longtext.path)
- {
- counts <- list()
- # read line by line
- con <- file(longtext.path, "r")
- while( TRUE ) {
- line <- readLines(con, n = 1)
- if ( length(line) == 0 ) break;
- # check is upper case alphabetic or whitespace
- line <- strsplit(toupper(line), split = "")[[1]]
- in.alphabet <- sapply(line, function(x) {isTRUE(grep("[A-Z]", x) == 1)})
- line[!in.alphabet] <- " "
- pairs <- paste(line[-length(line)],line[-1], sep = "")
- pairs <- table(pairs)
- old <- names(pairs) %in% names(counts)
- old.names <- names(pairs)[old]
- new.names <- names(pairs)[!old]
- counts[old.names] <- as.numeric(counts[old.names]) +
- as.numeric(pairs[old.names])
- counts[new.names] <- as.numeric(pairs[new.names])
- }
- close(con)
- return(counts)
- }
- ## same as getLanguageCounts but for shorter text not in file
- getCounts <- function(text)
- {
- # check is upper case alphabetic or whitespace
- text <- strsplit(toupper(text), split = "")[[1]]
- in.alphabet <- sapply(text, function(x) {isTRUE(grep("[A-Z]", x) == 1)})
- text[!in.alphabet] <- " "
- pairs <- paste(text[-length(text)],text[-1], sep = "")
- return(as.list(table(pairs)))
- }
- scoreCipher <- function(text, cipherString, languageCounts )
- {
- #cipher <- createCipher(cipher)
- decrypted <- applyDecipher(text, cipherString)
- counts <- getCounts(decrypted)
- score <- 0
- for (pair in intersect( names(counts), names(languageCounts) ) )
- {
- c <- as.numeric(counts[pair])
- l <- as.numeric(languageCounts[pair])
- score <- score + c*log(l)
- }
- return(score)
- }
- swapCipher <- function( cipherString )
- {
- index <- sample(1:nchar(cipherString), 2)
- cipherVec <- strsplit(toupper(cipherString), split = "")[[1]]
- swap1 <- cipherVec[index[1]]
- cipherVec[index[1]] <- cipherVec[index[2]]
- cipherVec[index[2]] <- swap1
- return(paste(cipherVec, collapse = ""))
- }
- # Biased coin with probability p of heads
- biasedCoin <- function(p)
- {
- return(runif(1,0,1) < p)
- }
- crackMCMC <- function(n, text, languageCounts)
- {
- currentCipher <- randomCipherString()
- memory <- list()
- bestCipher <- currentCipher
- score <- 0
- for ( i in 1:n )
- {
- memory[i] <- currentCipher
- nextCipher <- swapCipher( currentCipher )
- currentScore <- scoreCipher(text, currentCipher, languageCounts)
- nextScore <- scoreCipher(text, nextCipher, languageCounts)
- p.accept <- min(1, exp(nextScore - currentScore))
- if ( currentScore > score )
- {
- bestCipher <- currentCipher
- }
- if ( biasedCoin(p.accept) )
- {
- currentCipher <- nextCipher
- }
- if ( i %% 500 == 0 )
- {
- cat("iter ", i, ": ", substr(applyDecipher(text, currentCipher),1,99),"\n")
- }
- }
- return(list(best = bestCipher, memory = memory))
- }
- ### TESTING ### -----------------------------------------------------
- text <- paste("As Oliver gave this first proof",
- "of the free and proper action of his lungs",
- "the patchwork coverlet which was carelessly",
- "flung over the iron bedstead, rustled;",
- "the pale face of a young woman was raised",
- "feebly from the pillow; and a faint voice",
- "imperfectly articulated the words, Let me",
- "see the child, and die. The surgeon had been",
- "sitting with his face turned towards the fire:",
- "giving the palms of his hands a warm and a rub",
- "alternately. As the young woman spoke, he rose,",
- "and advancing to the bed's head, said, with more",
- "kindness than might have been expected of him: ")
- ## Use the text of any large book here, e.g. War and Peace.
- languageCounts <- getLanguageCounts("warandpeace.txt")
- cipher <- "XEBPROHYAUFTIDSJLKZMWVNGQC"
- encrypted.text <- applyCipher(text, cipher)
- cat("Text To Decode:", encrypted.text, "\n")
- result = crackMCMC(10000, encrypted.text, languageCounts)
- cat("\n", "Decoded Text:", applyDecipher(encrypted.text, result["best"]), "\n")
- cat("MCMC KEY FOUND:", result["best"], "\n")
- cat(" ACTUAL KEY:", cipher, "\n")
Add Comment
Please, Sign In to add comment