SHARE
TWEET

Untitled

a guest Nov 28th, 2019 103 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1.  
  2. create_model <- function(file_name, ... ){
  3.   gtp_data <- read.csv(file=file_name, header=FALSE, sep=",")
  4.   gtp_data <- as.matrix(gtp_data)
  5.  
  6.   temp_mat = matrix(data=strsplit(gtp_data[1], ""))
  7.   for (i in 2:length(gtp_data)) {
  8.     temp_mat <- rbind(temp_mat, strsplit(gtp_data[i], ""))
  9.   }
  10.  
  11.   gtp_data <- matrix(unlist(temp_mat), ncol = length(temp_mat[[1]]), byrow = TRUE)
  12.   match_columns = c()
  13.  
  14.   for (i in 1:ncol(gtp_data)) {
  15.     if (length(which(gtp_data[,i] == '-')) < nrow(gtp_data) * 0.5) {
  16.       match_columns = append(match_columns, i)
  17.     }
  18.   }
  19.  
  20.   L <- length(match_columns) + 1
  21.   temp <- c()
  22.  
  23.   for (i in 1:ncol(gtp_data)) {
  24.     temp = append(temp, unique(gtp_data[,i]))
  25.   }
  26.   symbols_list = unique(temp)
  27.   symbols_list = sort(symbols_list)
  28.   symbols_list = symbols_list[-1]
  29.  
  30.   match_emissions = matrix(0, nrow = length(symbols_list), ncol = L)
  31.   insert_emissions = matrix(0, nrow = length(symbols_list), ncol = L)
  32.  
  33.   rownames(match_emissions) <- symbols_list
  34.   rownames(insert_emissions) <- symbols_list
  35.  
  36.   state_transitions = matrix(0, nrow = 9, ncol = L)
  37.   rownames(state_transitions) <- c("M-M", "M-D", "M-I", "I-M", "I-D", "I-I", "D-M", "D-D", "D-I")
  38.  
  39.   was_previous_match = TRUE
  40.   was_previous_insertion = FALSE
  41.   was_previous_deletion = FALSE
  42.   match_number = 0
  43.  
  44.   for (i in 1:nrow(gtp_data)) {
  45.     was_previous_match = TRUE
  46.     was_previous_insertion = FALSE
  47.     was_previous_deletion = FALSE
  48.     match_number = 0
  49.     for (j in 1:ncol(gtp_data)) {
  50.       if (j %in% match_columns) {
  51.         if (gtp_data[i, j] == "-") {
  52.           if (was_previous_match == TRUE) {
  53.             state_transitions["M-D", match_number + 1] = state_transitions["M-D", match_number + 1] + 1
  54.           }
  55.           else if (was_previous_deletion == TRUE) {
  56.             state_transitions["D-D", match_number + 1] = state_transitions["D-D", match_number + 1] + 1
  57.           }
  58.           else {
  59.             state_transitions["I-D", match_number + 1] = state_transitions["I-D", match_number + 1] + 1
  60.           }
  61.           was_previous_match = FALSE
  62.           was_previous_insertion = FALSE
  63.           was_previous_deletion = TRUE
  64.         }
  65.         else {
  66.           if (was_previous_match == TRUE) {
  67.             state_transitions["M-M", match_number + 1] = state_transitions["M-M", match_number + 1] + 1
  68.             match_emissions[gtp_data[i, j], match_number + 2] = match_emissions[gtp_data[i, j], match_number + 2] + 1
  69.           }
  70.           else if (was_previous_deletion == TRUE) {
  71.             state_transitions["D-M", match_number + 1] = state_transitions["D-M", match_number + 1] + 1
  72.             match_emissions[gtp_data[i, j], match_number + 2] = match_emissions[gtp_data[i, j], match_number + 2] + 1
  73.           }
  74.           else {
  75.             state_transitions["I-M", match_number + 1] = state_transitions["I-M", match_number + 1] + 1
  76.             match_emissions[gtp_data[i, j], match_number + 2] = match_emissions[gtp_data[i, j], match_number + 2] + 1
  77.           }
  78.           was_previous_match = TRUE
  79.           was_previous_insertion = FALSE
  80.           was_previous_deletion = FALSE
  81.         }
  82.         match_number = match_number + 1
  83.         if (j == ncol(gtp_data)) {
  84.           if (gtp_data[i, j] == "-") {
  85.             state_transitions["D-M", L] = state_transitions["D-M", L] + 1
  86.           }
  87.           else {
  88.             state_transitions["M-M", L] = state_transitions["M-M", L] + 1
  89.           }
  90.         }
  91.       }
  92.       else {
  93.         if (gtp_data[i, j] != "-") {
  94.           if (i == 1) print(match_number)
  95.           if (was_previous_match == TRUE) {
  96.             state_transitions["M-I", match_number + 1] = state_transitions["M-I", match_number + 1] + 1
  97.             insert_emissions[gtp_data[i, j], match_number + 1] = insert_emissions[gtp_data[i, j], match_number + 1] + 1
  98.           }
  99.           else if (was_previous_deletion == TRUE) {
  100.             state_transitions["D-I", match_number + 1] = state_transitions["D-I", match_number + 1] + 1
  101.             insert_emissions[gtp_data[i, j], match_number + 1] = insert_emissions[gtp_data[i, j], match_number + 1] + 1
  102.           }
  103.           else {
  104.             state_transitions["I-I", match_number + 1] = state_transitions["I-I", match_number + 1] + 1
  105.             insert_emissions[gtp_data[i, j], match_number + 1] = insert_emissions[gtp_data[i, j], match_number + 1] + 1
  106.           }
  107.           was_previous_match = FALSE
  108.           was_previous_insertion = TRUE
  109.           was_previous_deletion = FALSE
  110.           if (j == ncol(gtp_data)) {
  111.             state_transitions["I-M", L] = state_transitions["I-M", L] + 1
  112.           }
  113.         }
  114.         else {
  115.           if (j == ncol(gtp_data)) {
  116.             if (was_previous_match == TRUE) {
  117.               state_transitions["M-M", L] = state_transitions["I-M", L] + 1
  118.             }
  119.             else if (was_previous_deletion == TRUE) {
  120.               state_transitions["D-M", L] = state_transitions["I-M", L] + 1
  121.             }
  122.             else {
  123.               state_transitions["I-M", L] = state_transitions["I-M", L] + 1
  124.             }
  125.           }
  126.         }
  127.       }
  128.     }
  129.   }
  130.  
  131.   for (i in 2:ncol(match_emissions)) {
  132.     if (length(which(match_emissions[,i] == 0)) > 0) {
  133.       match_emissions[,i] = match_emissions[,i] + 1
  134.     }
  135.     match_emissions[,i] = match_emissions[,i]/(sum(match_emissions[,i]))
  136.   }
  137.   for (i in 1:ncol(insert_emissions)) {
  138.     if (length(which(insert_emissions[,i] == 0)) > 0) {
  139.       insert_emissions[,i] = insert_emissions[,i] + 1
  140.     }
  141.     insert_emissions[,i] = insert_emissions[,i]/(sum(insert_emissions[,i]))
  142.   }
  143.   for (i in 1:ncol(state_transitions)) {
  144.     if (length(which(state_transitions[1:3,i] == 0)) > 0) {
  145.       state_transitions[1:3,i] = state_transitions[1:3,i] + 1
  146.     }
  147.     state_transitions[1:3,i] = state_transitions[1:3,i]/(sum(state_transitions[1:3,i]))
  148.   }
  149.   for (i in 1:ncol(state_transitions)) {
  150.     if (length(which(state_transitions[4:6,i] == 0)) > 0) {
  151.       state_transitions[4:6,i] = state_transitions[4:6,i] + 1
  152.     }
  153.     state_transitions[4:6,i] = state_transitions[4:6,i]/(sum(state_transitions[4:6,i]))
  154.   }
  155.   for (i in 2:ncol(state_transitions)) {
  156.     if (length(which(state_transitions[7:9,i] == 0)) > 0) {
  157.       state_transitions[7:9,i] = state_transitions[7:9,i] + 1
  158.     }
  159.     state_transitions[7:9,i] = state_transitions[7:9,i]/(sum(state_transitions[7:9,i]))
  160.   }
  161.  
  162.   return(list(match_emissions, insert_emissions, state_transitions))
  163. }
  164.  
  165. M1 <- create_model("/Users/Pawel/Desktop/test_data.txt")
  166. M2 <- create_model("/Users/Pawel/Desktop/test_data.txt")
  167.  
  168. # 4. Report the length of both M1 and M2 models (1 point)
  169.  
  170. # M1 length
  171. ncol(M1[[1]]) - 1
  172.  
  173. # M2 length
  174. ncol(M2[[1]]) - 1
  175.  
  176. # 5. Report the dimensions of your T, mE and iE for both models (1 point).
  177.  
  178. # M1 dimensions
  179.  
  180. dim(M1[[1]])
  181. dim(M1[[2]])
  182. dim(M1[[3]])
  183.  
  184. # M2 dimensions
  185.  
  186. dim(M2[[1]])
  187. dim(M2[[2]])
  188. dim(M2[[3]])
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
Top