Advertisement
Guest User

Untitled

a guest
Nov 22nd, 2019
125
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.95 KB | None | 0 0
  1. # BaumWelsh ---------------------------------------------------------------
  2.  
  3.  
  4. # initializations ---------------------------------------------------------
  5.  
  6.  
  7. States = c("1","2")
  8. Letters = c("A","T","C","G")
  9. iter = 0
  10. transitionMatrix = matrix(c(.7,.3,.6,.4),2)
  11. emissionProbabilities=matrix(c(.2,.1,.4,.2,.2,.5,.2,.2),2)
  12. sequence = c("A","G","C","G","C","C","C","T")
  13. initialProbabilities = rep(1/2,2)
  14. initialProbabilities[1]=0.7
  15. initialProbabilities[2]=0.3
  16.  
  17. names(initialProbabilities) = States
  18. dimnames(transitionMatrix)= list(from=States,to=States)
  19. dimnames(emissionProbabilities)= list(states=States,symbols=Letters)
  20.  
  21. delta=1e-09
  22.  
  23. transitionMatrix[is.na(transitionMatrix)] = 0
  24. emissionProbabilities[is.na(emissionProbabilities)] = 0
  25. diff = c()
  26.  
  27.  
  28. # main algorithm ----------------------------------------------------------
  29.  
  30.  
  31. for(i in 1:100)
  32. {
  33. transitionransitionMatrix = transitionMatrix
  34. transitionransitionMatrix[,] = 0
  35. emissionmissionMatrix = emissionProbabilities
  36. emissionmissionMatrix[,] = 0
  37.  
  38.  
  39.  
  40. # forward calculation -----------------------------------------------------
  41.  
  42.  
  43. transitionMatrix[is.na(transitionMatrix)] = 0
  44. emissionProbabilities[is.na(emissionProbabilities)] = 0
  45. sequenceLength = length(sequence)
  46. statesLength = length(States)
  47. fwd = array(NA,c(statesLength,sequenceLength))
  48. dimnames(fwd)= list(states=States,index=1:sequenceLength)
  49. # Initialization
  50. for(state in States)
  51. {
  52. fwd[state,1] =initialProbabilities[state] * emissionProbabilities[state,sequence[1]]
  53. }
  54. # Iteration
  55. for(k in 2:sequenceLength)
  56. {
  57. for(state in States)
  58. {
  59. temp2 = 0
  60. for(previousState in States)
  61. {
  62. temp = fwd[previousState,k-1] * transitionMatrix[previousState,state]
  63. temp2 = temp2 + temp
  64. }
  65. fwd[state,k] = (emissionProbabilities[state,sequence[k]]) * temp2
  66. }
  67. }
  68.  
  69. # bacward calculation -----------------------------------------------------
  70.  
  71. transitionMatrix[is.na(transitionMatrix)] = 0
  72. emissionProbabilities[is.na(emissionProbabilities)] = 0
  73. sequenceLength = length(sequence)
  74. statesLength = length(States)
  75. bwd = array(NA,c(statesLength,sequenceLength))
  76. dimnames(bwd)= list(states=States,index=1:sequenceLength)
  77. # Initialization
  78. for(state in States)
  79. {
  80. bwd[state,sequenceLength] = initialProbabilities[state] * emissionProbabilities[state,"G"]
  81. }
  82. # Iteration
  83. for(k in (sequenceLength-1):1)
  84. {
  85. for(state in States)
  86. {
  87.  
  88. temp2 = 0
  89. for(nextState in States)
  90. {
  91. temp = bwd[nextState,k+1] * transitionMatrix[state,nextState]*emissionProbabilities[nextState,sequence[k+1]]
  92. temp2 = temp2 + temp
  93.  
  94. }
  95. bwd[state,k] = temp2
  96. }
  97. }
  98.  
  99.  
  100. probabilityOperations = fwd[1,length(sequence)]
  101. for(i in 2:length(States))
  102. {
  103. j = fwd[i,length(sequence)]
  104. if(j > - Inf)
  105. {
  106. probabilityOperations = j + probabilityOperations
  107. }
  108. }
  109. for(x in States)
  110. {
  111. for(y in States)
  112. {
  113. temp = fwd[x,1] + transitionMatrix[x,y] +
  114. emissionProbabilities[y,sequence[1+1]] + bwd[y,1+1]
  115. for(i in 2:(length(sequence)-1))
  116. {
  117. j = fwd[x,i] + transitionMatrix[x,y] +
  118. emissionProbabilities[y,sequence[i+1]] + bwd[y,i+1]
  119.  
  120. temp = j + temp
  121.  
  122. }
  123. temp = temp - probabilityOperations
  124. transitionransitionMatrix[x,y] = temp
  125. }
  126. }
  127. for(x in States)
  128. {
  129. for(s in Letters)
  130. {
  131. temp = 0
  132. for(i in 1:length(sequence))
  133. {
  134. if(s == sequence[i])
  135. {
  136. j = fwd[x,i] + bwd[x,i]
  137.  
  138. temp = j + temp
  139.  
  140. }
  141. }
  142. temp = temp - probabilityOperations
  143. emissionmissionMatrix[x,s] = abs(temp)
  144. }
  145. }
  146. bw = (list(transitionransitionMatrix=transitionransitionMatrix,emissionmissionMatrix=emissionmissionMatrix))
  147.  
  148.  
  149. transition = bw$transitionransitionMatrix
  150. emission = bw$emissionmissionMatrix
  151.  
  152. transition[!is.na(transitionMatrix)] = transition[!is.na(transitionMatrix)]
  153. emission[!is.na(emissionProbabilities)] = emission[!is.na(emissionProbabilities)]
  154.  
  155. transition = (transition/apply(transition,1,sum))
  156. emission = (emission/apply(emission,1,sum))
  157. d = sqrt(sum((transitionMatrix-transition)^2)) + sqrt(sum((emissionProbabilities-emission)^2))
  158. diff = c(diff, d)
  159. transitionMatrix = transition
  160. emissionProbabilities = emission
  161. iter = iter + 1
  162. print(iter)
  163.  
  164. if(d < delta)
  165. {
  166. break
  167. }
  168. }
  169. transitionMatrix[is.na(transitionMatrix)] = NA
  170. emissionProbabilities[is.na(emissionProbabilities)] = NA
  171.  
  172. # final answer ------------------------------------------------------------
  173.  
  174.  
  175. finalBW = (list(transitionransitionMatrix=transitionMatrix,emissionmissionProbabilities=emissionProbabilities,difference=diff))
  176. finalBW$transitionransitionMatrix
  177. finalBW$emissionmissionProbabilities
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement