Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # BaumWelsh ---------------------------------------------------------------
- # initializations ---------------------------------------------------------
- States = c("1","2")
- Letters = c("A","T","C","G")
- iter = 0
- transitionMatrix = matrix(c(.7,.3,.6,.4),2)
- emissionProbabilities=matrix(c(.2,.1,.4,.2,.2,.5,.2,.2),2)
- sequence = c("A","G","C","G","C","C","C","T")
- initialProbabilities = rep(1/2,2)
- initialProbabilities[1]=0.7
- initialProbabilities[2]=0.3
- names(initialProbabilities) = States
- dimnames(transitionMatrix)= list(from=States,to=States)
- dimnames(emissionProbabilities)= list(states=States,symbols=Letters)
- delta=1e-09
- transitionMatrix[is.na(transitionMatrix)] = 0
- emissionProbabilities[is.na(emissionProbabilities)] = 0
- diff = c()
- # main algorithm ----------------------------------------------------------
- for(i in 1:100)
- {
- transitionransitionMatrix = transitionMatrix
- transitionransitionMatrix[,] = 0
- emissionmissionMatrix = emissionProbabilities
- emissionmissionMatrix[,] = 0
- # forward calculation -----------------------------------------------------
- transitionMatrix[is.na(transitionMatrix)] = 0
- emissionProbabilities[is.na(emissionProbabilities)] = 0
- sequenceLength = length(sequence)
- statesLength = length(States)
- fwd = array(NA,c(statesLength,sequenceLength))
- dimnames(fwd)= list(states=States,index=1:sequenceLength)
- # Initialization
- for(state in States)
- {
- fwd[state,1] =initialProbabilities[state] * emissionProbabilities[state,sequence[1]]
- }
- # Iteration
- for(k in 2:sequenceLength)
- {
- for(state in States)
- {
- temp2 = 0
- for(previousState in States)
- {
- temp = fwd[previousState,k-1] * transitionMatrix[previousState,state]
- temp2 = temp2 + temp
- }
- fwd[state,k] = (emissionProbabilities[state,sequence[k]]) * temp2
- }
- }
- # bacward calculation -----------------------------------------------------
- transitionMatrix[is.na(transitionMatrix)] = 0
- emissionProbabilities[is.na(emissionProbabilities)] = 0
- sequenceLength = length(sequence)
- statesLength = length(States)
- bwd = array(NA,c(statesLength,sequenceLength))
- dimnames(bwd)= list(states=States,index=1:sequenceLength)
- # Initialization
- for(state in States)
- {
- bwd[state,sequenceLength] = initialProbabilities[state] * emissionProbabilities[state,"G"]
- }
- # Iteration
- for(k in (sequenceLength-1):1)
- {
- for(state in States)
- {
- temp2 = 0
- for(nextState in States)
- {
- temp = bwd[nextState,k+1] * transitionMatrix[state,nextState]*emissionProbabilities[nextState,sequence[k+1]]
- temp2 = temp2 + temp
- }
- bwd[state,k] = temp2
- }
- }
- probabilityOperations = fwd[1,length(sequence)]
- for(i in 2:length(States))
- {
- j = fwd[i,length(sequence)]
- if(j > - Inf)
- {
- probabilityOperations = j + probabilityOperations
- }
- }
- for(x in States)
- {
- for(y in States)
- {
- temp = fwd[x,1] + transitionMatrix[x,y] +
- emissionProbabilities[y,sequence[1+1]] + bwd[y,1+1]
- for(i in 2:(length(sequence)-1))
- {
- j = fwd[x,i] + transitionMatrix[x,y] +
- emissionProbabilities[y,sequence[i+1]] + bwd[y,i+1]
- temp = j + temp
- }
- temp = temp - probabilityOperations
- transitionransitionMatrix[x,y] = temp
- }
- }
- for(x in States)
- {
- for(s in Letters)
- {
- temp = 0
- for(i in 1:length(sequence))
- {
- if(s == sequence[i])
- {
- j = fwd[x,i] + bwd[x,i]
- temp = j + temp
- }
- }
- temp = temp - probabilityOperations
- emissionmissionMatrix[x,s] = abs(temp)
- }
- }
- bw = (list(transitionransitionMatrix=transitionransitionMatrix,emissionmissionMatrix=emissionmissionMatrix))
- transition = bw$transitionransitionMatrix
- emission = bw$emissionmissionMatrix
- transition[!is.na(transitionMatrix)] = transition[!is.na(transitionMatrix)]
- emission[!is.na(emissionProbabilities)] = emission[!is.na(emissionProbabilities)]
- transition = (transition/apply(transition,1,sum))
- emission = (emission/apply(emission,1,sum))
- d = sqrt(sum((transitionMatrix-transition)^2)) + sqrt(sum((emissionProbabilities-emission)^2))
- diff = c(diff, d)
- transitionMatrix = transition
- emissionProbabilities = emission
- iter = iter + 1
- print(iter)
- if(d < delta)
- {
- break
- }
- }
- transitionMatrix[is.na(transitionMatrix)] = NA
- emissionProbabilities[is.na(emissionProbabilities)] = NA
- # final answer ------------------------------------------------------------
- finalBW = (list(transitionransitionMatrix=transitionMatrix,emissionmissionProbabilities=emissionProbabilities,difference=diff))
- finalBW$transitionransitionMatrix
- finalBW$emissionmissionProbabilities
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement