Advertisement
Guest User

Untitled

a guest
May 28th, 2015
212
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 11.17 KB | None | 0 0
  1. module GlobalAlignmentProblem(
  2. solve
  3. ) where
  4.  
  5. import Data.Array
  6. import qualified Data.Map as M
  7.  
  8. data Input = Input {string1 :: String,
  9. string2 :: String
  10. }
  11. deriving Show
  12.  
  13. data Pointer = Unoccupied | DEL | INS | SCORE deriving (Show, Eq)
  14.  
  15. defaultInput :: Input
  16. defaultInput = Input "PLEASANTLY" "MEANLY"
  17.  
  18. solve :: [String] -> IO ()
  19. solve [] = print $ globalAlignmentProblem $ defaultInput
  20. solve [inputFile] = do
  21. input <- parseFile inputFile
  22. print $ globalAlignmentProblem $ input
  23. solve [inputFile, outputFile] = do
  24. input <- parseFile inputFile
  25. let (num, (one, two)) = globalAlignmentProblem input
  26. writeFile outputFile $ show num
  27. appendFile outputFile "\n"
  28. appendFile outputFile $ one
  29. appendFile outputFile "\n"
  30. appendFile outputFile $ two
  31.  
  32. parseFile :: String -> IO Input
  33. parseFile file = do
  34. contents <- readFile file
  35. let [str1,str2] = lines contents
  36. return $ Input str1 str2
  37.  
  38. {-globalAlignmentProblem :: Input -> IO (Int, (String, String))-}
  39. {-globalAlignmentProblem input = do-}
  40. {-print str1-}
  41. {-print str2-}
  42. {-print $ matrix!0-}
  43. {-print matrix-}
  44. {-print backtrack-}
  45. {-print $ outputLCS backtrack ("","") (str1, str2) len1 len2-}
  46. {-return (1, ("asd", "asd"))-}
  47. globalAlignmentProblem :: Input -> (Int, (String, String))
  48. globalAlignmentProblem input = ((matrix!len1)!len2, outputLCS backtrack ("","") (str1, str2) len1 len2)
  49. where
  50. str1 = string1 input
  51. str2 = string2 input
  52. len1 = length str1
  53. len2 = length str2
  54. matrix = listArray (0, len1) (map populateMatrixRow [0..len1])
  55. populateMatrixRow i = listArray (0, len2) [fillCell i j | j <- [0.. len2]]
  56. fillCell i j
  57. | i == 0 = if j == 0 then 0 else (-5)*j
  58. | j == 0 = if i == 0 then 0 else (-5)*i
  59. | otherwise = maximum [matrix!(i-1)!j - 5, (matrix!i)!(j-1) - 5, (matrix!(i-1))!(j-1) + blosum62 (str1!!(i-1), str2!!(j-1))]
  60. backtrack = listArray (0, len1) (map populateBacktrackRow [0.. len1])
  61. populateBacktrackRow i = listArray(0, len2) [fillBacktrack i j | j <- [0.. len2]]
  62. fillBacktrack i j
  63. | i == 0 = if j == 0 then Unoccupied else INS
  64. | j == 0 = if i == 0 then Unoccupied else DEL
  65. | (matrix!i)!j == (matrix!(i-1))!j - 5 = DEL
  66. | (matrix!i)!j == (matrix!i)!(j-1) - 5 = INS
  67. | (matrix!i)!j == ((matrix!(i-1))!(j-1) + blosum62 (str1!!(i-1), str2!!(j-1))) = SCORE
  68. | otherwise = Unoccupied
  69.  
  70. outputLCS :: Array Int (Array Int Pointer) -> (String, String) -> (String, String) -> Int -> Int -> (String, String)
  71. outputLCS backtrack alignment v i j
  72. | i == 0 && j == 0 = alignment
  73. | (backtrack!i)!j == DEL = outputLCS backtrack ((str1!!(i-1)):curr1, '-':curr2) v (i-1) j
  74. | (backtrack!i)!j == INS = outputLCS backtrack ('-':curr1, str2!!(j-1):curr2) v i (j-1)
  75. | (backtrack!i)!j == SCORE = outputLCS backtrack (str1!!(i-1):curr1, str2!!(j-1):curr2) v (i-1) (j-1)
  76. | otherwise = alignment
  77. where
  78. curr1 = fst alignment
  79. curr2 = snd alignment
  80. str1 = fst v
  81. str2 = snd v
  82.  
  83. blosum62 :: (Char, Char) -> Int
  84. blosum62 m = M.findWithDefault (-4) m $ M.fromList
  85. [(('A','A'),4), (('A','B'),-2), (('A','C'),0), (('A','D'),-2),
  86. (('A','E'),-1), (('A','F'),-2), (('A','G'),0), (('A','H'),-2),
  87. (('A','I'),-1), (('A','K'),-1), (('A','L'),-1), (('A','M'),-1),
  88. (('A','N'),-2), (('A','P'),-1), (('A','Q'),-1), (('A','R'),-1),
  89. (('A','S'),1), (('A','T'),0), (('A','V'),0), (('A','W'),-3),
  90. (('A','X'),-1), (('A','Y'),-2), (('A','Z'),-1), (('B','A'),-2),
  91. (('B','B'),4), (('B','C'),-3), (('B','D'),4), (('B','E'),1),
  92. (('B','F'),-3), (('B','G'),-1), (('B','H'),0), (('B','I'),-3),
  93. (('B','K'),0), (('B','L'),-4), (('B','M'),-3), (('B','N'),3),
  94. (('B','P'),-2), (('B','Q'),0), (('B','R'),-1), (('B','S'),0),
  95. (('B','T'),-1), (('B','V'),-3), (('B','W'),-4), (('B','X'),-1),
  96. (('B','Y'),-3), (('B','Z'),1), (('C','A'),0), (('C','B'),-3),
  97. (('C','C'),9), (('C','D'),-3), (('C','E'),-4), (('C','F'),-2),
  98. (('C','G'),-3), (('C','H'),-3), (('C','I'),-1), (('C','K'),-3),
  99. (('C','L'),-1), (('C','M'),-1), (('C','N'),-3), (('C','P'),-3),
  100. (('C','Q'),-3), (('C','R'),-3), (('C','S'),-1), (('C','T'),-1),
  101. (('C','V'),-1), (('C','W'),-2), (('C','X'),-1), (('C','Y'),-2),
  102. (('C','Z'),-3), (('D','A'),-2), (('D','B'),4), (('D','C'),-3),
  103. (('D','D'),6), (('D','E'),2), (('D','F'),-3), (('D','G'),-1),
  104. (('D','H'),-1), (('D','I'),-3), (('D','K'),-1), (('D','L'),-4),
  105. (('D','M'),-3), (('D','N'),1), (('D','P'),-1), (('D','Q'),0),
  106. (('D','R'),-2), (('D','S'),0), (('D','T'),-1), (('D','V'),-3),
  107. (('D','W'),-4), (('D','X'),-1), (('D','Y'),-3), (('D','Z'),1),
  108. (('E','A'),-1), (('E','B'),1), (('E','C'),-4), (('E','D'),2),
  109. (('E','E'),5), (('E','F'),-3), (('E','G'),-2), (('E','H'),0),
  110. (('E','I'),-3), (('E','K'),1), (('E','L'),-3), (('E','M'),-2),
  111. (('E','N'),0), (('E','P'),-1), (('E','Q'),2), (('E','R'),0),
  112. (('E','S'),0), (('E','T'),-1), (('E','V'),-2), (('E','W'),-3),
  113. (('E','X'),-1), (('E','Y'),-2), (('E','Z'),4), (('F','A'),-2),
  114. (('F','B'),-3), (('F','C'),-2), (('F','D'),-3), (('F','E'),-3),
  115. (('F','F'),6), (('F','G'),-3), (('F','H'),-1), (('F','I'),0),
  116. (('F','K'),-3), (('F','L'),0), (('F','M'),0), (('F','N'),-3),
  117. (('F','P'),-4), (('F','Q'),-3), (('F','R'),-3), (('F','S'),-2),
  118. (('F','T'),-2), (('F','V'),-1), (('F','W'),1), (('F','X'),-1),
  119. (('F','Y'),3), (('F','Z'),-3), (('G','A'),0), (('G','B'),-1),
  120. (('G','C'),-3), (('G','D'),-1), (('G','E'),-2), (('G','F'),-3),
  121. (('G','G'),6), (('G','H'),-2), (('G','I'),-4), (('G','K'),-2),
  122. (('G','L'),-4), (('G','M'),-3), (('G','N'),0), (('G','P'),-2),
  123. (('G','Q'),-2), (('G','R'),-2), (('G','S'),0), (('G','T'),-2),
  124. (('G','V'),-3), (('G','W'),-2), (('G','X'),-1), (('G','Y'),-3),
  125. (('G','Z'),-2), (('H','A'),-2), (('H','B'),0), (('H','C'),-3),
  126. (('H','D'),-1), (('H','E'),0), (('H','F'),-1), (('H','G'),-2),
  127. (('H','H'),8), (('H','I'),-3), (('H','K'),-1), (('H','L'),-3),
  128. (('H','M'),-2), (('H','N'),1), (('H','P'),-2), (('H','Q'),0),
  129. (('H','R'),0), (('H','S'),-1), (('H','T'),-2), (('H','V'),-3),
  130. (('H','W'),-2), (('H','X'),-1), (('H','Y'),2), (('H','Z'),0),
  131. (('I','A'),-1), (('I','B'),-3), (('I','C'),-1), (('I','D'),-3),
  132. (('I','E'),-3), (('I','F'),0), (('I','G'),-4), (('I','H'),-3),
  133. (('I','I'),4), (('I','K'),-3), (('I','L'),2), (('I','M'),1),
  134. (('I','N'),-3), (('I','P'),-3), (('I','Q'),-3), (('I','R'),-3),
  135. (('I','S'),-2), (('I','T'),-1), (('I','V'),3), (('I','W'),-3),
  136. (('I','X'),-1), (('I','Y'),-1), (('I','Z'),-3), (('K','A'),-1),
  137. (('K','B'),0), (('K','C'),-3), (('K','D'),-1), (('K','E'),1),
  138. (('K','F'),-3), (('K','G'),-2), (('K','H'),-1), (('K','I'),-3),
  139. (('K','K'),5), (('K','L'),-2), (('K','M'),-1), (('K','N'),0),
  140. (('K','P'),-1), (('K','Q'),1), (('K','R'),2), (('K','S'),0),
  141. (('K','T'),-1), (('K','V'),-2), (('K','W'),-3), (('K','X'),-1),
  142. (('K','Y'),-2), (('K','Z'),1), (('L','A'),-1), (('L','B'),-4),
  143. (('L','C'),-1), (('L','D'),-4), (('L','E'),-3), (('L','F'),0),
  144. (('L','G'),-4), (('L','H'),-3), (('L','I'),2), (('L','K'),-2),
  145. (('L','L'),4), (('L','M'),2), (('L','N'),-3), (('L','P'),-3),
  146. (('L','Q'),-2), (('L','R'),-2), (('L','S'),-2), (('L','T'),-1),
  147. (('L','V'),1), (('L','W'),-2), (('L','X'),-1), (('L','Y'),-1),
  148. (('L','Z'),-3), (('M','A'),-1), (('M','B'),-3), (('M','C'),-1),
  149. (('M','D'),-3), (('M','E'),-2), (('M','F'),0), (('M','G'),-3),
  150. (('M','H'),-2), (('M','I'),1), (('M','K'),-1), (('M','L'),2),
  151. (('M','M'),5), (('M','N'),-2), (('M','P'),-2), (('M','Q'),0),
  152. (('M','R'),-1), (('M','S'),-1), (('M','T'),-1), (('M','V'),1),
  153. (('M','W'),-1), (('M','X'),-1), (('M','Y'),-1), (('M','Z'),-1),
  154. (('N','A'),-2), (('N','B'),3), (('N','C'),-3), (('N','D'),1),
  155. (('N','E'),0), (('N','F'),-3), (('N','G'),0), (('N','H'),1),
  156. (('N','I'),-3), (('N','K'),0), (('N','L'),-3), (('N','M'),-2),
  157. (('N','N'),6), (('N','P'),-2), (('N','Q'),0), (('N','R'),0),
  158. (('N','S'),1), (('N','T'),0), (('N','V'),-3), (('N','W'),-4),
  159. (('N','X'),-1), (('N','Y'),-2), (('N','Z'),0), (('P','A'),-1),
  160. (('P','B'),-2), (('P','C'),-3), (('P','D'),-1), (('P','E'),-1),
  161. (('P','F'),-4), (('P','G'),-2), (('P','H'),-2), (('P','I'),-3),
  162. (('P','K'),-1), (('P','L'),-3), (('P','M'),-2), (('P','N'),-2),
  163. (('P','P'),7), (('P','Q'),-1), (('P','R'),-2), (('P','S'),-1),
  164. (('P','T'),-1), (('P','V'),-2), (('P','W'),-4), (('P','X'),-1),
  165. (('P','Y'),-3), (('P','Z'),-1), (('Q','A'),-1), (('Q','B'),0),
  166. (('Q','C'),-3), (('Q','D'),0), (('Q','E'),2), (('Q','F'),-3),
  167. (('Q','G'),-2), (('Q','H'),0), (('Q','I'),-3), (('Q','K'),1),
  168. (('Q','L'),-2), (('Q','M'),0), (('Q','N'),0), (('Q','P'),-1),
  169. (('Q','Q'),5), (('Q','R'),1), (('Q','S'),0), (('Q','T'),-1),
  170. (('Q','V'),-2), (('Q','W'),-2), (('Q','X'),-1), (('Q','Y'),-1),
  171. (('Q','Z'),3), (('R','A'),-1), (('R','B'),-1), (('R','C'),-3),
  172. (('R','D'),-2), (('R','E'),0), (('R','F'),-3), (('R','G'),-2),
  173. (('R','H'),0), (('R','I'),-3), (('R','K'),2), (('R','L'),-2),
  174. (('R','M'),-1), (('R','N'),0), (('R','P'),-2), (('R','Q'),1),
  175. (('R','R'),5), (('R','S'),-1), (('R','T'),-1), (('R','V'),-3),
  176. (('R','W'),-3), (('R','X'),-1), (('R','Y'),-2), (('R','Z'),0),
  177. (('S','A'),1), (('S','B'),0), (('S','C'),-1), (('S','D'),0),
  178. (('S','E'),0), (('S','F'),-2), (('S','G'),0), (('S','H'),-1),
  179. (('S','I'),-2), (('S','K'),0), (('S','L'),-2), (('S','M'),-1),
  180. (('S','N'),1), (('S','P'),-1), (('S','Q'),0), (('S','R'),-1),
  181. (('S','S'),4), (('S','T'),1), (('S','V'),-2), (('S','W'),-3),
  182. (('S','X'),-1), (('S','Y'),-2), (('S','Z'),0), (('T','A'),0),
  183. (('T','B'),-1), (('T','C'),-1), (('T','D'),-1), (('T','E'),-1),
  184. (('T','F'),-2), (('T','G'),-2), (('T','H'),-2), (('T','I'),-1),
  185. (('T','K'),-1), (('T','L'),-1), (('T','M'),-1), (('T','N'),0),
  186. (('T','P'),-1), (('T','Q'),-1), (('T','R'),-1), (('T','S'),1),
  187. (('T','T'),5), (('T','V'),0), (('T','W'),-2), (('T','X'),-1),
  188. (('T','Y'),-2), (('T','Z'),-1), (('V','A'),0), (('V','B'),-3),
  189. (('V','C'),-1), (('V','D'),-3), (('V','E'),-2), (('V','F'),-1),
  190. (('V','G'),-3), (('V','H'),-3), (('V','I'),3), (('V','K'),-2),
  191. (('V','L'),1), (('V','M'),1), (('V','N'),-3), (('V','P'),-2),
  192. (('V','Q'),-2), (('V','R'),-3), (('V','S'),-2), (('V','T'),0),
  193. (('V','V'),4), (('V','W'),-3), (('V','X'),-1), (('V','Y'),-1),
  194. (('V','Z'),-2), (('W','A'),-3), (('W','B'),-4), (('W','C'),-2),
  195. (('W','D'),-4), (('W','E'),-3), (('W','F'),1), (('W','G'),-2),
  196. (('W','H'),-2), (('W','I'),-3), (('W','K'),-3), (('W','L'),-2),
  197. (('W','M'),-1), (('W','N'),-4), (('W','P'),-4), (('W','Q'),-2),
  198. (('W','R'),-3), (('W','S'),-3), (('W','T'),-2), (('W','V'),-3),
  199. (('W','W'),11), (('W','X'),-1), (('W','Y'),2), (('W','Z'),-3),
  200. (('X','A'),-1), (('X','B'),-1), (('X','C'),-1), (('X','D'),-1),
  201. (('X','E'),-1), (('X','F'),-1), (('X','G'),-1), (('X','H'),-1),
  202. (('X','I'),-1), (('X','K'),-1), (('X','L'),-1), (('X','M'),-1),
  203. (('X','N'),-1), (('X','P'),-1), (('X','Q'),-1), (('X','R'),-1),
  204. (('X','S'),-1), (('X','T'),-1), (('X','V'),-1), (('X','W'),-1),
  205. (('X','X'),-1), (('X','Y'),-1), (('X','Z'),-1), (('Y','A'),-2),
  206. (('Y','B'),-3), (('Y','C'),-2), (('Y','D'),-3), (('Y','E'),-2),
  207. (('Y','F'),3), (('Y','G'),-3), (('Y','H'),2), (('Y','I'),-1),
  208. (('Y','K'),-2), (('Y','L'),-1), (('Y','M'),-1), (('Y','N'),-2),
  209. (('Y','P'),-3), (('Y','Q'),-1), (('Y','R'),-2), (('Y','S'),-2),
  210. (('Y','T'),-2), (('Y','V'),-1), (('Y','W'),2), (('Y','X'),-1),
  211. (('Y','Y'),7), (('Y','Z'),-2), (('Z','A'),-1), (('Z','B'),1),
  212. (('Z','C'),-3), (('Z','D'),1), (('Z','E'),4), (('Z','F'),-3),
  213. (('Z','G'),-2), (('Z','H'),0), (('Z','I'),-3), (('Z','K'),1),
  214. (('Z','L'),-3), (('Z','M'),-1), (('Z','N'),0), (('Z','P'),-1),
  215. (('Z','Q'),3), (('Z','R'),0), (('Z','S'),0), (('Z','T'),-1),
  216. (('Z','V'),-2), (('Z','W'),-3), (('Z','X'),-1), (('Z','Y'),-2),
  217. (('Z','Z'),4)]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement