Advertisement
dawrehxyz

gringimadre

Sep 9th, 2016
107
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. module F2 where
  2. import Data.List
  3.  
  4. dnaLetter = "ACGT"
  5. data SeqTypes = Dna | Protein deriving (Show, Eq)
  6. data MolSeq = MolSeq String String SeqTypes deriving (Show)
  7.  
  8. string2seq :: String -> String -> MolSeq
  9. string2seq namn sekvens = MolSeq namn sekvens (string2seqHelper namn sekvens)
  10. string2seqHelper :: String -> String -> SeqTypes
  11. string2seqHelper namn sekvens
  12.    | (null sekvens) = Dna
  13.    | (head sekvens) `elem` dnaLetter = string2seqHelper namn (tail sekvens)
  14.    | otherwise = Protein
  15.  
  16. seqName :: MolSeq -> String
  17. seqName (MolSeq namn sekvens typ) = namn
  18.  
  19. seqSequence :: MolSeq -> String
  20. seqSequence (MolSeq namn sekvens typ) = sekvens
  21.  
  22. seqType :: MolSeq -> SeqTypes
  23. seqType (MolSeq namn sekvens typ) = typ
  24.  
  25. seqLength :: MolSeq -> Double
  26. seqLength (MolSeq namn sekvens typ) = sum [ 1 | _ <- sekvens] :: Double
  27.  
  28. getAlpha :: MolSeq -> MolSeq -> Double
  29. getAlpha (MolSeq namn1 sekvens1 typ1) (MolSeq namn2 sekvens2 typ2) = sum [ if x == y then 0 else 1 | (x,y) <- (zip sekvens1 sekvens2)] :: Double
  30. checkValidity :: MolSeq -> MolSeq -> Bool
  31. checkValidity (MolSeq namn1 sekvens1 typ1) (MolSeq namn2 sekvens2 typ2) = if typ1 == typ2 then True else False
  32. eDistanceDNA :: MolSeq -> MolSeq -> Double
  33. eDistanceDNA (MolSeq namn1 sekvens1 typ1) (MolSeq namn2 sekvens2 typ2)
  34.   | alpha > 0.74 = 3.3
  35.   | otherwise = (-3/4)*log(1-(4*alpha/3))
  36.   where alpha = gA/(seqLength (MolSeq namn1 sekvens1 typ1))
  37.         gA = getAlpha (MolSeq namn1 sekvens1 typ1) (MolSeq namn2 sekvens2 typ2)
  38. eDistanceProtein :: MolSeq -> MolSeq -> Double
  39. eDistanceProtein (MolSeq namn1 sekvens1 typ1) (MolSeq namn2 sekvens2 typ2)
  40.   | alpha > 0.94 = 3.7
  41.   | otherwise = (-19/20)*log(1-(20*alpha/19))
  42.   where alpha = gA/(seqLength (MolSeq namn1 sekvens1 typ1))
  43.         gA = getAlpha (MolSeq namn1 sekvens1 typ1) (MolSeq namn2 sekvens2 typ2)
  44. seqDistance :: MolSeq -> MolSeq -> Double
  45. seqDistance molseq1 molseq2 =
  46.   if (checkValidity molseq1 molseq2)
  47.   then (if (seqType molseq1) == Dna then eDistanceDNA molseq1 molseq2 else eDistanceProtein molseq1 molseq2)
  48.   else error "FU BRO"
  49.  
  50. -------------------UPPGIFT2-----------------------------------
  51. -------------------UPPGIFT2-----------------------------------
  52. -------------------UPPGIFT2-----------------------------------
  53.  
  54. data Profile = Profile [[Double]] SeqTypes Int String deriving (Show)
  55.  
  56. molseqs2profile :: String -> [MolSeq] -> Profile
  57. molseqs2profile s mArr = Profile (getM mArr) (getSeqUpp3 mArr) (length mArr) s
  58. -- getM xs = [ getMHelper x | x <- xs ]
  59. -- getMHelper [] = []
  60. -- getMHelper (x:xs) = snd x : getMHelper xs
  61. -- getSeqUpp2 profMatrix = string2seqHelper "notImportnat" [ fst x | x <- (concat profMatrix)]
  62. -- getkukiLength molseqArr = (length (head (makeProfileMatrix molseqArr)))
  63. -- getM :: [MolSeq] -> [[Int]]
  64. getM molseqArr = [ getMHelper (fromIntegral (length molseqArr)) x | x <- xs ]
  65. --getM molseqArr = [ getMHelper x | x <- xs ]
  66.   where xs = (makeProfileMatrix molseqArr)
  67. -- getMHelper :: Int -> [[(Char, Int)]] -> [[Int]]
  68. getMHelper mydiv [] = []
  69. getMHelper mydiv (x:xs) = (fromIntegral (snd x)) / (fromIntegral mydiv) : getMHelper mydiv xs
  70. getSeqUpp3 molseqArr = string2seqHelper "notImportnat" [ fst x | x <- (concat profMatrix)]
  71.   where profMatrix = makeProfileMatrix molseqArr
  72.  
  73.  
  74. nucleotides = "ACGT"
  75. aminoacids = sort "ARNDCEQGHILKMFPSTWYVX"
  76. makeProfileMatrix :: [MolSeq] -> [[(Char, Int)]]
  77. makeProfileMatrix [] = error "Empty sequence list"
  78. makeProfileMatrix sl = res
  79.   where
  80.     t = seqType (head sl)
  81.     defaults =
  82.       if (t == Dna) then
  83.         zip nucleotides (replicate (length nucleotides) 0) -- Rad (i)
  84.       else
  85.         zip aminoacids (replicate (length aminoacids) 0)   -- Rad (ii)
  86.     strs = map seqSequence sl                              -- Rad (iii)
  87.     tmp1 = map (map (\x -> ((head x), (length x))) . group . sort)
  88.                (transpose strs)                            -- Rad (iv)
  89.     equalFst a b = (fst a) == (fst b)
  90.     res = map sort (map (\l -> unionBy equalFst l defaults) tmp1)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement