Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- module F2 where
- import Data.List
- dnaLetter = "ACGT"
- data SeqTypes = Dna | Protein deriving (Show, Eq)
- data MolSeq = MolSeq String String SeqTypes deriving (Show)
- string2seq :: String -> String -> MolSeq
- string2seq namn sekvens = MolSeq namn sekvens (string2seqHelper namn sekvens)
- string2seqHelper :: String -> String -> SeqTypes
- string2seqHelper namn sekvens
- | (null sekvens) = Dna
- | (head sekvens) `elem` dnaLetter = string2seqHelper namn (tail sekvens)
- | otherwise = Protein
- seqName :: MolSeq -> String
- seqName (MolSeq namn sekvens typ) = namn
- seqSequence :: MolSeq -> String
- seqSequence (MolSeq namn sekvens typ) = sekvens
- seqType :: MolSeq -> SeqTypes
- seqType (MolSeq namn sekvens typ) = typ
- seqLength :: MolSeq -> Double
- seqLength (MolSeq namn sekvens typ) = sum [ 1 | _ <- sekvens] :: Double
- getAlpha :: MolSeq -> MolSeq -> Double
- getAlpha (MolSeq namn1 sekvens1 typ1) (MolSeq namn2 sekvens2 typ2) = sum [ if x == y then 0 else 1 | (x,y) <- (zip sekvens1 sekvens2)] :: Double
- checkValidity :: MolSeq -> MolSeq -> Bool
- checkValidity (MolSeq namn1 sekvens1 typ1) (MolSeq namn2 sekvens2 typ2) = if typ1 == typ2 then True else False
- eDistanceDNA :: MolSeq -> MolSeq -> Double
- eDistanceDNA (MolSeq namn1 sekvens1 typ1) (MolSeq namn2 sekvens2 typ2)
- | alpha > 0.74 = 3.3
- | otherwise = (-3/4)*log(1-(4*alpha/3))
- where alpha = gA/(seqLength (MolSeq namn1 sekvens1 typ1))
- gA = getAlpha (MolSeq namn1 sekvens1 typ1) (MolSeq namn2 sekvens2 typ2)
- eDistanceProtein :: MolSeq -> MolSeq -> Double
- eDistanceProtein (MolSeq namn1 sekvens1 typ1) (MolSeq namn2 sekvens2 typ2)
- | alpha > 0.94 = 3.7
- | otherwise = (-19/20)*log(1-(20*alpha/19))
- where alpha = gA/(seqLength (MolSeq namn1 sekvens1 typ1))
- gA = getAlpha (MolSeq namn1 sekvens1 typ1) (MolSeq namn2 sekvens2 typ2)
- seqDistance :: MolSeq -> MolSeq -> Double
- seqDistance molseq1 molseq2 =
- if (checkValidity molseq1 molseq2)
- then (if (seqType molseq1) == Dna then eDistanceDNA molseq1 molseq2 else eDistanceProtein molseq1 molseq2)
- else error "FU BRO"
- -------------------UPPGIFT2-----------------------------------
- -------------------UPPGIFT2-----------------------------------
- -------------------UPPGIFT2-----------------------------------
- data Profile = Profile [[Double]] SeqTypes Int String deriving (Show)
- molseqs2profile :: String -> [MolSeq] -> Profile
- molseqs2profile s mArr = Profile (getM mArr) (getSeqUpp3 mArr) (length mArr) s
- -- getM xs = [ getMHelper x | x <- xs ]
- -- getMHelper [] = []
- -- getMHelper (x:xs) = snd x : getMHelper xs
- -- getSeqUpp2 profMatrix = string2seqHelper "notImportnat" [ fst x | x <- (concat profMatrix)]
- -- getkukiLength molseqArr = (length (head (makeProfileMatrix molseqArr)))
- -- getM :: [MolSeq] -> [[Int]]
- getM molseqArr = [ getMHelper (fromIntegral (length molseqArr)) x | x <- xs ]
- --getM molseqArr = [ getMHelper x | x <- xs ]
- where xs = (makeProfileMatrix molseqArr)
- -- getMHelper :: Int -> [[(Char, Int)]] -> [[Int]]
- getMHelper mydiv [] = []
- getMHelper mydiv (x:xs) = (fromIntegral (snd x)) / (fromIntegral mydiv) : getMHelper mydiv xs
- getSeqUpp3 molseqArr = string2seqHelper "notImportnat" [ fst x | x <- (concat profMatrix)]
- where profMatrix = makeProfileMatrix molseqArr
- nucleotides = "ACGT"
- aminoacids = sort "ARNDCEQGHILKMFPSTWYVX"
- makeProfileMatrix :: [MolSeq] -> [[(Char, Int)]]
- makeProfileMatrix [] = error "Empty sequence list"
- makeProfileMatrix sl = res
- where
- t = seqType (head sl)
- defaults =
- if (t == Dna) then
- zip nucleotides (replicate (length nucleotides) 0) -- Rad (i)
- else
- zip aminoacids (replicate (length aminoacids) 0) -- Rad (ii)
- strs = map seqSequence sl -- Rad (iii)
- tmp1 = map (map (\x -> ((head x), (length x))) . group . sort)
- (transpose strs) -- Rad (iv)
- equalFst a b = (fst a) == (fst b)
- res = map sort (map (\l -> unionBy equalFst l defaults) tmp1)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement