Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- {-# LANGUAGE ScopedTypeVariables #-}
- {-# LANGUAGE TypeOperators #-}
- {-# LANGUAGE FlexibleContexts #-}
- {-# LANGUAGE FlexibleInstances #-}
- {-# LANGUAGE CPP #-}
- -- Naive translation of shen_anova1_batch.m into Accelerate.
- module Main (main) where
- import Data.Array.Accelerate as A
- import Data.Array.Accelerate.CUDA as R
- import Prelude as P
- import Data.List hiding (group)
- import Data.List.Split (splitEvery)
- -- import Data.Set as S
- import Control.Monad (when, mapM_, forM_)
- import Data.Array.Unboxed (IArray, UArray, elems, listArray)
- import System.Random
- type Matrix a = Array DIM2 a
- type EI = Exp Int -- Shorthand for the lazy.
- runE x = R.run (unit x) -- Run for scalars
- dbg = False -- TOGGLE me.
- --------------------------------------------------------------------------------
- -- Test harness -- set up and run the test.
- test_anova1 :: Int -> Int -> IO ()
- test_anova1 nsbj width = do
- let
- nsnp = 1
- nsbj' = constant nsbj
- area = constant (width*width)
- -- imdata = rand( width*width, nsbj )
- imdata :: Acc (Matrix Float) = generate (index2 area nsbj') inorder
- inorder x = let (r, c) = unlift (unindex2 x) in
- A.fromIntegral (r + c * area) -- Count by ones in row major..
- -- Ammendment -- for now fixing nsnp == 1 and just doing a vector rather than a matrix for snpdata:
- snpdata :: Acc (Vector Int)
- snpdata = generate (index1 nsbj') ((`mod` 3) . unindex1) -- Return 0,1, or 2
- putStrLn "=== Accelerate naive anova1 batch ==="
- putStrLn$ "Image width: "++ show width
- putStrLn$ "SNP category membership across patients:"++ pp(R.run snpdata)
- -- tic
- -- forM [0 .. nsnp-1] $ \ i -> do
- anova1_batch imdata snpdata
- -- fmap1(:,i) = F; ?????
- -- toc
- return ()
- ----------------------------------------------------------------------------------------------------
- -- Shen's implementation of anova1 in a batch mode
- -- 'voxels' is a matrix
- -- 'group' is a column vector
- -- 'voxel' columns have the same group memebership
- anova1_batch :: Acc (Matrix Float) -> Acc (Vector Int) -> IO ()
- -- RRN: I believe group should represent a set of individual's single
- -- nucleotide values at a specific location (a specific SNP).
- -- 'voxel's represents a /flat/ row of voxels for each individual in
- -- the group. Here we analyze how variance at each voxel correlates
- -- with the different categories of individuals created by 'group'.
- --
- -- Specifically, in this case there are three /subgroups/ within
- -- 'group', corresponding to snp=0, snp=1, snp=2.
- anova1_batch voxels group = do
- let d = shape voxels
- -- n = total number of voxels in each image
- -- r = number of parallel anova runs == number of individuals in group
- (n::EI, r::EI) = unlift$ unindex2 d
- -- FIXME:
- -- uniqueSnps = unique (R.run group) -- 'g': All the unique SNP settings.
- -- numUniqueSNPs = size g -- 'p': How many unique values.
- uniqueSNPs = [0,1,2] -- TEMP, FIXME -- hardcoding
- numUniqueSNPs = 3 -- TEMP, FIXME -- hardcoding
- -- NOTE: We could try to keep everything within Accelerate, but
- -- for the "outermost" loop here, over different SNP values, we
- -- allow ourselves the rconvenience of using lists, and keeping
- -- things in regular Haskell.
- -- First, how much variance is there across ALL voxels (per sample):
- var_v = variance1of2 voxels :: Acc (Vector Float)
- ss_T = A.map (df_T *) var_v;
- -- Next, we compute per-group variance.
- ------------------------------------------------------------
- -- Group is a mapping between index->groupID, and we effectively want to invert it
- -- to get groupID->indices. To accomplish that we first explicitly attach indices:
- zippedGroup = A.zip (iota r) group
- -- For each observed SNP value {0,1,2} ...
- idxs = P.map (\ snpVal ->
- let
- -- Here we want the index of *all* of the voxels belonging to
- -- individuals with a particular snpVal.
- idx = A.map A.fst $ filterAcc isMatch zippedGroup
- isMatch pr = let (_::EI,thisSnp::EI) = unlift pr in
- thisSnp ==* constant snpVal
- in idx
- ) uniqueSNPs
- -- Now we're ready to extract the set of voxels that belong to this group of patients:
- groupedVoxels :: [Acc (Matrix Float)]
- -- rowGroups = selectRows idx voxels
- groupedVoxels = P.map (`selectRows` voxels) idxs
- -- Whew, now we have finally achieved unflat = X(idx,:) in the original code....
- -- Next we look at variance within each voxel for individuals in THIS snp category:
- -- The result is a per-voxel variance:
- -- variances = A.map (* (A.fromIntegral$ newCols - 1)) (variance2of2 unflat)
- variances :: [Acc (Vector Float)] = P.map variance2of2 groupedVoxels
- -- What is the size of each group, minus 1:
- lens :: [Exp Float] = P.map ((\x -> A.fromIntegral (x - 1)) . unindex1 . shape) idxs
- -- Do a sum reduction, the result is an array of per-voxel statistics:
- ss_W = foldl1 (.+) (P.zipWith (liftScalar (.*)) lens variances)
- ------------------------------------------------------------
- df_B :: Exp Float = A.fromIntegral$ numUniqueSNPs - 1
- df_W :: Exp Float = A.fromIntegral$ n - numUniqueSNPs
- df_T :: Exp Float = A.fromIntegral$ n - 1
- -- Well, this is quite verbose due to there being no scalar/array overloading:
- ms_W = liftScalarR (./) ss_W df_W
- ss_B = ss_T .- ss_W;
- ms_B = liftScalarR (./) ss_B df_B
- f_statistic = ms_B ./ ms_W
- putStrLn$ "Finally, here's the F statistic: " ++ pp(run ss_W)
- return ()
- -- TEMP -- hardcoding this to a specific dimensionality until I fix the general version below
- -- This sums along the RIGHT of two dimensions (INNERMOST).
- -- If interpreted as a (row,column) matrix, this collapses each row.
- variance2of2 :: (Elt a, IsFloating a)
- => Acc (Array DIM2 a) -> Acc (Array DIM1 a)
- variance2of2 arr =
- A.map (/ (denom-1))
- (A.zipWith (-) sum2
- (A.zipWith (*) mean sum1))
- where
- mean = A.map (/ denom) sum1
- denom = A.fromIntegral rows
- Z :. (rows::EI) :. (cols::EI) = unlift sh1
- sh1 :: Exp ( Z :. Int :. Int) = shape arr
- -- Sum along LEFT dimension:
- sum1 = fold (+) 0 arr
- sum2 = fold (+) 0 sqrs
- sqrs = A.zipWith (*) arr arr
- -- TEMP -- hardcoding this to a specific dimensionality until I fix the general version below
- -- This iterates along the LEFT of two dimensions (OUTERMOST).
- -- If interpreted as a (row,column) matrix, this collapses each column,
- -- which is the same behavior as the var() function in matlab applied to a matrix.
- variance1of2 :: (Elt a, IsFloating a)
- => Acc (Array DIM2 a) -> Acc (Array DIM1 a)
- variance1of2 arr =
- A.map (/ (denom-1))
- (A.zipWith (-) sum2
- (A.zipWith (*) mean sum1))
- where
- mean = A.map (/ denom) sum1
- denom = A.fromIntegral rows
- Z :. (rows::EI) :. (cols::EI) = unlift sh1
- sh1 :: Exp ( Z :. Int :. Int) = shape arr
- -- Sum along LEFT dimension:
- swapped = swap2Dims arr
- sum1 = fold (+) 0 swapped
- sum2 = fold (+) 0 sqrs
- sqrs = A.zipWith (*) swapped swapped
- -- A swap function fixed to exactly two dimensions.
- swap2Dims :: (Elt e, Num e) => Acc (Matrix e) -> Acc (Matrix e)
- swap2Dims a =
- backpermute new_extent swap2 a -- Perform a gather.
- where
- new_extent = swap2 (shape a)
- swap2 :: Exp DIM2 -> Exp DIM2
- swap2 ind = let (Z :.i2 :.i1) = unlift ind in
- lift (Z :. (i1::EI) :. (i2::EI))
- ----------------------------------------------------------------------------------------------------
- -- Let's print matrices nicely.
- padleft n str | length str >= n = str
- padleft n str | otherwise = P.take (n - length str) (repeat ' ') ++ str
- class NiceShow a where
- pp :: a -> String
- instance Show a => NiceShow (Array DIM1 a) where
- pp arr =
- capends$ concat$
- intersperse " " $
- P.map (padleft maxpad) ls
- where
- ls = P.map show $ toList arr
- maxpad = maximum$ P.map length ls
- capends x = "| "++x++" |"
- -- This could be much more efficient:
- instance Show a => NiceShow (Array DIM2 a) where
- pp arr = concat $
- intersperse "\n" $
- P.map (capends .
- concat .
- intersperse " " .
- P.map (padleft maxpad))
- rowls
- where (Z :. rows :. cols) = arrayShape arr
- ls = P.map show $ toList arr
- maxpad = maximum$ P.map length ls
- rowls = splitEvery cols ls
- ----------------------------------------------------------------------------------------------------
- -- General Utility functions:
- ----------------------------------------------------------------------------------------------------
- -- Project a subset of the columns (second dim) from a two dimensional matrix.
- --
- -- @selectRows sl xs@ is similar to xs(sl,:) in Matlab.
- selectRows :: Elt e => Acc (Vector Int) -> Acc (Array DIM2 e) -> Acc (Array DIM2 e)
- selectRows sl xs =
- let Z :. rows = unlift $ shape sl
- Z :. _ :. cols = unlift $ shape xs :: Z :. Exp Int :. Exp Int
- in
- backpermute
- (index2 rows cols)
- (\ix -> let Z :. j :. i = unlift ix in index2 (sl ! index1 j) i)
- xs
- -- Filter -- from accelerate-examples
- -- ------
- filterAcc :: Elt a
- => (Exp a -> Exp Bool)
- -> Acc (Vector a)
- -> Acc (Vector a)
- filterAcc p arr
- = let -- arr = A.use vec
- flags = A.map (boolToInt . p) arr
- (targetIdx, len) = A.scanl' (+) 0 flags
- arr' = A.backpermute (index1 $ the len) id arr
- in
- A.permute const arr' (\ix -> flags!ix ==* 0 ? (ignore, index1 $ targetIdx!ix)) arr
- -- FIXME: This is abusing 'permute' in that the first two arguments are
- -- only justified because we know the permutation function will
- -- write to each location in the target exactly once.
- -- Instead, we should have a primitive that directly encodes the
- -- compaction pattern of the permutation function.
- -- Create a vector containing integers in the range [0,n)
- -- iota :: (Elt n, IsNum n) => Exp Int -> Acc (Vector n)
- iota :: Exp Int -> Acc (Vector Int)
- iota n = A.generate (index1 n) (A.fromIntegral . unindex1)
- -- Perform a scalar/array operation by lifting an array/array binary
- -- operator.
- liftScalar :: (Shape sh, Elt e)
- => (Acc (Array sh e) -> Acc (Array sh e) -> Acc (Array sh e))
- -> Exp e -> Acc (Array sh e) -> Acc (Array sh e)
- liftScalar op left right = op left' right
- where
- -- I can't figure out how to use replicate here. This is a spurious data dependency introduced by map:
- left' = A.map (\_ -> left) right
- -- left' = A.replicate (shape right) (A.unit left)
- -- Same but flipped:
- liftScalarR :: (Shape sh, Elt e)
- => (Acc (Array sh e) -> Acc (Array sh e) -> Acc (Array sh e))
- -> Acc (Array sh e) -> Exp e -> Acc (Array sh e)
- liftScalarR op = flip (liftScalar (flip op))
- --------------------------------------------------------------------------------
- -- Elementwise operation matching matlab:
- infixl 7 ./
- infixl 7 .*
- infixl 6 .+
- infixl 6 .-
- a ./ b = A.zipWith (/) a b
- a .* b = A.zipWith (*) a b
- a .+ b = A.zipWith (+) a b
- a .- b = A.zipWith (-) a b
- --------------------------------------------------------------------------------
- small = test_anova1 6 3
- big = test_anova1 300 256
- main = small
- ----------------------------------------------------------------------------------------------------
Add Comment
Please, Sign In to add comment