• API
• FAQ
• Tools
• Archive
SHARE
TWEET

# randomwalker

a guest Nov 28th, 2009 1,016 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
1. #!/usr/bin/python
2.
3. #calculates the entropy of a 13-STR CODIS (FBI) DNA profile
4. #from observed allele frequencies
5.
6. import math
7.
8. def shannonEntropy(freqs):
9.     """plain old Shannon entropy of a probability distribution"""
10.     return -sum(p * math.log(p, 2) for p in freqs)
11.
12. def locusEntropy(allelefreqs):
13.     """given the set of individual allele frequencies at a locus
14.    compute the genotype frequencies assuming Hardy-Weinberg equilibrium
15.    then calculate the Shannon entropy of the observed genotype frequencies"""
16.     genotypes = [2 * p * q  for (i, p) in enumerate(allelefreqs)
17.                             for (j, q) in enumerate(allelefreqs)
18.                             if i < j] + \
19.                 [p**2 for p in allelefreqs]
20.     return shannonEntropy(genotypes)
21.
23. # save as 'allelefreqs.csv'
24. def main():
26.     data = [line.split(',') for line in open('allelefreqs.csv')]
27.
28.     # ignore first 2 header rows
29.     data = data[2:]
30.
31.     # use caucasian data because sample size is highest
32.     caucasian_colids = range(1, 13*3, 3)
33.
34.     # list of allele frequencies at each locus
35.     allelefreqs = [[float(row[colid]) for row in data
36.                     if len(row[colid]) > 0]
37.                     for colid in caucasian_colids]
38.
39.     entropies = [locusEntropy(x) for x in allelefreqs]
40.
41.     print "locus entropies:\n\t", '\n\t'.join('%.2f' % s for s in entropies)
42.     print "total entropy: %.2f" % sum(entropies)
43.
44. if __name__ == "__main__":
45.     main()
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy.

Top