Want more features on Pastebin? Sign Up, it's FREE!
Guest

Checking Alignment

By: a guest on Feb 18th, 2013  |  syntax: None  |  size: 2.69 KB  |  views: 28  |  expires: Never
download  |  raw  |  embed  |  report abuse  |  print
Text below is selected. Please press Ctrl+C to copy to your clipboard. (⌘+C on Mac)
  1. import random
  2. import string
  3.  
  4. #takes as input file haps.dat (population haplotype file), simped.in (pedigree file with haplotypes as alleles)
  5. #produces as output file simped.ped (pedigree file with alleles replaced by random population haplotypes)
  6.  
  7. maxallele=32  #maximum number of alleles (haplotypes) used by a single family
  8. hap=['']    #initialize the pedigree haplotypes as a list containing a blank
  9. for i in range (1,maxallele+1):
  10.     hap.append('')   #add blanks to the list up to the max allowed
  11.  
  12. # open file of population haplotypes, named haps.dat
  13. # haps.dat is file of haplotypes, one haplotype per line
  14. # each line is genotype space genotype etc.
  15. # the relative probability of each haplotype is controlled by how often it appears
  16.  
  17. hapfile=open('haps.dat','r')
  18.  
  19. # get the population haplotypes as a list
  20.  
  21. pophaps=hapfile.readlines()
  22.  
  23. # len(haps) will be the number of population haplotypes
  24. # strip haplotype strings of tabs, spaces, commas, newlines
  25. for i in range(len(pophaps)):
  26.     pophaps[i]=pophaps[i].replace(' ','')
  27.     pophaps[i]=pophaps[i].replace('\t','')
  28.     pophaps[i]=pophaps[i].replace('\n','')
  29.     pophaps[i]=pophaps[i].replace(',','')
  30.  
  31. # hapltoypes are now packed, need to check (and remove) if any are null (from blank newlines)
  32. j=pophaps.count('')
  33. for i in range (j):
  34.     pophaps.remove('')
  35. # make hap[0] hap all zeroes (used for missing data)
  36.     hap[0]='0'*len(pophaps[0])
  37.  
  38. # open simped.in, pedigree file with alleles to indicate haplotypes
  39. # simped.ped is the modified pedigree file with alleles expanded to haplotypes
  40.  
  41. pedinfile=open('simped.doc','r')
  42. pedoutfile=open('simpedout.doc','w')
  43. # set the number of the last pedigree checked to 0
  44. lastped='    '
  45. for line in pedinfile:
  46.     currped=line.split()[0]
  47.  
  48. #first/new ped - need to zero all haplotype assignments x hap[0]
  49.     if lastped<>currped:
  50.         for i in range(1,maxallele+1):
  51.             hap[i]=''
  52.         lastped=currped
  53.  
  54. #get haplotype alleles for this line
  55.  
  56.     h1=int(line.split()[6])
  57.     h2=int(line.split()[7])
  58.     if hap[h1]=='':
  59.  
  60. #haplotype for this allele is currently null, need to assign haplotype 1
  61.  
  62.         hap[h1]=pophaps[random.randint(0,len(pophaps)-1)]
  63.  
  64.     if hap[h2]=='':
  65.  
  66. #need to assign haplotype 2
  67.  
  68.         hap[h2]=pophaps[random.randint(0,len(pophaps)-1)]
  69.  
  70. # make front of line
  71.     newline=line.split()[0]+'\t'
  72.     for i in range (1,6):
  73.         newline=newline+line.split()[i]+'\t'
  74. # add in haplotypes, alternating from h1 and h2
  75.     for i in range(len(hap[0])):
  76.         newline=newline+' '+hap[h1][i:i+1]+' '+hap[h2][i:i+1]+' '
  77. #add end of line
  78.         newline=newline+'\n'
  79.         pedoutfile.write(newline)
  80.  
  81. hapfile.close()
  82. pedinfile.close()
  83. pedoutfile.close()
clone this paste RAW Paste Data