1. import random
  2. import string
  3.  
  4. #takes as input file haps.dat (population haplotype file), simped.in (pedigree file with haplotypes as alleles)
  5. #produces as output file simped.ped (pedigree file with alleles replaced by random population haplotypes)
  6.  
  7. maxallele=32 #maximum number of alleles (haplotypes) used by a single family
  8. hap=[''] #initialize the pedigree haplotypes as a list containing a blank
  9. for i in range (1,maxallele+1):
  10. hap.append('') #add blanks to the list up to the max allowed
  11.  
  12. # open file of population haplotypes, named haps.dat
  13. # haps.dat is file of haplotypes, one haplotype per line
  14. # each line is genotype space genotype etc.
  15. # the relative probability of each haplotype is controlled by how often it appears
  16.  
  17. hapfile=open('haps.dat','r')
  18.  
  19. # get the population haplotypes as a list
  20.  
  21. pophaps=hapfile.readlines()
  22.  
  23. # len(haps) will be the number of population haplotypes
  24. # strip haplotype strings of tabs, spaces, commas, newlines
  25. for i in range(len(pophaps)):
  26. pophaps[i]=pophaps[i].replace(' ','')
  27. pophaps[i]=pophaps[i].replace('\t','')
  28. pophaps[i]=pophaps[i].replace('\n','')
  29. pophaps[i]=pophaps[i].replace(',','')
  30.  
  31. # hapltoypes are now packed, need to check (and remove) if any are null (from blank newlines)
  32. j=pophaps.count('')
  33. for i in range (j):
  34. pophaps.remove('')
  35. # make hap[0] hap all zeroes (used for missing data)
  36. hap[0]='0'*len(pophaps[0])
  37.  
  38. # open simped.in, pedigree file with alleles to indicate haplotypes
  39. # simped.ped is the modified pedigree file with alleles expanded to haplotypes
  40.  
  41. pedinfile=open('simped.doc','r')
  42. pedoutfile=open('simpedout.doc','w')
  43. # set the number of the last pedigree checked to 0
  44. lastped=' '
  45. for line in pedinfile:
  46. currped=line.split()[0]
  47.  
  48. #first/new ped - need to zero all haplotype assignments x hap[0]
  49. if lastped<>currped:
  50. for i in range(1,maxallele+1):
  51. hap[i]=''
  52. lastped=currped
  53.  
  54. #get haplotype alleles for this line
  55.  
  56. h1=int(line.split()[6])
  57. h2=int(line.split()[7])
  58. if hap[h1]=='':
  59.  
  60. #haplotype for this allele is currently null, need to assign haplotype 1
  61.  
  62. hap[h1]=pophaps[random.randint(0,len(pophaps)-1)]
  63.  
  64. if hap[h2]=='':
  65.  
  66. #need to assign haplotype 2
  67.  
  68. hap[h2]=pophaps[random.randint(0,len(pophaps)-1)]
  69.  
  70. # make front of line
  71. newline=line.split()[0]+'\t'
  72. for i in range (1,6):
  73. newline=newline+line.split()[i]+'\t'
  74. # add in haplotypes, alternating from h1 and h2
  75. for i in range(len(hap[0])):
  76. newline=newline+' '+hap[h1][i:i+1]+' '+hap[h2][i:i+1]+' '
  77. #add end of line
  78. newline=newline+'\n'
  79. pedoutfile.write(newline)
  80.  
  81. hapfile.close()
  82. pedinfile.close()
  83. pedoutfile.close()