import random
import string
#takes as input file haps.dat (population haplotype file), simped.in (pedigree file with haplotypes as alleles)
#produces as output file simped.ped (pedigree file with alleles replaced by random population haplotypes)
maxallele=32 #maximum number of alleles (haplotypes) used by a single family
hap=[''] #initialize the pedigree haplotypes as a list containing a blank
for i in range (1,maxallele+1):
hap.append('') #add blanks to the list up to the max allowed
# open file of population haplotypes, named haps.dat
# haps.dat is file of haplotypes, one haplotype per line
# each line is genotype space genotype etc.
# the relative probability of each haplotype is controlled by how often it appears
hapfile=open('haps.dat','r')
# get the population haplotypes as a list
pophaps=hapfile.readlines()
# len(haps) will be the number of population haplotypes
# strip haplotype strings of tabs, spaces, commas, newlines
for i in range(len(pophaps)):
pophaps[i]=pophaps[i].replace(' ','')
pophaps[i]=pophaps[i].replace('\t','')
pophaps[i]=pophaps[i].replace('\n','')
pophaps[i]=pophaps[i].replace(',','')
# hapltoypes are now packed, need to check (and remove) if any are null (from blank newlines)
j=pophaps.count('')
for i in range (j):
pophaps.remove('')
# make hap[0] hap all zeroes (used for missing data)
hap[0]='0'*len(pophaps[0])
# open simped.in, pedigree file with alleles to indicate haplotypes
# simped.ped is the modified pedigree file with alleles expanded to haplotypes
pedinfile=open('simped.doc','r')
pedoutfile=open('simpedout.doc','w')
# set the number of the last pedigree checked to 0
lastped=' '
for line in pedinfile:
currped=line.split()[0]
#first/new ped - need to zero all haplotype assignments x hap[0]
if lastped<>currped:
for i in range(1,maxallele+1):
hap[i]=''
lastped=currped
#get haplotype alleles for this line
h1=int(line.split()[6])
h2=int(line.split()[7])
if hap[h1]=='':
#haplotype for this allele is currently null, need to assign haplotype 1
hap[h1]=pophaps[random.randint(0,len(pophaps)-1)]
if hap[h2]=='':
#need to assign haplotype 2
hap[h2]=pophaps[random.randint(0,len(pophaps)-1)]
# make front of line
newline=line.split()[0]+'\t'
for i in range (1,6):
newline=newline+line.split()[i]+'\t'
# add in haplotypes, alternating from h1 and h2
for i in range(len(hap[0])):
newline=newline+' '+hap[h1][i:i+1]+' '+hap[h2][i:i+1]+' '
#add end of line
newline=newline+'\n'
pedoutfile.write(newline)
hapfile.close()
pedinfile.close()
pedoutfile.close()