Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import random
- import string
- #takes as input file haps.dat (population haplotype file), simped.in (pedigree file with haplotypes as alleles)
- #produces as output file simped.ped (pedigree file with alleles replaced by random population haplotypes)
- maxallele=32 #maximum number of alleles (haplotypes) used by a single family
- hap=[''] #initialize the pedigree haplotypes as a list containing a blank
- for i in range (1,maxallele+1):
- hap.append('') #add blanks to the list up to the max allowed
- # open file of population haplotypes, named haps.dat
- # haps.dat is file of haplotypes, one haplotype per line
- # each line is genotype space genotype etc.
- # the relative probability of each haplotype is controlled by how often it appears
- hapfile=open('haps.dat','r')
- # get the population haplotypes as a list
- pophaps=hapfile.readlines()
- # len(haps) will be the number of population haplotypes
- # strip haplotype strings of tabs, spaces, commas, newlines
- for i in range(len(pophaps)):
- pophaps[i]=pophaps[i].replace(' ','')
- pophaps[i]=pophaps[i].replace('\t','')
- pophaps[i]=pophaps[i].replace('\n','')
- pophaps[i]=pophaps[i].replace(',','')
- # hapltoypes are now packed, need to check (and remove) if any are null (from blank newlines)
- j=pophaps.count('')
- for i in range (j):
- pophaps.remove('')
- # make hap[0] hap all zeroes (used for missing data)
- hap[0]='0'*len(pophaps[0])
- # open simped.in, pedigree file with alleles to indicate haplotypes
- # simped.ped is the modified pedigree file with alleles expanded to haplotypes
- pedinfile=open('simped.doc','r')
- pedoutfile=open('simpedout.doc','w')
- # set the number of the last pedigree checked to 0
- lastped=' '
- for line in pedinfile:
- currped=line.split()[0]
- #first/new ped - need to zero all haplotype assignments x hap[0]
- if lastped<>currped:
- for i in range(1,maxallele+1):
- hap[i]=''
- lastped=currped
- #get haplotype alleles for this line
- h1=int(line.split()[6])
- h2=int(line.split()[7])
- if hap[h1]=='':
- #haplotype for this allele is currently null, need to assign haplotype 1
- hap[h1]=pophaps[random.randint(0,len(pophaps)-1)]
- if hap[h2]=='':
- #need to assign haplotype 2
- hap[h2]=pophaps[random.randint(0,len(pophaps)-1)]
- # make front of line
- newline=line.split()[0]+'\t'
- for i in range (1,6):
- newline=newline+line.split()[i]+'\t'
- # add in haplotypes, alternating from h1 and h2
- for i in range(len(hap[0])):
- newline=newline+' '+hap[h1][i:i+1]+' '+hap[h2][i:i+1]+' '
- #add end of line
- newline=newline+'\n'
- pedoutfile.write(newline)
- hapfile.close()
- pedinfile.close()
- pedoutfile.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement