Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python
- import sys
- import math
- import os
- barcodefile = sys.argv[1]
- fname = sys.argv[2]
- fullsequences = open(fname, "r")
- #Create simple (barcode-less) sequence files
- codes = open(barcodefile, "r")
- for barcode in codes:
- barcode = barcode.strip() #strip whitespace
- outfname = "%s.%s" % (fname,barcode) #output file for each barcode
- outf = open(outfname, "w")
- for line in fullsequences:
- seqid,barseq = line.split() #split seq ID
- possbarcode = barseq[:len(barcode)]
- seq = barseq[len(barcode):]
- if possbarcode == barcode: #try to match barcode
- outseq = seqid + " " + seq + "\n"
- outf.write(outseq)
- outf.close()
- fastafname = outfname + ".fasta"
- mafftfname = fastafname + ".mafft"
- stockfname = mafftfname + ".stock" #final stock file name
- distancefname = stockfname + ".distances" #matrix file name
- handle = open(outfname, "r")
- outffasta = open(fastafname, "w")
- for line in handle: #convert to fasta
- linearr = line.split()
- seqidfasta = linearr[0]
- seqfasta = linearr[1]
- outffasta.write(">%s\n%s\n" % (seqidfasta,seqfasta))
- outffasta.close()
- handle.close()
- cmd = "mafft %s > %s" % (fastafname,mafftfname) #fasta to mafft
- os.system(cmd)
- cmd = "fasta_to_stockholm %s > %s" % (mafftfname,stockfname) #stock
- os.system(cmd)
- cmd = "quicktree -out m %s > %s" % (stockfname,distancefname)
- os.system(cmd)
- x = [] #(re-)initialize array
- mathhandle = open(distancefname, "r") #open matrix file
- mathhandle.next()
- i = 2
- for line in mathhandle:
- linearr = line.split()
- for s in linearr[i:]:
- x.append(float(s))
- i += 1
- avg = sum(x)/len(x)
- ssq = 0.0
- for y in x:
- ssq += (y-avg)*(y-avg) #calc sum of sqaure
- var = ssq / (len(x)-1) #calc variance
- sdev = math.sqrt(var) #calc std dev
- stderr = sdev / math.sqrt(len(x)) #calc std err
- sys.stdout.write(barcode + " %s +/- %s\n" % (avg,stderr))
- mathhandle.close()
- fullsequences.close()
- codes.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement