Advertisement
Guest User

Untitled

a guest
Apr 24th, 2017
58
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.41 KB | None | 0 0
  1. #!/usr/bin/python
  2.  
  3. import sys
  4. import math
  5. import os
  6.  
  7. barcodefile = sys.argv[1]
  8. fname = sys.argv[2]
  9.  
  10. fullsequences = open(fname, "r")
  11.  
  12. #Create simple (barcode-less) sequence files
  13.  
  14. codes = open(barcodefile, "r")
  15.  
  16. for barcode in codes:
  17. barcode = barcode.strip() #strip whitespace
  18. outfname = "%s.%s" % (fname,barcode) #output file for each barcode
  19. outf = open(outfname, "w")
  20. for line in fullsequences:
  21. seqid,barseq = line.split() #split seq ID
  22. possbarcode = barseq[:len(barcode)]
  23. seq = barseq[len(barcode):]
  24. if possbarcode == barcode: #try to match barcode
  25. outseq = seqid + " " + seq + "\n"
  26. outf.write(outseq)
  27. outf.close()
  28. fastafname = outfname + ".fasta"
  29. mafftfname = fastafname + ".mafft"
  30. stockfname = mafftfname + ".stock" #final stock file name
  31. distancefname = stockfname + ".distances" #matrix file name
  32. handle = open(outfname, "r")
  33. outffasta = open(fastafname, "w")
  34. for line in handle: #convert to fasta
  35. linearr = line.split()
  36. seqidfasta = linearr[0]
  37. seqfasta = linearr[1]
  38. outffasta.write(">%s\n%s\n" % (seqidfasta,seqfasta))
  39. outffasta.close()
  40. handle.close()
  41. cmd = "mafft %s > %s" % (fastafname,mafftfname) #fasta to mafft
  42. os.system(cmd)
  43. cmd = "fasta_to_stockholm %s > %s" % (mafftfname,stockfname) #stock
  44. os.system(cmd)
  45. cmd = "quicktree -out m %s > %s" % (stockfname,distancefname)
  46. os.system(cmd)
  47.  
  48. x = [] #(re-)initialize array
  49.  
  50. mathhandle = open(distancefname, "r") #open matrix file
  51. mathhandle.next()
  52. i = 2
  53. for line in mathhandle:
  54. linearr = line.split()
  55. for s in linearr[i:]:
  56. x.append(float(s))
  57. i += 1
  58. avg = sum(x)/len(x)
  59. ssq = 0.0
  60. for y in x:
  61. ssq += (y-avg)*(y-avg) #calc sum of sqaure
  62. var = ssq / (len(x)-1) #calc variance
  63. sdev = math.sqrt(var) #calc std dev
  64. stderr = sdev / math.sqrt(len(x)) #calc std err
  65. sys.stdout.write(barcode + " %s +/- %s\n" % (avg,stderr))
  66.  
  67. mathhandle.close()
  68.  
  69. fullsequences.close()
  70. codes.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement