Advertisement
Guest User

Untitled

a guest
Jul 1st, 2016
49
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.04 KB | None | 0 0
  1. #!/usr/bin/env python
  2. import argparse
  3. from biom import parse_table
  4.  
  5. __author__ = "Gene Blanchard"
  6. __email__ = "me@geneblanchard.com"
  7.  
  8.  
  9. # Scrub that junk!
  10. def symbolic_cleaner(pathway):
  11. for symbol in ['[', ']', '(', ')', ':', ';', "'", '"', '{', '}', ',', ' ']:
  12. pathway = pathway.replace(symbol, '_')
  13. return pathway
  14.  
  15.  
  16. def main():
  17. # Argument Parser
  18. parser = argparse.ArgumentParser(description='Do Vince`s bidding on BioBakery Nonsense')
  19.  
  20. # Input file
  21. parser.add_argument('-i', '--input', dest='input', help='The input biom')
  22. # Output file
  23. parser.add_argument('-o', '--output', dest='output', help='The output jazz')
  24.  
  25. # Parse arguments
  26. args = parser.parse_args()
  27. infile = args.input
  28. outfile = args.output
  29.  
  30. # Lets open up our files
  31. with open(infile, 'r') as biomhandle, open(outfile, 'w') as outhandle:
  32. # Parse the biom table using the built in function
  33. # I have no idea if specifying the axis has any result
  34. table = parse_table(biomhandle, axis='observation')
  35. # Convert the table into a tsv-like object
  36. tsv = table.__str__().split('\n')
  37. # Parse the TSV
  38. for line in tsv:
  39. # Screw headers!
  40. if not line.startswith('#'):
  41. # Get the Kegg ID
  42. kegg_id = line.split('\t', 1)[0]
  43. # Find the pathway(s)
  44. pathways = table.metadata(id=kegg_id, axis='observation')['KEGG_Pathways']
  45. # For every pathway we find, we need a new line
  46. for pathway in pathways:
  47. # Add the kegg_id id
  48. pathway.append(kegg_id)
  49. parsed_pathway = '.'.join(pathway)
  50. # Clean up the symbols and write out the line
  51. outhandle.write("{}\t{}\n".format(line, symbolic_cleaner(parsed_pathway)))
  52. # Oh shit maybe we did need this header
  53. if line.startswith("#OTU ID\t"):
  54. outhandle.write("{}\tKEGG_Pathways\n".format(line.rstrip('\n')))
  55.  
  56.  
  57. if __name__ == '__main__':
  58. main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement