Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- import argparse
- from biom import parse_table
- __author__ = "Gene Blanchard"
- __email__ = "me@geneblanchard.com"
- # Scrub that junk!
- def symbolic_cleaner(pathway):
- for symbol in ['[', ']', '(', ')', ':', ';', "'", '"', '{', '}', ',', ' ']:
- pathway = pathway.replace(symbol, '_')
- return pathway
- def main():
- # Argument Parser
- parser = argparse.ArgumentParser(description='Do Vince`s bidding on BioBakery Nonsense')
- # Input file
- parser.add_argument('-i', '--input', dest='input', help='The input biom')
- # Output file
- parser.add_argument('-o', '--output', dest='output', help='The output jazz')
- # Parse arguments
- args = parser.parse_args()
- infile = args.input
- outfile = args.output
- # Lets open up our files
- with open(infile, 'r') as biomhandle, open(outfile, 'w') as outhandle:
- # Parse the biom table using the built in function
- # I have no idea if specifying the axis has any result
- table = parse_table(biomhandle, axis='observation')
- # Convert the table into a tsv-like object
- tsv = table.__str__().split('\n')
- # Parse the TSV
- for line in tsv:
- # Screw headers!
- if not line.startswith('#'):
- # Get the Kegg ID
- kegg_id = line.split('\t', 1)[0]
- # Find the pathway(s)
- pathways = table.metadata(id=kegg_id, axis='observation')['KEGG_Pathways']
- # For every pathway we find, we need a new line
- for pathway in pathways:
- # Add the kegg_id id
- pathway.append(kegg_id)
- parsed_pathway = '.'.join(pathway)
- # Clean up the symbols and write out the line
- outhandle.write("{}\t{}\n".format(line, symbolic_cleaner(parsed_pathway)))
- # Oh shit maybe we did need this header
- if line.startswith("#OTU ID\t"):
- outhandle.write("{}\tKEGG_Pathways\n".format(line.rstrip('\n')))
- if __name__ == '__main__':
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement