Untitled

#!/usr/bin/env python
import argparse
from biom import parse_table

__author__ = "Gene Blanchard"
__email__ = "me@geneblanchard.com"


# Scrub that junk!
def symbolic_cleaner(pathway):
    for symbol in ['[', ']', '(', ')', ':', ';', "'", '"', '{', '}', ',', ' ']:
        pathway = pathway.replace(symbol, '_')
    return pathway


def main():
    # Argument Parser
    parser = argparse.ArgumentParser(description='Do Vince`s bidding on BioBakery Nonsense')

    # Input file
    parser.add_argument('-i', '--input', dest='input', help='The input biom')
    # Output file
    parser.add_argument('-o', '--output', dest='output', help='The output jazz')

    # Parse arguments
    args = parser.parse_args()
    infile = args.input
    outfile = args.output

    # Lets open up our files
    with open(infile, 'r') as biomhandle, open(outfile, 'w') as outhandle:
        # Parse the biom table using the built in function
        # I have no idea if specifying the axis has any result
        table = parse_table(biomhandle, axis='observation')
        # Convert the table into a tsv-like object
        tsv = table.__str__().split('\n')
        # Parse the TSV
        for line in tsv:
            # Screw headers!
            if not line.startswith('#'):
                # Get the Kegg ID
                kegg_id = line.split('\t', 1)[0]
                # Find the pathway(s)
                pathways = table.metadata(id=kegg_id, axis='observation')['KEGG_Pathways']
                # For every pathway we find, we need a new line
                for pathway in pathways:
                    # Add the kegg_id id
                    pathway.append(kegg_id)
                    parsed_pathway = '.'.join(pathway)
                    # Clean up the symbols and write out the line
                    outhandle.write("{}\t{}\n".format(line, symbolic_cleaner(parsed_pathway)))
            # Oh shit maybe we did need this header
            if line.startswith("#OTU ID\t"):
                outhandle.write("{}\tKEGG_Pathways\n".format(line.rstrip('\n')))


if __name__ == '__main__':
    main()