Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python
- # Imports
- bExitForImportFailed=0
- try:
- import sys
- from time import strftime
- import argparse
- from optparse import OptionParser, OptionGroup
- import copy
- import subprocess
- import os
- import shutil
- import tarfile
- import time
- import fnmatch
- import platform
- import urllib
- from shutil import copyfile
- from os.path import expanduser
- import zipfile
- import glob
- import csv
- from collections import OrderedDict
- #import IMGAP
- except Exception, e:
- print 'Basic imports failed!'
- print e
- bExitForImportFailed=1
- ################################################################################
- # Color messages
- class Highlighter:
- def __init__(self):
- self._msgTypes={'INF':'\033[0m',
- 'IMP':'\033[1;32m',
- 'DEV':'\033[1;34m',
- 'ERR':'\033[1;31m',
- 'WRN':'\033[1;33m'}
- self._reset='\033[0m'
- self._default='INF'
- def ColorMsg(self,msg,msgLevel='INF'):
- try:
- s=self._msgTypes[msgLevel]+msg+self._reset
- except:s=s=self._msgTypes[self._default]+msg+self._reset
- return s
- def ColorOutput(msg,msgLevel='INF'):
- o=Highlighter()
- return o.ColorMsg(msg,msgLevel)
- ################################################################################
- #
- def reference_indexing(args):
- stage = 1
- iteration = int(args.iterations)
- itr_dir_name = "iteration"
- itr_dir = str(itr_dir_name) + str(iteration)
- reference_path=os.path.abspath(args.reference)
- reference_index=os.path.abspath(os.path.join(args.outDir,"reference"))
- os.chdir(reference_index)
- list_itr = os.listdir(reference_index)
- indexing = ['chromosome', 'chromosome.iit', 'contig', 'contig.iit', 'genomecomp','salcpexc','sarray','version']
- t0_time = time.time()
- gsnapFiles = 0
- if os.path.isdir(itr_dir):
- pass
- else:
- os.mkdir(itr_dir)
- itr_dir_path=os.path.abspath(os.path.join(reference_index,itr_dir))
- file_ext = ['.fasta', '.fa', '.fna']
- if len(os.listdir(itr_dir)) < 2:
- if iteration == 1:
- os.chdir(itr_dir)
- cmd='ln -s %s %s' % (reference_path,os.curdir)
- if os.system(cmd):exit()
- sys.stdout.write(strftime("%Y-%m-%d %H:%M:%S")+
- ColorOutput(' Generating Genome indexes......\n','IMP'))
- sys.stdout.flush()
- files = os.listdir(os.curdir)
- for file in files:
- if file.endswith(tuple(file_ext)):
- genome = file.split('.')[0]
- cmd = 'gmap_build -D . -d %s %s' % (genome,file)
- if os.system(cmd):exit()
- else:
- prev_itr_name="iteration"
- prev_iteration = int(`iteration-1`)
- prev_itr = str(prev_itr_name) + str(prev_iteration)
- scaffolds_path=os.path.join(args.outDir,"scaffolds",prev_itr)
- scaffolds_fasta=os.listdir(scaffolds_path)
- for file in scaffolds_fasta:
- if file.startswith("final_Scaffolds"):
- scaffold_file=os.path.join(scaffolds_path,"final_Scaffolds.fa")
- cmd='ln -sf %s %s' % (scaffold_file,itr_dir_path)
- if os.system(cmd):exit()
- sys.stdout.write(strftime("%Y-%m-%d %H:%M:%S")+
- ColorOutput(' Generating Genome indexes......\n','IMP'))
- sys.stdout.flush()
- os.chdir(itr_dir_path)
- files = os.listdir(itr_dir_path)
- for file in files:
- if file.endswith(tuple(file_ext)):
- genome = file.split('.')[0]
- cmd = 'gmap_build -D . -d %s %s' % (genome,file)
- if os.system(cmd):exit()
- else:
- refs = os.listdir(itr_dir)
- for file in refs:
- if file.endswith(tuple(indexing)):
- pass
- gsnapFiles = gsnapFiles + 1
- if gsnapFiles == len(indexing):
- sys.stdout.write(strftime("%Y-%m-%d %H:%M:%S")+
- ColorOutput(' Genome index files found......\n','IMP'))
- sys.stdout.flush()
- os.chdir(run_path)
- return stage
- def getargs():
- '''Retrieve the args passed from the command line'''
- usage = "python index.py"
- version="index "+__version__
- parser = argparse.ArgumentParser(usage,version=version,formatter_class=lambda prog: argparse.HelpFormatter(prog,
- max_help_position=100, width=200))
- required = parser.add_argument_group('required arguments')
- required.add_argument("--itr", dest="iterations", required=True,
- help="Iteration number for indexing step", metavar="<N>", type=int)
- required.add_argument("--ref", dest="reference", required=True,
- help="References in FASTA file with multiple sequences", metavar="<FILE>",
- type=is_file_exist)
- required.add_argument("--out", dest="outDir", required=False, metavar="<DIR>")
- # Parse the args
- args = parser.parse_args()
- return args
- def is_file_exist(arg):
- if not os.path.exists(arg):
- #parser.error("The file %s does not exist!" % arg)
- raise argparse.ArgumentTypeError("{0} does not exist".format(arg))
- else:
- return arg
- run_path = os.path.abspath(os.curdir)
- home = expanduser("~")
- def main():
- os.chdir(run_path)
- if bExitForImportFailed:
- pass
- else:
- args = getargs()
- # Message
- sys.stdout.write(strftime("%Y-%m-%d %H:%M:%S")+
- ColorOutput(' Starting indexing\n','IMP'))
- sys.stdout.flush()
- reference_indexing(args)
- if __name__ == '__main__':
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement