Advertisement
Guest User

Untitled

a guest
May 5th, 2017
121
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 5.78 KB | None | 0 0
  1. #!/usr/bin/python
  2.  
  3. # Imports
  4.  
  5. bExitForImportFailed=0
  6. try:
  7.     import sys
  8.     from time import strftime
  9.     import argparse
  10.     from optparse import OptionParser, OptionGroup
  11.     import copy
  12.     import subprocess
  13.     import os
  14.     import shutil
  15.     import tarfile
  16.     import time
  17.     import fnmatch
  18.     import platform
  19.     import urllib
  20.     from shutil import copyfile
  21.     from os.path import expanduser
  22.     import zipfile
  23.     import glob
  24.     import csv
  25.     from collections import OrderedDict
  26.     #import IMGAP
  27. except Exception, e:
  28.     print 'Basic imports failed!'
  29.     print e
  30.     bExitForImportFailed=1
  31.  
  32. ################################################################################
  33. # Color messages
  34.  
  35. class Highlighter:
  36.     def __init__(self):
  37.         self._msgTypes={'INF':'\033[0m',
  38.                 'IMP':'\033[1;32m',
  39.                 'DEV':'\033[1;34m',
  40.                 'ERR':'\033[1;31m',
  41.                 'WRN':'\033[1;33m'}
  42.         self._reset='\033[0m'
  43.         self._default='INF'
  44.     def ColorMsg(self,msg,msgLevel='INF'):
  45.         try:
  46.             s=self._msgTypes[msgLevel]+msg+self._reset
  47.         except:s=s=self._msgTypes[self._default]+msg+self._reset
  48.         return s
  49.  
  50. def ColorOutput(msg,msgLevel='INF'):
  51.     o=Highlighter()
  52.     return o.ColorMsg(msg,msgLevel)
  53.  
  54. ################################################################################
  55. #
  56.  
  57. def reference_indexing(args):
  58.     stage = 1
  59.     iteration = int(args.iterations)
  60.     itr_dir_name = "iteration"
  61.     itr_dir = str(itr_dir_name) + str(iteration)
  62.     reference_path=os.path.abspath(args.reference)
  63.     reference_index=os.path.abspath(os.path.join(args.outDir,"reference"))
  64.  
  65.     os.chdir(reference_index)
  66.  
  67.     list_itr = os.listdir(reference_index)
  68.     indexing = ['chromosome', 'chromosome.iit', 'contig', 'contig.iit', 'genomecomp','salcpexc','sarray','version']
  69.     t0_time = time.time()
  70.     gsnapFiles = 0
  71.  
  72.     if os.path.isdir(itr_dir):
  73.         pass
  74.     else:
  75.         os.mkdir(itr_dir)
  76.  
  77.     itr_dir_path=os.path.abspath(os.path.join(reference_index,itr_dir))
  78.  
  79.     file_ext = ['.fasta', '.fa', '.fna']
  80.  
  81.     if len(os.listdir(itr_dir)) < 2:
  82.         if iteration == 1:
  83.             os.chdir(itr_dir)
  84.             cmd='ln -s %s %s' % (reference_path,os.curdir)
  85.             if os.system(cmd):exit()
  86.             sys.stdout.write(strftime("%Y-%m-%d %H:%M:%S")+
  87.                 ColorOutput(' Generating Genome indexes......\n','IMP'))
  88.            
  89.             sys.stdout.flush()
  90.             files = os.listdir(os.curdir)
  91.             for file in files:
  92.                 if file.endswith(tuple(file_ext)):
  93.                     genome = file.split('.')[0]
  94.                     cmd = 'gmap_build -D . -d %s %s' % (genome,file)
  95.                     if os.system(cmd):exit()
  96.         else:
  97.             prev_itr_name="iteration"
  98.             prev_iteration = int(`iteration-1`)
  99.             prev_itr = str(prev_itr_name) + str(prev_iteration)
  100.             scaffolds_path=os.path.join(args.outDir,"scaffolds",prev_itr)
  101.             scaffolds_fasta=os.listdir(scaffolds_path)
  102.             for file in scaffolds_fasta:
  103.                 if file.startswith("final_Scaffolds"):
  104.                     scaffold_file=os.path.join(scaffolds_path,"final_Scaffolds.fa")
  105.                     cmd='ln -sf %s %s' % (scaffold_file,itr_dir_path)
  106.                     if os.system(cmd):exit()
  107.             sys.stdout.write(strftime("%Y-%m-%d %H:%M:%S")+
  108.                 ColorOutput(' Generating Genome indexes......\n','IMP'))
  109.             sys.stdout.flush()
  110.             os.chdir(itr_dir_path)
  111.             files = os.listdir(itr_dir_path)
  112.             for file in files:
  113.                 if file.endswith(tuple(file_ext)):
  114.                     genome = file.split('.')[0]
  115.                     cmd = 'gmap_build -D . -d %s %s' % (genome,file)
  116.                     if os.system(cmd):exit()
  117.     else:
  118.         refs = os.listdir(itr_dir)
  119.         for file in refs:
  120.             if file.endswith(tuple(indexing)):
  121.                 pass
  122.                 gsnapFiles = gsnapFiles + 1
  123.         if gsnapFiles == len(indexing):
  124.             sys.stdout.write(strftime("%Y-%m-%d %H:%M:%S")+
  125.                 ColorOutput(' Genome index files found......\n','IMP'))
  126.             sys.stdout.flush()
  127.  
  128.     os.chdir(run_path)
  129.     return stage
  130.  
  131. def getargs():
  132.     '''Retrieve the args passed from the command line'''
  133.  
  134.     usage = "python index.py"
  135.     version="index "+__version__
  136.     parser = argparse.ArgumentParser(usage,version=version,formatter_class=lambda prog: argparse.HelpFormatter(prog,
  137.     max_help_position=100, width=200))
  138.     required = parser.add_argument_group('required arguments')
  139.     required.add_argument("--itr", dest="iterations", required=True,
  140.                     help="Iteration number for indexing step", metavar="<N>", type=int)
  141.     required.add_argument("--ref", dest="reference", required=True,
  142.                     help="References in FASTA file with multiple sequences", metavar="<FILE>",
  143.                     type=is_file_exist)
  144.     required.add_argument("--out", dest="outDir", required=False, metavar="<DIR>")
  145.    
  146.     # Parse the args
  147.     args = parser.parse_args()
  148.     return args
  149.  
  150. def is_file_exist(arg):
  151.     if not os.path.exists(arg):
  152.         #parser.error("The file %s does not exist!" % arg)
  153.         raise argparse.ArgumentTypeError("{0} does not exist".format(arg))
  154.     else:
  155.         return arg  
  156.  
  157. run_path = os.path.abspath(os.curdir)
  158. home = expanduser("~")
  159.  
  160. def main():
  161.     os.chdir(run_path)
  162.    
  163.     if bExitForImportFailed:
  164.         pass    
  165.     else:
  166.         args = getargs()
  167.             # Message
  168.         sys.stdout.write(strftime("%Y-%m-%d %H:%M:%S")+
  169.             ColorOutput(' Starting indexing\n','IMP'))
  170.         sys.stdout.flush()
  171.  
  172.         reference_indexing(args)
  173.  
  174. if __name__ == '__main__':
  175.     main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement