Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- from pipeline.bin.fastaSplit import fastaCut
- from pipeline.config import *
- from pipeline.lib.shell import *
- from pipeline.lib.system import *
- from pipeline.software.software import *
- LOG = logging.getLogger(__name__)
- def HPCblast(cut_num, query, db, task, out="", cwd="", num_threads=3, done="blast.done", **params):
- """
- :param query:
- :param db:
- :param cut_num: cut number
- :param out: blast out file
- :param out_dir: output dir of tmp
- :param num_threads: cpus used to blast, see more details in blastn -help
- :param done: done filename, default is "blast.done"
- :param params: other blast param in ALLOWED_BLAST_PARAMS
- :return: done filename
- """
- query, db = check_paths(query, db)
- cwd = os.path.abspath(cwd)
- if task not in BLAST_TASK:
- msg = "not allowed blast task %r" % task
- LOG.error(msg)
- raise Exception(msg)
- if cwd:
- cwd = mkdir(cwd)
- params = dict(params)
- params.update({"query": query, "db": db, "out": out, "num_threads": num_threads})
- LOG.info("running blast with %r " % params2str(params))
- done = "%s/%s" % (cwd, done)
- # check for rerun
- if os.path.exists(done):
- LOG.info("%s has exists, pass blast step" % done)
- return done
- # cut query file
- cut_cwd = mkdir("%s/%s.cut" % (cwd, os.path.basename(query)))
- cut_files = fastaCut(query, cut_num, mode="size", cwd=cut_cwd)
- if not out:
- out = query+".blast"
- # make blast database
- cmd = makeblastdb(input_file=db, dbtype=BLAST_TASK[task]["dbtype"])
- #os.system(cmd)
- done_files = []
- out_files = []
- # qsub blast script to
- for i in range(len(cut_files)):
- file = cut_files[i]
- out_dir = mkdir("%s/job_%s" % (cwd, i+1))
- out_fn = "%s/%s.blast" % (out_dir, os.path.basename(file))
- done_fn = out_fn+".done"
- if os.path.isfile(done_fn):
- LOG.info("%s.sh had done, pass it" % os.path.basename(out_fn))
- else:
- params.update({"task": task, "query": file, "db": db, "out": out_fn, "num_threads": num_threads})
- script = script_run_blast(params)
- script_name = write_script(script, out_fn+".sh", done_fn)
- qsub(cpu=num_threads, queue=QUEUE, script=script_name)
- done_files.append(done_fn)
- out_files.append(out_fn)
- # check blast status
- LOG.info("check blast status")
- check_status(done_files, 120)
- LOG.info("blast done")
- cat(out_files, out)
- touch(done)
- return out
- def main():
- pass
- if __name__ == "__main__":
- main()
Add Comment
Please, Sign In to add comment