Advertisement
Guest User

Untitled

a guest
Nov 30th, 2015
67
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.69 KB | None | 0 0
  1. import tempfile
  2. import gffutils
  3. import os
  4. from argparse import ArgumentParser
  5.  
  6. def get_gtf_db(gtf, in_memory=False):
  7. """
  8. create a gffutils DB
  9. """
  10. db_file = gtf + ".db"
  11. if os.path.exists(db_file):
  12. return gffutils.FeatureDB(db_file)
  13. db_file = ":memory:" if in_memory else db_file
  14. if in_memory or not os.path.exists(db_file):
  15. infer_extent = guess_infer_extent(gtf)
  16. db = gffutils.create_db(gtf, dbfn=db_file,
  17. infer_gene_extent=infer_extent)
  18. if in_memory:
  19. return db
  20. else:
  21. return gffutils.FeatureDB(db_file)
  22.  
  23. def guess_infer_extent(gtf_file):
  24. """
  25. guess if we need to use the gene extent option when making a gffutils
  26. database by making a tiny database of 1000 lines from the original
  27. GTF and looking for all of the features
  28. """
  29. _, ext = os.path.splitext(gtf_file)
  30. tmp_out = tempfile.NamedTemporaryFile(suffix=".gtf", delete=False).name
  31. with open(tmp_out, "w") as out_handle:
  32. count = 0
  33. in_handle = open(gtf_file) if ext != ".gz" else gzip.open(gtf_file)
  34. for line in in_handle:
  35. if count > 1000:
  36. break
  37. out_handle.write(line)
  38. count += 1
  39. in_handle.close()
  40. db = gffutils.create_db(tmp_out, dbfn=":memory:", infer_gene_extent=False)
  41. os.remove(tmp_out)
  42. features = [x for x in db.featuretypes()]
  43. if "gene" in features and "transcript" in features:
  44. return False
  45. else:
  46. return True
  47.  
  48. if __name__ == "__main__":
  49. parser = ArgumentParser()
  50. parser.add_argument("gtf_file")
  51. args = parser.parse_args()
  52. db = get_gtf_db(args.gtf_file)
  53. for x in db.featuretypes():
  54. print x
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement