Guest User

Untitled

a guest
Apr 25th, 2018
88
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.39 KB | None | 0 0
  1. import sqlite3, re, math, os
  2.  
  3. con = sqlite3.connect(':memory:')
  4. db = con.cursor()
  5.  
  6. contigs = [390,389,166,690,6907,6993,2656,3501,6990,1530,7291,7069,7277,7290,6922,7257,7126,1500,7214]
  7.  
  8. # Set filename and open input
  9. in_file = open("trimmed.ace", "r")
  10.  
  11. # Compile regex for parsing each line
  12. AF_re = re.compile("AF\s(?P<name>.*?)\.(?P<range>\d+-\d+)(?P<link1>(?:\.fm|\.to)\d+)(?P<link2>(?:\.fm|\.to)\d+)?\s(?P<dir>[UC])")
  13.  
  14. # Create table
  15. db.execute('''CREATE TABLE 'read_index' ('name' varchar(255) NOT NULL, 'contig' int(11) NOT NULL, 'range_low' int(11) NOT NULL, 'range_high' int(11) NOT NULL);''')
  16. db.execute('''CREATE INDEX 'name' ON 'read_index' ('name')''')
  17. db.execute('''CREATE INDEX 'contig' ON 'read_index' ('contig')''')
  18.  
  19. # Parse each line in the trimmed ace file and insert it into the database
  20. for line in in_file:
  21. if line.startswith("CO"):
  22. # Current line is a contig, set current_contig value
  23. current_contig = int(line.strip()[9:14])
  24. else:
  25. # Current line is a read, parse it and insert it into the database with the current contig
  26. match = AF_re.match(line.strip())
  27. read_name = match.group('name')
  28. range = map(lambda x: int(x), match.group("range").split("-"))
  29. db.execute("INSERT INTO read_index (name, contig, range_low, range_high) VALUES (?, ?, ?, ?);", (read_name, current_contig, range[0], range[1]))
  30.  
  31. # Commit changes to database
  32. con.commit()
  33.  
  34. # SELECT a list of all reads contained in the database that have the contigs we're looking for
  35. read_list = db.execute("SELECT DISTINCT * FROM read_index WHERE contig=%s" % (' OR contig='.join(map(lambda x: str(x), contigs[1:])))).fetchall()
  36.  
  37. # Make a dictionary of reads and their associated contigs
  38. read_contigs = {}
  39. for read in read_list:
  40. read_contigs[str(read[0])] = map(lambda x: x[0], db.execute("SELECT contig FROM read_index WHERE name=?", (str(read[0]),)).fetchall())
  41.  
  42. # Get the paths that each read traces out by sorting their ranges and contigs associated with each range
  43. read_paths = {}
  44. for read, contig_list in read_contigs.items():
  45. contigs = db.execute("SELECT contig FROM read_index WHERE name=? ORDER BY range_low, range_high", (read,)).fetchall()
  46. path = '->'.join(map(lambda x: str(x[0]), contigs))
  47. try:
  48. # Increment path counter
  49. read_paths[path] += 1
  50. except:
  51. # New path so set counter to 1
  52. read_paths[path] = 1
  53.  
  54. # Close the database
  55. db.close()
  56.  
  57. # Print each path and frequency
  58. for path, frequency in read_paths.items():
  59. print path, frequency
Add Comment
Please, Sign In to add comment