Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python3
- from Bio.Blast import NCBIXML
- import time, datetime
- class BlastResult:
- def __init__(self):
- self.all_found_querys = set()
- self.all_found_accessions = set()
- self.all_found_pairs = {}
- self.all_hps_per_hit = {}
- self.all_hsp_per_sbjct = {}
- self.all_sbjcts = set()
- #self.records = []
- class BlastRecord: #TODO is wel nodig voor hps's visualiseren / of deprecated
- def __init__(self, query_name, query_length):
- self.name = query_name
- self.length = query_length
- self.hits = []
- class BlastHit:
- def __init__(self, accession, title, length):
- self.accession = accession
- self.title = title
- self.length = length
- self.hsps = []
- class BlastHsp:
- def __init__(self, hsp_id, acc_id, expect, identities, query, match, subject, score):
- self.hsp_id = hsp_id
- self.acc_id = acc_id
- self.expect = float(expect)
- self.identities = identities
- self.query = query
- self.match = match
- self.subject = subject
- self.score = float(score)
- """ origineel
- class BlastHandle:
- @staticmethod
- def obj_to_dict(obj):
- return obj.__dict__
- @staticmethod
- def parse_blast_outfile(outfile="outfile.xml") -> BlastResult:
- hsp_id = 1;
- acc_id = 1;
- blast_result_object = BlastResult()
- with open(outfile) as file:
- records = NCBIXML.parse(file)
- for record in records:
- blast_result_object.all_found_querys.add(record.query)
- for hit in record.alignments:
- all_hsp_per_hit = []
- if record.query != hit.title[hit.title.find(" ") + 1:]: # filter results where query == subject
- evalue = 1
- blast_hit = BlastHit(hit.accession,
- hit.title,
- hit.length)
- blast_result_object.all_found_accessions.add(hit.title)
- for hsp in hit.hsps:
- blast_hsp = BlastHsp(hsp_id,
- acc_id,
- float(hsp.expect),
- hsp.identities,
- hsp.query,
- hsp.match,
- hsp.sbjct,
- float(hsp.score))
- if hsp.expect < evalue:
- #data structuur: key als basis naam query + naam hit.
- #in principe de basis voor opbouw van tabel key=row+header, value=cel
- blast_result_object.all_found_pairs[record.query + "-" + hit.title] = [blast_hsp, blast_hit]
- evalue = hsp.expect
- all_hsp_per_hit.append(BlastHandle.obj_to_dict(blast_hsp))
- hsp_id += 1
- blast_result_object.all_hps_per_hit[acc_id] = all_hsp_per_hit
- acc_id += 1
- return blast_result_object
- """
- class BlastHandle:
- @staticmethod
- def obj_to_dict(obj):
- return obj.__dict__
- @staticmethod
- def parse_blast_outfile(outfile="testdata/GCF_000313135.1_Acastellanii.strNEFF_v1_genomic.xml") -> BlastResult:
- hsp_id = 1;
- acc_id = 1;
- blast_result_object = BlastResult()
- with open(outfile) as file:
- records = NCBIXML.parse(file)
- for record in records:
- blast_result_object.all_found_querys.add(record.query)
- for hit in record.alignments:
- all_hsp_per_hit = []
- if record.query != hit.title[hit.title.find(" ") + 1:]: # filter results where query == subject
- evalue = 1
- blast_hit = BlastHit(hit.accession,
- hit.title,
- hit.length)
- blast_result_object.all_found_accessions.add(hit.title)
- for hsp in hit.hsps:
- blast_hsp = BlastHsp(hsp_id,
- acc_id,
- float(hsp.expect),
- hsp.identities,
- hsp.query,
- hsp.match,
- hsp.sbjct,
- float(hsp.score))
- #extra logic
- blast_result_object.all_sbjcts.add(blast_hsp.subject)
- h = hash(record.query+"-"+hit.title+"-"+blast_hsp.subject)
- blast_result_object.all_hsp_per_sbjct[h] = blast_hsp
- #end extra logic
- """
- if hsp.expect < evalue:
- #data structuur: key als basis naam query + naam hit.
- #in principe de basis voor opbouw van tabel key=row+header, value=cel
- blast_result_object.all_found_pairs[record.query + "-" + hit.title] = [blast_hsp, blast_hit]
- evalue = hsp.expect
- all_hsp_per_hit.append(BlastHandle.obj_to_dict(blast_hsp))
- """
- hsp_id += 1
- #blast_result_object.all_hps_per_hit[acc_id] = all_hsp_per_hit
- acc_id += 1
- return blast_result_object
- print("start")
- print(datetime.datetime.now())
- start = time.time()
- test = BlastHandle.parse_blast_outfile()
- stop = time.time()
- print("done reading: " + str(int(stop - start)) + " - seconden")
- print("lengte querys: "+ str(len(test.all_found_querys)))
- print("lengte subjects: "+ str(len(test.all_found_accessions)))
- print("lengte unieke hsps: "+ str(len(test.all_sbjcts)))
- for row in test.all_sbjcts:
- for column in test.all_sbjcts:
- x = 1
- stop2 = time.time()
- print("done creating table: " + str(int(stop2 - stop)) + " - seconden")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement