Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from Bio import Entrez
- from Bio import SeqIO
- import os
- file_name = "proteome1.faa"
- net_handle = Entrez.efetch(db="protein", id="trichoderma_harzianum", rettype="gp", retmode="text")
- out_handle = open(file_name, "w")
- out_handle.write(net_handle.read())
- out_handle.close()
- net_handle.close()
- print("Saved")
- for seq_record in SeqIO.parse("proteome1.faa", "genbank"):
- print(seq_record.id)
- print(repr(seq_record.seq))
- print(len(seq_record))
- from Bio import Entrez
- Entrez.email = 'user@example.org' # Put your email here
- def get_tax_id(species):
- species = species.replace('_', '+').strip()
- search = Entrez.esearch(term=species, db='taxonomy', retmode='xml')
- record = Entrez.read(search)
- return record['IdList'][0]
- if __name__ == '__main__':
- organisms = ['lophodermium_seditiosum', 'trichoderma_harzianum']
- taxids = [get_tax_id(organism) for organism in organisms]
- print(taxids)
- def get_proteomes(taxid):
- query = 'txid{}[Organism:exp] '.format(taxid)
- handle = Entrez.esearch(db='protein', term=query)
- result = Entrez.read(handle)
- if int(result['Count']) <= 2000:
- ids = ','.join(result['IdList'])
- proteomes = Entrez.efetch(db='protein', id=ids, rettype='fasta').read()
- return proteomes
- else:
- raise RuntimeError('Too many results!')
- if __name__ == '__main__':
- print(get_proteomes('128023'))
- >ADI44294.1 actin, partial [Lophodermium seditiosum]
- CKAGFAGDDAPRAVFPSIVGRPRHHGIMIGMGQKDS
- >ADI44293.1 actin, partial [Lophodermium seditiosum]
- CKAGFAGDDAPRAVFPSIVGRPRHHGIMIGMGQKDS
Add Comment
Please, Sign In to add comment