• API
• FAQ
• Tools
• Archive
SHARE
TWEET Question a guest May 26th, 2019 75 in 21 hours
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
1. from Bio import SeqIO
2. from Bio.SeqUtils import GC
3. from collections import Counter
4. worms_f = 'input.fa'
5. worms_h = open(worms_f, 'r')
6. worms_seq = SeqIO.parse(worms_h, 'fasta')
7. worms_h.close()
8. #Question 2
9. #Count the number of cDNAs that have, within 100 bp of the sequence start,
10. #   GC content of; (1) <30%, (2) 30%-40%, (3) 40%-50%, (4) 50%<
11. t = 0
12. t_f = 0
13. f_fi = 0
14. fi = 0
15. for gc in worms_seq:
16.     gc = GC(gc.seq[:101])
17.     if gc < 30:
18.         t += 1
19.     elif gc >= 30 and gc <= 40:
20.         t_f += 1
21.     elif gc >= 40 and gc < 50:
22.         f_fi += 1
23.     else:
24.         fi += 1
25. print(str(t) + '\n' + str(t_f) + '\n' + str(f_fi) + '\n' + str(fi))
26. #Question 3
27. #Find all cDNA >= 2000 bp long w/ 3 or more
28. #   EcoR1 ('GAATTC') sites within 100 bp of the end of the sequence
29. count3 = []
30. for cdna in worms_seq:
31.     length = len(cdna.seq)
32.     ecor1_rec = cdna.count('gaattc', length, int(length) - 101)
33.     if length >= 2000 and ecor1_rec >= 3:
34.         count3.append(cdna.id)
35. print(count3)
36. #Question 4
37. #Find all the cDNAs that have 10 <= Glutamine residues and
38. #   their most common amino acids occur 100 or more times
39. count4 = ''
40. for x in worms_seq:
41.     qc = x.seq.translate().count('Q')
42.     cnt = Counter(x)
43.     mcb = cnt.most_common(1)
44.     base = list(mcb)
45.     if qc >= 10 and base > 100:
46.         count4 += 1
47. print(count4)
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy.

Top