SHARE
TWEET

Question

a guest May 26th, 2019 75 in 21 hours
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. from Bio import SeqIO
  2. from Bio.SeqUtils import GC
  3. from collections import Counter
  4. worms_f = 'input.fa'
  5. worms_h = open(worms_f, 'r')
  6. worms_seq = SeqIO.parse(worms_h, 'fasta')
  7. worms_h.close()
  8. #Question 2
  9. #Count the number of cDNAs that have, within 100 bp of the sequence start,
  10. #   GC content of; (1) <30%, (2) 30%-40%, (3) 40%-50%, (4) 50%<
  11. t = 0
  12. t_f = 0
  13. f_fi = 0
  14. fi = 0
  15. for gc in worms_seq:
  16.     gc = GC(gc.seq[:101])
  17.     if gc < 30:
  18.         t += 1
  19.     elif gc >= 30 and gc <= 40:
  20.         t_f += 1
  21.     elif gc >= 40 and gc < 50:
  22.         f_fi += 1
  23.     else:
  24.         fi += 1
  25. print(str(t) + '\n' + str(t_f) + '\n' + str(f_fi) + '\n' + str(fi))
  26. #Question 3
  27. #Find all cDNA >= 2000 bp long w/ 3 or more
  28. #   EcoR1 ('GAATTC') sites within 100 bp of the end of the sequence
  29. count3 = []
  30. for cdna in worms_seq:
  31.     length = len(cdna.seq)
  32.     ecor1_rec = cdna.count('gaattc', length, int(length) - 101)
  33.     if length >= 2000 and ecor1_rec >= 3:
  34.         count3.append(cdna.id)
  35. print(count3)
  36. #Question 4
  37. #Find all the cDNAs that have 10 <= Glutamine residues and
  38. #   their most common amino acids occur 100 or more times
  39. count4 = ''
  40. for x in worms_seq:
  41.     qc = x.seq.translate().count('Q')
  42.     cnt = Counter(x)
  43.     mcb = cnt.most_common(1)
  44.     base = list(mcb[0])
  45.     if qc >= 10 and base[1] > 100:
  46.         count4 += 1
  47. print(count4)
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top