Advertisement
Guest User

aswdefrthyuil

a guest
Feb 17th, 2020
100
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.26 KB | None | 0 0
  1. def readFile(filename):
  2.     f=open(filename)
  3.     seqs={}
  4.     for row in f:
  5.         if row[0] == '>':
  6.             data=row[1:].strip().split(':')
  7.             name,localization=data[0],data[1]
  8.             if not seqs.has_key(localization):
  9.                 seqs[localization]=[]
  10.             seqs[localization].append(name)
  11.     return seqs
  12.  
  13. def countorganism_by_localization(prots):
  14.     orgbyloc={}
  15.     for (localization,data) in prots.items():
  16.         orgs={}
  17.         for name in data:
  18.             org = name.split("_")[1]
  19.             if not orgs.has_key(org):
  20.                 orgs[org]=1
  21.             else:
  22.                 orgs[org]+=1
  23.         orgbyloc[localization]=orgs
  24.     return orgbyloc
  25.  
  26. def print_counts(counts_by_loc):
  27.     for localization,counts in counts_by_loc.items():
  28.         print localization
  29.         countlist=[(count,org) for (org,count) in counts.items()]
  30.         countlist.sort()
  31.         countlist.reverse()
  32.         print " ".join(["%d:%s" %(count,org) for (count,org) in countlist])
  33.  
  34. def compute_organismcount_by_localization(filename):
  35.     prots=readFile(filename)
  36.     counts_by_loc=countorganism_by_localization(prots)
  37.     print_counts(counts_by_loc)
  38.  
  39.  
  40. filename=raw_input("Name of file: ")
  41. compute_organismcount_by_localization(filename)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement