Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def readFile(filename):
- f=open(filename)
- seqs={}
- for row in f:
- if row[0] == '>':
- data=row[1:].strip().split(':')
- name,localization=data[0],data[1]
- if not seqs.has_key(localization):
- seqs[localization]=[]
- seqs[localization].append(name)
- return seqs
- def countorganism_by_localization(prots):
- orgbyloc={}
- for (localization,data) in prots.items():
- orgs={}
- for name in data:
- org = name.split("_")[1]
- if not orgs.has_key(org):
- orgs[org]=1
- else:
- orgs[org]+=1
- orgbyloc[localization]=orgs
- return orgbyloc
- def print_counts(counts_by_loc):
- for localization,counts in counts_by_loc.items():
- print localization
- countlist=[(count,org) for (org,count) in counts.items()]
- countlist.sort()
- countlist.reverse()
- print " ".join(["%d:%s" %(count,org) for (count,org) in countlist])
- def compute_organismcount_by_localization(filename):
- prots=readFile(filename)
- counts_by_loc=countorganism_by_localization(prots)
- print_counts(counts_by_loc)
- filename=raw_input("Name of file: ")
- compute_organismcount_by_localization(filename)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement