Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python
- import sys
- import re
- import csv
- reader = csv.reader(sys.stdin, delimiter='\t')
- writer = csv.writer(sys.stdout, delimiter='\t', quotechar='"', quoting=csv.QUOTE_ALL)
- '''
- The reducer is to tabulate the counts for each path and return the path with the highest count
- '''
- path={}
- for line in reader:
- #_path, _count = line.strip().split("\t")
- _path, _count = line
- '''
- Tabulate the counts for each path
- '''
- if path.has_key(_path):
- path[_path]=path[_path]+int(_count)
- else:
- path[_path]=int(_count)
- '''
- Return the path with the highest count
- '''
- maxValue=max(path.values())
- for key in path:
- if path[key]==maxValue:
- writer.writerow([key,str(maxValue)])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement