desdemona

better_all_reducer

Jun 8th, 2016
477
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.66 KB | None | 0 0
  1. #!/usr/bin/env python
  2.  
  3. from itertools import groupby
  4. from operator import itemgetter
  5. import sys
  6.  
  7. def read_mapper_output(file, separator='\t'):
  8.     for line in file:
  9.         yield line.rstrip().split(separator, 1)
  10.  
  11.  
  12. def main(separator='\t'):
  13.     # input comes from STDIN (standard input)
  14.     data = read_mapper_output(sys.stdin, separator=separator)
  15.  
  16.     for current_word, group in groupby(data, itemgetter(0)):
  17.         try:
  18.             total_count = sum(int(count) for current_word, count in group)
  19.             print "%s%s%d" % (current_word, separator, total_count)
  20.         except ValueError:
  21.  
  22.             pass
  23.  
  24. if __name__ == "__main__":
  25.     main()
Add Comment
Please, Sign In to add comment