desdemona

better_wc_reducer

Jun 8th, 2016
457
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.66 KB | None | 0 0
  1. #!/usr/bin/env python
  2.  
  3. from itertools import groupby
  4. from operator import itemgetter
  5. import sys
  6.  
  7. def read_mapper_output(file, separator='\t'):
  8.     for line in file:
  9.         yield line.rstrip().split(separator, 1)
  10.  
  11.  
  12.  
  13. def main(separator='\t'):
  14.     # input comes from STDIN (standard input)
  15.     data = read_mapper_output(sys.stdin, separator=separator)
  16.  
  17.     for current_word, group in groupby(data, itemgetter(0)):
  18.         try:
  19.             total_count = sum(int(count) for current_word, count in group)
  20.             print "%s%s%d" % (current_word, separator, total_count)
  21.         except ValueError:
  22.  
  23.             pass
  24.  
  25. if __name__ == "__main__":
  26.     main()
Add Comment
Please, Sign In to add comment