Advertisement
Guest User

word_count.py

a guest
Jul 21st, 2017
241
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.93 KB | None | 0 0
  1. import sys
  2. from flink.plan.Environment import get_environment
  3. from flink.plan.Constants import WriteMode
  4. from flink.functions.GroupReduceFunction import GroupReduceFunction
  5.  
  6.  
  7. class Adder(GroupReduceFunction):
  8.     def reduce(self, iterator, collector):
  9.         count, word = iterator.next()
  10.         count += sum([x[0] for x in iterator])
  11.         collector.collect((count, word))
  12.  
  13. if __name__ == "__main__":
  14.     input_file = 'hdfs://cdh01.sbgdinc.com:8020/viktor/in.txt'
  15.     output_file = 'hdfs://cdh01.sbgdinc.com:8020/viktor/out.txt'
  16.  
  17.     env = get_environment()
  18.     env.set_parallelism(3)
  19.     data = env.read_text(input_file)
  20.  
  21.     data.flat_map(lambda x, c: [(1, word) for word in x.lower().split()]) \
  22.         .group_by(1) \
  23.         .reduce_group(Adder(), combinable=True) \
  24.         .map(lambda y: 'Count: %s Word: %s' % (y[0], y[1])) \
  25.         .write_text(output_file, write_mode=WriteMode.OVERWRITE)
  26.  
  27.     env.execute()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement