Advertisement
Guest User

Untitled

a guest
Jan 20th, 2017
72
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.79 KB | None | 0 0
  1. from mrjob.job import MRJob
  2. from mrjob.step import MRStep
  3. from stemming.porter2 import stem
  4. class MRWordCount(MRJob):
  5. def steps(self):
  6. return [
  7. MRStep(mapper=self.mapper,
  8. reducer=self.reducer),
  9. MRStep(mapper=self.secondmapper,
  10. reducer = self.secondreducer)
  11. ]
  12.  
  13. def mapper(self,_,lines):
  14. words = lines.strip().split()
  15. for w in words:
  16. yield stem(w.lower()),1
  17.  
  18. def reducer(self, key, values):
  19. yield key, (sum(values))
  20.  
  21. def secondmapper(self, key,value):
  22. yield '%04d'%int(value), key
  23.  
  24. def secondreducer(self, key, values):
  25. for v in values:
  26. yield v,key
  27.  
  28. if __name__ == '__main__':
  29. MRWordCount.run()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement