Advertisement
Guest User

Untitled

a guest
Nov 14th, 2019
139
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.06 KB | None | 0 0
  1. #!/usr/bin/env bash
  2.  
  3. OUT_DIR="out"
  4. NUM_REDUCERS=4
  5.  
  6. hadoop fs -rm -r -skipTrash ${OUT_DIR}.tmp >/dev/null
  7.  
  8. hadoop jar /opt/cloudera/parcels/CDH/lib/hadoop-mapreduce/hadoop-streaming.jar \
  9. -D mapreduce.job.name="113_1" \
  10. -D mapreduce.job.reduces=$NUM_REDUCERS \
  11. -files mapper.py,reducer.py \
  12. -mapper mapper.py \
  13. -reducer reducer.py \
  14. -input /data/wiki/en_articles \
  15. -output ${OUT_DIR}.tmp >/dev/null
  16.  
  17. hadoop fs -rm -r -skipTrash ${OUT_DIR} >/dev/null
  18.  
  19. hadoop jar /opt/cloudera/parcels/CDH/lib/hadoop-mapreduce/hadoop-streaming.jar \
  20. -D mapreduce.job.name="113_2" \
  21. -D mapreduce.job.reduces=1 \
  22. -D mapred.output.key.comparator.class=org.apache.hadoop.mapred.lib.KeyFieldBasedComparator \
  23. -D mapred.text.key.comparator.options="-k2nr 1" \
  24. -D stream.num.map.output.key.fields=2 \
  25. -mapper cat \
  26. -reducer cat \
  27. -partitioner org.apache.hadoop.mapred.lib.KeyFieldBasedPartitioner \
  28. -input ${OUT_DIR}.tmp \
  29. -output ${OUT_DIR} >/dev/null
  30.  
  31. hadoop fs -cat ${OUT_DIR}/part-00000 2>/dev/null | head
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement