desdemona

download_data

May 24th, 2016
445
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Bash 0.65 KB | None | 0 0
  1. #!/bin/bash
  2. set -x
  3.  
  4. cd /usr/local/hadoop/
  5. bin/hdfs namenode -format
  6.  
  7. sbin/start-dfs.sh
  8.  
  9. bin/hdfs dfs -mkdir /user
  10. bin/hdfs dfs -mkdir /user/hduser
  11.  
  12. bin/hdfs dfs -put /home/domi/Desktop/wikiscraps/en/* wc_input
  13. bin/hdfs dfs -cat wc_input/*
  14.  
  15.  
  16. hadoop jar share/hadoop/tools/lib/hadoop-streaming-2.7.2.jar -input wc_input -output wc_output -mapper /home/domi/PycharmProjects/wiki_scraper/wc_mapper.py -reducer /home/domi/PycharmProjects/wiki_scraper/wc_reducer.py
  17.  
  18. bin/hdfs dfs -cat wc_output/* > /home/hduser/wc_output.txt
  19.  
  20. bin/hdfs dfs -cat wc_output/*
  21.  
  22. head -200 /home/hduser/wc_output.txt > /home/hduser/en_200_most_frequent.txt
  23.  
  24. sbin/stop-dfs.sh
Add Comment
Please, Sign In to add comment