clevernessisamyth

Hadoop installation

May 28th, 2021 (edited)
572
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Bash 4.00 KB | None | 0 0
  1. # run this script with the user `hduser` or any user of your choice
  2. # jdk
  3. wget https://github.com/frekele/oracle-java/releases/download/8u212-b10/jdk-8u212-linux-x64.tar.gz
  4. tar zxvf jdk-8u212-linux-x64.tar.gz
  5. sudo mv jdk1.8.0_212  /usr/java
  6.  
  7. # ssh
  8. sudo apt-get install openssh-server # 'sudo dnf install openssh-server' for centos
  9. ssh-keygen -t rsa -P ""
  10. cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
  11. chmod 0600 ~/.ssh/authorized_keys
  12. # verification
  13. ssh localhost
  14. # if "connection closed" redo  `ssh`
  15. exit
  16.  
  17. # hadoop
  18. wget http:/archive.apache.org/dist/hadoop/common/hadoop-3.3.0/hadoop-3.3.0.tar.gz
  19. tar xzf hadoop-3.3.0.tar.gz
  20. sudo mv hadoop-3.3.0 /opt/hadoop
  21. sudo chown `whoami` -R /opt/hadoop
  22.  
  23. # env variables
  24. cd ~ && nano .bashrc
  25. # paste and save
  26. export JAVA_HOME=/usr/java/
  27. export PATH=$PATH:$JAVA_HOME/bin
  28. export HADOOP_HOME=/opt/hadoop/
  29. export HADOOP_INSTALL=$HADOOP_HOME
  30. export HADOOP_MAPRED_HOME=$HADOOP_HOME
  31. export HADOOP_COMMON_HOME=$HADOOP_HOME
  32. export HADOOP_HDFS_HOME=$HADOOP_HOME
  33. export YARN_HOME=$HADOOP_HOME
  34. export PATH=$PATH:$HADOOP_HOME/sbin:$HADOOP_HOME/bin
  35. export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
  36. export HADOOP_OPTS="-Djava.library.path=$HADOOP_INSTALL/lib/native"
  37. export HADOOP_CLASSPATH=$(hadoop classpath)
  38. export HADOOP_CLASSPATH=$JAVA_HOME/jre/lib:$JAVA_HOME/lib:$JAVA_HOME/lib/tools.jar
  39.  
  40. source .bashrc
  41.  
  42. nano $HADOOP_HOME/etc/hadoop/hadoop-env.sh
  43. # add and save
  44. export JAVA_HOME=/usr/java/
  45.  
  46. sudo mkdir -p /opt/hadoop/hadoopdata/namenode
  47. sudo mkdir -p /opt/hadoop/hadoopdata/datanode
  48.  
  49. # config files
  50. cd $HADOOP_HOME/etc/hadoop
  51. nano core-site.xml
  52. # paste and save
  53.     <configuration>
  54.         <property>
  55.             <name>fs.default.name</name>
  56.             <value>hdfs://localhost:9000</value>
  57.         </property>
  58.     </configuration>
  59.  
  60. nano hdfs-site.xml
  61. # paste and save
  62.     <configuration>
  63.         <property>
  64.             <name>dfs.replication</name>
  65.             <value>1</value>
  66.         </property>
  67.         <property>
  68.             <name>dfs.name.dir</name>
  69.             <value>file:/opt/hadoop/hadoopdata/namenode</value>
  70.         </property>
  71.         <property>
  72.             <name>dfs.data.dir</name>
  73.             <value>file:/opt/hadoop/hadoopdata/datanode</value>
  74.         </property>
  75.     </configuration>
  76.  
  77. nano mapred-site.xml
  78. # paste and save
  79.     <configuration>
  80.         <property>
  81.              <name>mapreduce.framework.name</name>
  82.             <value>yarn</value>
  83.         </property>
  84.         <property>
  85.              <name>mapreduce.application.classpath</name>
  86.              <value>$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*:$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/*</value>
  87.         </property>
  88.     </configuration>
  89.  
  90. nano yarn-site.xml
  91. # paste and save
  92.     <configuration>
  93.         <property>
  94.             <name>yarn.nodemanager.aux-services</name>
  95.             <value>mapreduce_shuffle</value>
  96.         </property>
  97.         <property>
  98.             <name>yarn.nodemanager.env-whitelist</name>                             <value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value>
  99.         </property>
  100.     </configuration>
  101.  
  102.  
  103. # formattage du hdfs
  104. hdfs namenode -format
  105.  
  106. # start
  107. start-all.sh
  108.  
  109.  
  110. ## TP Python
  111. sudo apt-get install python2 # 'sudo dnf install python2' for centos
  112. cd ~
  113. # put words in `wcount.txt`
  114. nano wcount.txt
  115. hdfs dfs -put wcount.txt /wcount.txt
  116.  
  117. # code
  118. # https://www.michael-noll.com/tutorials/writing-an-hadoop-mapreduce-program-in-python/
  119. mkdir WordCountPython
  120. # copy paste code
  121. nano WordCountPython/WordCount_reducer.py
  122. nano WordCountPython/WordCount_mapper.py
  123.  
  124. # test w/o hadoop
  125. cat wcount.txt | python2 WordCountPython/WordCount_mapper.py | sort -k1,1 | python2 WordCountPython/WordCount_reducer.py
  126.  
  127. # hadoop
  128. hdfs dfsadmin -safemode leave
  129. # execute (copy the whole command)
  130. hadoop jar /opt/hadoop/share/hadoop/tools/lib/hadoop-streaming-3.3.0.jar -file WordCountPython/WordCount_mapper.py WordCountPython/WordCount_reducer.py \
  131.     -mapper "python2 WordCount_mapper.py" -reducer "python2 WordCount_reducer.py" -input /wcount.txt  -output /wcountpython
  132.  
  133. # result
  134. hdfs dfs -cat /wcountpython/part-00000
Add Comment
Please, Sign In to add comment