Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env bash
- HADOOP_HOME=${HADOOP_HOME:?HADOOP_HOME is not set}
- script_path=`dirname $0`
- # Clean up output from prior runs
- for dir in "/tmp/input" "/tmp/output" "/tmp/final_output"; do
- if [[ -d $dir ]] ; then
- rm -fr $dir
- fi
- done
- mkdir /tmp/input
- user=$1
- host=$2
- database=$3
- pass=$4
- mysql -u $user --password=$pass --host=$host --batch -e \
- "SELECT path_id, product_id FROM recent_items" \
- $database > /tmp/input/recent_items.tsv
- jarfile="$HADOOP_HOME/mapred/contrib/streaming/hadoop-0.21.0-streaming.jar"
- "$HADOOP_HOME/bin/hadoop" jar \
- $jarfile \
- -input /tmp/input \
- -output /tmp/output \
- -mapper cat \
- -reducer $script_path/reduce.rb
- cat /tmp/output/part* > /tmp/output/related.tsv
- "$HADOOP_HOME/bin/hadoop" jar \
- $jarfile \
- -input /tmp/output/related.tsv \
- -output /tmp/final_output \
- -mapper cat \
- -reducer $script_path/reduce2.rb
- cat /tmp/final_output/part* > /tmp/final_output/viewed_products.tsv
- mysqlimport --local --compress -u $user --host=$host \
- --columns=source_product_id,target_product_id,count \
- --replace $database \
- /tmp/final_output/viewed_products.tsv
Add Comment
Please, Sign In to add comment