Guest User

Untitled

a guest
Oct 28th, 2018
246
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.85 KB | None | 0 0
  1. for x in *; do hadoop fs -put $x /user/unravel/; done
  2.  
  3. cd /opt/h*/driver_data
  4. for x in *; do hadoop fs -put $x /user/unravel/; done
  5.  
  6. hive -e "create table temp_drivers (col_value STRING); load data inpath '/user/unravel/drivers.csv' OVERWRITE INTO TABLE temp_drivers;"
  7. hive -e "select * from temp_drivers;"
  8.  
  9. hive -e "CREATE TABLE IF NOT EXISTS drivers (driverId INT, name STRING, ssn BIGINT, location STRING, certified STRING, wageplan STRING); insert overwrite table drivers SELECT regexp_extract(col_value, '^(?:([^,]*),?){1}', 1) driverId,regexp_extract(col_value, '^(?:([^,]*),?){2}', 1) name, regexp_extract(col_value, '^(?:([^,]*),?){3}', 1) ssn, regexp_extract(col_value, '^(?:([^,]*),?){4}', 1) location, regexp_extract(col_value, '^(?:([^,]*),?){5}', 1) certified, regexp_extract(col_value, '^(?:([^,]*),?){6}', 1) wageplan from temp_drivers;"
  10. hive -e " select * from drivers"
  11.  
  12.  
  13. hive -e "CREATE TABLE IF NOT EXISTS temp_timesheet (col_value string); LOAD DATA INPATH '/user/unravel/timesheet.csv' OVERWRITE INTO TABLE temp_timesheet;"
  14. hive -e "select * from temp_timesheet;"
  15.  
  16. hive -e "CREATE TABLE IF NOT EXISTS timesheet (driverId INT, week INT, hours_logged INT , miles_logged INT); insert overwrite table timesheet SELECT regexp_extract(col_value, '^(?:([^,]*),?){1}', 1) driverId, regexp_extract(col_value, '^(?:([^,]*),?){2}', 1) week, regexp_extract(col_value, '^(?:([^,]*),?){3}', 1) hours_logged, regexp_extract(col_value, '^(?:([^,]*),?){4}', 1) miles_logged from temp_timesheet;"
  17. hive -e "select * from timesheet;"
  18.  
  19.  
  20. export WORKFLOW_NAME="Notif4MissedSLA"
  21. #export WORKFLOW_NAME="WealthMgmt"
  22. export UTC_TIME_STAMP=$(date -u '+%Y%m%dT%H%M%SZ')
  23. #export HS2='vego1.unraveldatalab.com'
  24. #export HS2='127.0.0.1'
  25.  
  26. beeline -u "jdbc:hive2://$HS2:10000" -n hdfs --hiveconf unravel.workflow.name=${WORKFLOW_NAME} --hiveconf unravel.workflow.utctimestamp=${UTC_TIME_STAMP} -e "use default; SELECT d.driverId, d.name, t.total_hours, t.total_miles from drivers d JOIN (SELECT driverId, sum(hours_logged)total_hours, sum(miles_logged)total_miles FROM timesheet GROUP BY driverId ) t ON (d.driverId = t.driverId);"
  27.  
  28. sleep 35
  29.  
  30. beeline -u "jdbc:hive2://$HS2:10000" -n hdfs --hiveconf unravel.workflow.name=${WORKFLOW_NAME} --hiveconf unravel.workflow.utctimestamp=${UTC_TIME_STAMP} -e "use default; SELECT driverId, sum(hours_logged), sum(miles_logged) FROM timesheet GROUP BY driverId;"
  31.  
  32.  
  33. sleep 40
  34. #beeline -u "jdbc:hive2://$HS2:10000" -n hdfs --hiveconf unravel.workflow.name=${WORKFLOW_NAME} --hiveconf unravel.workflow.utctimestamp=${UTC_TIME_STAMP} -e "use tpcds_text_30; select count(*) from reason;"
  35.  
  36. spark-submit --conf "spark.unravel.workflow.name=${WORKFLOW_NAME}" --conf "spark.unravel.workflow.utctimestamp=$UTC_TIME_STAMP" --conf "spark.eventLog.enabled=true" --class org.apache.spark.examples.SparkPi --master yarn-cluster /opt/cloudera/parcels/CDH/lib/spark/lib/spark-examples.jar 10 100
Add Comment
Please, Sign In to add comment