Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- for x in *; do hadoop fs -put $x /user/unravel/; done
- cd /opt/h*/driver_data
- for x in *; do hadoop fs -put $x /user/unravel/; done
- hive -e "create table temp_drivers (col_value STRING); load data inpath '/user/unravel/drivers.csv' OVERWRITE INTO TABLE temp_drivers;"
- hive -e "select * from temp_drivers;"
- hive -e "CREATE TABLE IF NOT EXISTS drivers (driverId INT, name STRING, ssn BIGINT, location STRING, certified STRING, wageplan STRING); insert overwrite table drivers SELECT regexp_extract(col_value, '^(?:([^,]*),?){1}', 1) driverId,regexp_extract(col_value, '^(?:([^,]*),?){2}', 1) name, regexp_extract(col_value, '^(?:([^,]*),?){3}', 1) ssn, regexp_extract(col_value, '^(?:([^,]*),?){4}', 1) location, regexp_extract(col_value, '^(?:([^,]*),?){5}', 1) certified, regexp_extract(col_value, '^(?:([^,]*),?){6}', 1) wageplan from temp_drivers;"
- hive -e " select * from drivers"
- hive -e "CREATE TABLE IF NOT EXISTS temp_timesheet (col_value string); LOAD DATA INPATH '/user/unravel/timesheet.csv' OVERWRITE INTO TABLE temp_timesheet;"
- hive -e "select * from temp_timesheet;"
- hive -e "CREATE TABLE IF NOT EXISTS timesheet (driverId INT, week INT, hours_logged INT , miles_logged INT); insert overwrite table timesheet SELECT regexp_extract(col_value, '^(?:([^,]*),?){1}', 1) driverId, regexp_extract(col_value, '^(?:([^,]*),?){2}', 1) week, regexp_extract(col_value, '^(?:([^,]*),?){3}', 1) hours_logged, regexp_extract(col_value, '^(?:([^,]*),?){4}', 1) miles_logged from temp_timesheet;"
- hive -e "select * from timesheet;"
- export WORKFLOW_NAME="Notif4MissedSLA"
- #export WORKFLOW_NAME="WealthMgmt"
- export UTC_TIME_STAMP=$(date -u '+%Y%m%dT%H%M%SZ')
- #export HS2='vego1.unraveldatalab.com'
- #export HS2='127.0.0.1'
- beeline -u "jdbc:hive2://$HS2:10000" -n hdfs --hiveconf unravel.workflow.name=${WORKFLOW_NAME} --hiveconf unravel.workflow.utctimestamp=${UTC_TIME_STAMP} -e "use default; SELECT d.driverId, d.name, t.total_hours, t.total_miles from drivers d JOIN (SELECT driverId, sum(hours_logged)total_hours, sum(miles_logged)total_miles FROM timesheet GROUP BY driverId ) t ON (d.driverId = t.driverId);"
- sleep 35
- beeline -u "jdbc:hive2://$HS2:10000" -n hdfs --hiveconf unravel.workflow.name=${WORKFLOW_NAME} --hiveconf unravel.workflow.utctimestamp=${UTC_TIME_STAMP} -e "use default; SELECT driverId, sum(hours_logged), sum(miles_logged) FROM timesheet GROUP BY driverId;"
- sleep 40
- #beeline -u "jdbc:hive2://$HS2:10000" -n hdfs --hiveconf unravel.workflow.name=${WORKFLOW_NAME} --hiveconf unravel.workflow.utctimestamp=${UTC_TIME_STAMP} -e "use tpcds_text_30; select count(*) from reason;"
- spark-submit --conf "spark.unravel.workflow.name=${WORKFLOW_NAME}" --conf "spark.unravel.workflow.utctimestamp=$UTC_TIME_STAMP" --conf "spark.eventLog.enabled=true" --class org.apache.spark.examples.SparkPi --master yarn-cluster /opt/cloudera/parcels/CDH/lib/spark/lib/spark-examples.jar 10 100
Add Comment
Please, Sign In to add comment