Untitled

for x in *; do hadoop fs -put  $x /user/unravel/; done

cd /opt/h*/driver_data
for x in *; do hadoop fs -put  $x /user/unravel/; done

hive -e "create table temp_drivers (col_value STRING); load data inpath '/user/unravel/drivers.csv' OVERWRITE INTO TABLE temp_drivers;"
hive -e "select * from temp_drivers;"

hive -e "CREATE TABLE IF NOT EXISTS drivers (driverId INT, name STRING, ssn BIGINT, location STRING, certified STRING, wageplan STRING);   insert overwrite table drivers SELECT regexp_extract(col_value, '^(?:([^,]*),?){1}', 1) driverId,regexp_extract(col_value, '^(?:([^,]*),?){2}', 1) name, regexp_extract(col_value, '^(?:([^,]*),?){3}', 1) ssn,  regexp_extract(col_value, '^(?:([^,]*),?){4}', 1) location, regexp_extract(col_value, '^(?:([^,]*),?){5}', 1) certified, regexp_extract(col_value, '^(?:([^,]*),?){6}', 1) wageplan from temp_drivers;"
hive -e " select * from drivers"


hive -e "CREATE TABLE IF NOT EXISTS temp_timesheet (col_value string); LOAD DATA INPATH '/user/unravel/timesheet.csv' OVERWRITE INTO TABLE temp_timesheet;"
hive -e "select * from temp_timesheet;"

hive -e "CREATE TABLE IF NOT EXISTS timesheet (driverId INT, week INT, hours_logged INT , miles_logged INT); insert overwrite table timesheet SELECT  regexp_extract(col_value, '^(?:([^,]*),?){1}', 1) driverId,  regexp_extract(col_value, '^(?:([^,]*),?){2}', 1) week, regexp_extract(col_value, '^(?:([^,]*),?){3}', 1) hours_logged, regexp_extract(col_value, '^(?:([^,]*),?){4}', 1) miles_logged from temp_timesheet;"
hive -e "select * from timesheet;"


export WORKFLOW_NAME="Notif4MissedSLA"
#export WORKFLOW_NAME="WealthMgmt"
export UTC_TIME_STAMP=$(date -u '+%Y%m%dT%H%M%SZ')
#export HS2='vego1.unraveldatalab.com'
#export HS2='127.0.0.1'

beeline -u "jdbc:hive2://$HS2:10000" -n hdfs --hiveconf unravel.workflow.name=${WORKFLOW_NAME} --hiveconf unravel.workflow.utctimestamp=${UTC_TIME_STAMP}  -e "use default; SELECT d.driverId, d.name, t.total_hours, t.total_miles from drivers d JOIN (SELECT driverId, sum(hours_logged)total_hours, sum(miles_logged)total_miles FROM timesheet GROUP BY driverId ) t ON (d.driverId = t.driverId);"

sleep 35

beeline -u "jdbc:hive2://$HS2:10000" -n hdfs --hiveconf unravel.workflow.name=${WORKFLOW_NAME}      --hiveconf unravel.workflow.utctimestamp=${UTC_TIME_STAMP}  -e "use default; SELECT driverId, sum(hours_logged), sum(miles_logged) FROM timesheet GROUP BY driverId;"


sleep 40
#beeline -u "jdbc:hive2://$HS2:10000" -n hdfs --hiveconf unravel.workflow.name=${WORKFLOW_NAME}      --hiveconf unravel.workflow.utctimestamp=${UTC_TIME_STAMP}  -e "use tpcds_text_30; select count(*) from reason;"

spark-submit --conf "spark.unravel.workflow.name=${WORKFLOW_NAME}" --conf "spark.unravel.workflow.utctimestamp=$UTC_TIME_STAMP" --conf "spark.eventLog.enabled=true"  --class org.apache.spark.examples.SparkPi --master yarn-cluster /opt/cloudera/parcels/CDH/lib/spark/lib/spark-examples.jar 10 100