Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /*
- {"pub_date":"2018-07-25 22:25:31","datetime":"1532617798985","guid":"6e6259f0905b11e89d270af1c0e54c8c","pub_date_ms":"1532557531000"}
- /usr/hdp/current/pig-client/lib/datafu.jar
- */
- define Median datafu.pig.stats.StreamingMedian();
- define VAR datafu.pig.stats.VAR();
- raw = LOAD '/project/jsn/newscred/latencytest_01/*' using TextLoader();
- raw = LOAD '/project/jsn/newscred/latencytest_01/*' using JsonLoader('pub_date:chararray, dt:chararray, guid:chararray, pub_date_ms:chararray');
- diff = FOREACH raw GENERATE guid, pub_date, dt, pub_date_ms, ((long)dt-(long)pub_date_ms) as time_lag;
- gg = GROUP diff all;
- stats = FOREACH gg GENERATE MIN(diff.time_lag) as min_lag, MAX(diff.time_lag) as max_lag, AVG(diff.time_lag) as avg_lag, Median(diff.time_lag) as median_lag, VAR(diff.time_lag) as var_lag, COUNT(diff.time_lag) as record_count;
- dump stats;
- //(163732,91943808248,2.747976673345342E8,(8931603.0),-7.8245800454064688E16,1943)
- //(163732,91954486270,1.597113799494665E8,(7736237.0),-2.48276543688097E16,11715)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement