Advertisement
josephxsxn

newscred_latency.pig

Jul 26th, 2018
125
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Bash 1.01 KB | None | 0 0
  1. /*
  2. {"pub_date":"2018-07-25 22:25:31","datetime":"1532617798985","guid":"6e6259f0905b11e89d270af1c0e54c8c","pub_date_ms":"1532557531000"}
  3. /usr/hdp/current/pig-client/lib/datafu.jar
  4. */
  5.  
  6.  
  7. define Median datafu.pig.stats.StreamingMedian();
  8. define VAR datafu.pig.stats.VAR();
  9.  
  10. raw = LOAD '/project/jsn/newscred/latencytest_01/*' using TextLoader();
  11.  
  12. raw = LOAD '/project/jsn/newscred/latencytest_01/*' using JsonLoader('pub_date:chararray, dt:chararray, guid:chararray, pub_date_ms:chararray');
  13. diff = FOREACH raw GENERATE guid, pub_date, dt, pub_date_ms, ((long)dt-(long)pub_date_ms) as time_lag;
  14. gg = GROUP diff all;
  15. stats = FOREACH gg GENERATE MIN(diff.time_lag) as min_lag, MAX(diff.time_lag) as max_lag, AVG(diff.time_lag) as avg_lag, Median(diff.time_lag) as median_lag, VAR(diff.time_lag) as var_lag, COUNT(diff.time_lag) as record_count;
  16. dump stats;
  17.  
  18. //(163732,91943808248,2.747976673345342E8,(8931603.0),-7.8245800454064688E16,1943)
  19. //(163732,91954486270,1.597113799494665E8,(7736237.0),-2.48276543688097E16,11715)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement