Advertisement
josephxsxn

HDFS CMDs Ranger Log Report

Mar 27th, 2017
101
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.38 KB | None | 0 0
  1.  
  2. set job.name HDFS-CMD-COUNTER;
  3.  
  4. set pig.maxCombinedSplitSize 1573741824;
  5. set pig.splitCombination true;
  6. set pig.exec.reducers.max 15000;
  7. set pig.tmpfilecompression true;
  8.  
  9. set mapreduce.input.fileinputformat.split.minsize 1573741824;
  10. set mapreduce.input.fileinputformat.split.maxsize 1573741824;
  11. set mapreduce.task.io.sort.mb 2047;
  12. set mapreduce.map.memory.mb 4096;
  13. set mapreduce.map.java.opts '-Xmx3g';
  14. set mapreduce.reduce.memory.mb 16192;
  15. set mapreduce.reduce.java.opts '-Xmx15g';
  16.  
  17.  
  18. set tez.am.grouping.min-size 1573741824;
  19. set tez.am.grouping.max-size 1573741824;
  20. set tez.am.grouping.split-waves 1.0;
  21. set tez.runtime.io.sort.mb 2047;
  22. set tez.am.resource.memory.mb 32000;
  23. set tez.am.launch.cmd-opts '-XX:+PrintGCDetails -verbose:gc -XX:+PrintGCTimeStamps -XX:+UseNUMA -XX:+UseParallelGC -Xmx30g';
  24. set tez.task.resource.memory.mb 4096;
  25. set tez.task.launch.cmd-opts '-XX:+PrintGCDetails -verbose:gc -XX:+PrintGCTimeStamps -XX:+UseNUMA -XX:+UseParallelGC -Xmx3500m';
  26.  
  27. rm hdfs-reports/cmduse/
  28.  
  29. %DEFAULT USER 'jniemie8'
  30. %DEFAULT RANGERHDFSLOGSINPUT '/ranger/audit/$USER/hdfs/*/*'
  31. rawhdfsrangerlog = LOAD '$RANGERHDFSLOGSINPUT' using JsonLoader('repotype:chararray, repo:chararray, reqUser:chararray, evtTime:chararray, access:chararray, resource:chararray, resType:chararray, action:chararray, result:chararray, policy:chararray, enforcer:chararray, sess:chararray, cliType:chararray, cliIP:chararray, reqData:chararray, agentHost:chararray, logType:chararray, id:chararray, seq_num:chararray, event_count:chararray, event_dur_ms:chararray');
  32.  
  33. types_proj = FOREACH rawhdfsrangerlog GENERATE access;
  34. types_group = GROUP types_proj BY access;
  35. types = DISTINCT types_group;
  36.  
  37.  
  38. TypeUserDate = FOREACH rawhdfsrangerlog GENERATE ToDate(evtTime, 'yyyy-MM-dd HH:mm:ss.SSS') as timstim, ToString(ToDate(evtTime, 'yyyy-MM-dd HH:mm:ss.SSS'), 'yyyy-MM-dd') as day, reqUser, access;
  39.  
  40. TotalPerDayGroups = GROUP TypeUserDate BY day;
  41. PerDayCount = FOREACH TotalPerDayGroups GENERATE FLATTEN(group) as day, 'TOTAL', COUNT(TypeUserDate) as count;
  42.  
  43. TotalPerUserGroups = GROUP TypeUserDate BY (day, reqUser);
  44. PerDayPerUserCount = FOREACH TotalPerUserGroups GENERATE FLATTEN(group), COUNT(TypeUserDate) as count;
  45.  
  46. unionForStorage= UNION PerDayCount, PerDayPerUserCount;
  47. orderedStorageUnion = ORDER unionForStorage BY $0, $2 PARALLEL 1;
  48.  
  49. STORE orderedStorageUnion INTO 'hdfs-reports/cmduse/' USING PigStorage();
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement