Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- define GFV GetFieldValue('transforms.conf');
- -- Variant A --
- students = load 'roster' using PigStorage();
- passing = filter students by GFV(*, 'threadId') != 'F'
- and GFV(*, 'threadId') != 'E'
- and GFV(*, 'threadId') != 'G'
- and GFV(*, 'threadId') != 'H'
- and GFV(*, 'threadId') != 'I'
- and GFV(*, 'threadId') != 'J'
- and GFV(*, 'threadId') != 'K'
- and GFV(*, 'threadId') != 'L'
- and GFV(*, 'threadId') != 'M'
- and GFV(*, 'threadId') != 'N'
- and GFV(*, 'threadId') != 'O'
- and GFV(*, 'threadId') != 'P'
- and GFV(*, 'threadId') != 'Q'
- and GFV(*, 'threadId') != 'R'
- and GFV(*, 'threadId') != 'S'
- and GFV(*, 'threadId') != 'T'
- and GFV(*, 'threadId') != 'U'
- and GFV(*, 'threadId') != 'V'
- and GFV(*, 'threadId') != 'W'
- and GFV(*, 'threadId') != 'X'
- and GFV(*, 'threadId') != 'Y'
- and GFV(*, 'threadId') != 'Z';
- pruneFields = foreach passing generate
- (int) GFV(*, 'logRecordType') as age,
- GFV(*, 'timestamp') as name,
- GFV(*, 'requestId') as grade,
- GFV(*, 'threadId') as hometown;
- -- -- -- -- --
- -- Variant B --
- -- students = load 'roster' using PigStorage()
- -- as (age,
- -- name:chararray,
- -- grade:chararray,
- -- hometown:chararray);
- --
- -- passing = filter students by grade != 'F'
- -- and grade != 'E'
- -- and grade != 'G'
- -- and grade != 'H'
- -- and grade != 'I'
- -- and grade != 'J'
- -- and grade != 'K'
- -- and grade != 'L'
- -- and grade != 'M'
- -- and grade != 'N'
- -- and grade != 'O'
- -- and grade != 'P'
- -- and grade != 'Q'
- -- and grade != 'R'
- -- and grade != 'S'
- -- and grade != 'T'
- -- and grade != 'U'
- -- and grade != 'V'
- -- and grade != 'W'
- -- and grade != 'X'
- -- and grade != 'Y'
- -- and grade != 'Z';
- --
- -- pruneFields = foreach passing generate (int) age, name, grade, hometown;
- -- -- -- -- --
- split pruneFields into aStudents if grade == 'A',
- bStudents if grade == 'B',
- cStudents if grade == 'C',
- dStudents if grade == 'D';
- aStudents = foreach aStudents generate *, 'Superb' as description;
- bStudents = foreach bStudents generate *, 'Good' as description;
- cStudents = foreach cStudents generate *, 'Average' as description;
- dStudents = foreach dStudents generate *, 'Okay' as description;
- mergeAll = union aStudents, bStudents, cStudents, dStudents;
- agg = group mergeAll by (hometown, description) parallel 1;
- report = foreach agg {
- names = mergeAll.name;
- uniqNames = distinct names;
- generate group, COUNT_STAR(mergeAll) as numStudents,
- COUNT(uniqNames) as numNames,
- AVG(mergeAll.age) as avgAge;
- }
- store report into 'roster-out';
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement