define GFV GetFieldValue('transforms.conf'); -- Variant A -- students = load 'roster' using PigStorage(); passing = filter students by GFV(*, 'threadId') != 'F' and GFV(*, 'threadId') != 'E' and GFV(*, 'threadId') != 'G' and GFV(*, 'threadId') != 'H' and GFV(*, 'threadId') != 'I' and GFV(*, 'threadId') != 'J' and GFV(*, 'threadId') != 'K' and GFV(*, 'threadId') != 'L' and GFV(*, 'threadId') != 'M' and GFV(*, 'threadId') != 'N' and GFV(*, 'threadId') != 'O' and GFV(*, 'threadId') != 'P' and GFV(*, 'threadId') != 'Q' and GFV(*, 'threadId') != 'R' and GFV(*, 'threadId') != 'S' and GFV(*, 'threadId') != 'T' and GFV(*, 'threadId') != 'U' and GFV(*, 'threadId') != 'V' and GFV(*, 'threadId') != 'W' and GFV(*, 'threadId') != 'X' and GFV(*, 'threadId') != 'Y' and GFV(*, 'threadId') != 'Z'; pruneFields = foreach passing generate (int) GFV(*, 'logRecordType') as age, GFV(*, 'timestamp') as name, GFV(*, 'requestId') as grade, GFV(*, 'threadId') as hometown; -- -- -- -- -- -- Variant B -- -- students = load 'roster' using PigStorage() -- as (age, -- name:chararray, -- grade:chararray, -- hometown:chararray); -- -- passing = filter students by grade != 'F' -- and grade != 'E' -- and grade != 'G' -- and grade != 'H' -- and grade != 'I' -- and grade != 'J' -- and grade != 'K' -- and grade != 'L' -- and grade != 'M' -- and grade != 'N' -- and grade != 'O' -- and grade != 'P' -- and grade != 'Q' -- and grade != 'R' -- and grade != 'S' -- and grade != 'T' -- and grade != 'U' -- and grade != 'V' -- and grade != 'W' -- and grade != 'X' -- and grade != 'Y' -- and grade != 'Z'; -- -- pruneFields = foreach passing generate (int) age, name, grade, hometown; -- -- -- -- -- split pruneFields into aStudents if grade == 'A', bStudents if grade == 'B', cStudents if grade == 'C', dStudents if grade == 'D'; aStudents = foreach aStudents generate *, 'Superb' as description; bStudents = foreach bStudents generate *, 'Good' as description; cStudents = foreach cStudents generate *, 'Average' as description; dStudents = foreach dStudents generate *, 'Okay' as description; mergeAll = union aStudents, bStudents, cStudents, dStudents; agg = group mergeAll by (hometown, description) parallel 1; report = foreach agg { names = mergeAll.name; uniqNames = distinct names; generate group, COUNT_STAR(mergeAll) as numStudents, COUNT(uniqNames) as numNames, AVG(mergeAll.age) as avgAge; } store report into 'roster-out';