define GFV GetFieldValue('transforms.conf');
-- Variant A --
students = load 'roster' using PigStorage();
passing = filter students by GFV(*, 'threadId') != 'F'
and GFV(*, 'threadId') != 'E'
and GFV(*, 'threadId') != 'G'
and GFV(*, 'threadId') != 'H'
and GFV(*, 'threadId') != 'I'
and GFV(*, 'threadId') != 'J'
and GFV(*, 'threadId') != 'K'
and GFV(*, 'threadId') != 'L'
and GFV(*, 'threadId') != 'M'
and GFV(*, 'threadId') != 'N'
and GFV(*, 'threadId') != 'O'
and GFV(*, 'threadId') != 'P'
and GFV(*, 'threadId') != 'Q'
and GFV(*, 'threadId') != 'R'
and GFV(*, 'threadId') != 'S'
and GFV(*, 'threadId') != 'T'
and GFV(*, 'threadId') != 'U'
and GFV(*, 'threadId') != 'V'
and GFV(*, 'threadId') != 'W'
and GFV(*, 'threadId') != 'X'
and GFV(*, 'threadId') != 'Y'
and GFV(*, 'threadId') != 'Z';
pruneFields = foreach passing generate
(int) GFV(*, 'logRecordType') as age,
GFV(*, 'timestamp') as name,
GFV(*, 'requestId') as grade,
GFV(*, 'threadId') as hometown;
-- -- -- -- --
-- Variant B --
-- students = load 'roster' using PigStorage()
-- as (age,
-- name:chararray,
-- grade:chararray,
-- hometown:chararray);
--
-- passing = filter students by grade != 'F'
-- and grade != 'E'
-- and grade != 'G'
-- and grade != 'H'
-- and grade != 'I'
-- and grade != 'J'
-- and grade != 'K'
-- and grade != 'L'
-- and grade != 'M'
-- and grade != 'N'
-- and grade != 'O'
-- and grade != 'P'
-- and grade != 'Q'
-- and grade != 'R'
-- and grade != 'S'
-- and grade != 'T'
-- and grade != 'U'
-- and grade != 'V'
-- and grade != 'W'
-- and grade != 'X'
-- and grade != 'Y'
-- and grade != 'Z';
--
-- pruneFields = foreach passing generate (int) age, name, grade, hometown;
-- -- -- -- --
split pruneFields into aStudents if grade == 'A',
bStudents if grade == 'B',
cStudents if grade == 'C',
dStudents if grade == 'D';
aStudents = foreach aStudents generate *, 'Superb' as description;
bStudents = foreach bStudents generate *, 'Good' as description;
cStudents = foreach cStudents generate *, 'Average' as description;
dStudents = foreach dStudents generate *, 'Okay' as description;
mergeAll = union aStudents, bStudents, cStudents, dStudents;
agg = group mergeAll by (hometown, description) parallel 1;
report = foreach agg {
names = mergeAll.name;
uniqNames = distinct names;
generate group, COUNT_STAR(mergeAll) as numStudents,
COUNT(uniqNames) as numNames,
AVG(mergeAll.age) as avgAge;
}
store report into 'roster-out';