Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- loadData = load '${input}' as (line:chararray);
- recordCount = foreach loadData {
- splitToBg = STRSPLITTOBAG(line,'\\,');
- cout = COUNT(splitToBg);
- generate cout as countBg,line;
- };
- SPLIT recordCount INTO loadSplCharRecords if (countBg<${novars}), loadNrmlRecords if (countBg>=${novars}); -- novars = num of columns from feed -1
- nrmlRecords = foreach loadNrmlRecords generate line;
- splCharRecords = foreach loadSplCharRecords generate line;
- replaceComma = foreach splCharRecords generate REPLACE(line,'\\,','@krsna@') as line1;
- addDelimiterRecords = foreach replaceComma generate (line1 matches '^[0-9]{6}.*' ? ',':'') as spl,line1;
- concatComma = foreach addDelimiterRecords generate (spl matches '^[,]{1}.*' ? CONCAT(spl,CONCAT('@krsna@',line1)) : line1) as line3;
- grpAllRecords = GROUP concatComma ALL;
- bgToStr = foreach grpAllRecords generate BagToString(concatComma,'') as line4;
- -- bgToStr = foreach (GROUP concatComma ALL) generate FLATTEN(TOKENIZE(BagToString(concatComma,''),',')) as line2;
- tokenizeRecords = foreach bgToStr generate FLATTEN(TOKENIZE(line4,',')) as line2;
- strReplaceComma = foreach tokenizeRecords generate REPLACE(line2,'@krsna@','\\,') as line5;
- -- strReplaceComma = foreach bgToStr generate REPLACE(line2,'@krsna@','\\,') as line5;
- splitRecords = foreach strReplaceComma generate FLATTEN(STRSPLIT(line5,'\\,'));
- splNrmlRecords = foreach splitRecords generate $1..$62 as line6;
- unionBadGood = UNION splNrmlRecords,nrmlRecords;
- store unionBadGood into '${output}' using PigStorage(',');
Add Comment
Please, Sign In to add comment