Guest User

Untitled

a guest
Feb 22nd, 2018
72
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.50 KB | None | 0 0
  1. loadData = load '${input}' as (line:chararray);
  2. recordCount = foreach loadData {
  3. splitToBg = STRSPLITTOBAG(line,'\\,');
  4. cout = COUNT(splitToBg);
  5. generate cout as countBg,line;
  6. };
  7. SPLIT recordCount INTO loadSplCharRecords if (countBg<${novars}), loadNrmlRecords if (countBg>=${novars}); -- novars = num of columns from feed -1
  8. nrmlRecords = foreach loadNrmlRecords generate line;
  9. splCharRecords = foreach loadSplCharRecords generate line;
  10. replaceComma = foreach splCharRecords generate REPLACE(line,'\\,','@krsna@') as line1;
  11. addDelimiterRecords = foreach replaceComma generate (line1 matches '^[0-9]{6}.*' ? ',':'') as spl,line1;
  12. concatComma = foreach addDelimiterRecords generate (spl matches '^[,]{1}.*' ? CONCAT(spl,CONCAT('@krsna@',line1)) : line1) as line3;
  13. grpAllRecords = GROUP concatComma ALL;
  14. bgToStr = foreach grpAllRecords generate BagToString(concatComma,'') as line4;
  15. -- bgToStr = foreach (GROUP concatComma ALL) generate FLATTEN(TOKENIZE(BagToString(concatComma,''),',')) as line2;
  16. tokenizeRecords = foreach bgToStr generate FLATTEN(TOKENIZE(line4,',')) as line2;
  17. strReplaceComma = foreach tokenizeRecords generate REPLACE(line2,'@krsna@','\\,') as line5;
  18. -- strReplaceComma = foreach bgToStr generate REPLACE(line2,'@krsna@','\\,') as line5;
  19. splitRecords = foreach strReplaceComma generate FLATTEN(STRSPLIT(line5,'\\,'));
  20. splNrmlRecords = foreach splitRecords generate $1..$62 as line6;
  21. unionBadGood = UNION splNrmlRecords,nrmlRecords;
  22. store unionBadGood into '${output}' using PigStorage(',');
Add Comment
Please, Sign In to add comment