Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- REGISTER '${binjarr}/com.aexp.scb-1.0.jar';
- DEFINE FundTyp FundTyp();
- A = LOAD '${input}' AS (line:chararray);
- A1 = FOREACH A GENERATE RTRIM(line) AS linea:chararray;
- AREP = FOREACH A1 GENERATE REPLACE(REPLACE(REPLACE(linea,'&',''),'~',''),'^','') as line1;
- SPLIT AREP INTO A2 IF (line1 MATCHES '02.*' or line1 MATCHES '03.*' or line1 MATCHES '88.*' or line1 MATCHES '16.*'), D OTHERWISE;
- A3 = FOREACH A2 GENERATE (line1 MATCHES '03.*' ? '&' : (line1 MATCHES '16.*' ? '~' : (line1 MATCHES '02.*' ? '^' : ''))),line1;
- A4 = FOREACH (GROUP A3 ALL) GENERATE FLATTEN(TOKENIZE(REPLACE(REPLACE(BagToString(A3, ','),'/,,88,',''),'/',''),'^')) AS line3;
- A5 = FOREACH A4 GENERATE FLATTEN(STRSPLIT(line3,'\\,',9));
- A6 = FOREACH A5 GENERATE $4,CONCAT('20',SUBSTRING($5,0,2),'-',SUBSTRING($5,2,4),'-',SUBSTRING($5,4,6)),FLATTEN(TOKENIZE($8,'&'));
- A7 = FOREACH A6 GENERATE $0,$1,FLATTEN(STRSPLIT($2,'\\,',29));
- A8 = FOREACH A7 GENERATE $0,$1,$4,$5,FLATTEN(TOKENIZE($30,'~')) as line5;
- SPLIT A8 INTO A9 IF (line5 matches '/,' or line5 matches ''), A10 OTHERWISE;
- A11 = FOREACH A10 GENERATE $0,$1,$2,$3,FLATTEN(STRSPLIT(line5,'\\,'));
- SPLIT A11 INTO AV If ($8 matches 'V'), SA if ($8 matches 'S'), AD if ($8 matches 'D'), AO OTHERWISE;
- -- A13 = FOREACH AV GENERATE $0,$1,$2,$3,$6,$7,$11,CONCAT($12,' ',$13);
- -- A13 = FOREACH AV GENERATE $0,$1,$2,$3,$6,$7,$11,CONCAT($7,' ',$8,' ',$9,' ',$10,' ',$11,' ',$12,' ',$13);
- -- A13 = FOREACH AV GENERATE $0,$1,$2,$3,$6,$7,$11,CONCAT($7,$8,$9,$10,$11,$12,$13);
- A13 = FOREACH AV GENERATE $0,$1,$2,$3,$6,$7,$11,CONCAT((chararray)$7,CONCAT((chararray)$8,CONCAT((chararray)$9,CONCAT((chararray)$10,CONCAT((chararray)$11,CONCAT((chararray)$12,(chararray)$13))))));
- -- A14 = FOREACH SA GENERATE $0,$1,$2,$3,$6,$7,$12,CONCAT($13,' ',$14);
- -- A14 = FOREACH SA GENERATE $0,$1,$2,$3,$6,$7,$12,CONCAT($7,' ',$8,' ',$9,' ',$10,' ',$11,' ',$12,' ',$13,' ',$14);
- -- A14 = FOREACH SA GENERATE $0,$1,$2,$3,$6,$7,$12,CONCAT($7,$8,$9,$10,$11,$12,$13,$14);
- A14 = FOREACH SA GENERATE $0,$1,$2,$3,$6,$7,$12,CONCAT((chararray)$7,CONCAT((chararray)$8,CONCAT((chararray)$9,CONCAT((chararray)$10,CONCAT((chararray)$11,CONCAT($12,CONCAT((chararray)$13,(chararray)$14)))))));
- A17 = FOREACH AD GENERATE FLATTEN(STRSPLIT(FundTyp(*),'\\,'));
- -- A18 = FOREACH AO GENERATE $0,$1,$2,$3,$6,$7,,CONCAT($9,' ',$10,' ',$11);
- A18 = FOREACH AO GENERATE $0..$3,$6,$7,CONCAT('',''),($9 IS NOT NULL ? $9 :' '),
- ($10 IS NOT NULL ? $10 :' '),($11 IS NOT NULL ? $11 :' '),($12 IS NOT NULL ? $12 :' '),($13 IS NOT NULL ? $13 :' '),
- ($14 IS NOT NULL ? $14 :' '),($15 IS NOT NULL ? $15 :' '),($16 IS NOT NULL ? $16 :' ');
- A19 = FOREACH A18 GENERATE $0..$6,RTRIM(CONCAT($7..));
- A15 = FOREACH (GROUP A13 ALL) GENERATE FLATTEN(A13);
- A16 = FOREACH (GROUP A14 ALL) GENERATE FLATTEN(A14);
- A20 = FOREACH (GROUP A19 ALL) GENERATE FLATTEN(A19);
- AU = UNION A15,A16,A17,A20;
- ran = rank AU;
- HDC = FOREACH (GROUP ran ALL) GENERATE 'H',COUNT(ran),'${dat}';
- HAU = UNION HDC,ran;
- -- dump HAU;
- store HAU into '${output}' using PigStorage(',');
Add Comment
Please, Sign In to add comment