Advertisement
Guest User

Untitled

a guest
Jan 20th, 2020
73
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.90 KB | None | 0 0
  1. %let dir=/home/u45056142/sasuser.v94/Wyjscie;
  2. %let dir_in=/home/u45056142/sasuser.v94/;
  3. libname out "&dir";
  4. libname in "&dir_in";
  5. %let kat_tree=%sysfunc(pathname(out));
  6. %put &kat_tree;
  7.  
  8. data Transactions; set in.Transactions;
  9. seniority=intck('month',input(fin_period,yymmn6.),input(period, yymmn6.));
  10. vin1=(due_installments>=1);
  11. vin2=(due_installments>=2);
  12. vin3=(due_installments>=3);
  13. vin12_1=(seniority>12 and due_installments>=1);
  14. vin12_2=(seniority>12 and due_installments>=2);
  15. vin12_3=(seniority>12 and due_installments>=3);
  16. output;
  17.  
  18. if status in ('B','C') and period<='200812' then do;
  19. n_steps=intck('month',input(period,yymmn6.),input('200812',yymmn6.));
  20.  
  21. do i=1 to n_steps; period=put(intnx('month',input(period,yymmn6.),1,'end'),yymmn6. ); seniority=intck('month',input(fin_period,yymmn6.),input(period, yymmn6.)); output; end; end;
  22.  
  23. drop n_steps i; run;
  24.  
  25. data Production;
  26. set in.Production;
  27. run;
  28.  
  29. proc sort data = production;
  30. by cid aid; run;
  31.  
  32. proc sort data = transactions;
  33. by cid aid seniority; run;
  34.  
  35. data Transactions_loan_amt;
  36. merge Transactions(in=a) Production(keep=cid aid app_loan_amount);
  37. by cid aid;
  38.  
  39. if a; run;
  40.  
  41. proc means data=Transactions noprint nway; by cid aid; var vin:; output out=due(drop=_freq_ _type_) max=; run;
  42.  
  43. proc sort
  44. data = due;
  45. by cid aid;
  46. run;
  47.  
  48. data Production_due;
  49. merge Production(in=a) due(in=b);
  50. by cid aid;
  51.  
  52. if a and b;
  53. run;
  54.  
  55.  
  56.  
  57. proc contents data=Production_due(keep=app_: act_: ags: agr:) out=zmienne(keep=name type) noprint; run;
  58.  
  59. data zmienne; length id 8.; length group $3; set zmienne; length type2 $3; id = monotonic(); group = name; select (type); when (1) type2 = 'interval'; when (2) type2 = 'nominal'; otherwise; end;
  60.  
  61. drop type; run;
  62.  
  63. proc datasets lib=work memtype=data nolist; copy out=out; select Transactions_loan_amt Production_due zmienne; run;
  64.  
  65. /*VINTAGE ILOSCIOWY*/ %let zm = vin1; %let nazwa = vintage_ile1;
  66.  
  67. proc means data=Transactions_loan_amt noprint nway; class fin_period seniority; var &zm.; output out=vintagr_il(drop=_freq_ _type_) n()=production mean()=&nazwa.; format &nazwa. nlpct12.2;
  68.  
  69.  
  70. run;
  71.  
  72. proc means data=Transactions_loan_amt noprint nway; class fin_period; var &zm.; output out=production_il(drop=_freq_ _type_) n()=production; where seniority=0; run;
  73.  
  74. proc transpose data=vintagr_il out=vintage_il(drop=_name_) prefix=months_after_; by fin_period production; var &nazwa.; id seniority; run;
  75.  
  76. /*VINTAGE KWOTOWY*/ %let zm = vin1; %let nazwa = vintage_kw1;
  77.  
  78. proc means data=Transactions_loan_amt noprint nway; class fin_period seniority; var &zm.; weight app_loan_amount; output out=vintagr_kw(drop=_freq_ _type_) sumwgt()=amount mean()=&nazwa.; format &nazwa. nlpct12.2; run;
  79.  
  80. proc means data=Transactions_loan_amt noprint nway; class fin_period; weight app_loan_amount; var &zm.; where seniority=0; output out=production_kw(drop=_freq_ _type_) sumwgt()=amount; format amount comma10.0; run;
  81.  
  82. proc transpose data=vintagr_kw out=vintage_kw (drop=_name_) prefix=months_after_; by fin_period amount; var &nazwa.; id seniority; format amount comma10.0; run;
  83.  
  84. /*KATEGORYZACJA ZMIENNYCH*/ %macro GroupingScoring(input, output, target); %let maxbranch = 3; %let criterion = entropy; %let top_groups = 5;
  85.  
  86. data &output.; set zmienne; length Gini 8.; format Gini nlpct12.2;
  87.  
  88.  
  89.  
  90.  
  91. call symputx('max_id', id); run;
  92.  
  93. data prod; set &input.; call symputx('leafsize', floor(_N_ * 0.05)); keep cid aid product period &target.; run;
  94.  
  95. proc sort data = prod; by cid aid; run;
  96.  
  97. %do id=1 %to %eval(&max_id.);
  98.  
  99. data _null_; set zmienne; call symputx('name', name); call symputx('level', type2); where id = &id.; run;
  100.  
  101. data a1; set &input.; keep cid aid &target. &name.; run;
  102.  
  103. proc freq data = a1(keep=&name.) nlevels; ods exclude onewayfreqs; ods output nlevels = n_levels (keep = tablevar nlevels); run;
  104.  
  105. data _null_; set n_levels;
  106.  
  107. if nlevels < 20 then do; call symputx('level', 'nom'); end; run;
  108.  
  109. proc hpsplit data=a1 maxdepth = 1 maxbranch = &maxbranch. leafsize = &leafsize. event = '1'; id cid aid; target &target.; input &name. / level = &level.; criterion &criterion.; prune none; score out = a2(keep=cid aid _Node_); run;
  110.  
  111. data a2; set a2;
  112.  
  113. if missing(_Node_) then
  114.  
  115.  
  116. _Node_ = 0; run;
  117.  
  118. proc datasets library=work nolist; modify a2; rename _Node_ = GRP_&name.; label GRP_&name. = "GRP_&name."; quit;
  119.  
  120. proc sort data = a2; by cid aid; run;
  121.  
  122. data prod; merge prod(in=a) a2(in=b); by cid aid;
  123.  
  124. if a; run;
  125.  
  126. proc datasets library=work nolist; delete smdcr; run;
  127.  
  128. proc freq data=prod(keep=&target. GRP_&name.) noprint; tables &target.*GRP_&name. / measures; output out=smdcr(keep=_SMDCR_ ) smdcr; run;
  129.  
  130. %if %sysfunc(exist(smdcr)) %then %do;
  131.  
  132. data _null_; set smdcr; call symputx('Gini', _smdcr_); run;
  133.  
  134. %end; %else %do;
  135.  
  136. data _null_; call symputx('Gini', 0); run;
  137.  
  138. %end;
  139.  
  140. data &output.; set &output.;
  141.  
  142. if name = "&name." then do; Gini = &Gini.; type2 = "&level."; end; run;
  143.  
  144.  
  145.  
  146.  
  147. %end;
  148.  
  149. data out.Production_due_&target.; set prod; run;
  150.  
  151. proc sort data = &output.; by group descending Gini; run;
  152.  
  153. data work.zmienne_scoring_&target.; set &output.; run;
  154.  
  155. data &output.; length i 8.; set &output.; retain i 0; by group;
  156.  
  157. if first.group then i = 0; i = i + 1;
  158.  
  159. if i <= &top_groups. then output; drop id; run;
  160.  
  161. proc datasets library=work nolist; delete prod a1 a2 smdcr; run;
  162.  
  163. %mend GroupingScoring;
  164.  
  165.  
  166. %macro ImpactLogistic(input, target, category, variables, output);
  167.  
  168. proc logistic data = &input.; class &category. (param=ref); model &target. (event='1') = &variables. &category.; ods output ParameterEstimates = &output.; run;
  169.  
  170. %mend ImpactLogistic;
  171.  
  172. /*^^^^^^^^^^^^^^^^^^^^^^*/ data Production_Due; set out.Production_Due; run;
  173.  
  174. %GroupingScoring(Production_Due, out.zm_best_vin12_1, vin12_1);
  175. %GroupingScoring(Production_Due(where=(product='css')), out.zm_best_vin12_1_css, vin12_1);
  176. %GroupingScoring(Production_Due(where=(product='ins')), out.zm_best_vin12_1_ins, vin12_1);
  177. %GroupingScoring(Production_Due, out.zm_best_vin12_2, vin12_2);
  178. %GroupingScoring(Production_Due(where=(product='css')), out.zm_best_vin12_2_css, vin12_2);
  179. %GroupingScoring(Production_Due(where=(product='ins')), out.zm_best_vin12_2_ins, vin12_2);
  180. %GroupingScoring(Production_Due, out.zm_best_vin12_3, vin12_3);
  181. %GroupingScoring(Production_Due(where=(product='css')), out.zm_best_vin12_3_css, vin12_3);
  182. %GroupingScoring(Production_Due(where=(product='ins')), out.zm_best_vin12_3_ins, vin12_3);
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement