Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- %let dir=/home/u45056142/sasuser.v94/Wyjscie;
- %let dir_in=/home/u45056142/sasuser.v94/;
- libname out "&dir";
- libname in "&dir_in";
- %let kat_tree=%sysfunc(pathname(out));
- %put &kat_tree;
- data Transactions; set in.Transactions;
- seniority=intck('month',input(fin_period,yymmn6.),input(period, yymmn6.));
- vin1=(due_installments>=1);
- vin2=(due_installments>=2);
- vin3=(due_installments>=3);
- vin12_1=(seniority>12 and due_installments>=1);
- vin12_2=(seniority>12 and due_installments>=2);
- vin12_3=(seniority>12 and due_installments>=3);
- output;
- if status in ('B','C') and period<='200812' then do;
- n_steps=intck('month',input(period,yymmn6.),input('200812',yymmn6.));
- do i=1 to n_steps; period=put(intnx('month',input(period,yymmn6.),1,'end'),yymmn6. ); seniority=intck('month',input(fin_period,yymmn6.),input(period, yymmn6.)); output; end; end;
- drop n_steps i; run;
- data Production;
- set in.Production;
- run;
- proc sort data = production;
- by cid aid; run;
- proc sort data = transactions;
- by cid aid seniority; run;
- data Transactions_loan_amt;
- merge Transactions(in=a) Production(keep=cid aid app_loan_amount);
- by cid aid;
- if a; run;
- proc means data=Transactions noprint nway; by cid aid; var vin:; output out=due(drop=_freq_ _type_) max=; run;
- proc sort
- data = due;
- by cid aid;
- run;
- data Production_due;
- merge Production(in=a) due(in=b);
- by cid aid;
- if a and b;
- run;
- proc contents data=Production_due(keep=app_: act_: ags: agr:) out=zmienne(keep=name type) noprint; run;
- data zmienne; length id 8.; length group $3; set zmienne; length type2 $3; id = monotonic(); group = name; select (type); when (1) type2 = 'interval'; when (2) type2 = 'nominal'; otherwise; end;
- drop type; run;
- proc datasets lib=work memtype=data nolist; copy out=out; select Transactions_loan_amt Production_due zmienne; run;
- /*VINTAGE ILOSCIOWY*/ %let zm = vin1; %let nazwa = vintage_ile1;
- proc means data=Transactions_loan_amt noprint nway; class fin_period seniority; var &zm.; output out=vintagr_il(drop=_freq_ _type_) n()=production mean()=&nazwa.; format &nazwa. nlpct12.2;
- run;
- proc means data=Transactions_loan_amt noprint nway; class fin_period; var &zm.; output out=production_il(drop=_freq_ _type_) n()=production; where seniority=0; run;
- proc transpose data=vintagr_il out=vintage_il(drop=_name_) prefix=months_after_; by fin_period production; var &nazwa.; id seniority; run;
- /*VINTAGE KWOTOWY*/ %let zm = vin1; %let nazwa = vintage_kw1;
- proc means data=Transactions_loan_amt noprint nway; class fin_period seniority; var &zm.; weight app_loan_amount; output out=vintagr_kw(drop=_freq_ _type_) sumwgt()=amount mean()=&nazwa.; format &nazwa. nlpct12.2; run;
- proc means data=Transactions_loan_amt noprint nway; class fin_period; weight app_loan_amount; var &zm.; where seniority=0; output out=production_kw(drop=_freq_ _type_) sumwgt()=amount; format amount comma10.0; run;
- proc transpose data=vintagr_kw out=vintage_kw (drop=_name_) prefix=months_after_; by fin_period amount; var &nazwa.; id seniority; format amount comma10.0; run;
- /*KATEGORYZACJA ZMIENNYCH*/ %macro GroupingScoring(input, output, target); %let maxbranch = 3; %let criterion = entropy; %let top_groups = 5;
- data &output.; set zmienne; length Gini 8.; format Gini nlpct12.2;
- call symputx('max_id', id); run;
- data prod; set &input.; call symputx('leafsize', floor(_N_ * 0.05)); keep cid aid product period &target.; run;
- proc sort data = prod; by cid aid; run;
- %do id=1 %to %eval(&max_id.);
- data _null_; set zmienne; call symputx('name', name); call symputx('level', type2); where id = &id.; run;
- data a1; set &input.; keep cid aid &target. &name.; run;
- proc freq data = a1(keep=&name.) nlevels; ods exclude onewayfreqs; ods output nlevels = n_levels (keep = tablevar nlevels); run;
- data _null_; set n_levels;
- if nlevels < 20 then do; call symputx('level', 'nom'); end; run;
- proc hpsplit data=a1 maxdepth = 1 maxbranch = &maxbranch. leafsize = &leafsize. event = '1'; id cid aid; target &target.; input &name. / level = &level.; criterion &criterion.; prune none; score out = a2(keep=cid aid _Node_); run;
- data a2; set a2;
- if missing(_Node_) then
- _Node_ = 0; run;
- proc datasets library=work nolist; modify a2; rename _Node_ = GRP_&name.; label GRP_&name. = "GRP_&name."; quit;
- proc sort data = a2; by cid aid; run;
- data prod; merge prod(in=a) a2(in=b); by cid aid;
- if a; run;
- proc datasets library=work nolist; delete smdcr; run;
- proc freq data=prod(keep=&target. GRP_&name.) noprint; tables &target.*GRP_&name. / measures; output out=smdcr(keep=_SMDCR_ ) smdcr; run;
- %if %sysfunc(exist(smdcr)) %then %do;
- data _null_; set smdcr; call symputx('Gini', _smdcr_); run;
- %end; %else %do;
- data _null_; call symputx('Gini', 0); run;
- %end;
- data &output.; set &output.;
- if name = "&name." then do; Gini = &Gini.; type2 = "&level."; end; run;
- %end;
- data out.Production_due_&target.; set prod; run;
- proc sort data = &output.; by group descending Gini; run;
- data work.zmienne_scoring_&target.; set &output.; run;
- data &output.; length i 8.; set &output.; retain i 0; by group;
- if first.group then i = 0; i = i + 1;
- if i <= &top_groups. then output; drop id; run;
- proc datasets library=work nolist; delete prod a1 a2 smdcr; run;
- %mend GroupingScoring;
- %macro ImpactLogistic(input, target, category, variables, output);
- proc logistic data = &input.; class &category. (param=ref); model &target. (event='1') = &variables. &category.; ods output ParameterEstimates = &output.; run;
- %mend ImpactLogistic;
- /*^^^^^^^^^^^^^^^^^^^^^^*/ data Production_Due; set out.Production_Due; run;
- %GroupingScoring(Production_Due, out.zm_best_vin12_1, vin12_1);
- %GroupingScoring(Production_Due(where=(product='css')), out.zm_best_vin12_1_css, vin12_1);
- %GroupingScoring(Production_Due(where=(product='ins')), out.zm_best_vin12_1_ins, vin12_1);
- %GroupingScoring(Production_Due, out.zm_best_vin12_2, vin12_2);
- %GroupingScoring(Production_Due(where=(product='css')), out.zm_best_vin12_2_css, vin12_2);
- %GroupingScoring(Production_Due(where=(product='ins')), out.zm_best_vin12_2_ins, vin12_2);
- %GroupingScoring(Production_Due, out.zm_best_vin12_3, vin12_3);
- %GroupingScoring(Production_Due(where=(product='css')), out.zm_best_vin12_3_css, vin12_3);
- %GroupingScoring(Production_Due(where=(product='ins')), out.zm_best_vin12_3_ins, vin12_3);
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement