Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /* import danych */
- data loan;
- set tmp1.loan;
- run;
- data client;
- set tmp1.client;
- run;
- data disp;
- set tmp1.disp;
- run;
- data account;
- set tmp1.account;
- run;
- data card;
- set tmp1.card;
- run;
- data district;
- set tmp1.district;
- run;
- data order;
- set tmp1.order;
- run;
- data trans;
- set tmp1.trans;
- run;
- /* zmienna zalezna */
- data loan2;
- set loan;
- If status in ('A', 'C') then default=0;
- If status in ('B', 'D') then default=1;
- proc sql;
- CREATE table datakredytu as
- SELECT loan.*, client.*, disp.*
- FROM ((disp
- INNER JOIN loan ON disp.account_id = loan.account_id)
- INNER JOIN client ON disp.client_id = client.client_id)
- WHERE type = 'OWNER';
- quit;
- data asd.zwiek(keep=client_id account_id birth_number date data_ur wiek plec where=(wiek>=18));
- set datakredytu;
- if (substrn(birth_number, 3, 2) > 12) then
- do;
- plec = 1;
- data_ur = INPUT(PUT((birth_number - 5000 + 19000000),8.),YYMMDD8.);
- FORMAT data_ur YYMMDD8.;
- wiek = floor((date - data_ur)/365);
- end;
- else
- do;
- plec = 0;
- data_ur = INPUT(PUT((birth_number + 19000000),8.),YYMMDD8.);
- FORMAT data_ur YYMMDD8.;
- wiek = floor((date - data_ur)/365);
- end;
- run;
- proc sort data=asd.zwiek;
- by wiek;
- quit;
- proc sql;
- CREATE table stazklienta as
- SELECT account.account_id, account.date as data_konta, client.*, disp.*, loan.date as data_pozyczki, loan.account_id
- FROM (((disp
- INNER JOIN account ON disp.account_id = account.account_id)
- INNER JOIN loan ON loan.account_id = disp.account_id)
- INNER JOIN client ON disp.client_id = client.client_id)
- WHERE type = 'OWNER';
- quit;
- data asd.stazklienta;
- set stazklienta;
- staz_dni = data_pozyczki - data_konta;
- run;
- proc sql;
- CREATE table kartaczygold as
- SELECT card.disp_id, card.type as typ_karty, disp.disp_id, disp.type as typ_disp
- FROM (disp
- INNER JOIN card ON disp.disp_id = card.disp_id)
- WHERE typ_disp = 'OWNER';
- quit;
- data asd.kartagold;
- set kartaczygold;
- if( typ_karty = 'gold') then
- do;
- czy_gold = 1;
- end;
- else
- do;
- czy_gold = 0;
- end;
- run;
- data districtzid;
- set district(rename=(A1 = district_id));
- run;
- proc sort data=districtzid;
- by district_id;
- run;
- proc sort data=client;
- by district_id;
- run;
- data templiczbamieszsas;
- merge districtzid(in=districtzid) client(in=client);
- by district_id;
- run;
- data asd.liczbamiesz(keep=client_id district_id liczba_mieszkancow);
- set templiczbamieszsas(rename=(A4 = liczba_mieszkancow));
- run;
- data districtzid;
- set district(rename=(A1 = district_id));
- run;
- proc sort data=districtzid;
- by district_id;
- run;
- proc sort data=client;
- by district_id;
- run;
- data templiczbamiastsas;
- merge districtzid(in=districtzid) client(in=client);
- by district_id;
- run;
- data asd.liczbamiast(keep=client_id district_id liczba_miast);
- set templiczbamiastsas(rename=(A9 = liczba_miast));
- run;
- data districtzid;
- set district(rename=(A1 = district_id));
- run;
- proc sort data=districtzid;
- by district_id;
- run;
- proc sort data=client;
- by district_id;
- run;
- data tempsredniapensja;
- merge districtzid(in=districtzid) client(in=client);
- by district_id;
- run;
- data asd.sredniapensja(keep=client_id district_id srednia_pensja);
- set tempsredniapensja(rename=(A11 = srednia_pensja));
- run;
- data districtzid;
- set district(rename=(A1 = district_id));
- run;
- proc sort data=districtzid;
- by district_id;
- run;
- proc sort data=client;
- by district_id;
- run;
- data tempudzialmiastowych;
- merge districtzid(in=districtzid) client(in=client);
- by district_id;
- run;
- data asd.udzialmiastowych(keep=client_id district_id udzial_miesz_miast);
- set tempudzialmiastowych(rename=(A10 = udzial_miesz_miast));
- run;
- data districtzid;
- set district(rename=(A1 = district_id));
- run;
- proc sort data=districtzid;
- by district_id;
- run;
- proc sort data=client;
- by district_id;
- run;
- data tempwskprzedsiebiorcow;
- merge districtzid(in=districtzid) client(in=client);
- by district_id;
- run;
- data asd.wskaprzedsiebiorcow(keep=client_id district_id wsk_przedsiebiorcow_1000);
- set tempwskprzedsiebiorcow(rename=(A14 = wsk_przedsiebiorcow_1000));
- run;
- data districtzid;
- set district(rename=(A1 = district_id));
- run;
- proc sort data=districtzid;
- by district_id;
- run;
- proc sort data=client;
- by district_id;
- run;
- data tempbezrobocie;
- merge districtzid(in=districtzid) client(in=client);
- by district_id;
- run;
- data asd.bezrob(keep=client_id district_id poziom_bezrobocia_95 poziom_bezrobocia_96 przyrost_bezrob_proc);
- set tempbezrobocie(rename=(A12 = poziom_bezrobocia_95 A13 = poziom_bezrobocia_96));
- przyrost_bezrob_proc = (poziom_bezrobocia_96/poziom_bezrobocia_95)*100;
- run;
- proc sort data=account;
- by account_id;
- run;
- proc sort data=order;
- by account_id;
- run;
- data tempzlecstale;
- merge account(in=account) order(in=order);
- by account_id;
- run;
- data asd.zlecstale(keep=account_id );
- set tempzlecstale;
- run;
- data stale;
- set order;
- run;
- proc sql;
- CREATE table zlecstale2ubezsipo as
- SELECT stale.account_id, sum(stale.amount) as laczne_wydatki_dom,
- avg(stale.amount) as srednia_wydatki_ubez
- FROM stale
- WHERE k_symbol = 'SIPO'
- GROUP BY stale.account_id;
- quit;
- proc sql;
- CREATE table zlecstale2ubezpojistne as
- SELECT stale.account_id, sum(stale.amount) as laczne_wydatki_ubez,
- avg(stale.amount) as srednia_wydatki_ubez
- FROM stale
- WHERE k_symbol = 'POJISTNE'
- GROUP BY stale.account_id;
- quit;
- proc sql;
- CREATE table zlecstale2ubezleasing as
- SELECT stale.account_id, sum(stale.amount) as laczne_wydatki_leasing,
- avg(stale.amount) as srednia_wydatki_leasing
- FROM stale
- WHERE k_symbol = 'LEASING'
- GROUP BY stale.account_id;
- quit;
- proc sql;
- create table ujemne_konto as
- select trans.account_id,
- trans.date as date_trans, trans.k_symbol,
- loan.account_id, loan.date as date_loan
- FROM (trans
- INNER JOIN loan ON loan.account_id = trans.account_id)
- where date_trans < date_loan
- group by loan.account_id, date_trans;
- quit;
- data czydluznik;
- set ujemne_konto;
- if( k_symbol = 'SANKC. UROK') then
- do;
- zm_czydluznik = 1;
- end;
- else
- do;
- zm_czydluznik = 0;
- end;
- run;
- data czydluznik2;
- set czydluznik;
- by account_id;
- if last.account_id then output;
- keep account_id zm_czydluznik;
- run;
- proc sql;
- CREATE table trans_wplywy as
- SELECT account_id, sum(amount) as laczne_wplywy,
- avg(amount) as srednie_wplywy
- FROM trans
- WHERE type = 'PRIJEM'
- GROUP BY account_id;
- quit;
- proc sql;
- CREATE table trans_wplywy_vklad as
- SELECT account_id, sum(amount) as laczne_wplywy_vklad,
- avg(amount) as srednie_wplywy_vklad
- FROM trans
- WHERE (type = 'PRIJEM' and operation = 'VKLAD')
- GROUP BY account_id;
- quit;
- proc sql;
- CREATE table trans_wydatki as
- SELECT account_id, sum(amount) as laczne_wydatki,
- avg(amount) as srednie_wydatki
- FROM trans
- WHERE type = 'VYDAJ'
- GROUP BY account_id;
- quit;
- proc sql;
- CREATE table trans_wydatki_vyberkart as
- SELECT account_id, sum(amount) as laczne_wydatki_vyberkart,
- avg(amount) as srednie_wydatki_vyberkart
- FROM trans
- WHERE (type = 'VYDAJ' and operation = 'VYBER KARTOU')
- GROUP BY account_id;
- quit;
- proc sql;
- CREATE table trans_wydatki_vyber as
- SELECT account_id, sum(amount) as laczne_wydatki_vyber,
- avg(amount) as srednie_wydatki_vyber
- FROM trans
- WHERE (type = 'VYDAJ' and operation = 'VYBER')
- GROUP BY account_id;
- quit;
- data trans_wp1;
- set trans_wplywy_vklad;
- run;
- data trans_wp2;
- set trans_wplywy;
- run;
- data temp_wsk_wklad;
- merge trans_wp1(in=trans_wp1) trans_wp2(in=trans_wp2);
- by account_id;
- run;
- data wsk_wklad(keep = account_id wsk_wkl_proc);
- set temp_wsk_wklad;
- wsk_wkl_proc = (laczne_wplywy_vklad/laczne_wplywy)*100;
- run;
- data temp_wsk_vyber_p;
- merge trans_wydatki_vyber(in=trans_wydatki_vyber) trans_wydatki(in=trans_wydatki);
- by account_id;
- run;
- data wsk_vyber_p(keep = account_id wsk_vyber_p_proc);
- set temp_wsk_vyber_p;
- wsk_vyber_p_proc = (laczne_wydatki_vyber/laczne_wydatki)*100;
- run;
- data temp_wsk_vyber_k;
- merge trans_wydatki_vyberkart(in=trans_wydatki_vyberkart) trans_wydatki(in=trans_wydatki);
- by account_id;
- run;
- data wsk_vyber_k(keep = account_id wsk_vyber_k_proc);
- set temp_wsk_vyber_k;
- wsk_vyber_k_proc = (laczne_wydatki_vyberkart/laczne_wydatki)*100;
- run;
- data wsk_vyber_k2;
- set wsk_vyber_k;
- if wsk_vyber_k_proc=. then do wsk_vyber_k_proc=0;
- end;
- run;
- data zwiek;
- set asd.zwiek;
- run;
- data zlecstale;
- set asd.zlecstale;
- run;
- data liczbamiesz;
- set asd.liczbamiesz;
- run;
- data bezrob;
- set asd.bezrob;
- run;
- data czydluznik;
- set asd.czydluznik;
- run;
- data liczbamiast;
- set asd.liczbamiast;
- run;
- data sredniapensja;
- set asd.sredniapensja;
- run;
- data stazklienta;
- set asd.stazklienta;
- run;
- data udzialmiastowych;
- set asd.udzialmiastowych;
- run;
- data wskaprzedsiebiorcow;
- set asd.wskaprzedsiebiorcow;
- run;
- proc sql;
- CREATE table tabelakoncowa as
- SELECT *
- FROM (((((((zwiek
- INNER JOIN liczbamiesz ON liczbamiesz.client_id = zwiek.client_id)
- INNER JOIN liczbamiast ON liczbamiast.client_id = zwiek.client_id)
- INNER JOIN sredniapensja ON sredniapensja.client_id = zwiek.client_id)
- INNER JOIN stazklienta ON stazklienta.client_id = zwiek.client_id)
- INNER JOIN udzialmiastowych ON udzialmiastowych.client_id = zwiek.client_id)
- INNER JOIN wskaprzedsiebiorcow ON wskaprzedsiebiorcow.client_id = zwiek.client_id)
- INNER JOIN bezrob ON bezrob.client_id = zwiek.client_id);
- quit;
- proc sort data = tabelakoncowa;
- by account_id;
- quit;
- proc sql;
- CREATE TABLE tabelakoncowa2 as
- SELECT *
- FROM (((((((((((tabelakoncowa
- LEFT JOIN czydluznik2 ON czydluznik2.account_id= tabelakoncowa.account_id)
- LEFT JOIN trans_wplywy ON trans_wplywy.account_id= tabelakoncowa.account_id)
- LEFT JOIN trans_wplywy_vklad ON trans_wplywy_vklad.account_id= tabelakoncowa.account_id)
- LEFT JOIN trans_wydatki ON trans_wydatki.account_id= tabelakoncowa.account_id)
- LEFT JOIN trans_wydatki_vyber ON trans_wydatki_vyber.account_id= tabelakoncowa.account_id)
- LEFT JOIN trans_wydatki_vyberkart ON trans_wydatki_vyberkart.account_id= tabelakoncowa.account_id)
- LEFT JOIN wsk_vyber_k2 ON wsk_vyber_k2.account_id= tabelakoncowa.account_id)
- LEFT JOIN wsk_vyber_p ON wsk_vyber_p.account_id= tabelakoncowa.account_id)
- LEFT JOIN wsk_wklad ON wsk_wklad.account_id= tabelakoncowa.account_id)
- LEFT JOIN zlecstale2ubezsipo ON zlecstale2ubezsipo.account_id= tabelakoncowa.account_id)
- LEFT JOIN zlecstale2ubezpojistne ON zlecstale2ubezpojistne.account_id= tabelakoncowa.account_id);
- quit;
- data projekt;
- set tabelakoncowa2;
- if laczne_wydatki_vyberkart = . then do laczne_wydatki_vyberkart = 0;
- end;
- if srednie_wydatki_vyberkart = . then do srednie_wydatki_vyberkart = 0;
- end;
- if laczne_wydatki_dom = . then do laczne_wydatki_dom = 0;
- end;
- if srednia_wydatki_ubez = . then do srednia_wydatki_ubez = 0;
- end;
- if laczne_wydatki_ubez = . then do laczne_wydatki_ubez = 0;
- end;
- run;
- proc sql;
- create table projekt2 as
- select *
- from (loan2
- left join projekt on loan2.account_id = projekt.account_id);
- quit;
- data projektkonc (drop = account_id date client_id district_id disp_id);
- set projekt2;
- run;
- data tabela_finalna1;
- set Tmp1.tabela_finalna(rename=(data_ur=data_urodzenia srednia_pensja=srednie_zarobki udzial_miesz_miast=proc_miastowych data_konta = data_utw_konta
- wsk_przedsiebiorcow_1000=wsk_przedsiebiorcow
- przyrost_bezrob_proc=wsk_bezrobocia zm_czydluznik=dluznik laczne_wplywy=suma_wplywow srednie_wplywy=srednia_wplywow laczne_wplywy_vklad=suma_wpl_vklad
- srednie_wplywy_vklad = srednia_wpl_vklad));
- drop birth_number;
- run;
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement