Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /* PSTAT 130 Homework Winter 2020*/
- /* By: */
- /* Priscilla Lee */
- /* Nivi Lakshminarayanan */
- /* Deni Stoyanova */
- /* TASK 1: COMPUTATION OF FUTURE COSTS */
- /* Instructions 1-11: */
- /* 1 */
- libname data "/folders/myfolders/Project/Database";
- /* proc contents data=data.ptf; run; */
- data work.ptf;
- set data.ptf;
- length age $8 default=8;
- today = DATE();
- days = today - birthdate;
- agenum = floor(days/365);
- if (agenum LE 21) then age = "-21";
- if (agenum GT 21) and (agenum LE 35) then age = "21-35";
- if (agenum GT 35) and (agenum LE 60) then age = "35-60";
- if (agenum GT 60) then age = "+60";
- drop days today agenum;
- run;
- /* test */
- /* proc print data=ptf (obs=10); */
- /* run; */
- /* 2 */
- proc import datafile="/folders/myfolders/Project/Database/cars.csv"
- out=work.cars
- dbms=CSV replace;
- getnames=yes;
- /* datarow=2; */
- run;
- data work.cars;
- set work.cars;
- format hp:$15.;
- length hp $10;
- if (horsepower LE 150) then hp = "low";
- if (horsepower GT 150) and (horsepower LE 300) then hp = "medium";
- if (horsepower GT 300) then hp = "high";
- run;
- proc sort data=work.ptf;
- by cars_id;
- run;
- proc sort data=work.cars;
- by cars_id;
- run;
- data work.ptf;
- merge work.cars work.ptf;
- by cars_id;
- run;
- /* proc print data=ptf (obs=10); */
- /* run; */
- /* 3 */
- proc import datafile="/folders/myfolders/Project/Database/CA_ZIP_CODE.txt"
- out=work.CA_ZIP_CODE
- dbms=dlm replace;
- getnames=yes;
- run;
- /* proc print data=CA_ZIP_CODE (obs=10); */
- /* run; */
- data work.CA_ZIP_CODE;
- set work.CA_ZIP_CODE;
- format density:$15.;
- length density $10;
- if (population LE 4000) then density = "low";
- if (population GT 4000) and (population LE 30000) then density = "medium";
- if (population GT 30000) then density = "high";
- run;
- proc sort data=work.ptf;
- by zip_code;
- run;
- proc sort data=work.CA_ZIP_CODE;
- by zip_code;
- run;
- data work.ptf;
- merge work.CA_ZIP_CODE work.ptf;
- by zip_code;
- run;
- /* density hp and age r importante :) */
- /* 4 */
- data data.claims;
- set data.claims;
- run;
- proc sort data=work.ptf;
- by POLICYHOLDER_ID POLICY_STARTING_DATE;
- run;
- proc sort data=data.claims;
- by POLICYHOLDER_ID POLICY_STARTING_DATE;
- run;
- data work.claims;
- merge data.claims (IN=A) work.ptf (IN=B);
- by POLICYHOLDER_ID POLICY_STARTING_DATE;
- IF A;
- run;
- /* 5 */
- data work.claims;
- set work.claims;
- format year 4.;
- year=year(policy_starting_date);
- run;
- proc sort data=work.claims;
- by age hp density;
- run;
- proc means data=work.claims noprint;
- var CLAIMS_COST;
- by age hp density year;
- output out=work.claims_summary (drop = _TYPE_)
- mean(claims_cost) = cost
- N(claims_cost) = num_claims;
- /* types policyholder_id*year; */
- run;
- /* TODO how to do the above? */
- proc print data=work.claims_summary (obs=10); run;
- /* merge this with the work.ptf database */
- /* TASK 2: DEFINITION OF THE BEST INSURANCE PRICE STRATEGY */
- /* Instructions 12-19 */
- /* TASK 3: CREATE A REPORT */
- /* Instructions 20-29 */
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement