Advertisement
Guest User

Untitled

a guest
Feb 21st, 2020
127
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.93 KB | None | 0 0
  1. /* PSTAT 130 Homework Winter 2020*/
  2. /* By: */
  3. /* Priscilla Lee */
  4. /* Nivi Lakshminarayanan */
  5. /* Deni Stoyanova */
  6.  
  7. /* TASK 1: COMPUTATION OF FUTURE COSTS */
  8. /* Instructions 1-11: */
  9.  
  10. /* 1 */
  11. libname data "/folders/myfolders/Project/Database";
  12. /* proc contents data=data.ptf; run; */
  13.  
  14. data work.ptf;
  15. set data.ptf;
  16. length age $8 default=8;
  17. today = DATE();
  18. days = today - birthdate;
  19. agenum = floor(days/365);
  20. if (agenum LE 21) then age = "-21";
  21. if (agenum GT 21) and (agenum LE 35) then age = "21-35";
  22. if (agenum GT 35) and (agenum LE 60) then age = "35-60";
  23. if (agenum GT 60) then age = "+60";
  24. drop days today agenum;
  25. run;
  26.  
  27. /* test */
  28. /* proc print data=ptf (obs=10); */
  29. /* run; */
  30.  
  31. /* 2 */
  32. proc import datafile="/folders/myfolders/Project/Database/cars.csv"
  33. out=work.cars
  34. dbms=CSV replace;
  35. getnames=yes;
  36. /* datarow=2; */
  37. run;
  38.  
  39. data work.cars;
  40. set work.cars;
  41. format hp:$15.;
  42. length hp $10;
  43. if (horsepower LE 150) then hp = "low";
  44. if (horsepower GT 150) and (horsepower LE 300) then hp = "medium";
  45. if (horsepower GT 300) then hp = "high";
  46. run;
  47.  
  48.  
  49.  
  50. proc sort data=work.ptf;
  51. by cars_id;
  52. run;
  53. proc sort data=work.cars;
  54. by cars_id;
  55. run;
  56.  
  57. data work.ptf;
  58. merge work.cars work.ptf;
  59. by cars_id;
  60. run;
  61.  
  62. /* proc print data=ptf (obs=10); */
  63. /* run; */
  64.  
  65. /* 3 */
  66. proc import datafile="/folders/myfolders/Project/Database/CA_ZIP_CODE.txt"
  67. out=work.CA_ZIP_CODE
  68. dbms=dlm replace;
  69. getnames=yes;
  70. run;
  71.  
  72. /* proc print data=CA_ZIP_CODE (obs=10); */
  73. /* run; */
  74.  
  75. data work.CA_ZIP_CODE;
  76. set work.CA_ZIP_CODE;
  77. format density:$15.;
  78. length density $10;
  79. if (population LE 4000) then density = "low";
  80. if (population GT 4000) and (population LE 30000) then density = "medium";
  81. if (population GT 30000) then density = "high";
  82. run;
  83.  
  84. proc sort data=work.ptf;
  85. by zip_code;
  86. run;
  87.  
  88. proc sort data=work.CA_ZIP_CODE;
  89. by zip_code;
  90. run;
  91.  
  92. data work.ptf;
  93. merge work.CA_ZIP_CODE work.ptf;
  94. by zip_code;
  95. run;
  96. /* density hp and age r importante :) */
  97.  
  98. /* 4 */
  99. data data.claims;
  100. set data.claims;
  101. run;
  102.  
  103. proc sort data=work.ptf;
  104. by POLICYHOLDER_ID POLICY_STARTING_DATE;
  105. run;
  106.  
  107. proc sort data=data.claims;
  108. by POLICYHOLDER_ID POLICY_STARTING_DATE;
  109. run;
  110.  
  111. data work.claims;
  112. merge data.claims (IN=A) work.ptf (IN=B);
  113. by POLICYHOLDER_ID POLICY_STARTING_DATE;
  114. IF A;
  115. run;
  116.  
  117.  
  118. /* 5 */
  119. data work.claims;
  120. set work.claims;
  121. format year 4.;
  122. year=year(policy_starting_date);
  123. run;
  124.  
  125. proc sort data=work.claims;
  126. by age hp density;
  127. run;
  128.  
  129. proc means data=work.claims noprint;
  130. var CLAIMS_COST;
  131. by age hp density year;
  132. output out=work.claims_summary (drop = _TYPE_)
  133. mean(claims_cost) = cost
  134. N(claims_cost) = num_claims;
  135. /* types policyholder_id*year; */
  136. run;
  137.  
  138. /* TODO how to do the above? */
  139.  
  140. proc print data=work.claims_summary (obs=10); run;
  141. /* merge this with the work.ptf database */
  142.  
  143. /* TASK 2: DEFINITION OF THE BEST INSURANCE PRICE STRATEGY */
  144. /* Instructions 12-19 */
  145.  
  146. /* TASK 3: CREATE A REPORT */
  147. /* Instructions 20-29 */
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement