Guest User

Untitled

a guest
Feb 21st, 2020
99
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. /* PSTAT 130 Homework Winter 2020*/
  2. /* By: */
  3. /* Priscilla Lee */
  4. /* Nivi Lakshminarayanan */
  5. /* Deni Stoyanova */
  6.  
  7. /* TASK 1: COMPUTATION OF FUTURE COSTS */
  8. /* Instructions 1-11: */
  9.  
  10. /* 1 */
  11. libname data "/home/u45009682/sasuser.v94/HOMEWORK";
  12. proc contents data=data.ptf; run;
  13.  
  14. data work.ptf;
  15. set data.ptf;
  16. length age $8 default=8;
  17. today = DATE();
  18. days = today - birthdate;
  19. agenum = floor(days/365);
  20. if (agenum LE 21) then age = "-21";
  21. if (agenum GT 21) and (agenum LE 35) then age = "21-35";
  22. if (agenum GT 35) and (agenum LE 60) then age = "35-60";
  23. if (agenum GT 60) then age = "+60";
  24. drop days today agenum;
  25. run;
  26.  
  27. /* test */
  28. /* proc print data=ptf (obs=10); */
  29. /* run; */
  30.  
  31.  
  32. /* 2 */
  33. proc import datafile="/home/u45009682/sasuser.v94/HOMEWORK/cars.csv"
  34. out=work.cars
  35. dbms=CSV replace;
  36. getnames=yes;
  37. datarow=2;
  38. run;
  39.  
  40. data work.cars;
  41. set work.cars;
  42. informat hp:$15.;
  43. length hp $10;
  44. if (horsepower LE 150) then hp = "low";
  45. if (horsepower GT 150) and (horsepower LE 300) then hp = "medium";
  46. if (horsepower GT 300) then hp = "high";
  47. run;
  48.  
  49.  
  50.  
  51. proc sort data=work.ptf;
  52. by cars_id;
  53. run;
  54. proc sort data=work.cars;
  55. by cars_id;
  56. run;
  57.  
  58. data work.ptf;
  59. merge work.cars work.ptf;
  60. by cars_id;
  61. run;
  62.  
  63. /* proc print data=ptf (obs=10); */
  64. /* run; */
  65.  
  66. /* 3 */
  67. proc import datafile="/home/u45009682/sasuser.v94/HOMEWORK/CA_ZIP_CODE.TXT"
  68. out=work.CA_ZIP_CODE
  69. dbms=dlm replace;
  70. getnames=yes;
  71. run;
  72.  
  73. /* proc print data=CA_ZIP_CODE (obs=10); */
  74. /* run; */
  75.  
  76. data work.CA_ZIP_CODE;
  77. set work.CA_ZIP_CODE;
  78. informat density:$15.;
  79. length density $10;
  80. if (population LE 4000) then density = "low";
  81. if (population GT 4000) and (population LE 30000) then density = "medium";
  82. if (population GT 30000) then density = "high";
  83. run;
  84.  
  85. proc sort data=work.ptf;
  86. by zip_code;
  87. run;
  88.  
  89. proc sort data=work.CA_ZIP_CODE;
  90. by zip_code;
  91. run;
  92.  
  93. data work.ptf;
  94. merge work.CA_ZIP_CODE work.ptf;
  95. by zip_code;
  96. run;
  97. /* density hp and age r importante :) */
  98.  
  99. /* 4 */
  100.  
  101. libname data "/home/u45009682/sasuser.v94/HOMEWORK";
  102. proc contents data=data.ptf; run;
  103.  
  104. data data.claims;
  105. set data.claims;
  106. run;
  107.  
  108. proc sort data=work.ptf;
  109. by POLICYHOLDER_ID POLICY_STARTING_DATE;
  110. run;
  111.  
  112. proc sort data=data.claims;
  113. by POLICYHOLDER_ID POLICY_STARTING_DATE;
  114. run;
  115.  
  116. data work.claims;
  117. merge data.claims (IN=A) work.ptf (IN=B);
  118. by POLICYHOLDER_ID POLICY_STARTING_DATE;
  119. IF A;
  120. run;
  121.  
  122.  
  123. /* 5 */
  124. data work.claims;
  125. set work.claims;
  126. informat year 4.;
  127. year=year(policy_starting_date);
  128. run;
  129.  
  130. proc sort data=work.claims;
  131. by year;
  132. run;
  133.  
  134. proc means data=work.claims noprint;
  135. var CLAIMS_COST;
  136. class year age hp density;
  137. output out=work.claims_summary (drop = _TYPE_ _FREQ_)
  138. mean(claims_cost) = cost
  139. N(claims_cost) = nb_claims;
  140. /* types policyholder_id*year; */
  141. run;
  142.  
  143. /* TODO how to do the above? */
  144.  
  145. /* merge this with the work.ptf database */
  146.  
  147. /* 6 */
  148. data work.ptf;
  149. set work.ptf;
  150. format year 4.;
  151. year=year(policy_starting_date);
  152. run;
  153.  
  154. proc sort data=work.ptf;
  155. by year;
  156. run;
  157.  
  158. proc means data=work.ptf;
  159. var policyholder_id;
  160. class year age hp density;
  161. output out=work.ptf_summary (drop = _TYPE_ _FREQ_)
  162. N(policyholder_id) = nb;
  163. run;
  164.  
  165. /* 7 */
  166. proc sort data=work.claims_summary;
  167. by age density hp year;
  168. run;
  169.  
  170. proc sort data=work.ptf_summary;
  171. by age density hp year;
  172. run;
  173.  
  174. data work.summary;
  175. merge work.claims_summary work.ptf_summary;
  176. by age density hp year;
  177. run;
  178.  
  179. data work.summary;
  180. set work.summary;
  181. informat freq:6.3;
  182. freq=nb_claims/nb;
  183. run;
  184.  
  185. proc sort data=work.summary;
  186. by descending freq age hp density;
  187. run;
  188.  
  189. proc sort data=work.summary out=work.freq nodupkey;
  190. by age hp density;
  191. run;
  192.  
  193. proc sort data=work.freq;
  194. by year age hp density;
  195. run;
  196.  
  197. proc print data=work.freq;
  198. run;
  199.  
  200.  
  201. /* 8 */
  202. data work.freq;
  203. set work.freq;
  204. keep age hp density freq;
  205. run;
  206.  
  207.  
  208. /* TASK 2: DEFINITION OF THE BEST INSURANCE PRICE STRATEGY */
  209. /* Instructions 12-19 */
  210.  
  211. /* TASK 3: CREATE A REPORT */
  212. /* Instructions 20-29 */
RAW Paste Data