Advertisement
Guest User

Untitled

a guest
Feb 21st, 2020
100
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.31 KB | None | 0 0
  1. /* PSTAT 130 Homework Winter 2020*/
  2. /* By: */
  3. /* Priscilla Lee */
  4. /* Nivi Lakshminarayanan */
  5. /* Deni Stoyanova */
  6.  
  7. /* TASK 1: COMPUTATION OF FUTURE COSTS */
  8. /* Instructions 1-11: */
  9.  
  10. /* 1 */
  11. libname data "/folders/myfolders/Project/Database";
  12. /* proc contents data=data.ptf; run; */
  13.  
  14. data work.ptf;
  15. set data.ptf;
  16. length age $8 default=8;
  17. today = DATE();
  18. days = today - birthdate;
  19. agenum = floor(days/365);
  20. if (agenum LE 21) then age = "-21";
  21. if (agenum GT 21) and (agenum LE 35) then age = "21-35";
  22. if (agenum GT 35) and (agenum LE 60) then age = "35-60";
  23. if (agenum GT 60) then age = "+60";
  24. drop days today agenum;
  25. run;
  26.  
  27. /* test */
  28. /* proc print data=ptf (obs=10); */
  29. /* run; */
  30.  
  31. /* 2 */
  32. proc import datafile="/folders/myfolders/Project/Database/cars.csv"
  33. out=work.cars
  34. dbms=CSV replace;
  35. getnames=yes;
  36. /* datarow=2; */
  37. run;
  38.  
  39. data work.cars;
  40. set work.cars;
  41. format hp:$15.;
  42. length hp $10;
  43. if (horsepower LE 150) then hp = "low";
  44. if (horsepower GT 150) and (horsepower LE 300) then hp = "medium";
  45. if (horsepower GT 300) then hp = "high";
  46. run;
  47.  
  48.  
  49.  
  50. proc sort data=work.ptf;
  51. by cars_id;
  52. run;
  53. proc sort data=work.cars;
  54. by cars_id;
  55. run;
  56.  
  57. data work.ptf;
  58. merge work.cars work.ptf;
  59. by cars_id;
  60. run;
  61.  
  62. /* proc print data=ptf (obs=10); */
  63. /* run; */
  64.  
  65. /* 3 */
  66. proc import datafile="/folders/myfolders/Project/Database/CA_ZIP_CODE.txt"
  67. out=work.CA_ZIP_CODE
  68. dbms=dlm replace;
  69. getnames=yes;
  70. run;
  71.  
  72. /* proc print data=CA_ZIP_CODE (obs=10); */
  73. /* run; */
  74.  
  75. data work.CA_ZIP_CODE;
  76. set work.CA_ZIP_CODE;
  77. format density:$15.;
  78. length density $10;
  79. if (population LE 4000) then density = "low";
  80. if (population GT 4000) and (population LE 30000) then density = "medium";
  81. if (population GT 30000) then density = "high";
  82. run;
  83.  
  84. proc sort data=work.ptf;
  85. by zip_code;
  86. run;
  87.  
  88. proc sort data=work.CA_ZIP_CODE;
  89. by zip_code;
  90. run;
  91.  
  92. data work.ptf;
  93. merge work.CA_ZIP_CODE work.ptf;
  94. by zip_code;
  95. run;
  96. /* density hp and age r importante :) */
  97.  
  98. /* 4 */
  99. data data.claims;
  100. set data.claims;
  101. run;
  102.  
  103. proc sort data=work.ptf;
  104. by POLICYHOLDER_ID POLICY_STARTING_DATE;
  105. run;
  106.  
  107. proc sort data=data.claims;
  108. by POLICYHOLDER_ID POLICY_STARTING_DATE;
  109. run;
  110.  
  111. data work.claims;
  112. merge data.claims (IN=A) work.ptf (IN=B);
  113. by POLICYHOLDER_ID POLICY_STARTING_DATE;
  114. IF A;
  115. run;
  116.  
  117. /* 5 */
  118. data work.claims;
  119. set work.claims;
  120. informat year 4.;
  121. year=year(policy_starting_date);
  122. run;
  123.  
  124. proc sort data=work.claims;
  125. by year;
  126. run;
  127.  
  128. proc means data=work.claims noprint;
  129. var CLAIMS_COST;
  130. class year age hp density;
  131. output out=work.claims_summary (drop = _TYPE_ _FREQ_)
  132. mean(claims_cost) = cost
  133. N(claims_cost) = nb_claims;
  134. run;
  135.  
  136.  
  137. /* merge this with the work.ptf database */
  138.  
  139. /* 6 */
  140. data work.ptf;
  141. set work.ptf;
  142. format year 4.;
  143. year=year(policy_starting_date);
  144. run;
  145.  
  146. proc sort data=work.ptf;
  147. by year;
  148. run;
  149.  
  150. proc means data=work.ptf noprint;
  151. var policyholder_id;
  152. class year age hp density;
  153. output out=work.ptf_summary (drop = _TYPE_ _FREQ_)
  154. N(policyholder_id) = nb;
  155. run;
  156.  
  157. /* 7 */
  158. proc sort data=work.claims_summary;
  159. by age density hp year;
  160. run;
  161.  
  162. proc sort data=work.ptf_summary;
  163. by age density hp year;
  164. run;
  165.  
  166. data work.summary;
  167. merge work.claims_summary work.ptf_summary;
  168. by age density hp year;
  169. run;
  170.  
  171. data work.summary;
  172. set work.summary;
  173. informat freq:6.3;
  174. freq=nb_claims/nb;
  175. run;
  176.  
  177. proc sort data=work.summary;
  178. by descending freq age hp density;
  179. run;
  180.  
  181. proc sort data=work.summary out=work.freq nodupkey;
  182. by age hp density;
  183. run;
  184.  
  185. proc sort data=work.freq;
  186. by year age hp density;
  187. run;
  188.  
  189. /* proc print data=work.freq; */
  190. /* run; */
  191.  
  192.  
  193. /* 8 */
  194. data work.freq;
  195. set work.freq;
  196. keep age hp density freq;
  197. run;
  198.  
  199. /* 9 */
  200. proc sort data=work.summary;
  201. by descending cost age hp density;
  202. run;
  203.  
  204. proc sort data=work.summary out=work.cost nodupkey;
  205. by age hp density;
  206. run;
  207.  
  208. proc sort data=work.cost;
  209. by year age hp density;
  210. run;
  211.  
  212. proc print data=work.cost;
  213. run;
  214.  
  215. /* 10 */
  216. proc sort data=work.cost;
  217. by age density hp;
  218. run;
  219.  
  220. proc sort data=work.freq;
  221. by age density hp;
  222. run;
  223.  
  224. data work.pp;
  225. merge work.cost work.freq;
  226. by age density hp;
  227. run;
  228.  
  229. data work.pp;
  230. set work.pp;
  231. informat pp:15.9;
  232. pp=freq*cost;
  233. run;
  234.  
  235. /* 11 */
  236.  
  237.  
  238.  
  239. /* TASK 2: DEFINITION OF THE BEST INSURANCE PRICE STRATEGY */
  240. /* Instructions 12-19 */
  241.  
  242. /* TASK 3: CREATE A REPORT */
  243. /* Instructions 20-29 */
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement