Advertisement
Guest User

Untitled

a guest
Feb 23rd, 2020
155
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 9.92 KB | None | 0 0
  1. /* PSTAT 130 Homework Winter 2020*/
  2. /* By: */
  3. /* Priscilla Lee */
  4. /* Nivi Lakshminarayanan */
  5. /* Deni Stoyanova */
  6.  
  7. /* TASK 1: COMPUTATION OF FUTURE COSTS */
  8. /* Instructions 1-11: */
  9.  
  10. /* 1 */
  11. libname data "/home/u45009682/sasuser.v94/HOMEWORK";
  12. proc contents data=data.ptf; run;
  13.  
  14. data work.ptf;
  15. set data.ptf;
  16. length age $8 default=8;
  17. today = DATE();
  18. days = today - birthdate;
  19. agenum = floor(days/365);
  20. if (agenum LE 21) then age = "-21";
  21. if (agenum GT 21) and (agenum LE 35) then age = "21-35";
  22. if (agenum GT 35) and (agenum LE 60) then age = "35-60";
  23. if (agenum GT 60) then age = "+60";
  24. drop days today agenum;
  25. run;
  26.  
  27. /* test */
  28. /* proc print data=ptf (obs=10); */
  29. /* run; */
  30.  
  31.  
  32. /* 2 */
  33. proc import datafile="/home/u45009682/sasuser.v94/HOMEWORK/cars.csv"
  34. out=work.cars
  35. dbms=CSV replace;
  36. getnames=yes;
  37. datarow=2;
  38. run;
  39.  
  40. data work.cars;
  41. set work.cars;
  42. informat hp:$15.;
  43. length hp $10;
  44. if (horsepower LE 150) then hp = "low";
  45. if (horsepower GT 150) and (horsepower LE 300) then hp = "medium";
  46. if (horsepower GT 300) then hp = "high";
  47. run;
  48.  
  49.  
  50.  
  51. proc sort data=work.ptf;
  52. by cars_id;
  53. run;
  54. proc sort data=work.cars;
  55. by cars_id;
  56. run;
  57.  
  58. data work.ptf;
  59. merge work.cars work.ptf;
  60. by cars_id;
  61. run;
  62.  
  63. /* proc print data=ptf (obs=10); */
  64. /* run; */
  65.  
  66. /* 3 */
  67. proc import datafile="/home/u45009682/sasuser.v94/HOMEWORK/CA_ZIP_CODE.TXT"
  68. out=work.CA_ZIP_CODE
  69. dbms=dlm replace;
  70. getnames=yes;
  71. run;
  72.  
  73. /* proc print data=CA_ZIP_CODE (obs=10); */
  74. /* run; */
  75.  
  76. data work.CA_ZIP_CODE;
  77. set work.CA_ZIP_CODE;
  78. informat density:$15.;
  79. length density $10;
  80. if (population LE 4000) then density = "low";
  81. if (population GT 4000) and (population LE 30000) then density = "medium";
  82. if (population GT 30000) then density = "high";
  83. run;
  84.  
  85. proc sort data=work.ptf;
  86. by zip_code;
  87. run;
  88.  
  89. proc sort data=work.CA_ZIP_CODE;
  90. by zip_code;
  91. run;
  92.  
  93. data work.ptf;
  94. merge work.CA_ZIP_CODE work.ptf;
  95. by zip_code;
  96. run;
  97. /* density hp and age r importante :) */
  98.  
  99. /* 4 */
  100.  
  101. libname data "/home/u45009682/sasuser.v94/HOMEWORK";
  102. proc contents data=data.ptf; run;
  103.  
  104. data data.claims;
  105. set data.claims;
  106. run;
  107.  
  108. proc sort data=work.ptf;
  109. by POLICYHOLDER_ID POLICY_STARTING_DATE;
  110. run;
  111.  
  112. proc sort data=data.claims;
  113. by POLICYHOLDER_ID POLICY_STARTING_DATE;
  114. run;
  115.  
  116. data work.claims;
  117. merge data.claims (IN=A) work.ptf (IN=B);
  118. by POLICYHOLDER_ID POLICY_STARTING_DATE;
  119. IF A;
  120. run;
  121.  
  122.  
  123. /* 5 */
  124. data work.claims;
  125. set work.claims;
  126. informat year 4.;
  127. year=year(policy_starting_date);
  128. run;
  129.  
  130. proc sort data=work.claims;
  131. by year;
  132. run;
  133.  
  134. proc means data=work.claims noprint nway;
  135. var CLAIMS_COST;
  136. class year age hp density;
  137. output out=work.claims_summary (drop = _TYPE_ _FREQ_)
  138. mean(claims_cost) = cost
  139. N(claims_cost) = nb_claims;
  140. /* types policyholder_id*year; */
  141. run;
  142.  
  143. /* TODO how to do the above? */
  144.  
  145. /* merge this with the work.ptf database */
  146.  
  147. /* 6 USE SUM?*/
  148. data work.ptf;
  149. set work.ptf;
  150. format year 4.;
  151. year=year(policy_starting_date);
  152. run;
  153.  
  154. proc sort data=work.ptf;
  155. by year;
  156. run;
  157.  
  158. proc means data=work.ptf noprint nway;
  159. var policyholder_id;
  160. class year age hp density;
  161. output out=work.ptf_summary (drop = _TYPE_ _FREQ_)
  162. N(policyholder_id) = nb;
  163. run;
  164.  
  165. /* 7 */
  166. proc sort data=work.claims_summary;
  167. by age density hp year;
  168. run;
  169.  
  170. proc sort data=work.ptf_summary;
  171. by age density hp year;
  172. run;
  173.  
  174. data work.summary; /* in=a use */
  175. merge work.claims_summary work.ptf_summary;
  176. by age density hp year;
  177. run;
  178.  
  179. data work.summary;
  180. set work.summary;
  181. informat freq:6.3;
  182. freq=nb_claims/nb;
  183. run;
  184.  
  185. /* 8 */
  186. proc sort data=work.summary;
  187. by descending freq age hp density;
  188. run;
  189.  
  190. proc sort data=work.summary out=work.freq nodupkey;
  191. by age hp density;
  192. run;
  193.  
  194. proc sort data=work.freq;
  195. by year age hp density;
  196. run;
  197.  
  198. proc print data=work.freq;
  199. run;
  200.  
  201. data work.freq;
  202. set work.freq;
  203. keep age hp density freq;
  204. run;
  205.  
  206.  
  207. /* 9 */
  208. proc sort data=work.summary;
  209. by descending cost age hp density;
  210.  
  211. proc sort data=work.summary out=work.cost nodupkey;
  212. by age hp density;
  213. run;
  214.  
  215. proc sort data=work.cost;
  216. by year age hp density;
  217. run;
  218.  
  219.  
  220. /* 10 */
  221. proc sort data=work.cost;
  222. by age density hp;
  223. run;
  224.  
  225. proc sort data=work.freq;
  226. by age density hp;
  227. run;
  228.  
  229. data work.pp;
  230. merge work.cost work.freq;
  231. by age density hp;
  232. run;
  233.  
  234. data work.pp;
  235. set work.pp;
  236. informat pp:15.9;
  237. pp=freq*cost;
  238. run;
  239.  
  240. /* 11 */
  241. data work.pp;
  242. set work.pp;
  243. informat lp: 15.9;
  244. lp = pp*(1+0.05);
  245. run;
  246.  
  247. /* TASK 2: DEFINITION OF THE BEST INSURANCE PRICE STRATEGY */
  248. /* Instructions 12-19 */
  249.  
  250. /* 12 */
  251. data work.price;
  252. set work.pp;
  253. keep age density hp lp;
  254. run;
  255.  
  256. data work.price;
  257. set work.price;
  258. informat pA: 15.9 pB:15.9 pC:15.9;
  259. pA = lp*(1+0.05);
  260. pB = lp*(1+0.10);
  261. pC = lp*(1+0.15);
  262. run;
  263.  
  264.  
  265. /* 13 */
  266. PROC IMPORT OUT=work.prospect
  267. DATAFILE = "/home/u45009682/sasuser.v94/HOMEWORK/Prospect.csv"
  268. DBMS=CSV
  269. REPLACE;
  270. GETNAMES=YES ;
  271. DATAROW=2;
  272. delimiter=",";
  273. RUN;
  274.  
  275. data work.prospect;
  276. set work.prospect;
  277. length age $8 default=8;
  278. today = DATE();
  279. days = today - birthdate;
  280. agenum = floor(days/365);
  281. if (agenum LE 21) then age = "-21";
  282. if (agenum GT 21) and (agenum LE 35) then age = "21-35";
  283. if (agenum GT 35) and (agenum LE 60) then age = "35-60";
  284. if (agenum GT 60) then age = "+60";
  285. drop days today agenum;
  286. run;
  287.  
  288. proc sort data=work.prospect;
  289. by cars_id;
  290. run;
  291.  
  292. proc sort data=work.cars;
  293. by cars_id;
  294. run;
  295.  
  296. data work.prospect;
  297. merge work.cars work.prospect;
  298. by cars_id;
  299. run;
  300.  
  301. proc sort data=work.prospect;
  302. by zip_code;
  303. run;
  304.  
  305. proc sort data=work.CA_ZIP_CODE;
  306. by zip_code;
  307. run;
  308.  
  309. data work.prospect;
  310. merge work.CA_ZIP_CODE work.prospect;
  311. by zip_code;
  312. run;
  313.  
  314.  
  315. /* 14 */
  316. proc sort data=work.prospect;
  317. by age density hp;
  318. run;
  319.  
  320. proc sort data=work.price;
  321. by age density hp;
  322. run;
  323.  
  324. data work.prospect;
  325. merge work.prospect work.price;
  326. by age density hp;
  327. run;
  328.  
  329. /* 15 */
  330. data work.prospect;
  331. set work.prospect;
  332. informat prob_a:15.9 prob_b:15.9 prob_c:15.9;
  333. prob_a = 1/(1+exp((-0.1*(lp/pA))+(0.002*(pA-lp))));
  334. prob_b = 1/(1+exp((-0.1*(lp/pB))+(0.002*(pB-lp))));
  335. prob_c = 1/(1+exp((-0.1*(lp/pC))+(0.002*(pC-lp))));
  336. run;
  337.  
  338.  
  339. /* 16 */
  340. data work.prospect;
  341. set work.prospect;
  342. informat accept_a:15.9 accept_b:15.9 accept_c:15.9;
  343. if prob_A GT 0.5 then accept_a=1;
  344. else accept_a=0;
  345. if prob_B GT 0.5 then accept_b=1;
  346. else accept_b=0;
  347. if prob_C GT 0.5 then accept_c=1;
  348. else accept_c=0;
  349. run;
  350.  
  351.  
  352. /* 17 */
  353. proc means data=work.prospect noprint;
  354. var accept_a accept_b accept_c;
  355. output out=work.table_volume (drop = _TYPE_ _FREQ_)
  356. sum(accept_a) = A sum(accept_b) = B sum(accept_c) = C;
  357. run;
  358.  
  359.  
  360. /* 18 */
  361. data work.prospect;
  362. set work.prospect;
  363. informat pi_a:15.9 pi_b:15.9 pi_c:15.9;
  364. pi_a = (pA-lp)*accept_a;
  365. pi_b = (pA-lp)*accept_b;
  366. pi_c = (pA-lp)*accept_c;
  367. run;
  368.  
  369.  
  370. /* 19 */
  371. proc means data=work.prospect noprint;
  372. var pi_a pi_b pi_c;
  373. output out=work.table_profit (drop = _TYPE_ _FREQ_)
  374. sum(pi_a) = profit_A sum(pi_b) = profit_B sum(pi_c) = profit_C;
  375. run;
  376.  
  377.  
  378. /* TASK 3: CREATE A REPORT */
  379. /* Instructions 20-29 */
  380.  
  381. /* 20 */
  382. options nodate pdfpageview=fitpage;
  383. ods noproctitle;
  384. ods pdf style=journal file="/home/u45009682/sasuser.v94/HOMEWORK/final_report_lakshminarayanan_lee_stoyanova.pdf"
  385. startpage=no;
  386.  
  387. title1 "Homework Project PSTAT 130";
  388. title2 "Nivi Lakshminarayanan, Priscilla Lee, Deni Stoyanova";
  389. title3 "nlakshminarayanan@ucsb.edu, priscilla_lee@ucsb.edu, denitza@ucsb.edu";
  390.  
  391. ods layout gridded;
  392.  
  393. /* 21 */
  394. proc sort data=work.ptf;
  395. by age hp density;
  396. run;
  397.  
  398. proc sort data=work.pp;
  399. by age hp density;
  400. run;
  401.  
  402. data work.ptf_pp ;
  403. merge work.ptf work.pp;
  404. by age hp density;
  405. run;
  406.  
  407. data work.ptf_pp;
  408. set work.ptf_pp;
  409. where year=2019;
  410. run;
  411.  
  412.  
  413. /* 22 */
  414. title "Average Frequency by Age";
  415. proc means data=work.ptf_pp mean nonobs;
  416. var freq;
  417. class age;
  418. output out=work.average_claims_freq1 (drop= _FREQ_ _TYPE_);
  419. types age;
  420. run;
  421.  
  422. title "Average Frequency by Density";
  423. proc means data=work.ptf_pp mean nonobs;
  424. var freq;
  425. class density;
  426. output out=work.average_claims_freq2 (drop= _FREQ_ _TYPE_);
  427. types density;
  428. run;
  429.  
  430. title "Average Frequency by HP";
  431. proc means data=work.ptf_pp mean nonobs;
  432. var freq;
  433. class hp;
  434. output out=work.average_claims_freq3 (drop= _FREQ_ _TYPE_);
  435. types hp;
  436. run;
  437.  
  438. /* 23 */
  439. title "Average Claims Cost by Age";
  440. proc means data=work.ptf_pp mean nonobs;
  441. var cost;
  442. class age;
  443. output out=work.average_claims_cost1 (drop= _FREQ_ _TYPE_);
  444. types age;
  445. run;
  446.  
  447. title "Average Claims Cost by Density";
  448. proc means data=work.ptf_pp mean nonobs;
  449. var cost;
  450. class density;
  451. output out=work.average_claims_cost2 (drop= _FREQ_ _TYPE_);
  452. types density;
  453. run;
  454.  
  455. title "Average Claims Cost by HP";
  456. proc means data=work.ptf_pp mean nonobs;
  457. var cost;
  458. class hp;
  459. output out=work.average_claims_cost3 (drop= _FREQ_ _TYPE_);
  460. types hp;
  461. run;
  462.  
  463. /* 24 */
  464. title "Average Pure Premium of the Current Portfolio";
  465. proc means data=work.ptf_pp mean nonobs;
  466. var pp;
  467. output out=work.average_pp (drop= _FREQ_ _TYPE_);
  468. run;
  469.  
  470. /* 25 */
  471. proc sort data=work.ptf_pp;
  472. by pp;
  473. run;
  474.  
  475. title "Age, Density, and Horsepower with Lowest Pure Premium";
  476. proc print data=work.ptf_pp (obs=1) noobs; var age density hp; run;
  477.  
  478. /* 26 */
  479. proc sort data=work.ptf_pp;
  480. by descending pp;
  481. run;
  482.  
  483. title "Age, Density, and Horsepower with Highest Pure Premium";
  484. proc print data=work.ptf_pp (obs=1) noobs; var age density hp; run;
  485.  
  486. /* 27 */
  487. title "Volume of New Business Generated By Each Strategy";
  488. proc print data=work.table_volume label noobs;
  489. label A="Strategy A" B="Strategy B" C="Strategy C";
  490. run;
  491.  
  492. /* 28 */
  493. title "Profit Generated By Each Strategy";
  494. footnote "The Best Strategy is Strategy A";
  495. proc print data=work.table_profit label noobs;
  496. label profit_A="Strategy A" profit_B="Strategy B" profit_C="Strategy C";
  497. run;
  498.  
  499. /* 29 */
  500. data work.final_price;
  501. set work.price;
  502. keep age hp density pA; /* keeping pA since it is the highest profit based on the above instruciton*/
  503. rename pA=best_price;
  504. run;
  505.  
  506. title "Final Price";
  507. proc print data=work.final_price noobs; run;
  508.  
  509. ods layout end;
  510. ods PDF CLOSE;
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement