# Untitled

a guest
Feb 21st, 2020
99
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
1. /* PSTAT 130 Homework Winter 2020*/
2. /* By: */
3. /* Priscilla Lee */
4. /* Nivi Lakshminarayanan */
5. /* Deni Stoyanova */
6.
7. /* TASK 1: COMPUTATION OF FUTURE COSTS */
8. /* Instructions 1-11: */
9.
10. /* 1 */
11. libname data "/home/u45009682/sasuser.v94/HOMEWORK";
12. proc contents data=data.ptf; run;
13.
14. data work.ptf;
15. set data.ptf;
16. length age \$8 default=8;
17. today = DATE();
18. days = today - birthdate;
19. agenum = floor(days/365);
20. if (agenum LE 21) then age = "-21";
21. if (agenum GT 21) and (agenum LE 35) then age = "21-35";
22. if (agenum GT 35) and (agenum LE 60) then age = "35-60";
23. if (agenum GT 60) then age = "+60";
24. drop days today agenum;
25. run;
26.
27. /* test */
28. /* proc print data=ptf (obs=10); */
29. /* run; */
30.
31.
32. /* 2 */
33. proc import datafile="/home/u45009682/sasuser.v94/HOMEWORK/cars.csv"
34. out=work.cars
35. dbms=CSV replace;
36. getnames=yes;
37. datarow=2;
38. run;
39.
40. data work.cars;
41. set work.cars;
42. informat hp:\$15.;
43. length hp \$10;
44. if (horsepower LE 150) then hp = "low";
45. if (horsepower GT 150) and (horsepower LE 300) then hp = "medium";
46. if (horsepower GT 300) then hp = "high";
47. run;
48.
49.
50.
51. proc sort data=work.ptf;
52. by cars_id;
53. run;
54. proc sort data=work.cars;
55. by cars_id;
56. run;
57.
58. data work.ptf;
59. merge work.cars work.ptf;
60. by cars_id;
61. run;
62.
63. /* proc print data=ptf (obs=10); */
64. /* run; */
65.
66. /* 3 */
67. proc import datafile="/home/u45009682/sasuser.v94/HOMEWORK/CA_ZIP_CODE.TXT"
68. out=work.CA_ZIP_CODE
69. dbms=dlm replace;
70. getnames=yes;
71. run;
72.
73. /* proc print data=CA_ZIP_CODE (obs=10); */
74. /* run; */
75.
76. data work.CA_ZIP_CODE;
77. set work.CA_ZIP_CODE;
78. informat density:\$15.;
79. length density \$10;
80. if (population LE 4000) then density = "low";
81. if (population GT 4000) and (population LE 30000) then density = "medium";
82. if (population GT 30000) then density = "high";
83. run;
84.
85. proc sort data=work.ptf;
86. by zip_code;
87. run;
88.
89. proc sort data=work.CA_ZIP_CODE;
90. by zip_code;
91. run;
92.
93. data work.ptf;
94. merge work.CA_ZIP_CODE work.ptf;
95. by zip_code;
96. run;
97. /* density hp and age r importante :) */
98.
99. /* 4 */
100.
101. libname data "/home/u45009682/sasuser.v94/HOMEWORK";
102. proc contents data=data.ptf; run;
103.
104. data data.claims;
105. set data.claims;
106. run;
107.
108. proc sort data=work.ptf;
109. by POLICYHOLDER_ID POLICY_STARTING_DATE;
110. run;
111.
112. proc sort data=data.claims;
113. by POLICYHOLDER_ID POLICY_STARTING_DATE;
114. run;
115.
116. data work.claims;
117. merge data.claims (IN=A) work.ptf (IN=B);
118. by POLICYHOLDER_ID POLICY_STARTING_DATE;
119. IF A;
120. run;
121.
122.
123. /* 5 */
124. data work.claims;
125. set work.claims;
126. informat year 4.;
127. year=year(policy_starting_date);
128. run;
129.
130. proc sort data=work.claims;
131. by year;
132. run;
133.
134. proc means data=work.claims noprint;
135. var CLAIMS_COST;
136. class year age hp density;
137. output out=work.claims_summary (drop = _TYPE_ _FREQ_)
138. mean(claims_cost) = cost
139. N(claims_cost) = nb_claims;
140. /* types policyholder_id*year; */
141. run;
142.
143. /* TODO how to do the above? */
144.
145. /* merge this with the work.ptf database */
146.
147. /* 6 */
148. data work.ptf;
149. set work.ptf;
150. format year 4.;
151. year=year(policy_starting_date);
152. run;
153.
154. proc sort data=work.ptf;
155. by year;
156. run;
157.
158. proc means data=work.ptf;
159. var policyholder_id;
160. class year age hp density;
161. output out=work.ptf_summary (drop = _TYPE_ _FREQ_)
162. N(policyholder_id) = nb;
163. run;
164.
165. /* 7 */
166. proc sort data=work.claims_summary;
167. by age density hp year;
168. run;
169.
170. proc sort data=work.ptf_summary;
171. by age density hp year;
172. run;
173.
174. data work.summary;
175. merge work.claims_summary work.ptf_summary;
176. by age density hp year;
177. run;
178.
179. data work.summary;
180. set work.summary;
181. informat freq:6.3;
182. freq=nb_claims/nb;
183. run;
184.
185. proc sort data=work.summary;
186. by descending freq age hp density;
187. run;
188.
189. proc sort data=work.summary out=work.freq nodupkey;
190. by age hp density;
191. run;
192.
193. proc sort data=work.freq;
194. by year age hp density;
195. run;
196.
197. proc print data=work.freq;
198. run;
199.
200.
201. /* 8 */
202. data work.freq;
203. set work.freq;
204. keep age hp density freq;
205. run;
206.
207.
208. /* TASK 2: DEFINITION OF THE BEST INSURANCE PRICE STRATEGY */
209. /* Instructions 12-19 */
210.
211. /* TASK 3: CREATE A REPORT */
212. /* Instructions 20-29 */
RAW Paste Data