Advertisement
Guest User

Untitled

a guest
Apr 21st, 2019
98
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.73 KB | None | 0 0
  1. FILENAME REFFILE '/home/apexmuaado0/dap_assignment/dap_data.xls';
  2.  
  3. PROC IMPORT DATAFILE=REFFILE
  4. DBMS=XLS
  5. OUT=dap_data;
  6. GETNAMES=YES;
  7. RUN;
  8.  
  9. /* drop the Donor ID column */
  10. data dap_data(drop=D_ID);
  11. set dap_data;
  12. run;
  13.  
  14.  
  15.  
  16.  
  17. /* list the first 10 observations */
  18. proc print data=work.dap_data (obs=10);
  19. title 'Listing of the Data Set';
  20. run;
  21. title;
  22.  
  23. /* summary of the table contents */
  24. PROC CONTENTS DATA=work.dap_1;
  25. RUN;
  26.  
  27. /* *******************************************************ENCODE CATEGORICAL VARIABLES*****************************************************************/
  28. /* ****************************************************************************************************************************************************/
  29. /* encode categorical variable in char to numeric
  30. Donor_Status:: A if active funder, S if star funder, N if new funder, E if inactive funder, F if first time funder, L if lapsing funder
  31. DUMMY VARS CREAteD For all THE DONORS EXCEPT for donor_status_l
  32. */
  33. data work.dap_data;
  34. set work.dap_data;
  35. if Donor_Status = 'A' then Donor_Status_a = 1;
  36. else if Donor_Status = 'S' then Donor_Status_s = 1;
  37. else if Donor_Status = 'N' then Donor_Status_n = 1;
  38. else if Donor_Status = 'E' then Donor_Status_e = 1;
  39. else if Donor_Status = 'F' then Donor_Status_f = 1;
  40. run;
  41.  
  42. /* missing values in the dummy variables are declared as zero : except donor status l*/
  43. data work.dap_data;
  44. set work.dap_data;
  45. if Donor_Status_a = . then Donor_Status_a = 0;
  46. if Donor_Status_s = . then Donor_Status_s = 0;
  47. if Donor_Status_n = . then Donor_Status_n = 0;
  48. if Donor_Status_e = . then Donor_Status_e = 0;
  49. if Donor_Status_f = . then Donor_Status_f = 0;
  50. run;
  51.  
  52. /* confirm the masking values are changed in the dataset */
  53. proc freq data = work.dap_1;
  54. table Donor_Status_n;
  55. run;
  56.  
  57. /*
  58. DemHomeOwner:: H - if home owner U - if unknown
  59. CREAtE ONE DUMMY Variable FOR HOME OWNER
  60. */
  61. data work.dap_data;
  62. set work.dap_data;
  63. if DemHomeOwner = 'H' then DemHomeOwner_n = 1;
  64. if DemHomeOwner = 'U' then DemHomeOwner_n = 0;
  65. run;
  66.  
  67. /* confirm the masking values are changed in the dataset */
  68. proc freq data = work.dap_data;
  69. table DemHomeOwner_n;
  70. run;
  71.  
  72. /*
  73. Gender:: M for Male : F for Female
  74. create two dummy variables for male and female
  75. */
  76. data work.dap_data;
  77. set work.dap_data;
  78. if Gender = 'M' then Gender_m = 1;
  79. if Gender = 'F' then Gender_f = 1;
  80. run;
  81.  
  82. /*declare misssing values of gender dummy variables as zero */
  83. data work.dap_data;
  84. set work.dap_data;
  85. if Gender_m = . then Gender_m = 0;
  86. if Gender_f = . then Gender_f = 0;
  87. run;
  88.  
  89. /* check if any missing values exisit */
  90. proc means data = work.dap_1 n nmiss ;
  91. var _numeric_;
  92. run;
  93.  
  94.  
  95. /* *********************END*************************************END**************************************END*******************************************/
  96. /* ****************************************************************************************************************************************************/
  97.  
  98. /* create a new dataset with the new encoded variables */
  99. data dap_1;
  100. set work.dap_data (drop=Gender DemHomeOwner Donor_Status);
  101. run;
  102.  
  103. /* view the new dataset with encoded variables */
  104. PROC CONTENTS DATA=work.dap_1;
  105. RUN;
  106.  
  107. /* change the vraiable type of the DemArea from char to numeric */
  108. data dap_1;
  109. set work.dap_1;
  110. DemArea2 = input(DemArea, best12.);
  111. run;
  112.  
  113. data dap_1;
  114. set work.dap_1 (drop = DemArea);
  115. run;
  116.  
  117. /* Outlier treatment *****************************************************************************************************************************/
  118. proc univariate data= WORK.dap_1 robustscale plot;
  119. var age areahomevalue areamedincome callcntall callcntcardall callcntcardp1
  120. callcntcardp2 callcntp1 callcntp2 demarea2 demhomeowner_n donavgcardp1 donavgall
  121. donavgcardp1 donavglast donavgp1 doncntall doncntcardall doncntcardp1 doncntp1
  122. donor donor_D donor_Status_n donor_status_prev_camp dontimefirst dontimelast Gender_m Gender_f;
  123. run;
  124.  
  125.  
  126. /* get the values for iqr 3rd quartile and 1st quartile*/
  127. proc means data=work.dap_1 stackods n qrange p25 p75;
  128. var age areahomevalue areamedincome callcntall callcntcardall callcntcardp1
  129. callcntcardp2 callcntp1 callcntp2 demarea2 demhomeowner_n donavgcardp1 donavgall
  130. donavgcardp1 donavglast donavgp1 doncntall doncntcardall doncntcardp1 doncntp1
  131. donor donor_D donor_Status_n donor_status_prev_camp dontimefirst dontimelast Gender_m Gender_f;
  132. ods output summary=ranges;
  133. run;
  134.  
  135. *macro to cap outliers;
  136. %macro cap(dset=,var=, lower=, upper=);
  137. data &dset;
  138. set &dset;
  139. if &var>&upper then &var=&upper;
  140. if &var<&lower then &var=&lower;
  141. run;
  142. %mend;
  143.  
  144. *create cutoffs and execute macro for each variable;
  145. data cutoffs;
  146. set ranges;
  147. lower=p25-1.5*qrange;
  148. upper=p75+1.5*qrange;
  149. string = catt('%cap(dset=dap_1, var=', variable, ", lower=", lower, ", upper=", upper ,");");
  150. call execute(string);
  151. run;
  152.  
  153. /*assign labels */
  154. data dap_1; set dap_1;
  155. label
  156. Donor_D = "Amount donated for previous campaigns"
  157. DonCntP1 = "Times the person funded in the last 36 months"
  158. DONCntAll = "Times the person donated over period of few years"
  159. DONCntCardP1 = "Times the person funded in the last 36 months with the help of references"
  160. DONCntCardAll = "Times the person donated over period of few years with the help of references"
  161. DONAvgLast = "Amount funded last by the person (in $ thousands)"
  162. DONAvgP1 = "Fund Amount in Average for 36 Months"
  163. DONAvgAll = "Fund Amount in Average for overall period of few years"
  164. DONAvgCardP1 = "Fund Amount in Average for 36 Months from the individual with the help of references"
  165. DONTimeLast = "No of months since the donator made last donation"
  166. DONTimeFirst = "No of months since the donator made first donation"
  167. CallCntP2 = "No of Calls made by GRDS to the individual for the last 12 months"
  168. CallCntP1 = "No of Calls made by GRDS to the individual for the last 36 months"
  169. CallCntAll = "No of Calls made by GRDS to the individual over the period of few years"
  170. CallCntCardP2 = "No of Calls made by References to the individual for the last 12 months"
  171. CallCntCardP1 = "No of Calls made by References to the individual for the last 36 months"
  172. CallCntCardAll = "No of Calls made by References to the individual over the period of few years"
  173. Donor_Status_a = "active funder"
  174. Donor_Status_s = "star funder"
  175. Donor_Status_n = "new funder"
  176. Donor_Status_e = "inactive funder"
  177. Donor_Status_f = "first time funder"
  178. DemHomeOwner_n = "Owns home or not"
  179. Donor_Status_Prev_Camp = "Donated to the last response"
  180. DemArea2 = "Demographic Area"
  181. Age = "Age"
  182. Gender_m = "Male"
  183. Gender_f = "Female"
  184. AreaHomeValue = "median home value"
  185. AreaMedIncome = "median income";
  186. RUN;
  187.  
  188.  
  189. data dap_1;
  190. set work.dap_1 (drop = Donor_D);
  191. run;
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement