SHARE
TWEET

Untitled

a guest Apr 21st, 2019 71 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. FILENAME REFFILE '/home/apexmuaado0/dap_assignment/dap_data.xls';
  2.  
  3. PROC IMPORT DATAFILE=REFFILE
  4.     DBMS=XLS
  5.     OUT=dap_data;
  6.     GETNAMES=YES;
  7. RUN;
  8.  
  9. /* drop the Donor ID column */
  10. data dap_data(drop=D_ID);
  11.    set dap_data;
  12. run;
  13.  
  14.  
  15.  
  16.  
  17. /* list the first 10 observations */
  18. proc print data=work.dap_data (obs=10);
  19.    title 'Listing of the Data Set';
  20. run;
  21. title;
  22.  
  23. /* summary of the table contents */
  24. PROC CONTENTS DATA=work.dap_1;
  25. RUN;
  26.  
  27. /* *******************************************************ENCODE CATEGORICAL VARIABLES*****************************************************************/
  28. /* ****************************************************************************************************************************************************/
  29. /* encode categorical variable in char to numeric
  30. Donor_Status:: A if active funder, S if star funder, N if new funder, E if inactive funder, F if first time funder, L if lapsing funder
  31. DUMMY VARS CREAteD For all THE DONORS EXCEPT for donor_status_l
  32. */
  33. data work.dap_data;
  34. set work.dap_data;
  35. if Donor_Status = 'A' then Donor_Status_a = 1;
  36. else if Donor_Status = 'S' then Donor_Status_s = 1;
  37. else if Donor_Status = 'N' then Donor_Status_n = 1;
  38. else if Donor_Status = 'E' then Donor_Status_e = 1;
  39. else if Donor_Status = 'F' then Donor_Status_f = 1;
  40. run;
  41.  
  42. /* missing values in the dummy variables are declared as zero : except donor status l*/
  43. data work.dap_data;
  44. set work.dap_data;
  45. if Donor_Status_a = . then Donor_Status_a = 0;
  46. if Donor_Status_s = . then Donor_Status_s = 0;
  47. if Donor_Status_n = . then Donor_Status_n = 0;
  48. if Donor_Status_e = . then Donor_Status_e = 0;
  49. if Donor_Status_f = . then Donor_Status_f = 0;
  50. run;
  51.  
  52. /* confirm the masking values are changed in the dataset */
  53. proc freq data = work.dap_1;
  54. table Donor_Status_n;
  55. run;
  56.  
  57. /*
  58. DemHomeOwner:: H - if home owner U - if unknown
  59. CREAtE ONE DUMMY Variable FOR HOME OWNER
  60. */
  61. data work.dap_data;
  62. set work.dap_data;
  63. if DemHomeOwner = 'H' then DemHomeOwner_n = 1;
  64. if DemHomeOwner = 'U' then DemHomeOwner_n = 0;  
  65. run;
  66.  
  67. /* confirm the masking values are changed in the dataset */
  68. proc freq data = work.dap_data;
  69. table DemHomeOwner_n;
  70. run;
  71.  
  72. /*
  73. Gender:: M for Male : F for Female
  74. create two dummy variables for male and female
  75. */
  76. data work.dap_data;
  77. set work.dap_data;
  78. if Gender = 'M' then Gender_m = 1;
  79. if Gender = 'F' then Gender_f  = 1;
  80. run;
  81.  
  82. /*declare misssing values of gender dummy variables as zero */
  83. data work.dap_data;
  84. set work.dap_data;
  85. if Gender_m = . then Gender_m = 0;
  86. if Gender_f = . then Gender_f  = 0;
  87. run;
  88.  
  89. /* check if any missing values exisit */
  90. proc means data = work.dap_1 n nmiss ;
  91. var _numeric_;
  92. run;
  93.  
  94.  
  95. /* *********************END*************************************END**************************************END*******************************************/
  96. /* ****************************************************************************************************************************************************/
  97.  
  98. /* create a new dataset with the new encoded variables */
  99. data dap_1;
  100.    set work.dap_data (drop=Gender DemHomeOwner Donor_Status);
  101. run;
  102.  
  103. /* view the new dataset with encoded variables */
  104. PROC CONTENTS DATA=work.dap_1;
  105. RUN;
  106.  
  107. /* change the vraiable type of the DemArea from char to numeric */
  108. data dap_1;
  109.  set work.dap_1;
  110. DemArea2 = input(DemArea, best12.);
  111. run;
  112.  
  113. data dap_1;
  114.  set work.dap_1 (drop = DemArea);
  115. run;
  116.  
  117. /* Outlier treatment *****************************************************************************************************************************/
  118. proc univariate data= WORK.dap_1 robustscale plot;
  119. var  age areahomevalue areamedincome callcntall callcntcardall callcntcardp1
  120.    callcntcardp2 callcntp1 callcntp2 demarea2 demhomeowner_n donavgcardp1 donavgall
  121.    donavgcardp1 donavglast donavgp1 doncntall doncntcardall doncntcardp1 doncntp1
  122.    donor donor_D donor_Status_n donor_status_prev_camp dontimefirst dontimelast Gender_m Gender_f;
  123. run;
  124.  
  125.  
  126. /* get the values for iqr 3rd quartile and 1st quartile*/
  127. proc means data=work.dap_1 stackods n qrange p25 p75;
  128. var age areahomevalue areamedincome callcntall callcntcardall callcntcardp1
  129.    callcntcardp2 callcntp1 callcntp2 demarea2 demhomeowner_n donavgcardp1 donavgall
  130.    donavgcardp1 donavglast donavgp1 doncntall doncntcardall doncntcardp1 doncntp1
  131.    donor donor_D donor_Status_n donor_status_prev_camp dontimefirst dontimelast Gender_m Gender_f;
  132. ods output summary=ranges;
  133. run;
  134.  
  135. *macro to cap outliers;
  136. %macro cap(dset=,var=, lower=, upper=);
  137. data &dset;
  138.     set &dset;
  139.     if &var>&upper then &var=&upper;
  140.     if &var<&lower then &var=&lower;
  141. run;
  142. %mend;
  143.  
  144. *create cutoffs and execute macro for each variable;
  145. data cutoffs;
  146. set ranges;
  147. lower=p25-1.5*qrange;
  148. upper=p75+1.5*qrange;
  149. string = catt('%cap(dset=dap_1, var=', variable, ", lower=", lower, ", upper=", upper ,");");
  150. call execute(string);
  151. run;
  152.  
  153. /*assign labels */
  154. data dap_1; set dap_1;
  155.     label
  156.         Donor_D = "Amount donated for previous campaigns"
  157.         DonCntP1 = "Times  the person funded in the last 36 months"
  158.         DONCntAll = "Times the person donated over period of few years"
  159.         DONCntCardP1 = "Times  the person funded in the last 36 months with the help of references"
  160.         DONCntCardAll = "Times the person donated over period of few years with the help of references"
  161.         DONAvgLast = "Amount funded last  by the person (in $ thousands)"
  162.         DONAvgP1 = "Fund Amount in Average for 36 Months"
  163.         DONAvgAll = "Fund Amount in Average for overall period of few years"
  164.         DONAvgCardP1 = "Fund Amount in Average for 36 Months  from the individual with the help of references"
  165.         DONTimeLast = "No of months since the donator made last donation"
  166.         DONTimeFirst = "No of months since the donator made first donation"
  167.         CallCntP2 = "No of Calls made by GRDS to the individual for the last 12 months"
  168.         CallCntP1 = "No of Calls made by GRDS to the individual for the last 36 months"
  169.         CallCntAll = "No of Calls made by GRDS to the individual over the period of few years"
  170.         CallCntCardP2 = "No of Calls made by References to the individual for the last 12 months"
  171.         CallCntCardP1 = "No of Calls made by References to the individual for the last 36 months"
  172.         CallCntCardAll = "No of Calls made by References to the individual over the period of few years"
  173.         Donor_Status_a =  "active funder"
  174.         Donor_Status_s  =  "star funder"
  175.         Donor_Status_n   =  "new funder"
  176.         Donor_Status_e = "inactive funder"
  177.         Donor_Status_f = "first time funder"
  178.         DemHomeOwner_n = "Owns home or not"
  179.         Donor_Status_Prev_Camp = "Donated to the last response"
  180.         DemArea2 = "Demographic Area"
  181.         Age = "Age"
  182.         Gender_m = "Male"
  183.         Gender_f = "Female"
  184.         AreaHomeValue = "median home value"
  185.         AreaMedIncome = "median income";
  186. RUN;
  187.  
  188.  
  189. data dap_1;
  190.  set work.dap_1 (drop = Donor_D);
  191. run;
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top