Advertisement
Guest User

SAS SQL How TO

a guest
Apr 29th, 2019
451
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
SAS 30.07 KB | None | 0 0
  1. /**************************************************************
  2. Program: Code_For_SQL_How_SGF_2019 Lavery
  3. programmer: r lavery
  4. date: 2018/02/28
  5. Purpose: code for attendees to run and modify in SQL How
  6. **************************************************************/
  7. /*
  8. Topics are:
  9. Section A 1 OF 11) THE SAS DATA ENGINE          `      
  10. Section B 2 OF 11) INDEXING
  11. Section B 3 OF 11) THE SQL OPTIMIZER AND IMPROVING PERFORMANCE
  12. Section D 4 OF 11) SUB-QUERIES: CORRELATED AND UNCORRELATED
  13. Section E 5 OF 11) PLACEMENT OF SUB-QUERIES IN SQL SYNTAX
  14. Section F 6 OF 11) VIEWS                            
  15. Section G 7 OF 11) FUZZY MERGING
  16. Section H 8 OF 11) COALESCING                      
  17. Section I 9 OF 11) FINDING DUPLICATES
  18. Section J 10 of 11) REFLEXIVE JOINS
  19. Section K 11 of 11) USE SQL DICTOINARY TABLES TO DOCUMENT DATA SETS IN HYPERLINKED EXCEL WORKBOOK
  20. */
  21.  
  22.  
  23. /****Section: Data Set Creation *************************/
  24. Options msglevel=i;
  25. ODS Listing;
  26. ODS HTML close;
  27. options nocenter MCompilenote = all;
  28.  
  29.  
  30. PROC SQL;   create table MySchool
  31.   ( Name Char(8) , Sex Char(1) ,  age  num
  32.    ,  Height  num , weight  num
  33.    );
  34. insert into MySchool
  35. values('Joy'    ,'F', 11 , 51.3  , 50.5)
  36. values('Jane'   ,'F', 12 , 59.8  , 84.5)
  37. values('Jim'    ,'M', 12 , 57.3  , 83.0)
  38. values('Alice'  ,'F', 13 , 56.5  , 84.0)
  39. values('Jeff'   ,'M', 13 , 62.5  , 84.0)
  40. values('Bob'    ,'M', 14 , 64.2  , 90.0)
  41. values('Philip' ,'M', 16 , 72.0  ,150.0);
  42.  
  43. PROC SQL;   create table ExtraInfo
  44.    (  Name Char(8) , age num, Sex Char(1)      
  45.    , Allerg Char(4),  Diet Char(6) , sport num ,  ResDay Char(8)
  46.     );
  47. insert into ExtraInfo
  48. values('Joy'   , 11 ,'F', 'None', 'Vegan'   , 3  ,'Resident')
  49. values('Jane'  , 12 ,'F', 'Nuts',  'Meat'   , 1  ,'Day')
  50. values('Jim'   , 12 ,'M', 'None',  'Meat'   , 0  ,'Resident')
  51. values('Alice' , 13 ,'F', 'Nuts',  'NoMeat' , 1  ,'Resident')
  52. values('Jeff'  , 13 ,'M', 'Dust',  'Vegan'  , 1  ,'Day')
  53. values('Philip', 16 ,'M', 'None',  'NoMeat' , 2  ,'Resident');
  54. quit;
  55.  
  56. /*******************************************************************************************/
  57. /** Section A- 1 of 11: THE SAS DATA ENGINE ******/
  58. /*  The fact that different numbers of observations were read supports the idea
  59.      that a data engine exists that is close to the hard drive.*/
  60. /*******************************************************************************************/
  61. /*Example A_1*/
  62. Data A01_IF_example ;
  63.    /*Reads 19 obs*/
  64.    set sashelp.class ;
  65.    if sex NE "F";
  66.    run;
  67.  
  68.  
  69. Data A02_Where_Example ;
  70.    set sashelp.class ;
  71.    /*Reads 10 obs*/
  72.    Where sex NE "F";
  73.    run;
  74.  
  75.  
  76. /*******************************************************************************************/
  77. /** Section B - 2 of 11: INDEXING**/
  78. /*******************************************************************************************/
  79. /*Example B_1 run in small steps and look for notes in the log*/
  80. PROC SQL;
  81. /*Create a dataset on which you can create an index*/
  82. Create table B01_MyClass
  83.  as select *
  84.  from SASHelp.class;
  85. /*Data set B01 has an index created by SQL, B02 will create the index using a data step*/
  86. Create index name on B01_MyClass(name);
  87.  
  88. PROC Contents data=B01_MyClass centiles;
  89. Title "Example B_1 The PROC Contents tells you about indexes";
  90. TITLE2 "but it does not tell you much - read Mike Raithal's paper";
  91. run;
  92. title "";
  93.  
  94.  
  95. PROC SQL;
  96. Select *
  97.  /*B_01_A: No note in log*/
  98.  from B01_MyClass;
  99.  
  100. Select *
  101.  from B01_MyClass
  102. /*B_01_B: There is a note in the log
  103.  INFO: Index Name selected for WHERE clause optimization*/
  104.  where name="Jane";
  105.  
  106.  
  107. Select *
  108.  from B01_MyClass
  109. /*B_01_C: There is a note in the log
  110.  INFO: Index Name selected for WHERE clause optimization*/
  111.  where substr(name,1,1)="J";
  112.  
  113.  Select *
  114.  from B01_MyClass
  115. /*B_01_D: There is a note in the log
  116.  INFO: Index Name selected for WHERE clause optimization*/
  117.  where substr(name,2,1)="o";
  118.  
  119.  Select *
  120.  from B01_MyClass
  121.  /*B_01_E: No Note in log (violates the 10% rule?)*/
  122.  where substr(name,2,1)in ("a","e","i","o","u");
  123.  
  124.  Select *
  125.  from B01_MyClass
  126.  /*B_01_F: No note in log - we did not have an index on age*/
  127.  where age=13;
  128.  
  129. /*Only Difference between B01 and B02 is that the index for Data B02 was created in a data step - B01 used SQL*/
  130. /*Example B_2 Run code in steps and look for notes in the log - resuilts will match B01*/
  131. Data B02_My_Class(index=(name));
  132.  set SASHelp.class;
  133.  run;
  134.  
  135. PROC Contents data=B02_My_Class centiles;
  136. Title "Example B_2 The PROC Contents tells you about indexes";
  137. TITLE2 "but it does not tell you much - read Mike Raithal's paper";
  138. run;
  139. title "";
  140.  
  141. Data _null_;
  142.  set B02_My_Class;
  143.  /*No Where -- No INFO in log. SAS Reads ALL 19 obs*/
  144.  run;
  145.  
  146. Data _null_;
  147.  set B02_My_Class;
  148.  /*INFO: Index Name selected for WHERE clause optimization. is in log*/
  149.  where name="Jane";
  150.  run;
  151.  
  152.  Data _null_;
  153.  set B02_My_Class;
  154.  /*INFO: Index Name selected for WHERE clause optimization. is in log*/
  155.  where substr(name,1,1)="J";
  156.  run;
  157.  
  158.  Data _null_;
  159.  set B02_My_Class;
  160.  /*INFO: Index Name selected for WHERE clause optimization. is in log*/
  161.   where substr(name,2,1)="o";
  162.   run;
  163.  
  164. Data _null_;
  165.  set B02_My_Class;
  166.  /*No Note in log (violates the 10% rule?)*/
  167.     where substr(name,2,1)in ("a","e","i","o","u");
  168.  run;
  169.  
  170. Data _null_;
  171.  set B02_My_Class;
  172.  /*NO INFO in log, but reads 3 obs*/
  173.     where age=13;
  174.  run;
  175. title "";
  176.  
  177.  
  178. /*******************************************************************************************/
  179. /** Section C - 3 of 11: THE SQL OPTIMIZER AND IMPROVING PERFORMANCE **/
  180. /*******************************************************************************************/
  181. /*Example C_1 run this and look in the log.
  182.    The log will show a summary for the program that the optimizer plans to run*/
  183. PROC SQL _method _tree;
  184. title "Example C_1 Look in Log to see the explain plan. The output from _method and _Tree";
  185.  select name    from sashelp.class
  186.    where sex="M"    
  187.     order by Age;
  188. title "";
  189.  
  190. /*******************************************************************************************/
  191. /** Section D - 4 OF 11) SUB-QUERIES: CORRELATED AND UNCORRELATED*/
  192. /*******************************************************************************************/
  193. /*Example D_1*/
  194. PROC SQL _method _tree;
  195.   Title "Example D_1 UNcorrelated Sub query in the FROM clause";
  196.   Select *
  197.   From  (Select name , sex , height
  198.            from MySchool as I
  199.            where I.sex='F');
  200. title "";
  201.  
  202. /*Example D_2*/
  203. PROC SQL _method _tree;
  204.   Title "Example D_2 UNcorretated Sub query in the WHERE clause";
  205.   Select name , sex, age
  206.   From MySchool as O
  207.   Where O.Sex=
  208.     (select sex
  209.       from ExtraInfo
  210.       having Sport=max(Sport)
  211.     );
  212. title "";
  213.  
  214. /*Example D_3*/
  215. PROC Sql;
  216.   Title "Example D_3 use join to replace UNcorretated Sub query in the FROM clause";
  217.   select name , Outer.sex, age
  218.   from
  219.      MySchool as Outer
  220.   Inner join
  221.     (select sex
  222.      From ExtraInfo
  223.      having Sport=max(Sport)
  224.      ) as sub
  225.   on Outer.sex=sub.sex;
  226.  
  227. /*Example D_4*/
  228. PROC SQL;
  229.   Title "Example D_4 Correlated Sub query in the Where clause";
  230.   select name , sex, age
  231.   from MySchool as O
  232.   where O.Age =
  233.     (select Age from ExtraInfo as I
  234.        Where I.sex EQ O.Sex
  235.        Having Sport=Max(sport)
  236.      );
  237. title "";
  238.  
  239. /*Example D_5*/
  240. /*Correlated query vs Join*/
  241. PROC SQL;
  242. /*This is a correlated query in the WHERE clause ** note I.sex  NE O.Sex */
  243.   Create table D05_Corr_Sub_Q as
  244.   Select name , sex, age
  245.   From MySchool as O
  246.   Where EXISTS
  247.     (select * from ExtraInfo as I  
  248.       having  I.age  = O.age
  249.       and I.sex  NE O.Sex
  250.      ) ;
  251.  
  252. PROC SQL;
  253.   /*This is a join query" ** note inner join */
  254.   Create table D05_Equivolent_Join as
  255.   Select O.name , O.sex, O.age
  256.   From
  257.      MySchool as O
  258.   Inner join
  259.      (select name, sex, age
  260.        from ExtraInfo
  261.       ) as WasSub
  262.   On O.Age=WasSub.Age
  263.      and O.sex NE WasSub.sex;
  264.  
  265.  PROC compare base=D05_Corr_Sub_Q compare =D05_Equivolent_Join;
  266.  title "Example D_5 This compares the resuolts from the correlated query and the join";
  267.  run;
  268. title "";
  269.  
  270. /*******************************************************************************************/
  271. /** Section E 5 OF 11) PLACEMENT OF SUB-QUERIES IN SQL SYNTAX **/
  272. /*******************************************************************************************/
  273. /*Example E_1*/
  274. PROC SQL;
  275.   title "Example E_1 Subquery returns a 1 BY 1 to one 'cell' as SQL processes EACH ROW in the Select";
  276.   title2 "Exploring what shapes of data can be returned to what parts of a SQL Query";
  277.   select O.name, O.age
  278.  ,(select I.age
  279.     from ExtraInfo I
  280.     where Name='Joy') as Age_of_Joy
  281.  , O.sex
  282.     From MySchool as O;
  283. title "";
  284.  
  285. /*Example E_2 INTENTIONAL FAIL*/
  286. PROC SQL;
  287.   title "Example E_2  Subquery returns A COLUMN OF DATA to one 'cell' as SQL processes EACH ROW in the Select";
  288.   title2 "Exploring what shapes of data can be returned to what parts of a SQL Query";
  289.   select O.name
  290.         , O.age  
  291.         ,(select DISTINCT I.age
  292.             from ExtraInfo I) as Returns_col
  293.         , O.sex
  294.   From MySchool as O;
  295. title "";
  296.  
  297. /*Example E_3 INTENTIONAL FAIL*/
  298. PROC SQL;
  299.   title "Example E_3 Subquery returns MULTIPLE ROWS to one 'cell' as SQL processes EACH ROW in the Select";
  300.   title2 "Exploring what shapes of data can be returned to what parts of a SQL Query";
  301.   select O.name
  302.        , O.age
  303.       /*Subquery returns multiple vars TO EACH ROW - in the Select*/
  304.       ,(select I.name, I.Sex, I.sport
  305.           from ExtraInfo as I
  306.           where name='Joy') as Mult_Vars
  307.        , O.sex
  308. From MySchool as O;
  309. title "";
  310.  
  311. /*Example E_4 INTENTIONAL FAIL*/
  312. PROC SQL;
  313.   title "Example E_4 Subquery returns A TABLE to one 'cell' as SQL processes EACH ROW in the Select";
  314.   title2 "Exploring what shapes of data can be returned to what parts of a SQL Query";
  315.   select O.name
  316.     ,O.age
  317.     /*Subquery returns a table - in the select*/
  318.     ,(select I.age , I.diet , I.sport  
  319.       from ExtraInfo as I)
  320.      , O.sex
  321.      From MySchool as O;
  322. title "";
  323.  
  324. /*Example E_5*/
  325. /* Shapes allowed in the Where and having*/
  326. PROC SQL;  
  327.   title "Example E_5 UNcorrelated Subquery returns A 1 by 1 to the Where";
  328.   title2 "Exploring what shapes of data can be returned to what parts of a SQL Query";
  329.   select *  
  330.    From MySchool as O
  331.    /*UNCoirrelated Subquery returns 1 by 1 in the Where or Having*/
  332.      where O.Age LE
  333.         (Select MAX(I.age)
  334.          from ExtraInfo as I
  335.          );
  336. title "";
  337.  
  338. /*Example E_6*/
  339. PROC SQL;  
  340.   title "Example E_6 UNcorrelated Subquery returns A COLUMN of data to the Where";
  341.   title2 "Exploring what shapes of data can be returned to what parts of a SQL Query";
  342.   select O.*
  343.    From MySchool as O
  344.      /*Subquery returns column - in the Where or Having*/
  345.      where O.Age IN
  346.       (Select distinct I.age
  347.         from ExtraInfo  as I
  348.        );
  349. title "";
  350.  
  351. /*Example E_7*/
  352. PROC SQL;  
  353.   title "Example E_7 Correlated Subquery returns A ROW of data to the Where";
  354.   title2 "Exploring what shapes of data can be returned to what parts of a SQL Query";
  355.   title3 "if you are in MySchool AND in ExraINFO, you show up ";
  356.   title4 "We lose Bob";
  357.    select O.*
  358.    From MySchool as O
  359.    /*Subquery returns column - in the Where or Having*/
  360.    where  Exists
  361.           (Select I.Name, I.sex, I.sport
  362.           from ExtraInfo as I
  363.           where O.name=I.name
  364.           );
  365. title "";
  366.  
  367. /*Example E_8*/
  368. Proc SQL;  
  369.   title1 "Example E_8 Correlated Subquery returns A TABLE of data to the Where";
  370.   title2 "Exploring what shapes of data can be returned to what parts of a SQL Query";
  371.   title3 "There is one or more people in ExtraInfo of the same sex - you show up in the report";
  372.     select *  
  373.     From MySchool as O
  374.     /*Subquery returns table - in the Where or Having*/
  375.     where Exists
  376.         (Select I.*
  377.          from ExtraInfo as I
  378.          where I.sex= O.sex
  379.         );
  380. title "";
  381.  
  382. /*******************************************************************************************/
  383. /** Section F - 6 OF 11) Views**/
  384. /*******************************************************************************************/
  385. /*Example F_1 Show Views being used*/
  386. Proc SQL;
  387. Create View F1_Old_guys as
  388. select name, sex, age
  389.     , Weight/2.2 as Wt_KG
  390. from SAShelp.class as c
  391. where sex='M' and age > 13;
  392. quit;
  393.  
  394. proc print data=F1_Old_guys; run;
  395. title "xample F_1 Show Views being used";
  396. run;
  397. title "";
  398.  
  399. /*Example F_2 Show Views being used*/
  400. PROC Gchart data=F1_Old_guys;
  401. title "Example F_2 Show Views being used";
  402. pie age /discrete; run;
  403.  
  404. PROC Univariate data=F1_Old_guys;
  405. var age; run;
  406. title "";
  407.  
  408. /*Example F_3 Views being chained*/
  409. PROC SQL;
  410.   create VIEW F3_boys as
  411.   select * from sashelp.class
  412.   where sex='M';
  413.  
  414. PROC SQL;
  415.   create VIEW F4_Old_boys as
  416.   select * from F3_boys
  417.   where age GE 14;
  418.  
  419. PROC SQL;
  420.   CREATE TABLE F05_Chained_Views as
  421. /*"This is the result of chained views";*/
  422. /*"Note that the log only shows one data set being created";*/
  423. select
  424.   'number of old boys' as tag
  425.   ,count(*) as Nmbr_old_boys
  426. from F4_Old_boys
  427. ;
  428. QUIT;
  429.  
  430. PROC PRINT data=F05_Chained_Views;
  431. title "Example F_3 Views being chained This is the result of chained views";
  432. title2 "Note that the log only showed one data set being created when we created F01_Chained_Views";
  433. run;
  434.  
  435. /*Example F_4 table vs view*/
  436. /*Show the diifference between a table of data and a view of data*/
  437. PROC SQL;
  438.   Create table F06_Class_table as
  439.   select *
  440.   from SASHelp.class;
  441.  
  442.   /*Use the SQL command "Describe" to see "what is in" F02_Class_table..Data */
  443.   Describe table F06_Class_table;
  444.  
  445.  
  446. PROC SQL;
  447.   Create view F06_Class_View as
  448.   select *
  449.   from SASHelp.class;
  450.  
  451.   /*Use the SQL command "Describe" to see "what is in" F02_Class_View..INSTRUCTIONS */
  452.   DESCRIBE VIEW F06_Class_View;
  453.  
  454.  
  455.  
  456. /*Example F_5  Making a perm View - handling the libname issue*/
  457. /*The View ITSELF must contain a libname because not evey user uses the same libnames*/
  458. options noxsync noxwait;
  459. x "mkdir C:\temp";            /*Make a dir*/
  460. x "mkdir C:\Perm_Views";      /*Make a dir*/
  461.  
  462. Libname OneSpot "C:\temp";    /*define libraries to the session*/
  463. Libname ViewLoc "C:\Perm_Views";
  464.  
  465. /*Place some data in C:\temp
  466.   at a place that the SAS session thinks of as OneSpot*/
  467. Data OneSpot.F03_Class_data;
  468. set SASHelp.Class; run;
  469.  
  470.  
  471. Proc SQL ;
  472. /*"This is a Permanent view so has a two part name";*/
  473.  Create view  ViewLoc.F04_Eng_2_Met    as
  474.     select   name  
  475.             ,Weight/2.2 as Wt_Kg
  476.             ,height*2.25 as Hgt_cm
  477.     from PermLoc.F03_Class_data
  478.         /*Get some data From C:\temp
  479.         A place that the View thinks of as PermLoc*/    
  480.        using libname PermLoc 'C:\TEMP';
  481. quit;
  482.  
  483. PROC Print data= ViewLoc.F04_Eng_2_Met;
  484. title "Example F_5  Making a perm View This is a Permanent view so has a two part name";
  485. run;
  486. title "";
  487.  
  488.  
  489.  
  490.  
  491.  
  492. /*******************************************************************************************/
  493. /** Section G - 7 OF 11) FUZZY MERGING */
  494. /*******************************************************************************************/
  495. /* Misspelling of names is very common
  496.    We have purchased physican information from two different sources (two conferences)
  497.     and want to find peopel who attended both conferences.
  498.     We will match on name and other characteristics  */
  499.  
  500. /*Example G_1  Fuzzy merging is common when you have to match up names and addresses*/
  501. Data G01_Doctors_2010;
  502. Infile datalines truncover firstobs=4;
  503. input @1  name $Char15.
  504.       @17 City $Char6.
  505.       @25 BrdCert $Char3.; /*BrdCert = board certified*/
  506. datalines;
  507. Name    CITY  Board Cert.
  508.          1         2         3
  509. 123456789012345678901234567890
  510. Dr. Sebastian   Paris   YES  
  511. Dr. O'Banion    Dublin  YES  
  512. Dr. Jaramillo   Madrid  YES  
  513. Dr. Mafume      Tokyo   YES  
  514. Dr. Lu          Boston   NO
  515. ;run;
  516.  
  517. Data G02_Doctors_2011;
  518. Infile datalines truncover firstobs=1;
  519. input @1  name $Char15.
  520.      @17 City $Char6.
  521.      @25 BrdCert $Char3.;
  522. datalines;
  523. Dr. Sebastian   Paris   YES
  524. Dr. O'Banionn   Dublin  YES
  525. Dr. Jaramillo   Madr d  YES
  526. Dr. Mafumee     T kyo   yES
  527. Dr. Lu          Boston   NO
  528. ;run;
  529.  
  530. Proc SQL;
  531.   title "Example G_1  Fuzzy merging";
  532.   select    ( (O.name=N.name)        *9
  533.               +(O.City=N.City)       *5
  534.               +(O.BrdCert=N.BrdCert) *3  /*BrdCert = board certified*/
  535.              ) as score
  536.         /* O stands for Old=2010 and N stands for New=2011*/
  537.             ,O.name as Old_N , O.city as Old_C , O.BrdCert as OBC
  538.             ,N.name as New_N , N.city as New_C , N.BrdCert as NBC
  539.  
  540.  
  541.         from G01_Doctors_2010 as O , G02_Doctors_2011 as N
  542.         having score >= 2   order by score desc ,O.name  ;
  543. title "";
  544. quit;
  545.  
  546. /*Example G_2  use a format to make the Fuzzy merging results more readable*/
  547. Proc format ;
  548. value Mtching
  549. 17="Name & City & Board"
  550. 12="Name & Board"
  551. 8="City & Board"
  552. 9="Name"
  553. 5="City"
  554. 3="Board only"
  555. 0="no Match";run;
  556.  
  557. Proc SQL;  
  558.   title "Example G_2  Fuzzy merging w format";
  559. select ((O.name=N.name)*9
  560.          +(O.City=N.City)*5
  561.         +(O.BrdCert=N.BrdCert)*3)
  562.             as score format=Mtching.
  563.         /* stands for Old-2010 and N stands for New=2011*/
  564.         ,O.name as ON , O.city as OC ,O.BrdCert as OBC
  565.         ,N.name as NN , N.city as NC ,N.BrdCert as NBC
  566.     from G01_Doctors_2010 as O , G02_Doctors_2011 as N
  567.     having score >= 2   order by score desc ,O.name;
  568. quit;
  569. title "";
  570.  
  571. /*******************************************************************************************/
  572. /** Section H - 8 OF 11) COALESCING  **/
  573. /*******************************************************************************************/
  574. /*Example H_1 missing values protagate and make report look bad
  575.  Using a coalesce to replace missings with zeros*/
  576. Proc SQL;
  577. create table H01_Nm_money
  578. ( name char(4)
  579.  ,balance  num );
  580.  
  581. insert into H01_Nm_money
  582.  values('russ' ,  .  )
  583.  values('joe' ,10000)
  584.  values('Chi' ,60000)
  585. ;
  586.  
  587. Proc SQL;/*INTENTIONAL ERROR*/
  588.   create table H02_interest1 as
  589.   select
  590.     name
  591.     , coalesce(balance,0) as balance2
  592.     , balance2*.05 as interest1 /*INTENTIONAL ERROR*/
  593.  
  594.   From H01_Nm_money;
  595. quit;/*INTENTIONAL ERROR*/
  596.  
  597. /*Example H_2 missing values propagate and make report look bad
  598.  Using a Calculated to fix problem*/
  599. Proc SQL;
  600.   create table H02_interest3 as
  601.   select
  602.     name
  603.     , coalesce(balance,0) as balance2
  604.     , calculated balance2*.05 as interest
  605.  
  606.   From H01_Nm_money;
  607. quit;
  608.  
  609. PROC Print data=H02_interest3;
  610.   title "Example H_1 Using a coalesce to replace missings with zeros";
  611. run;
  612. title "" ;
  613.  
  614. /*Example H_3 A Coalescing issue Russ is spelled differently and the coalesce does not work*/
  615. PROC SQL;
  616.   create table H05_Nm_job_Mismatch
  617.   ( name char(4)
  618.    ,job  char(5) );
  619.  
  620. insert into H05_Nm_job_Mismatch
  621.   values('Russ','Geek')
  622.   values('Joe' ,'Prgmr')
  623.   values('Chi' ,'Mgr.')
  624. ;
  625.  
  626. PROC SQL;
  627.   create table H06_Nm_Time
  628.   ( name char(4)
  629.    ,Time_W_Co num );
  630.  
  631. insert into H06_Nm_Time
  632.   values('russ',6)
  633.   values('Chi' ,8)
  634. ;
  635.  
  636. Proc SQL;
  637.  Create table H07_name_Job_Time_Mismatch as
  638.  Select
  639.     coalesce (J.name,T.name) as name
  640.     ,j.job as job
  641.     ,coalesce(T.Time_W_Co, 0) as time_w_co
  642.  
  643.   From
  644.      H05_Nm_job_Mismatch as J
  645.        left join
  646.      H06_Nm_Time as T
  647.   On j.name=t.name;
  648.     ;
  649.  
  650. Proc print data=H07_name_Job_Time_Mismatch;
  651.   title "Example H_2 A Coalescing issue Russ is spelled differently and the coalesce does not work";
  652. run;
  653.  
  654. /************************************************************/
  655. /*Example H_4 Using a coalesce to get the most recent data*/
  656. /*We have three Years of donation data and want most recent info (people have an ID*/
  657. Data H08_Yr2006;
  658.  infile datalines truncover
  659.   firstobs=3;
  660.   input @1 ID
  661.         @5 Name   $char6.
  662.         @15 state $char6.;  
  663. Datalines;
  664. ID  Name      State  
  665. 12345678901234567890
  666. 001 Robert    TN2006
  667. 002 Calvin    NH2006
  668. 005 Carl      NJ2006
  669. 007 Earl      NY2006
  670. 008 Ell       DE2006
  671. 025 Ted       WI2006
  672. ;
  673. run;
  674.  
  675. data H09_Yr2005;
  676. infile datalines truncover
  677.   firstobs=3;
  678. input @1 ID
  679.       @5 Name   $char6.
  680.       @15 state $char6.;  
  681. Datalines;
  682. ID      Name        State  
  683. 12345678901234567890
  684. 001 Bob       PA2005
  685. 002 Cal       NH2005
  686. 005 Carl      NJ2005
  687. 006 Errol     CA2005
  688. 020 Sue       NJ2005
  689. ;
  690. run;
  691.  
  692. data H10_Yr2004;
  693. infile datalines truncover
  694.      firstobs=3;
  695. input @1 ID
  696.       @5 Name $char6.
  697.       @15 state $char6.;  
  698. Datalines;
  699. ID      Name        State  
  700. 12345678901234567890
  701. 001 Bob       PA2004
  702. 005 Carl      NJ2004
  703. 010 Fan       DE2004
  704. 011 Mike      PA2004
  705. ;
  706.  
  707. Proc SQL;
  708. create table H11_current as
  709.  select
  710.   coalesce(six.ID   ,Five.ID    ,Four.ID)    as Coalesced_ID
  711.  ,coalesce(six.name ,Five.name  ,Four.name)  as Most_recent_name
  712.  ,coalesce(six.State,Five.State ,Four.State) as Most_recent_Add
  713.  
  714. from H08_yr2006 as six
  715.   full join
  716.      H09_yr2005 as five on six.id=five.ID
  717.   full join
  718.      H10_yr2004 as four on (four.ID=six.ID or four.id=five.id)
  719.    ORDER BY Coalesced_ID;
  720.  
  721. PROC Print data=H11_Current;
  722.   title "Example H_3 Using a coalesce to get the most recent data";
  723. run;
  724. title "";
  725.  
  726. /*******************************************************************************************/
  727. /** Section I - 9 OF 11) FINDING DUPLICATES **/
  728. /*******************************************************************************************/
  729. /* This is a very useful, and fairly simple, bit of SAS code */
  730. /*Example I_1  Flexible way of finding and a easily understood reporting on duplicates*/
  731. Data I01_DataW_duplicates;
  732. infile datalines truncover firstobs=4  ;
  733. input @1  name   $Char6.
  734.       @9  Sex    $char1.
  735.       @13 Age         2.
  736.       @17 height    4.1
  737.       @25 Weight    5.1
  738.       ;
  739. datalines ;
  740. Name  Sex  Age  Height Weight
  741.          1         2         3         4         5
  742. 12345678901234567890123456789012345678901234567890
  743. Jane    F    .   59.8    84.5
  744. Alfred  M   14   69.0   112.5
  745. Carol   F   14   62.8   102.5
  746. Fred    M   12   57.3    83.0
  747. Jane    F   12     .       .5
  748. Alfred  M   99   69.0   112.5
  749. Louise  F   12   56.3    77.0
  750. Jane    F   12   59.8    84.5
  751. ;
  752. run;
  753.  
  754. PROC SQL;
  755. title "Example I_1  Flexible way of finding and a easily understood reporting on duplicates";
  756.  select I01_in.Number_Of_Dupes
  757.        ,I01_Out.*
  758.  from  
  759.   I01_DataW_duplicates as I01_Out
  760.    inner join
  761.   (select name, sex, count(*) as Number_Of_Dupes
  762.     from I01_DataW_duplicates
  763.     group by name, sex
  764.       having Number_Of_Dupes >1)  as I01_in
  765.       on I01_in.name=I01_Out.name
  766.        and I01_in.sex=I01_out.sex
  767.    order by Number_Of_Dupes desc , name, sex;
  768. title "";
  769.  
  770. /*******************************************************************************************/
  771. /** Section J - 10 of 11) REFLEXIVE JOINS **/
  772. /*******************************************************************************************/
  773. /*This is the classic example of using a reflexive join to find a person's boss*/
  774. /*Example J_1  The common reflexive join*/
  775. PROC SQL;
  776. /*Here is the data set - the ida is to */
  777. create table J01_employees
  778.           (EmpNo    num
  779.            ,job    char(15)
  780.            ,name   char(15)
  781.            ,SupervisorEmpNo num
  782.           );
  783. insert into J01_employees
  784. values(1,  "1_Pres"            ,"Goodnight" ,.)
  785. values(4,  "2_V.P. Sales"      ,"Kurd"      ,1)
  786. values(6,  '2_V.P. R&D'        ,"Church"    ,1)
  787. values(8,  "2_CFO"             ,"Lee"       ,1)
  788. values(14, "3_Salesman"        ,"Wang"      ,4)
  789. values(18, "3_Salesman"        ,"Rama"      ,4)
  790. values(26, "3_Chemist"         ,"Levin"     ,6)
  791. values(28, "3_Metalurgist"     ,"Klien"     ,6)
  792. values(31, "3_Acntg. Mgr"      ,"Dowd"      ,8)
  793. values(36, "3_Acntg. Mgr"      ,"Shu"       ,8)
  794. ;
  795.  
  796. Proc SQl;
  797. Select empl.EmpNo
  798.      , empl.job
  799.      , empl.name
  800.      , Rpt2.name as supervisor
  801.      , Rpt2.job as supv_job
  802. from J01_employees as empl
  803.    inner join
  804.      J01_employees as Rpt2
  805.      on empl.supervisorEmpNo=Rpt2.EmpNo
  806. order by supv_job;
  807.  
  808.  
  809. /*Example J_2  Finding connecting flights to get you home********************/
  810. proc SQl;
  811. Title  "Example J_2  Finding connecting flights to get you from  LA home to Phila";
  812. Title2 "Use a reflexive join to find the fastest epath through a network";
  813. create table J02_Flights
  814.   (origin           Char(3)
  815.   ,flight           num
  816.   ,Destination      char(3)
  817.   ,time         num);
  818. insert into J02_Flights
  819. values("SFO",111,"CHI",240)   /*San Fran to Chicago*/
  820. values("LAX",111,"CHI",210)   /*LA to to All Chicago*/
  821. values("LAX",121,"NOH",220)   /*LA to Just  O'Hare*/
  822. values("LAX",131,"CAK",266)   /*LA to Akron */
  823. values("CHI",241,"PHL",145)   /*All Chicago to Philadelphia*/
  824. values("NOH",201,"PHL",167)   /*O'Hare to Phila*/
  825. values("CAK",201,"PHL",145)   /*Akron to Phila*/
  826. values("CAK",201,"EWK",145);  /*Akron to Newark*/
  827. ;
  828.  
  829. proc SQL;
  830. select  wc.origin as WCStart            /*West Coast Start airport*/
  831.          ,  wc.flight as WCFlight       /*West Coast Flight*/
  832.            , wc.time as WCTime          /*West Coast time*/
  833.               , wc.Destination as WCEnd /*West Coast ending airport*/
  834.        , "->" as spacer label="#"
  835.        ,ec.origin as ReStart                /*East coast Start Airport*/
  836.            ,ec.flight as ECFlight           /*East Coast Flight*/
  837.               ,ec.time as ECTime            /*East Coast time*/
  838.                   ,ec.Destination as ECEnd  /*East Coast ending airport*/
  839.      ,  (ec.time+wc.time) as TotalTime      /*flying time*/
  840.   from J02_Flights as wc  inner join  J02_Flights as ec
  841.   on wc.Destination=ec.origin and WC.origin="LAX" and EC.Destination="PHL"
  842.   order by totalTime desc;
  843.   quit;
  844. title "";
  845.  
  846.  
  847. /*******************************************************************************************/
  848. /** Section K 11 of 11) USE SQL DICTIONARY TABLES TO DOCUMENT YOUR DATA SETS IN A HYPERLINKED EXCEL WORKBOOK*/
  849. /*******************************************************************************************/
  850. /********ONLY WORKS FOR SAS 9.4 tm3 - tm4 and tm5 need slightly different coding *********************/
  851. %MACRO Check_by_VDG_V94_TM3(LibName=SASHelp /*<-- only use upper case letters*/
  852.                  );
  853. /*excel limit 1,048,576 rows by 16,384 columns*/
  854. %local lp Libname FileList DateList NObsList SizeList Lenlist NOfVarslist;
  855. %local            ThisFile ThisDate ThisNObs ThisSize ThisLen ThisNVar;
  856. %let Libname=%UpCase(&LibName);
  857. %put Libname=&Libname;
  858.  
  859. ods _all_ close;
  860. %let path2lib = %sysfunc(pathname(&LibName));
  861. ODS Excel File="C:\HOW\Lavery\Contents_of_&Libname..xlsx "
  862.            nogtitle nogfootnote style=HTMLBlue ;
  863. ODS Excel options(embedded_titles='yes' embedded_footnotes='yes');
  864.  
  865. Proc SQL noprint /*inobs=10*/;
  866. select memname , modate, nobs, filesize, obslen , NVar
  867.   into  :filelist separated by " "
  868.         ,:DateList separated by " "
  869.         ,:NObsList separated by " "
  870.         ,:SizeList separated by " "
  871.         ,:Lenlist separated by " "
  872.         ,:NOfVarslist separated by " "
  873.  from Dictionary.tables
  874.  /*Below will eliminate views and graphic data types*/
  875.  where libname="&Libname" & MEMTYPE="DATA" and typemem ="DATA" and nobs GE 0;
  876. %put filelist  = &filelist   ;
  877. %put DateList  = &DateList   ;
  878. %put NObsList  = &NObsList   ;
  879. %put SizeList  = &SizeList   ;
  880. %put Lenlist   = &Lenlist    ;
  881. %put NOfVarslist=&NOfVarslist;
  882.  
  883.  /*this is the list of all the tables and goes on the first tab*/
  884. ods Excel options(sheet_name="List_of_tables_in_lib" );
  885. Proc report data=sashelp.vtable /*(obs=10)*/ nowd;
  886. title "Tables in this library (&Libname) and workbook";
  887. title2 "S=sorted ** SK= sorted with no duplicate key values ** SR - sorted with no duplicate records";
  888. title3 "YES | CHAR= compresses (variable-length records) by SAS using RLE (Run Length Encoding). Compresses repeated comsiecutive characters.";
  889.  
  890. title4 "Binary=obs. compresses (variable-length records) by SAS using RLE (Run Length Encoding).";
  891.  Column  ('Click MemName to see desired data' libname memname MEMTYPE modate typemem nobs filesize obslen NVar
  892.                                               indxtype  sorttype  sortchar compress pcompress);
  893.  compute memname;
  894.      urlstring="#'"||strip(memname)||"'!A1";
  895.     call define(_col_,'url',urlstring);
  896.     call define(_col_,'style', 'style=[textdecoration=underline]');
  897.  endcomp;
  898.  where libname="&Libname" & MEMTYPE="DATA" and typemem ="DATA" and nobs GE 0;
  899. run;quit;
  900.  
  901. /***/
  902. ods Excel options(sheet_name="List_of_indexes_lib" );
  903. Proc Report data=sashelp.vindex(obs=10) ;
  904. title "Indeces in this workbook";
  905. where libname="&Libname" & MEMTYPE="DATA" ;
  906. run;quit;
  907.  
  908.  
  909. /*Title    link="#'List_of_tables_in_lib'!A1" '(Click to return to list of tables (first tab))';*/
  910. /*Footnote link="#'List_of_tables_in_lib'!A1" '(Click to return to list of tables (first tab))';*/
  911.  
  912. %let lp=1;
  913.  %do %while(%scan(&filelist,&Lp) NE);
  914.     %let ThisFile = %scan(&filelist,&Lp);
  915.     %let ThisDate = %scan(&DateList,&Lp);
  916.     %let ThisNObs = %scan(&NObsList,&Lp);
  917.     %let ThisSize = %scan(&SizeList,&Lp);
  918.     %let ThisLen  = %scan(&Lenlist,&Lp);
  919.     %let ThisNVar = %scan(&NOfVarslist,&Lp);
  920.  
  921.     ods excel options(sheet_interval='table');
  922.     ods exclude all;
  923.        data _null_;
  924.        declare odsout obj();
  925.        run;
  926.  
  927.    ods select all;
  928.    ods excel options(sheet_interval='none' sheet_name="&ThisFile" );
  929.  
  930.  
  931.      title "&Libname &ThisFile: rows=&ThisNObs NVars=&ThisNVar ModDate=&ThisDate Size=&ThisSize Obslen=&ThisLen";
  932.      Title    link="#'List_of_tables_in_lib'!A1" '(Click to return to list of tables (first tab))';
  933.      Footnote link="#'List_of_tables_in_lib'!A1" '(Click to return to list of tables (first tab))';
  934.      *footnote2 "&libname is: &path2lib and workbook" ;
  935.     Proc Report data=sashelp.VColumn nowd;
  936.        Column
  937.          libname memname memtype name type length npos varnum label format informat
  938.          idxusage sortedby xtype notnull precision scale transcode diagnostic ;
  939.        where libname="&Libname" & MemName="&ThisFile";
  940.        run;quit;
  941.  
  942.        title "&Libname &ThisFile: rows=&ThisNObs NVars=&ThisNVar :: SHOW ten obs" ;
  943.        Proc print data=&Libname..&ThisFile(obs=10) ;
  944.        run;quit;
  945.        title "";
  946.  
  947.      %let Lp = %eval(&Lp+1);
  948. %end;
  949. ods Excel Close;
  950. %MEND Check_by_VDG_V94_TM3;
  951.  
  952. %Check_by_VDG_V94_TM3(Libname=SASHELP /*<-- only use upper case letters*/
  953.            );
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement