Advertisement
anchormodeling

Polymorphic Graph Queries in SQL Server

Jun 25th, 2018
9,825
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
T-SQL 12.77 KB | None | 0 0
  1. use Hierarchy;
  2. go
  3.  
  4. /*
  5.  
  6.     This is an example showing a use case of polymorphic queries over a network
  7.     graph solved by some HIERARCHYID and string manipulation trickery.
  8.  
  9.     If you are not interested in how the test data is set up, skip directly
  10.     to the "BUILD THE HIERARCHY" section.
  11.  
  12.     2018-06-25  Lars Rönnbäck     CREATED
  13.  
  14. */
  15. SET NOCOUNT ON;
  16.  
  17. -- determine some boundaries
  18. declare @numberOfDevices int = 100;
  19. declare @maxIterations   tinyint = 4;
  20. declare @minPing         smallint = 2;
  21. declare @maxPing         smallint = 100;
  22.  
  23. ---------------------------- SET UP THE TEST DATA ----------------------------
  24.  
  25. if OBJECT_ID('Relationship',   'U') is not null drop table Relationship;
  26. if OBJECT_ID('Device',         'U') is not null drop table Device;
  27. if OBJECT_ID('DeviceType',     'U') is not null drop table DeviceType;
  28.  
  29. create table DeviceType (
  30.     DeviceType_Id tinyint identity(1,1) not null,
  31.     DeviceType varchar(42) not null,
  32.     Connector bit not null,       -- used to connect non-connectors
  33.     Connections tinyint not null, -- may connect to so many devices
  34.     Commonality tinyint not null, -- relative commonality among devices
  35.     constraint pk_DeviceType primary key (
  36.         DeviceType_Id
  37.     ),
  38.     constraint uq_DeviceType unique (
  39.         DeviceType
  40.     )
  41. );
  42.  
  43. insert into DeviceType
  44. (DeviceType, Connector, Connections, Commonality) values
  45. ('Computer', 0,         1,           8), -- 8 times more common
  46. ('Switch',   0,         4,           1),
  47. ('Router',   0,         4,           1),
  48. ('Wireless', 1,         4,           1),
  49. ('Wired',    1,         2,           2); -- twice as common
  50.  
  51. create table Device (
  52.     Device_Id int not null,
  53.     DeviceType_Id tinyint not null
  54.         references DeviceType(DeviceType_Id),
  55.     constraint pk_Device primary key (
  56.         Device_Id
  57.     )
  58. );
  59.    
  60. declare @totalCommonality int = (
  61.     select sum(Commonality) from DeviceType
  62. );
  63. DECLARE @rows int;
  64.  
  65. -- generate random devices
  66. with deviceGenerator as (
  67.     select
  68.         1 as Device_Id,
  69.         cast(rand(checksum(newid())) * @totalCommonality as int) as Commonality
  70.     union all
  71.     select
  72.         dg.Device_Id + 1 as Device_Id,
  73.         cast(rand(checksum(newid())) * @totalCommonality as int) as Commonality
  74.     from
  75.         deviceGenerator dg
  76.     where
  77.         dg.Device_Id < @numberOfDevices
  78. )
  79. insert into Device (
  80.     Device_Id,
  81.     DeviceType_Id
  82. )
  83. select
  84.     Device_Id,
  85.     DeviceType_Id
  86. from
  87.     deviceGenerator g
  88. join (
  89.     select
  90.         DeviceType_Id,
  91.         sum(Commonality) over (order by DeviceType_Id) - Commonality as Commonality_From,
  92.         sum(Commonality) over (order by DeviceType_Id) - 1 as Commonality_To
  93.     from
  94.         DeviceType
  95. ) dt
  96. on
  97.     g.Commonality between dt.Commonality_From and dt.Commonality_To
  98. option (maxrecursion 0);
  99.  
  100. set @rows = @@ROWCOUNT;
  101. print 'Number of devices: ' + cast(@rows as varchar(10));
  102.  
  103. -- select top 100 * from Device;
  104. -- print some statistics of which devices were created
  105. select
  106.     dt.DeviceType_Id,
  107.     dt.DeviceType,
  108.     count(*) as NumberOfDevices
  109. from   
  110.     Device d
  111. join
  112.     DeviceType dt
  113. on
  114.     dt.DeviceType_Id = d.DeviceType_Id
  115. group by
  116.     dt.DeviceType_Id,
  117.     dt.DeviceType;
  118.  
  119. -- source devices should connect to target devices
  120. if OBJECT_ID('tempdb..#sources') is not null
  121. drop table #sources;
  122.  
  123. select
  124.      dt.Device_Id,
  125.      dtt.Connections
  126. into #sources
  127. from Device dt
  128. join DeviceType dtt
  129.   on dtt.DeviceType_Id = dt.DeviceType_Id
  130.  and dtt.Connector = 1;
  131.  
  132. set @rows = @@ROWCOUNT;
  133. print 'Number of sources: ' + cast(@rows as varchar(10));
  134.  
  135. -- and these are the targets
  136. if OBJECT_ID('tempdb..#targets') is not null
  137. drop table #targets;
  138.  
  139. select
  140.      dt.Device_Id,
  141.      dtt.Connections
  142. into #targets
  143. from Device dt
  144. join DeviceType dtt
  145.   on dtt.DeviceType_Id = dt.DeviceType_Id
  146.  and dtt.Connector = 0;
  147.  
  148. set @rows = @@ROWCOUNT;
  149. print 'Number of targets: ' + cast(@rows as varchar(10));
  150.  
  151. if OBJECT_ID('tempdb..#network') is not null
  152. drop table #network;
  153.  
  154. create table #network (
  155.     Device_Id_Source int,
  156.     Connections_Source tinyint,
  157.     Device_Id_Target int,
  158.     Connections_Target tinyint,
  159.     Iteration smallint,
  160.     Ping smallint
  161. );
  162.  
  163. DECLARE @iteration smallint = 1;
  164. DECLARE @continue bit = 1;
  165.  
  166. -- this loop will build a network, with some stopping conditions
  167. while(@continue = 1)
  168. begin
  169.     print 'Iteration: ' + cast(@iteration as varchar(10));
  170.     with network as (
  171.         select
  172.              ds.Device_Id as Device_Id_Source,
  173.              ds.Connections - count(dtr.Device_Id) over (
  174.                  partition by ds.Device_Id
  175.              ) as Connections_Source,
  176.              dtr.Device_Id as Device_Id_Target,
  177.              dtr.Connections as Connections_Target,
  178.              @iteration as Iteration,
  179.              cast(rand(checksum(newid())) * (@maxPing - @minPing) + @minPing as smallint) as Ping    
  180.         from #sources ds
  181.         cross apply (
  182.             select
  183.                  dt.Device_Id,
  184.                  dt.Connections,
  185.                  row_number() over (
  186.                     order by -- force different random values (salt with Device_Id)
  187.                         rand(checksum(ds.Device_Id) + checksum(newid()))
  188.                  ) as Random_Order
  189.             from #targets dt
  190.         ) dtr
  191.         where
  192.             dtr.Random_Order <= ds.Connections
  193.     )
  194.     insert into #network (
  195.         Device_Id_Source,
  196.         Connections_Source,
  197.         Device_Id_Target,
  198.         Connections_Target,
  199.         Iteration,
  200.         Ping
  201.     )
  202.     select distinct
  203.         Device_Id_Source,
  204.         Connections_Source + Removals as Connections_Source,
  205.         Device_Id_Target,
  206.         Connections_Target,
  207.         Iteration,
  208.         Ping
  209.     from (
  210.         select
  211.             *,
  212.             count(case when TargetCounter > Connections_Target then 1 end)
  213.             over (partition by Device_Id_Source) as Removals
  214.         from (
  215.             select
  216.                 *,
  217.                 row_number() over (
  218.                     partition by
  219.                         Device_Id_Target
  220.                     order by
  221.                         rand(checksum(newid()))
  222.                 ) as TargetCounter
  223.             from
  224.                 network
  225.         ) n
  226.     ) n
  227.     where
  228.         TargetCounter <= Connections_Target;
  229.    
  230.     set @rows = @@ROWCOUNT;
  231.     print 'Network growth: ' + cast(@rows as varchar(10));
  232.    
  233.     truncate table #sources;
  234.  
  235.     insert into #sources (
  236.         Device_Id,
  237.         Connections
  238.     )
  239.     select distinct
  240.         Device_Id_Source,
  241.         Connections_Source 
  242.     from
  243.         #network
  244.     where
  245.         Iteration = @iteration
  246.     and
  247.         Connections_Source > 0;
  248.  
  249.     set @rows = @@ROWCOUNT;
  250.     if(@rows = 0) set @continue = 0;
  251.  
  252.     delete t
  253.     from
  254.         #targets t
  255.     join (
  256.         select
  257.             Device_Id_Target,
  258.             count(*) as Connections
  259.         from
  260.             #network
  261.         group by
  262.             Device_Id_Target
  263.     ) x
  264.     on
  265.         t.Device_Id = x.Device_Id_Target
  266.     and
  267.         t.Connections <= x.Connections;
  268.    
  269.     set @rows = (select count(*) from #targets);
  270.     if(@rows = 0) set @continue = 0;
  271.  
  272.     set @iteration = @iteration + 1;
  273.     if(@iteration > @maxIterations) set @continue = 0;
  274. end
  275.  
  276. create table Relationship (
  277.     Device_Id_Source int not null references Device(Device_Id),
  278.     Device_Id_Target int not null references Device(Device_Id),
  279.     Ping int not null,
  280.     constraint pk_Relationship primary key (
  281.         Device_Id_Source,
  282.         Device_Id_Target
  283.     )
  284. )
  285.  
  286. -- given the network, build a traditional hierarchy (parent-child)
  287. insert into Relationship (
  288.     Device_Id_Source,
  289.     Device_Id_Target,
  290.     Ping
  291. )
  292. select
  293.     Device_Id_Source,
  294.     Device_Id_Target,
  295.     min(Ping)
  296. from (
  297.     select
  298.         Device_Id_Source,
  299.         Device_Id_Target,
  300.         Ping
  301.     from
  302.         #network
  303.     union all -- both ways
  304.     select
  305.         Device_Id_Target,
  306.         Device_Id_Source,
  307.         Ping
  308.     from
  309.         #network
  310. ) n
  311. group by
  312.     Device_Id_Source,
  313.     Device_Id_Target;
  314.  
  315. -- select * from Relationship;
  316. ------------------------------------------------------------------------------
  317.  
  318. /*
  319.  
  320.     The HIERARCHYID data type expects a path described as /node/node/node/.
  321.     This textual representation is compacted into as compact binary form
  322.     as possibly by the SQL Server internals.
  323.  
  324.     Three columns will be created:
  325.     DevicePath containing the Device_Id:s of the nodes.
  326.     TypePath containing the DeviceType_Id:s of the nodes.
  327.     PingPath containing the Ping between two adjacent nodes.
  328.  
  329.     Note that it is sufficient to store the longest path /1/2/3/4/ and
  330.     none of its sub-paths /1/2/3/, /2/3/4/, /1/2/, /3/4/, and /2/3/.
  331.  
  332. */
  333.  
  334. ----------------------------- BUILD THE HIERARCHY ----------------------------
  335. IF OBJECT_ID('Hierarchy') IS NOT NULL
  336. DROP TABLE Hierarchy;
  337.  
  338. CREATE TABLE Hierarchy (
  339.     DevicePath HIERARCHYID not null,
  340.     TypePath HIERARCHYID not null,
  341.     PingPath HIERARCHYID not null
  342. );
  343.  
  344. --- recursively iterate through the relationships
  345. with traverser as (
  346.     select 
  347.         r.Device_Id_Target as Device_Id_Next,
  348.         cast(
  349.             '/' +
  350.             cast(r.Device_Id_Source as varchar(10)) +
  351.             '/' +
  352.             cast(r.Device_Id_Target as varchar(10)) +
  353.             '/'
  354.         as hierarchyid) as DevicePath,
  355.         cast(
  356.             '/' +
  357.             cast(ds.DeviceType_Id as varchar(10)) +
  358.             '/' +
  359.             cast(dt.DeviceType_Id as varchar(10)) +
  360.             '/'
  361.         as hierarchyid) as TypePath,
  362.         cast(
  363.             '/0/' + -- first device has 0 ping to itself
  364.             cast(r.Ping as varchar(10)) +
  365.             '/'
  366.         as hierarchyid) as PingPath
  367.     from   
  368.         Relationship r
  369.     join
  370.         Device ds
  371.     on
  372.         ds.Device_Id = r.Device_Id_Source
  373.     join
  374.         Device dt
  375.     on
  376.         dt.Device_Id = r.Device_Id_Target
  377.     union all
  378.     select 
  379.         r.Device_Id_Target as Device_Id_Next,
  380.         cast(
  381.             t.DevicePath.ToString() + -- append
  382.             cast(r.Device_Id_Target as varchar(10)) +
  383.             '/'
  384.         as hierarchyid) as DevicePath,
  385.         cast(
  386.             t.TypePath.ToString() + -- append
  387.             cast(dt.DeviceType_Id as varchar(10)) +
  388.             '/'
  389.         as hierarchyid) as TypePath,
  390.         cast(
  391.             t.PingPath.ToString() + -- append
  392.             cast(r.Ping as varchar(10)) +
  393.             '/'
  394.         as hierarchyid) as PingPath
  395.     from   
  396.         traverser t
  397.     join
  398.         Relationship r
  399.     on
  400.         r.Device_Id_Source = t.Device_Id_Next
  401.     and -- avoid circular references
  402.         PATINDEX(
  403.             '%/' +
  404.             cast(r.Device_Id_Target as varchar(10)) +
  405.             '/%',
  406.             t.DevicePath.ToString()
  407.         ) = 0
  408.     join
  409.         Device ds
  410.     on
  411.         ds.Device_Id = r.Device_Id_Source
  412.     join
  413.         Device dt
  414.     on
  415.         dt.Device_Id = r.Device_Id_Target
  416. )
  417. insert into Hierarchy (
  418.     DevicePath,
  419.     TypePath,
  420.     PingPath
  421. )
  422. select
  423.     DevicePath,
  424.     TypePath,
  425.     PingPath
  426. from
  427.     traverser;
  428.  
  429. -- reduce redundant paths (keep longest, remove sub-paths)
  430. DELETE Hierarchy
  431. WHERE DevicePath IN (
  432.     SELECT DISTINCT
  433.         hx.DevicePath
  434.     FROM
  435.         Hierarchy h
  436.     JOIN
  437.         Hierarchy hx
  438.     ON
  439.         hx.DevicePath <> h.DevicePath
  440.     AND
  441.         PATINDEX(
  442.             '%' +
  443.             hx.DevicePath.ToString() +
  444.             '%',
  445.             h.DevicePath.ToString()
  446.         ) > 0
  447. );
  448.  
  449. /*
  450.  
  451.     A polymorphic query is one in which you want to find any two nodes having
  452.     certain properties and that are connected in some way through a path.
  453.     Note that graph tables in SQL Server 2017 do not yet support such
  454.     queries.
  455.  
  456.     1. How many computers in the network are at some point connected
  457.        through wireless?
  458.  
  459.     2. List these paths, along with the number of hops between the
  460.        computers and the total ping. Taking the minimal ping between
  461.        two computers will actually find you the most efficient path,
  462.        similar to the traveling salesman problem.
  463.  
  464.     3. Find all Device_Id:s of the wireless connectors found in
  465.        the paths from the previous question.
  466.  
  467.     All these queries are solved using search in and manipulation
  468.     of the string representations of the HIERARCHYID. While the string
  469.     queries may not be optimal from a performance perspective it is
  470.     hard to find a more compact way to store the network in a way
  471.     that allows for ad-hoc polymorphic queries.
  472.  
  473. */
  474.  
  475. ----------------------------- QUERYING THE DATA ----------------------------
  476.  
  477.  -- count how many computers are at some point connected through wireless
  478. SELECT
  479.     count(*) as Computers
  480. FROM
  481.     Hierarchy h
  482. WHERE
  483.     -- pattern search in the DeviceType_Id path
  484.     PATINDEX('%1%4%1%', h.TypePath.ToString()) > 0;
  485.  
  486. -- count the number of hops between computers at some point connected through wireless
  487. -- and find the total ping between them
  488. SELECT
  489.     DevicePath.ToString() as DevicePath,
  490.     TypePath.ToString() as TypePath,
  491.     -- account for inital and ending / by subtracting 2
  492.     len(DevicePath.ToString()) - len(REPLACE(DevicePath.ToString(), '/', '')) - 2 as Hops,
  493.     PingPath.ToString() as PingPath,
  494.     -- use the string_split function to get all values
  495.     (
  496.         select sum(cast([value] as int))
  497.         from string_split(PingPath.ToString(), '/')
  498.     ) as TotalPing
  499. FROM
  500.     Hierarchy h
  501. WHERE
  502.     -- pattern search in the DeviceType_Id path
  503.     PATINDEX('%1%4%1%', h.TypePath.ToString()) > 0;
  504.  
  505. -- find the Device_Id:s of the wireless connectors that have computers on both sides
  506. SELECT
  507.     h.DevicePath.ToString() as DevicePath,
  508.     h.TypePath.ToString() as TypePath,
  509.     cast(d.Device_Id as int) as Device_Id
  510. FROM
  511.     Hierarchy h
  512. CROSS APPLY (
  513.     -- use string_split to get the indexes of the wireless connectors
  514.     select
  515.         [value] as DeviceType,
  516.         row_number() over (order by (select 1)) as idx
  517.     from
  518.         string_split(h.TypePath.ToString(), '/')
  519. ) t
  520. CROSS APPLY (
  521.     -- use string_split again to find the corresponding Device_Id:s
  522.     select
  523.         [value] as Device_Id,
  524.         row_number() over (order by (select 1)) as idx
  525.     from
  526.         string_split(h.DevicePath.ToString(), '/')
  527. ) d    
  528. where
  529.     t.DeviceType = 4 -- wireless
  530. and
  531.     d.idx = t.idx
  532. and
  533.     -- pattern search in the DeviceType_Id path
  534.     PATINDEX('%1%4%1%', h.TypePath.ToString()) > 0;
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement