Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- use Hierarchy;
- go
- /*
- This is an example showing a use case of polymorphic queries over a network
- graph solved by some HIERARCHYID and string manipulation trickery.
- If you are not interested in how the test data is set up, skip directly
- to the "BUILD THE HIERARCHY" section.
- 2018-06-25 Lars Rönnbäck CREATED
- */
- SET NOCOUNT ON;
- -- determine some boundaries
- declare @numberOfDevices int = 100;
- declare @maxIterations tinyint = 4;
- declare @minPing smallint = 2;
- declare @maxPing smallint = 100;
- ---------------------------- SET UP THE TEST DATA ----------------------------
- if OBJECT_ID('Relationship', 'U') is not null drop table Relationship;
- if OBJECT_ID('Device', 'U') is not null drop table Device;
- if OBJECT_ID('DeviceType', 'U') is not null drop table DeviceType;
- create table DeviceType (
- DeviceType_Id tinyint identity(1,1) not null,
- DeviceType varchar(42) not null,
- Connector bit not null, -- used to connect non-connectors
- Connections tinyint not null, -- may connect to so many devices
- Commonality tinyint not null, -- relative commonality among devices
- constraint pk_DeviceType primary key (
- DeviceType_Id
- ),
- constraint uq_DeviceType unique (
- DeviceType
- )
- );
- insert into DeviceType
- (DeviceType, Connector, Connections, Commonality) values
- ('Computer', 0, 1, 8), -- 8 times more common
- ('Switch', 0, 4, 1),
- ('Router', 0, 4, 1),
- ('Wireless', 1, 4, 1),
- ('Wired', 1, 2, 2); -- twice as common
- create table Device (
- Device_Id int not null,
- DeviceType_Id tinyint not null
- references DeviceType(DeviceType_Id),
- constraint pk_Device primary key (
- Device_Id
- )
- );
- declare @totalCommonality int = (
- select sum(Commonality) from DeviceType
- );
- DECLARE @rows int;
- -- generate random devices
- with deviceGenerator as (
- select
- 1 as Device_Id,
- cast(rand(checksum(newid())) * @totalCommonality as int) as Commonality
- union all
- select
- dg.Device_Id + 1 as Device_Id,
- cast(rand(checksum(newid())) * @totalCommonality as int) as Commonality
- from
- deviceGenerator dg
- where
- dg.Device_Id < @numberOfDevices
- )
- insert into Device (
- Device_Id,
- DeviceType_Id
- )
- select
- Device_Id,
- DeviceType_Id
- from
- deviceGenerator g
- join (
- select
- DeviceType_Id,
- sum(Commonality) over (order by DeviceType_Id) - Commonality as Commonality_From,
- sum(Commonality) over (order by DeviceType_Id) - 1 as Commonality_To
- from
- DeviceType
- ) dt
- on
- g.Commonality between dt.Commonality_From and dt.Commonality_To
- option (maxrecursion 0);
- set @rows = @@ROWCOUNT;
- print 'Number of devices: ' + cast(@rows as varchar(10));
- -- select top 100 * from Device;
- -- print some statistics of which devices were created
- select
- dt.DeviceType_Id,
- dt.DeviceType,
- count(*) as NumberOfDevices
- from
- Device d
- join
- DeviceType dt
- on
- dt.DeviceType_Id = d.DeviceType_Id
- group by
- dt.DeviceType_Id,
- dt.DeviceType;
- -- source devices should connect to target devices
- if OBJECT_ID('tempdb..#sources') is not null
- drop table #sources;
- select
- dt.Device_Id,
- dtt.Connections
- into #sources
- from Device dt
- join DeviceType dtt
- on dtt.DeviceType_Id = dt.DeviceType_Id
- and dtt.Connector = 1;
- set @rows = @@ROWCOUNT;
- print 'Number of sources: ' + cast(@rows as varchar(10));
- -- and these are the targets
- if OBJECT_ID('tempdb..#targets') is not null
- drop table #targets;
- select
- dt.Device_Id,
- dtt.Connections
- into #targets
- from Device dt
- join DeviceType dtt
- on dtt.DeviceType_Id = dt.DeviceType_Id
- and dtt.Connector = 0;
- set @rows = @@ROWCOUNT;
- print 'Number of targets: ' + cast(@rows as varchar(10));
- if OBJECT_ID('tempdb..#network') is not null
- drop table #network;
- create table #network (
- Device_Id_Source int,
- Connections_Source tinyint,
- Device_Id_Target int,
- Connections_Target tinyint,
- Iteration smallint,
- Ping smallint
- );
- DECLARE @iteration smallint = 1;
- DECLARE @continue bit = 1;
- -- this loop will build a network, with some stopping conditions
- while(@continue = 1)
- begin
- print 'Iteration: ' + cast(@iteration as varchar(10));
- with network as (
- select
- ds.Device_Id as Device_Id_Source,
- ds.Connections - count(dtr.Device_Id) over (
- partition by ds.Device_Id
- ) as Connections_Source,
- dtr.Device_Id as Device_Id_Target,
- dtr.Connections as Connections_Target,
- @iteration as Iteration,
- cast(rand(checksum(newid())) * (@maxPing - @minPing) + @minPing as smallint) as Ping
- from #sources ds
- cross apply (
- select
- dt.Device_Id,
- dt.Connections,
- row_number() over (
- order by -- force different random values (salt with Device_Id)
- rand(checksum(ds.Device_Id) + checksum(newid()))
- ) as Random_Order
- from #targets dt
- ) dtr
- where
- dtr.Random_Order <= ds.Connections
- )
- insert into #network (
- Device_Id_Source,
- Connections_Source,
- Device_Id_Target,
- Connections_Target,
- Iteration,
- Ping
- )
- select distinct
- Device_Id_Source,
- Connections_Source + Removals as Connections_Source,
- Device_Id_Target,
- Connections_Target,
- Iteration,
- Ping
- from (
- select
- *,
- count(case when TargetCounter > Connections_Target then 1 end)
- over (partition by Device_Id_Source) as Removals
- from (
- select
- *,
- row_number() over (
- partition by
- Device_Id_Target
- order by
- rand(checksum(newid()))
- ) as TargetCounter
- from
- network
- ) n
- ) n
- where
- TargetCounter <= Connections_Target;
- set @rows = @@ROWCOUNT;
- print 'Network growth: ' + cast(@rows as varchar(10));
- truncate table #sources;
- insert into #sources (
- Device_Id,
- Connections
- )
- select distinct
- Device_Id_Source,
- Connections_Source
- from
- #network
- where
- Iteration = @iteration
- and
- Connections_Source > 0;
- set @rows = @@ROWCOUNT;
- if(@rows = 0) set @continue = 0;
- delete t
- from
- #targets t
- join (
- select
- Device_Id_Target,
- count(*) as Connections
- from
- #network
- group by
- Device_Id_Target
- ) x
- on
- t.Device_Id = x.Device_Id_Target
- and
- t.Connections <= x.Connections;
- set @rows = (select count(*) from #targets);
- if(@rows = 0) set @continue = 0;
- set @iteration = @iteration + 1;
- if(@iteration > @maxIterations) set @continue = 0;
- end
- create table Relationship (
- Device_Id_Source int not null references Device(Device_Id),
- Device_Id_Target int not null references Device(Device_Id),
- Ping int not null,
- constraint pk_Relationship primary key (
- Device_Id_Source,
- Device_Id_Target
- )
- )
- -- given the network, build a traditional hierarchy (parent-child)
- insert into Relationship (
- Device_Id_Source,
- Device_Id_Target,
- Ping
- )
- select
- Device_Id_Source,
- Device_Id_Target,
- min(Ping)
- from (
- select
- Device_Id_Source,
- Device_Id_Target,
- Ping
- from
- #network
- union all -- both ways
- select
- Device_Id_Target,
- Device_Id_Source,
- Ping
- from
- #network
- ) n
- group by
- Device_Id_Source,
- Device_Id_Target;
- -- select * from Relationship;
- ------------------------------------------------------------------------------
- /*
- The HIERARCHYID data type expects a path described as /node/node/node/.
- This textual representation is compacted into as compact binary form
- as possibly by the SQL Server internals.
- Three columns will be created:
- DevicePath containing the Device_Id:s of the nodes.
- TypePath containing the DeviceType_Id:s of the nodes.
- PingPath containing the Ping between two adjacent nodes.
- Note that it is sufficient to store the longest path /1/2/3/4/ and
- none of its sub-paths /1/2/3/, /2/3/4/, /1/2/, /3/4/, and /2/3/.
- */
- ----------------------------- BUILD THE HIERARCHY ----------------------------
- IF OBJECT_ID('Hierarchy') IS NOT NULL
- DROP TABLE Hierarchy;
- CREATE TABLE Hierarchy (
- DevicePath HIERARCHYID not null,
- TypePath HIERARCHYID not null,
- PingPath HIERARCHYID not null
- );
- --- recursively iterate through the relationships
- with traverser as (
- select
- r.Device_Id_Target as Device_Id_Next,
- cast(
- '/' +
- cast(r.Device_Id_Source as varchar(10)) +
- '/' +
- cast(r.Device_Id_Target as varchar(10)) +
- '/'
- as hierarchyid) as DevicePath,
- cast(
- '/' +
- cast(ds.DeviceType_Id as varchar(10)) +
- '/' +
- cast(dt.DeviceType_Id as varchar(10)) +
- '/'
- as hierarchyid) as TypePath,
- cast(
- '/0/' + -- first device has 0 ping to itself
- cast(r.Ping as varchar(10)) +
- '/'
- as hierarchyid) as PingPath
- from
- Relationship r
- join
- Device ds
- on
- ds.Device_Id = r.Device_Id_Source
- join
- Device dt
- on
- dt.Device_Id = r.Device_Id_Target
- union all
- select
- r.Device_Id_Target as Device_Id_Next,
- cast(
- t.DevicePath.ToString() + -- append
- cast(r.Device_Id_Target as varchar(10)) +
- '/'
- as hierarchyid) as DevicePath,
- cast(
- t.TypePath.ToString() + -- append
- cast(dt.DeviceType_Id as varchar(10)) +
- '/'
- as hierarchyid) as TypePath,
- cast(
- t.PingPath.ToString() + -- append
- cast(r.Ping as varchar(10)) +
- '/'
- as hierarchyid) as PingPath
- from
- traverser t
- join
- Relationship r
- on
- r.Device_Id_Source = t.Device_Id_Next
- and -- avoid circular references
- PATINDEX(
- '%/' +
- cast(r.Device_Id_Target as varchar(10)) +
- '/%',
- t.DevicePath.ToString()
- ) = 0
- join
- Device ds
- on
- ds.Device_Id = r.Device_Id_Source
- join
- Device dt
- on
- dt.Device_Id = r.Device_Id_Target
- )
- insert into Hierarchy (
- DevicePath,
- TypePath,
- PingPath
- )
- select
- DevicePath,
- TypePath,
- PingPath
- from
- traverser;
- -- reduce redundant paths (keep longest, remove sub-paths)
- DELETE Hierarchy
- WHERE DevicePath IN (
- SELECT DISTINCT
- hx.DevicePath
- FROM
- Hierarchy h
- JOIN
- Hierarchy hx
- ON
- hx.DevicePath <> h.DevicePath
- AND
- PATINDEX(
- '%' +
- hx.DevicePath.ToString() +
- '%',
- h.DevicePath.ToString()
- ) > 0
- );
- /*
- A polymorphic query is one in which you want to find any two nodes having
- certain properties and that are connected in some way through a path.
- Note that graph tables in SQL Server 2017 do not yet support such
- queries.
- 1. How many computers in the network are at some point connected
- through wireless?
- 2. List these paths, along with the number of hops between the
- computers and the total ping. Taking the minimal ping between
- two computers will actually find you the most efficient path,
- similar to the traveling salesman problem.
- 3. Find all Device_Id:s of the wireless connectors found in
- the paths from the previous question.
- All these queries are solved using search in and manipulation
- of the string representations of the HIERARCHYID. While the string
- queries may not be optimal from a performance perspective it is
- hard to find a more compact way to store the network in a way
- that allows for ad-hoc polymorphic queries.
- */
- ----------------------------- QUERYING THE DATA ----------------------------
- -- count how many computers are at some point connected through wireless
- SELECT
- count(*) as Computers
- FROM
- Hierarchy h
- WHERE
- -- pattern search in the DeviceType_Id path
- PATINDEX('%1%4%1%', h.TypePath.ToString()) > 0;
- -- count the number of hops between computers at some point connected through wireless
- -- and find the total ping between them
- SELECT
- DevicePath.ToString() as DevicePath,
- TypePath.ToString() as TypePath,
- -- account for inital and ending / by subtracting 2
- len(DevicePath.ToString()) - len(REPLACE(DevicePath.ToString(), '/', '')) - 2 as Hops,
- PingPath.ToString() as PingPath,
- -- use the string_split function to get all values
- (
- select sum(cast([value] as int))
- from string_split(PingPath.ToString(), '/')
- ) as TotalPing
- FROM
- Hierarchy h
- WHERE
- -- pattern search in the DeviceType_Id path
- PATINDEX('%1%4%1%', h.TypePath.ToString()) > 0;
- -- find the Device_Id:s of the wireless connectors that have computers on both sides
- SELECT
- h.DevicePath.ToString() as DevicePath,
- h.TypePath.ToString() as TypePath,
- cast(d.Device_Id as int) as Device_Id
- FROM
- Hierarchy h
- CROSS APPLY (
- -- use string_split to get the indexes of the wireless connectors
- select
- [value] as DeviceType,
- row_number() over (order by (select 1)) as idx
- from
- string_split(h.TypePath.ToString(), '/')
- ) t
- CROSS APPLY (
- -- use string_split again to find the corresponding Device_Id:s
- select
- [value] as Device_Id,
- row_number() over (order by (select 1)) as idx
- from
- string_split(h.DevicePath.ToString(), '/')
- ) d
- where
- t.DeviceType = 4 -- wireless
- and
- d.idx = t.idx
- and
- -- pattern search in the DeviceType_Id path
- PATINDEX('%1%4%1%', h.TypePath.ToString()) > 0;
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement