Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /*
- combine1
- Stefan Gustafsson, Acando
- Uses ideas borrowed from Mikael Eriksson and Daniel Hutmacher
- */
- truncate table dbo.Customers
- set nocount on
- declare @t0 datetime = getdate()
- declare @t1 datetime
- declare @ms int
- declare @rc int
- if object_id('tempdb..#ColStoreTable') is not null DROP TABLE #ColStoreTable;
- CREATE TABLE #ColStoreTable (
- col tinyint NOT NULL
- );
- CREATE CLUSTERED COLUMNSTORE INDEX ColStoreIndex ON #ColStoreTable;
- INSERT INTO #ColStoreTable VALUES (0);
- ALTER INDEX ColStoreIndex ON #ColStoreTable REORGANIZE;
- -- Build edge list
- ;with
- p as (
- select PurchaseID, FirstName, LastName, Email, StreetAddress, ZipCode, CreditCard
- from dbo.PurchasesBig
- )
- , cte1 as (
- select
- PurchaseID,
- Key1 = (Firstname+'/'+CreditCard) collate finnish_swedish_bin,
- Key2 = (Firstname+'/'+Email) collate finnish_swedish_bin,
- Key3 = (Firstname+'/'+LastName+'/'+StreetAddress+'/'+cast(ZipCode as varchar(10))) collate finnish_swedish_bin
- from p
- )
- , k1 as (
- select Key1, MIN(PurchaseID) as node
- from cte1
- group by Key1
- )
- , k2 as (
- select Key2, MIN(PurchaseID) as node
- from cte1
- group by Key2
- )
- , k3 as (
- select Key3, MIN(PurchaseID)as node
- from cte1
- group by Key3
- )
- ,cte2 as (
- select
- a.PurchaseID,
- k1.node as n1,
- k2.node as n2,
- k3.node as n3
- from cte1 a
- inner hash join k1 on k1.Key1 = a.Key1
- inner hash join k2 on k2.Key2 = a.Key2
- inner hash join k3 on k3.Key3 = a.Key3
- )
- , cte3 as (
- select
- s = PurchaseID,
- t = case
- when n1 < n2 then
- case when n1 < n3 then n1 else n3 end
- else
- case when n2 < n3 then n2 else n3 end
- end
- from cte2
- )
- insert into dbo.Customers with (tablockx)
- (PurchaseID, CustomerID)
- select s,t
- from cte3
- LEFT JOIN #ColStoreTable ON 1=0
- option (hash group, hash join)
- set @rc = @@rowcount
- set @ms = datediff(ms, @t0, getdate())
- RAISERROR ('step1 duration: %d ms rc=%d',0,0,@ms,@rc) WITH NOWAIT
- while @rc > 0
- begin
- set @t1 = getdate()
- update c
- set c.CustomerID = p.CustomerID
- from dbo.Customers c
- join dbo.Customers p on
- p.PurchaseID <> p.CustomerID and
- c.CustomerID = p.PurchaseID
- set @rc = @@rowcount
- set @ms = datediff(ms, @t1, getdate())
- RAISERROR ('After update: %d ms rc=%d',0,0,@ms, @rc) WITH NOWAIT
- end
- set @ms = datediff(ms, @t0, getdate())
- RAISERROR ('total duration: %d ms',0,0,@ms) WITH NOWAIT
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement