Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- -- Prevent unwanted resultsets back to client
- SET NOCOUNT ON;
- -- Prepare staging table
- CREATE TABLE #Customers
- (
- FirstName VARCHAR(50) NOT NULL,
- LastName VARCHAR(50) NULL,
- StreetAddress VARCHAR(50) NULL,
- ZipCode INT NULL,
- Email VARCHAR(50) NULL,
- CreditCard CHAR(19) NULL,
- CustomerID INT NOT NULL
- );
- /*
- http://sqlmag.com/sql-server/what-you-need-know-about-batch-mode-window-aggregate-operator-sql-server-2016-part-1
- */
- CREATE NONCLUSTERED COLUMNSTORE INDEX NCI_Purchases ON dbo.Purchases(PurchaseID) WHERE PurchaseID = -1 AND PurchaseID = -2;
- -- Empty this solutions table
- TRUNCATE TABLE dbo.Customers;
- -- Populate staging table with earliest PurchaseID for all unique three possible matching cases
- -- About 2 seconds
- WITH ctePreaggregate(FirstName, LastName, StreetAddress, ZipCode, Email, CreditCard, CustomerID)
- AS (
- SELECT p.FirstName,
- p.LastName,
- p.StreetAddress,
- p.ZipCode,
- p.Email,
- p.CreditCard,
- MIN(p.PurchaseID) AS CustomerID
- FROM dbo.Purchases AS p WITH (TABLOCK)
- GROUP BY GROUPING SETS
- (
- (
- FirstName,
- Email
- ),
- (
- FirstName,
- CreditCard
- ),
- (
- FirstName,
- LastName,
- StreetAddress,
- ZipCode
- )
- )
- )
- INSERT #Customers WITH (TABLOCK)
- (
- FirstName,
- LastName,
- StreetAddress,
- ZipCode,
- Email,
- CreditCard,
- CustomerID
- )
- SELECT MAX(c.FirstName) AS FirstName,
- MAX(c.LastName) AS LastName,
- MAX(c.StreetAddress) AS StreetAddress,
- MAX(c.ZipCode) AS ZipCode,
- MAX(c.Email) AS Email,
- MAX(c.CreditCard) AS CreditCard,
- c.CustomerID
- FROM ctePreaggregate AS c
- GROUP BY c.CustomerID;
- -- Populate Customer table with all purchases and earliest CustomerID
- -- About 3 seconds
- INSERT dbo.Customers WITH (TABLOCK)
- (
- PurchaseID,
- CustomerID
- )
- SELECT p.PurchaseID,
- MIN( CASE
- WHEN e.CustomerID <= x.CustomerID AND e.CustomerID <= cc.CustomerID THEN e.CustomerID
- WHEN x.CustomerID <= e.CustomerID AND x.CustomerID <= cc.CustomerID THEN x.CustomerID
- ELSE cc.CustomerID
- END) AS CustomerID
- FROM dbo.Purchases AS p WITH (TABLOCK)
- INNER JOIN #Customers AS e WITH (TABLOCK) ON e.FirstName = p.FirstName
- AND e.Email = p.Email
- INNER JOIN #Customers AS x WITH (TABLOCK) ON x.FirstName = p.FirstName
- AND x.LastName = p.LastName
- AND x.StreetAddress = p.StreetAddress
- AND x.ZipCode = p.ZipCode
- INNER JOIN #Customers AS cc WITH (TABLOCK) ON cc.FirstName = p.FirstName
- AND cc.CreditCard = p.CreditCard
- GROUP BY p.PurchaseID;
- -- Iterate the Network Connected Graph
- -- About 1 second
- WHILE @@ROWCOUNT > 0
- UPDATE f
- SET f.OriginalCustomerID = f.NewCustomerID
- FROM (
- SELECT c1.CustomerID AS OriginalCustomerID,
- c2.CustomerID AS NewCustomerID
- FROM dbo.Customers AS c1
- INNER JOIN dbo.Customers AS c2 ON c2.PurchaseID = c1.CustomerID
- AND c2.CustomerID < c2.PurchaseID
- ) AS f;
- -- Clean up
- DROP INDEX NCI_Purchases ON dbo.Purchases;
- DROP TABLE #Customers;
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement