Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /*
- Denna lösning ger varje kund ett CustomerID som är kundens lägsta PurchaseID.
- Det går att vinna pyttelite tid på att inte välja lägsta PurchaseID,
- men den lilla tidsförlusten bjuder jag på i estetikens namn.
- Vi har tre ekvivalensregler:
- - FirstName + CreditCard
- - FirstName + Email
- - FirstName + LastName + StreetAddress + ZipCode
- Lösningen består av tre huvudsteg:
- 1) Varje kombination i varje ekvivalensregel tilldelas ett tal som är det lägsta PurchaseID där kombinationen förekommer.
- 2) I tabellen Rules läggs alla PurchaseID och deras tre värden från föregående steg.
- 3) I tabellen Clusters kopplas sedan de olika värdena från Steg 1 samman via förekomst på samma rad i Rules.
- Mer kommentarer i koden.
- */
- SET NOCOUNT ON;
- -- DROP
- -- Tables
- IF OBJECT_ID('dbo.Rules', 'U') IS NOT NULL
- DROP TABLE dbo.Rules;
- IF OBJECT_ID('dbo.Clusters', 'U') IS NOT NULL
- DROP TABLE dbo.Clusters;
- TRUNCATE TABLE dbo.Customers;
- -- Cursor
- IF CURSOR_STATUS('global','outer_cursor') >= -1
- DEALLOCATE outer_cursor;
- -- CREATE
- -- Tables
- CREATE TABLE dbo.Rules (
- PurchaseID int NOT NULL
- ,ClusterR1 int NOT NULL
- ,ClusterR2 int NOT NULL
- ,ClusterR3 int NOT NULL
- );
- CREATE TABLE dbo.Clusters (
- ClusterID int NOT NULL PRIMARY KEY
- ,ClusterOnRow int NOT NULL
- ,Cluster varchar(MAX) NOT NULL
- );
- /*
- I mina tidigare lösningar lade jag tre ickeklustrade index på Purchases, ett för varje ekvivalensregel.
- Eftersom uppslag bara sker på dessa kolumner i det inledande steget (GROUP BY), har dessa index slopats.
- */
- -- STEG 1-2
- WITH Rule1 AS (
- SELECT
- MIN(PurchaseID) AS Cluster
- ,FirstName
- ,CreditCard
- FROM dbo.Purchases
- GROUP BY FirstName, CreditCard
- )
- , Rule2 AS (
- SELECT
- MIN(PurchaseID) AS Cluster
- ,FirstName
- ,Email
- FROM dbo.Purchases
- GROUP BY FirstName, Email
- )
- , Rule3 AS (
- SELECT
- MIN(PurchaseID) AS Cluster
- ,FirstName
- ,LastName
- ,StreetAddress
- ,ZipCode
- FROM dbo.Purchases
- GROUP BY FirstName, LastName, StreetAddress, ZipCode
- )
- INSERT INTO dbo.Rules
- WITH (TABLOCK)
- SELECT
- p.PurchaseID
- ,r1.Cluster AS ClusterR1
- ,r2.Cluster AS ClusterR2
- ,r3.Cluster AS ClusterR3
- FROM dbo.Purchases AS p
- JOIN Rule1 AS r1 ON r1.FirstName = p.FirstName AND r1.CreditCard = p.CreditCard
- JOIN Rule2 AS r2 ON r2.FirstName = p.FirstName AND r2.Email = p.Email
- JOIN Rule3 AS r3 ON r3.FirstName = p.FirstName AND r3.LastName = p.LastName AND r3.StreetAddress = p.StreetAddress AND r3.ZipCode = p.ZipCode;
- DECLARE @maxid int;
- SELECT @maxid = MAX(PurchaseID) FROM dbo.Purchases;
- DECLARE @itemlength int;
- SELECT @itemlength = LEN(CAST(@maxid AS varchar));
- DECLARE @cl1 int;
- DECLARE @cl2 int;
- DECLARE @cl3 int;
- DECLARE @clusterOnRow1 int;
- DECLARE @clusterOnRow2 int;
- DECLARE @cluster1 varchar(MAX);
- DECLARE @cluster2 varchar(MAX);
- DECLARE @cluster3 varchar(MAX);
- DECLARE @cluster4 varchar(MAX);
- DECLARE @i int;
- -- STEG 3
- /*
- Detta är den huvudsakliga algoritmen, som kopplar ihop kombinationer från de olika
- ekvivalensreglerna med varandra.
- Jag betraktar detta som ett grafproblem. Varje unikt värde (PurchaseID) som genererats
- i STEG 1 är en nod. När detta steg inleds finns inga kanter.
- Detta steg kommer sedan loopa igenom alla unika kombinationer från tabellen Rules. De
- motsvarande noderna kommer kopplas ihop med kanter, så att hopkopplade noder anses
- motsvara samma kund.
- Hopkopplingen av noder sker på ett bestämt vis. De bildar alltid träd med maxdjup 1 (stjärnor),
- där en nod är rot, och alla andra är löv. Detta åstadkoms med hjälp av tabellen Clusters.
- Clusters innehåller en rad för varje startnod (unikt PurchaseID från STEG 1). Förutom
- nyckelkolumnen ClusterID (int) finns kolumnen ClusterOnRow (int) som pekar på den rad i
- samma tabell som är rot i det träd där raden ingår. Från början är ClusterOnRow = ClusterID.
- Sist finns kolumnen Cluster, som är en s.k. Adjacency List i form av en varchar(MAX).
- Kolumnen Cluster är bara intressant på de rader där ClusterOnRow = ClusterID, vilket är
- rotnoderna. Den innehåller en konkatenering av samtliga noder i trädet, med det lägsta
- numret först. Om det högsta förekommande PurchaseID är sexsiffrigt kommer alla noder
- motsvaras av en varchar(6). Klustret 3-4497-98 blir då '000003004497000098'.
- */
- WITH IDS AS (
- SELECT DISTINCT ClusterR1 AS ClusterID FROM dbo.Rules
- UNION
- SELECT DISTINCT ClusterR2 AS ClusterID FROM dbo.Rules
- UNION
- SELECT DISTINCT ClusterR3 AS ClusterID FROM dbo.Rules
- )
- INSERT INTO dbo.Clusters
- WITH (TABLOCK)
- SELECT
- ClusterID = ClusterID
- ,ClusterOnRow = ClusterID
- ,Cluster = FORMAT(ClusterID, 'd' + CAST(@itemlength AS varchar))
- FROM IDS;
- DECLARE outer_cursor CURSOR FOR
- SELECT DISTINCT
- ClusterR1
- ,ClusterR2
- ,ClusterR3
- FROM dbo.Rules;
- OPEN outer_cursor;
- FETCH NEXT FROM outer_cursor
- INTO @cl1, @cl2, @cl3;
- /*
- I koden nedan görs skillnad på tre typer av noder:
- * Eget kluster - noden är ensam.
- * Rotnod - har lövnoder under sig.
- * Lövnod - har en rotnod över sig.
- Av dessa tre nodtyper uppkommer sex olika kombinationer som måste tas hänsyn till:
- A) Två egna kluster
- B) Eget kluster + rotnod
- C) Eget kluster + lövnod
- D) Två rotnoder
- E) Rotnod + lövnod
- F) Två lövnoder
- */
- WHILE @@FETCH_STATUS = 0
- BEGIN
- IF @cl1 <> @cl2
- BEGIN
- SELECT
- @clusterOnRow1 = ClusterOnRow
- ,@cluster1 = Cluster
- FROM dbo.Clusters
- WHERE ClusterID = @cl1;
- SELECT
- @clusterOnRow2 = ClusterOnRow
- ,@cluster2 = Cluster
- FROM dbo.Clusters
- WHERE ClusterID = @cl2;
- IF @clusterOnRow1 <> @clusterOnRow2
- BEGIN
- IF @clusterOnRow1 = @cl1 -- A B C D E
- BEGIN
- IF @clusterOnRow2 = @cl2 -- A B D
- BEGIN
- IF LEN(@cluster1) = @itemlength -- A B
- BEGIN
- UPDATE dbo.Clusters
- SET ClusterOnRow = @cl2
- WHERE ClusterID = @cl1;
- UPDATE dbo.Clusters
- SET Cluster = CASE
- WHEN LEFT(@cluster1, @itemlength) < LEFT (@cluster2, @itemlength) THEN @cluster1 + @cluster2
- ELSE @cluster2 + @cluster1
- END
- WHERE ClusterID = @cl2;
- END -- A B
- ELSE -- B D
- BEGIN
- IF LEN(@cluster2) = @itemlength -- B
- BEGIN
- UPDATE dbo.Clusters
- SET ClusterOnRow = @cl1
- WHERE ClusterID = @cl2;
- UPDATE dbo.Clusters
- SET Cluster = CASE
- WHEN LEFT(@cluster1, @itemlength) < LEFT (@cluster2, @itemlength) THEN @cluster1 + @cluster2
- ELSE @cluster2 + @cluster1
- END
- WHERE ClusterID = @cl1;
- END -- B
- ELSE -- D
- BEGIN
- SET @i = 1;
- IF LEN(@cluster1) <= LEN(@cluster2)
- BEGIN
- WHILE @i * @itemlength <= LEN(@cluster1)
- BEGIN
- UPDATE dbo.Clusters
- SET ClusterOnRow = @cl2
- WHERE ClusterID = CAST(SUBSTRING(@cluster1,(@i - 1) * @itemlength + 1,@itemlength) AS int);
- SET @i = @i + 1;
- END
- UPDATE dbo.Clusters
- SET Cluster = CASE
- WHEN LEFT(@cluster1, @itemlength) < LEFT (@cluster2, @itemlength) THEN @cluster1 + @cluster2
- ELSE @cluster2 + @cluster1
- END
- WHERE ClusterID = @cl2;
- END
- ELSE
- BEGIN
- WHILE @i * @itemlength <= LEN(@cluster2)
- BEGIN
- UPDATE dbo.Clusters
- SET ClusterOnRow = @cl1
- WHERE ClusterID = CAST(SUBSTRING(@cluster2,(@i - 1) * @itemlength + 1,@itemlength) AS int);
- SET @i = @i + 1;
- END
- UPDATE dbo.Clusters
- SET Cluster = CASE
- WHEN LEFT(@cluster1, @itemlength) < LEFT (@cluster2, @itemlength) THEN @cluster1 + @cluster2
- ELSE @cluster2 + @cluster1
- END
- WHERE ClusterID = @cl1;
- END
- END -- D
- END -- B D
- END -- A B D
- ELSE -- C E
- BEGIN
- IF LEN(@cluster1) = @itemlength -- C
- BEGIN
- UPDATE dbo.Clusters
- SET ClusterOnRow = @clusterOnRow2
- WHERE ClusterID = @cl1;
- SELECT @cluster3 = Cluster
- FROM dbo.Clusters
- WHERE ClusterID = @clusterOnRow2;
- UPDATE dbo.Clusters
- SET Cluster = CASE
- WHEN LEFT(@cluster1, @itemlength) < LEFT (@cluster3, @itemlength) THEN @cluster1 + @cluster3
- ELSE @cluster3 + @cluster1
- END
- WHERE ClusterID = @clusterOnRow2;
- END -- C
- ELSE -- E
- BEGIN
- SET @i = 1;
- SELECT @cluster3 = Cluster
- FROM dbo.Clusters
- WHERE ClusterID = @clusterOnRow2;
- IF LEN(@cluster1) <= LEN(@cluster3)
- BEGIN
- WHILE @i * @itemlength <= LEN(@cluster1)
- BEGIN
- UPDATE dbo.Clusters
- SET ClusterOnRow = @clusterOnRow2
- WHERE ClusterID = CAST(SUBSTRING(@cluster1,(@i - 1) * @itemlength + 1,@itemlength) AS int);
- SET @i = @i + 1;
- END
- UPDATE dbo.Clusters
- SET Cluster = CASE
- WHEN LEFT(@cluster1, @itemlength) < LEFT (@cluster3, @itemlength) THEN @cluster1 + @cluster3
- ELSE @cluster3 + @cluster1
- END
- WHERE ClusterID = @clusterOnRow2;
- END
- ELSE
- BEGIN
- WHILE @i * @itemlength <= LEN(@cluster3)
- BEGIN
- UPDATE dbo.Clusters
- SET ClusterOnRow = @cl1
- WHERE ClusterID = CAST(SUBSTRING(@cluster3,(@i - 1) * @itemlength + 1,@itemlength) AS int);
- SET @i = @i + 1;
- END
- UPDATE dbo.Clusters
- SET Cluster = CASE
- WHEN LEFT(@cluster1, @itemlength) < LEFT (@cluster3, @itemlength) THEN @cluster1 + @cluster3
- ELSE @cluster3 + @cluster1
- END
- WHERE ClusterID = @cl1;
- END
- END -- E
- END -- C E
- END -- A B C D E
- ELSE -- C E F
- BEGIN
- IF @clusterOnRow2 = @cl2 -- C E
- BEGIN
- IF LEN(@cluster2) = @itemlength -- C
- BEGIN
- UPDATE dbo.Clusters
- SET ClusterOnRow = @clusterOnRow1
- WHERE ClusterID = @cl2;
- SELECT @cluster3 = Cluster
- FROM dbo.Clusters
- WHERE ClusterID = @clusterOnRow1;
- UPDATE dbo.Clusters
- SET Cluster = CASE
- WHEN LEFT(@cluster2, @itemlength) < LEFT (@cluster3, @itemlength) THEN @cluster2 + @cluster3
- ELSE @cluster3 + @cluster2
- END
- WHERE ClusterID = @clusterOnRow1;
- END -- C
- ELSE -- E
- BEGIN
- SET @i = 1;
- SELECT @cluster3 = Cluster
- FROM dbo.Clusters
- WHERE ClusterID = @clusterOnRow1;
- IF LEN(@cluster2) <= LEN(@cluster3)
- BEGIN
- WHILE @i * @itemlength <= LEN(@cluster2)
- BEGIN
- UPDATE dbo.Clusters
- SET ClusterOnRow = @clusterOnRow1
- WHERE ClusterID = CAST(SUBSTRING(@cluster2,(@i - 1) * @itemlength + 1,@itemlength) AS int);
- SET @i = @i + 1;
- END
- UPDATE dbo.Clusters
- SET Cluster = CASE
- WHEN LEFT(@cluster2, @itemlength) < LEFT (@cluster3, @itemlength) THEN @cluster2 + @cluster3
- ELSE @cluster3 + @cluster2
- END
- WHERE ClusterID = @clusterOnRow1;
- END
- ELSE
- BEGIN
- WHILE @i * @itemlength <= LEN(@cluster3)
- BEGIN
- UPDATE dbo.Clusters
- SET ClusterOnRow = @cl2
- WHERE ClusterID = CAST(SUBSTRING(@cluster3,(@i - 1) * @itemlength + 1,@itemlength) AS int);
- SET @i = @i + 1;
- END
- UPDATE dbo.Clusters
- SET Cluster = CASE
- WHEN LEFT(@cluster2, @itemlength) < LEFT (@cluster3, @itemlength) THEN @cluster2 + @cluster3
- ELSE @cluster3 + @cluster2
- END
- WHERE ClusterID = @cl2;
- END
- END -- E
- END -- C E
- ELSE -- F
- BEGIN
- SELECT @cluster3 = Cluster
- FROM dbo.Clusters
- WHERE ClusterID = @clusterOnRow1;
- SELECT @cluster4 = Cluster
- FROM dbo.Clusters
- WHERE ClusterID = @clusterOnRow2;
- SET @i = 1;
- IF LEN(@cluster3) <= LEN(@cluster4)
- BEGIN
- WHILE @i * @itemlength <= LEN(@cluster3)
- BEGIN
- UPDATE dbo.Clusters
- SET ClusterOnRow = @clusterOnRow2
- WHERE ClusterID = CAST(SUBSTRING(@cluster3,(@i - 1) * @itemlength + 1,@itemlength) AS int);
- SET @i = @i + 1;
- END
- UPDATE dbo.Clusters
- SET Cluster = CASE
- WHEN LEFT(@cluster3, @itemlength) < LEFT (@cluster4, @itemlength) THEN @cluster3 + @cluster4
- ELSE @cluster4 + @cluster3
- END
- WHERE ClusterID = @clusterOnRow2;
- END
- ELSE
- BEGIN
- WHILE @i * @itemlength <= LEN(@cluster4)
- BEGIN
- UPDATE dbo.Clusters
- SET ClusterOnRow = @clusterOnRow1
- WHERE ClusterID = CAST(SUBSTRING(@cluster4,(@i - 1) * @itemlength + 1,@itemlength) AS int);
- SET @i = @i + 1;
- END
- UPDATE dbo.Clusters
- SET Cluster = CASE
- WHEN LEFT(@cluster3, @itemlength) < LEFT (@cluster4, @itemlength) THEN @cluster3 + @cluster4
- ELSE @cluster4 + @cluster3
- END
- WHERE ClusterID = @clusterOnRow1;
- END
- END -- F
- END -- C E F
- END
- END
- IF @cl1 <> @cl3 AND @cl2 <> @cl3
- BEGIN
- SELECT
- @clusterOnRow1 = ClusterOnRow
- ,@cluster1 = Cluster
- FROM dbo.Clusters
- WHERE ClusterID = @cl1;
- SELECT
- @clusterOnRow2 = ClusterOnRow
- ,@cluster2 = Cluster
- FROM dbo.Clusters
- WHERE ClusterID = @cl3;
- IF @clusterOnRow1 <> @clusterOnRow2
- BEGIN
- IF @clusterOnRow1 = @cl1 -- A B C D E
- BEGIN
- IF @clusterOnRow2 = @cl3 -- A B D
- BEGIN
- IF LEN(@cluster1) = @itemlength -- A B
- BEGIN
- UPDATE dbo.Clusters
- SET ClusterOnRow = @cl3
- WHERE ClusterID = @cl1;
- UPDATE dbo.Clusters
- SET Cluster = CASE
- WHEN LEFT(@cluster1, @itemlength) < LEFT (@cluster2, @itemlength) THEN @cluster1 + @cluster2
- ELSE @cluster2 + @cluster1
- END
- WHERE ClusterID = @cl3;
- END -- A B
- ELSE -- B D
- BEGIN
- IF LEN(@cluster2) = @itemlength -- B
- BEGIN
- UPDATE dbo.Clusters
- SET ClusterOnRow = @cl1
- WHERE ClusterID = @cl3;
- UPDATE dbo.Clusters
- SET Cluster = CASE
- WHEN LEFT(@cluster1, @itemlength) < LEFT (@cluster2, @itemlength) THEN @cluster1 + @cluster2
- ELSE @cluster2 + @cluster1
- END
- WHERE ClusterID = @cl1;
- END -- B
- ELSE -- D
- BEGIN
- SET @i = 1;
- IF LEN(@cluster1) <= LEN(@cluster2)
- BEGIN
- WHILE @i * @itemlength <= LEN(@cluster1)
- BEGIN
- UPDATE dbo.Clusters
- SET ClusterOnRow = @cl3
- WHERE ClusterID = CAST(SUBSTRING(@cluster1,(@i - 1) * @itemlength + 1,@itemlength) AS int);
- SET @i = @i + 1;
- END
- UPDATE dbo.Clusters
- SET Cluster = CASE
- WHEN LEFT(@cluster1, @itemlength) < LEFT (@cluster2, @itemlength) THEN @cluster1 + @cluster2
- ELSE @cluster2 + @cluster1
- END
- WHERE ClusterID = @cl3;
- END
- ELSE
- BEGIN
- WHILE @i * @itemlength <= LEN(@cluster2)
- BEGIN
- UPDATE dbo.Clusters
- SET ClusterOnRow = @cl1
- WHERE ClusterID = CAST(SUBSTRING(@cluster2,(@i - 1) * @itemlength + 1,@itemlength) AS int);
- SET @i = @i + 1;
- END
- UPDATE dbo.Clusters
- SET Cluster = CASE
- WHEN LEFT(@cluster1, @itemlength) < LEFT (@cluster2, @itemlength) THEN @cluster1 + @cluster2
- ELSE @cluster2 + @cluster1
- END
- WHERE ClusterID = @cl1;
- END
- END -- D
- END -- B D
- END -- A B D
- ELSE -- C E
- BEGIN
- IF LEN(@cluster1) = @itemlength -- C
- BEGIN
- UPDATE dbo.Clusters
- SET ClusterOnRow = @clusterOnRow2
- WHERE ClusterID = @cl1;
- SELECT @cluster3 = Cluster
- FROM dbo.Clusters
- WHERE ClusterID = @clusterOnRow2;
- UPDATE dbo.Clusters
- SET Cluster = CASE
- WHEN LEFT(@cluster1, @itemlength) < LEFT (@cluster3, @itemlength) THEN @cluster1 + @cluster3
- ELSE @cluster3 + @cluster1
- END
- WHERE ClusterID = @clusterOnRow2;
- END -- C
- ELSE -- E
- BEGIN
- SET @i = 1;
- SELECT @cluster3 = Cluster
- FROM dbo.Clusters
- WHERE ClusterID = @clusterOnRow2;
- IF LEN(@cluster1) <= LEN(@cluster3)
- BEGIN
- WHILE @i * @itemlength <= LEN(@cluster1)
- BEGIN
- UPDATE dbo.Clusters
- SET ClusterOnRow = @clusterOnRow2
- WHERE ClusterID = CAST(SUBSTRING(@cluster1,(@i - 1) * @itemlength + 1,@itemlength) AS int);
- SET @i = @i + 1;
- END
- UPDATE dbo.Clusters
- SET Cluster = CASE
- WHEN LEFT(@cluster1, @itemlength) < LEFT (@cluster3, @itemlength) THEN @cluster1 + @cluster3
- ELSE @cluster3 + @cluster1
- END
- WHERE ClusterID = @clusterOnRow2;
- END
- ELSE
- BEGIN
- WHILE @i * @itemlength <= LEN(@cluster3)
- BEGIN
- UPDATE dbo.Clusters
- SET ClusterOnRow = @cl1
- WHERE ClusterID = CAST(SUBSTRING(@cluster3,(@i - 1) * @itemlength + 1,@itemlength) AS int);
- SET @i = @i + 1;
- END
- UPDATE dbo.Clusters
- SET Cluster = CASE
- WHEN LEFT(@cluster1, @itemlength) < LEFT (@cluster3, @itemlength) THEN @cluster1 + @cluster3
- ELSE @cluster3 + @cluster1
- END
- WHERE ClusterID = @cl1;
- END
- END -- E
- END -- C E
- END -- A B C D E
- ELSE -- C E F
- BEGIN
- IF @clusterOnRow2 = @cl3 -- C E
- BEGIN
- IF LEN(@cluster2) = @itemlength -- C
- BEGIN
- UPDATE dbo.Clusters
- SET ClusterOnRow = @clusterOnRow1
- WHERE ClusterID = @cl3;
- SELECT @cluster3 = Cluster
- FROM dbo.Clusters
- WHERE ClusterID = @clusterOnRow1;
- UPDATE dbo.Clusters
- SET Cluster = CASE
- WHEN LEFT(@cluster2, @itemlength) < LEFT (@cluster3, @itemlength) THEN @cluster2 + @cluster3
- ELSE @cluster3 + @cluster2
- END
- WHERE ClusterID = @clusterOnRow1;
- END -- C
- ELSE -- E
- BEGIN
- SET @i = 1;
- SELECT @cluster3 = Cluster
- FROM dbo.Clusters
- WHERE ClusterID = @clusterOnRow1;
- IF LEN(@cluster2) <= LEN(@cluster3)
- BEGIN
- WHILE @i * @itemlength <= LEN(@cluster2)
- BEGIN
- UPDATE dbo.Clusters
- SET ClusterOnRow = @clusterOnRow1
- WHERE ClusterID = CAST(SUBSTRING(@cluster2,(@i - 1) * @itemlength + 1,@itemlength) AS int);
- SET @i = @i + 1;
- END
- UPDATE dbo.Clusters
- SET Cluster = CASE
- WHEN LEFT(@cluster2, @itemlength) < LEFT (@cluster3, @itemlength) THEN @cluster2 + @cluster3
- ELSE @cluster3 + @cluster2
- END
- WHERE ClusterID = @clusterOnRow1;
- END
- ELSE
- BEGIN
- WHILE @i * @itemlength <= LEN(@cluster3)
- BEGIN
- UPDATE dbo.Clusters
- SET ClusterOnRow = @cl3
- WHERE ClusterID = CAST(SUBSTRING(@cluster3,(@i - 1) * @itemlength + 1,@itemlength) AS int);
- SET @i = @i + 1;
- END
- UPDATE dbo.Clusters
- SET Cluster = CASE
- WHEN LEFT(@cluster2, @itemlength) < LEFT (@cluster3, @itemlength) THEN @cluster2 + @cluster3
- ELSE @cluster3 + @cluster2
- END
- WHERE ClusterID = @cl3;
- END
- END -- E
- END -- C E
- ELSE -- F
- BEGIN
- SELECT @cluster3 = Cluster
- FROM dbo.Clusters
- WHERE ClusterID = @clusterOnRow1;
- SELECT @cluster4 = Cluster
- FROM dbo.Clusters
- WHERE ClusterID = @clusterOnRow2;
- SET @i = 1;
- IF LEN(@cluster3) <= LEN(@cluster4)
- BEGIN
- WHILE @i * @itemlength <= LEN(@cluster3)
- BEGIN
- UPDATE dbo.Clusters
- SET ClusterOnRow = @clusterOnRow2
- WHERE ClusterID = CAST(SUBSTRING(@cluster3,(@i - 1) * @itemlength + 1,@itemlength) AS int);
- SET @i = @i + 1;
- END
- UPDATE dbo.Clusters
- SET Cluster = CASE
- WHEN LEFT(@cluster3, @itemlength) < LEFT (@cluster4, @itemlength) THEN @cluster3 + @cluster4
- ELSE @cluster4 + @cluster3
- END
- WHERE ClusterID = @clusterOnRow2;
- END
- ELSE
- BEGIN
- WHILE @i * @itemlength <= LEN(@cluster4)
- BEGIN
- UPDATE dbo.Clusters
- SET ClusterOnRow = @clusterOnRow1
- WHERE ClusterID = CAST(SUBSTRING(@cluster4,(@i - 1) * @itemlength + 1,@itemlength) AS int);
- SET @i = @i + 1;
- END
- UPDATE dbo.Clusters
- SET Cluster = CASE
- WHEN LEFT(@cluster3, @itemlength) < LEFT (@cluster4, @itemlength) THEN @cluster3 + @cluster4
- ELSE @cluster4 + @cluster3
- END
- WHERE ClusterID = @clusterOnRow1;
- END
- END -- F
- END -- C E F
- END
- END
- FETCH NEXT FROM outer_cursor
- INTO @cl1, @cl2, @cl3;
- END
- CLOSE outer_cursor;
- DEALLOCATE outer_cursor;
- INSERT INTO dbo.Customers
- WITH (TABLOCK)
- SELECT
- PurchaseID = r.PurchaseID
- ,CustomerID = CAST(LEFT(c2.Cluster, @itemlength) AS int)
- FROM dbo.Rules AS r
- JOIN dbo.Clusters AS c1 ON r.ClusterR1 = c1.ClusterID
- JOIN dbo.Clusters AS c2 ON c1.ClusterOnRow = c2.ClusterID
- SET NOCOUNT OFF;
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement