Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ----------------------------- TEST PARAMETERS -------------------------------
- /*
- Different methods to find the rows in effect by Lars Rönnbäck, 2022.
- This script is in the Public Domain.
- You need to set up a database with a size of 10GB to hold ~100 million
- To run a suite of tests from 1k to 100M:
- EXEC performance_test 1000; -- 1k
- EXEC performance_test 10000; -- 10k
- EXEC performance_test 100000; -- 100k
- EXEC performance_test 1000000; -- 1M
- EXEC performance_test 10000000; -- 10M
- EXEC performance_test 100000000; -- 100M
- HISTORY:
- 2021-01-21 CREATED
- 2021-01-22 Added additional functions
- 2021-01-23 Rewrote as stored procedure + bug fixes and improvements
- Added clustered columnstore index test
- */
- create or alter proc performance_test (
- -- approximately how many attribute rows you will have in the attribute table
- @approximateAttributeRows int = 1000,
- -- how many versions you will maximally have for an attribute value
- @maxVersions int = 100,
- -- 30 is a statistical "magic" number for statistical significance
- @runs int = 30,
- -- controls whether sp_updatestats is run after table creation
- @stat bit = 1,
- -- checks that output from all functions are identical
- -- (this is very slow so run it only on a small test)
- @verification bit = 0
- )
- as
- begin
- set nocount on;
- --=======================================================================--
- drop table if exists #func;
- create table #func (
- id int identity(1, 1) not null primary key,
- func varchar(555)
- );
- insert into #func (func)
- values
- ('p_anchor_cte_row_number'),
- ('p_anchor_first_value'),
- ('p_anchor_left_join_max'),
- ('p_anchor_outer_apply_top_1'),
- ('p_anchor_row_number'),
- ('p_anchor_top_1_subselect'),
- ('p_anchor_with_ties');
- select * from #func;
- -- truncate table [dbo].[result];
- if OBJECT_ID('[dbo].[result]') is null
- begin
- CREATE TABLE [dbo].[result](
- [func] [varchar](555) NOT NULL,
- [median_duration_ms] [float] NOT NULL,
- [average_duration_ms] [bigint] NOT NULL,
- [deviation_duration_ms] [float] NOT NULL,
- [minimum_duration_ms] [bigint] NOT NULL,
- [maximum_duration_ms] [bigint] NOT NULL,
- [approximateAttributeRows] [bigint] NOT NULL,
- [maxVersions] [int] NOT NULL,
- [runs] [int] NOT NULL,
- [cold] [bit] NOT NULL,
- [stat] [bit] NOT NULL,
- [columnar] [bit] NOT NULL,
- [version] varchar(555) NOT NULL,
- [tested_at] datetime NOT NULL default GETDATE()
- );
- end
- ---------------------------- TEST BENCH SETUP -------------------------------
- drop table if exists attribute_one;
- drop table if exists attribute_two;
- drop table if exists anchor;
- create table anchor (
- id int not null primary key
- );
- with idGen as (
- select 1 as id
- union all
- select id + 1
- from idGen
- where id < round(1E0 * @approximateAttributeRows / 25, 0)
- )
- insert into anchor (id)
- select id
- from idGen
- option (MAXRECURSION 0);
- drop table if exists #version;
- select top (@maxVersions) *
- into #version
- from anchor
- order by id asc;
- alter table #version add primary key (id);
- create table attribute_one (
- id int not null foreign key references anchor(id),
- val varchar(max),
- since date not null,
- primary key (
- id asc,
- since desc
- )
- );
- insert into attribute_one (id, val, since)
- select
- a.id,
- 'ID: ' + cast(a.id as varchar(10)) + ' VERSION: ' + cast(v.id as varchar(10)),
- dateadd(day, v.id - a.id % @maxVersions, '2020-01-01')
- from anchor a
- join #version v
- on v.id <= rand(checksum(a.id)) * (a.id % @maxVersions);
- create table attribute_two (
- id int not null foreign key references anchor(id),
- val varchar(max),
- since date not null,
- unique (
- id asc,
- since desc
- )
- );
- create clustered columnstore index cc_attribute_two on attribute_two;
- insert into attribute_two (id, val, since)
- select
- a.id,
- 'ID: ' + cast(a.id as varchar(10)) + ' VERSION: ' + cast(v.id as varchar(10)),
- dateadd(day, v.id - a.id % @maxVersions, '2020-01-01')
- from anchor a
- join #version v
- on v.id <= rand(checksum(a.id)) * (a.id % @maxVersions);
- --------------------------------- TESTING ------------------------------------
- declare @at char(10) = '2019-12-01';
- drop table if exists #timing;
- create table #timing (
- func varchar(555) not null,
- cold bit not null,
- columnar bit not null,
- iteration int not null,
- start_time datetime2(7) not null,
- end_time datetime2(7) not null,
- duration_ms as datediff(ms, start_time, end_time),
- primary key (
- func,
- cold,
- columnar,
- iteration
- )
- );
- drop table if exists #result;
- select top 0 id, val_one, since_one
- into #result
- from p_anchor_row_number(@at);
- if (@verification = 1)
- begin
- drop table if exists #failed_verification;
- select top 0
- a.id,
- a1.val as val_one,
- a1.since as since_one,
- a2.val as val_two,
- a2.since as since_two,
- CAST(null as varchar(555)) as failing_functions
- into #failed_verification
- from anchor a
- join attribute_one a1
- on a1.id = a.id
- join attribute_two a2
- on a2.id = a.id;
- declare @VERIFICATION_SQL varchar(max);
- set @VERIFICATION_SQL = (
- select '
- insert into #failed_verification
- select *, ''' + f1.func + ' x ' + f2.func + '''
- from (
- select * from ' + f1.func + '(DEFAULT)
- except
- select * from ' + f2.func + '(DEFAULT)
- ) x
- ' as [text()]
- from #func f1
- cross join #func f2
- where f1.id <> f2.id
- for xml path (''), type
- ).value('.', 'varchar(max)');
- EXEC(@VERIFICATION_SQL);
- select * from #failed_verification;
- end
- select (
- select '
- SET SHOWPLAN_XML ON;;
- GO
- ' + (
- select '
- select id, val_one, since_one
- from ' + f.func + '(''' + @at + ''');
- select id, val_two, since_two
- from ' + f.func + '(''' + @at + ''');
- ' as [text()]
- from #func f
- for xml path (''), type
- ).value('.', 'varchar(max)') + '
- GO
- SET SHOWPLAN_XML OFF;
- GO
- ' for xml path(''), type
- ) as [Run the following separately if you want to collect execution plans];
- declare @id int;
- declare @func varchar(555);
- declare @SQL varchar(max);
- declare @iter int;
- declare @start datetime2(7);
- declare @end datetime2(7);
- declare @db_id int;
- -------------- COLD TEST (clustered index) ------------------
- set @id = (select min(id) from #func);
- while(@id is not null)
- begin
- set @func = (
- select func
- from #func
- where id = @id
- );
- set @SQL = '
- select id, val_one, since_one
- into #result
- from ' + @func + '(''' + @at + ''')
- OPTION (OPTIMIZE FOR UNKNOWN);
- ';
- set @iter = 0;
- while (@iter < @runs)
- begin
- truncate table #result;
- -- cold
- CHECKPOINT;
- DBCC DROPCLEANBUFFERS;
- DBCC FREESYSTEMCACHE ('ALL');
- DBCC FREEPROCCACHE WITH NO_INFOMSGS;
- SET @db_id = DB_ID();
- DBCC FLUSHPROCINDB (@db_id);
- set @start = SYSDATETIME()
- EXEC(@SQL);
- set @end = SYSDATETIME();
- insert into #timing (
- func,
- cold,
- columnar,
- iteration,
- start_time,
- end_time
- )
- values (
- @func,
- 1,
- 0,
- @iter,
- @start,
- @end
- );
- set @iter = @iter + 1;
- end
- set @id = (
- select id
- from #func
- where id = @id + 1
- );
- end
- -------------- COLD TEST (columnstore) ------------------
- set @id = (select min(id) from #func);
- while(@id is not null)
- begin
- set @func = (
- select func
- from #func
- where id = @id
- );
- set @SQL = '
- select id, val_two, since_two
- into #result
- from ' + @func + '(''' + @at + ''')
- OPTION (OPTIMIZE FOR UNKNOWN);
- ';
- set @iter = 0;
- while (@iter < @runs)
- begin
- truncate table #result;
- -- cold
- CHECKPOINT;
- DBCC DROPCLEANBUFFERS;
- DBCC FREESYSTEMCACHE ('ALL');
- DBCC FREEPROCCACHE WITH NO_INFOMSGS;
- SET @db_id = DB_ID();
- DBCC FLUSHPROCINDB (@db_id);
- set @start = SYSDATETIME()
- EXEC(@SQL);
- set @end = SYSDATETIME();
- insert into #timing (
- func,
- cold,
- columnar,
- iteration,
- start_time,
- end_time
- )
- values (
- @func,
- 1,
- 1,
- @iter,
- @start,
- @end
- );
- set @iter = @iter + 1;
- end
- set @id = (
- select id
- from #func
- where id = @id + 1
- );
- end
- -- update statistics before warm tests
- if @stat = 1
- begin
- EXEC sp_updatestats
- end
- -------------- WARM TEST (clustered index) ------------------
- set @id = (select min(id) from #func);
- while(@id is not null)
- begin
- set @func = (
- select func
- from #func
- where id = @id
- );
- set @SQL = '
- select id, val_one, since_one
- into #result
- from ' + @func + '(''' + @at + ''');
- ';
- set @iter = 0;
- while (@iter < @runs)
- begin
- truncate table #result;
- -- warm
- set @start = SYSDATETIME()
- EXEC(@SQL);
- set @end = SYSDATETIME();
- insert into #timing (
- func,
- cold,
- columnar,
- iteration,
- start_time,
- end_time
- )
- values (
- @func,
- 0,
- 0,
- @iter,
- @start,
- @end
- );
- set @iter = @iter + 1;
- end
- set @id = (
- select id
- from #func
- where id = @id + 1
- );
- end
- -------------- WARM TEST (columnstore) ------------------
- set @id = (select min(id) from #func);
- while(@id is not null)
- begin
- set @func = (
- select func
- from #func
- where id = @id
- );
- set @SQL = '
- select id, val_two, since_two
- into #result
- from ' + @func + '(''' + @at + ''');
- ';
- set @iter = 0;
- while (@iter < @runs)
- begin
- truncate table #result;
- -- warm
- set @start = SYSDATETIME()
- EXEC(@SQL);
- set @end = SYSDATETIME();
- insert into #timing (
- func,
- cold,
- columnar,
- iteration,
- start_time,
- end_time
- )
- values (
- @func,
- 0,
- 1,
- @iter,
- @start,
- @end
- );
- set @iter = @iter + 1;
- end
- set @id = (
- select id
- from #func
- where id = @id + 1
- );
- end
- insert into result (
- [func],
- [median_duration_ms],
- [average_duration_ms],
- [deviation_duration_ms],
- [minimum_duration_ms],
- [maximum_duration_ms],
- [approximateAttributeRows],
- [maxVersions],
- [runs],
- [cold],
- [columnar],
- [stat],
- [version]
- )
- select
- [func],
- [median_duration_ms],
- [average_duration_ms],
- [deviation_duration_ms],
- [minimum_duration_ms],
- [maximum_duration_ms],
- @approximateAttributeRows,
- @maxVersions,
- @runs,
- [cold],
- [columnar],
- @stat,
- @@VERSION
- from (
- select
- func,
- cold,
- columnar,
- MAX(median_duration_ms) as median_duration_ms,
- AVG(duration_ms) as average_duration_ms,
- STDEVP(duration_ms) as deviation_duration_ms,
- MIN(duration_ms) as minimum_duration_ms,
- MAX(duration_ms) as maximum_duration_ms
- from (
- select
- *,
- median_duration_ms = PERCENTILE_CONT(0.5)
- within group (order by duration_ms)
- over (partition by func, cold, columnar)
- from
- #timing
- ) t
- group by func, cold, columnar
- ) f
- order by 2;
- select * from result;
- end
- go
- ------------------------------------- FUNCTIONS ------------------------------------------
- /* Note that some functions need to use a group by "trick" to retain table elimination */
- drop function if exists p_anchor_row_number;
- go
- create function p_anchor_row_number (
- @at date = '9999-12-31'
- )
- returns table as return
- select
- a.*,
- a1.val as val_one,
- a1.since as since_one,
- a2.val as val_two,
- a2.since as since_two
- from anchor a
- left join (
- select
- id,
- max(val) as val,
- max(since) as since
- from (
- select
- *,
- row_number() over (partition by id order by since desc) as recency
- from
- attribute_one
- where
- since <= @at
- ) one
- where one.recency = 1
- group by (id)
- ) a1
- on a1.id = a.id
- left join (
- select
- id,
- max(val) as val,
- max(since) as since
- from (
- select
- *,
- row_number() over (partition by id order by since desc) as recency
- from
- attribute_two
- where
- since <= @at
- ) two
- where two.recency = 1
- group by id
- ) a2
- on a2.id = a.id;
- go
- --select top 100 * from p_anchor_row_number(DEFAULT);
- --select top 100 id from p_anchor_row_number(DEFAULT);
- drop function if exists p_anchor_first_value;
- go
- create function p_anchor_first_value (
- @at date = '9999-12-31'
- )
- returns table as return
- select
- a.*,
- a1.val as val_one,
- a1.since as since_one,
- a2.val as val_two,
- a2.since as since_two
- from anchor a
- outer apply (
- select top 1
- first_value(val) over (partition by id order by since desc) as val,
- first_value(since) over (partition by id order by since desc) as since
- from attribute_one
- where id = a.id and since <= @at
- ) a1
- outer apply (
- select top 1
- first_value(val) over (partition by id order by since desc) as val,
- first_value(since) over (partition by id order by since desc) as since
- from attribute_two
- where id = a.id and since <= @at
- ) a2
- go
- --select top 100 * from p_anchor_first_value(DEFAULT);
- --select top 100 id from p_anchor_first_value(DEFAULT);
- drop function if exists p_anchor_top_1_subselect;
- go
- create function p_anchor_top_1_subselect (
- @at date = '9999-12-31'
- )
- returns table as return
- select
- a.*,
- a1.val as val_one,
- a1.since as since_one,
- a2.val as val_two,
- a2.since as since_two
- from anchor a
- left join attribute_one a1
- on a1.id = a.id
- and a1.since = (
- select top 1
- sub.since
- from attribute_one sub
- where sub.id = a.id
- and sub.since <= @at
- )
- left join attribute_two a2
- on a2.id = a.id
- and a2.since = (
- select top 1
- sub.since
- from attribute_two sub
- where sub.id = a.id
- and sub.since <= @at
- )
- go
- --select top 100 * from p_anchor_top_1_subselect(DEFAULT);
- --select top 100 id from p_anchor_top_1_subselect(DEFAULT);
- drop function if exists p_anchor_outer_apply_top_1;
- go
- create function p_anchor_outer_apply_top_1 (
- @at date = '9999-12-31'
- )
- returns table as return
- select
- a.*,
- a1.val as val_one,
- a1.since as since_one,
- a2.val as val_two,
- a2.since as since_two
- from anchor a
- outer apply (
- select top 1
- *
- from attribute_one sub
- where sub.id = a.id
- and sub.since <= @at
- order by sub.since desc
- ) a1
- outer apply (
- select top 1
- *
- from attribute_two sub
- where sub.id = a.id
- and sub.since <= @at
- order by sub.since desc
- ) a2
- go
- --select top 100 * from p_anchor_outer_apply_top_1(DEFAULT);
- --select top 100 id from p_anchor_outer_apply_top_1(DEFAULT);
- drop function if exists p_anchor_with_ties;
- go
- create function p_anchor_with_ties (
- @at date = '9999-12-31'
- )
- returns table as return
- select
- a.*,
- a1.val as val_one,
- a1.since as since_one,
- a2.val as val_two,
- a2.since as since_two
- from anchor a
- left join (
- select
- id,
- max(val) as val,
- max(since) as since
- from (
- select top 1 with ties
- *
- from attribute_one
- where since <= @at
- order by row_number() over (partition by id order by since desc)
- ) one
- group by id
- ) a1
- on a1.id = a.id
- left join (
- select
- id,
- max(val) as val,
- max(since) as since
- from (
- select top 1 with ties
- *
- from attribute_two
- where since <= @at
- order by row_number() over (partition by id order by since desc)
- ) two
- group by id
- ) a2
- on a2.id = a.id
- go
- --select top 100 * from p_anchor_with_ties(DEFAULT);
- --select top 100 id from p_anchor_with_ties(DEFAULT);
- drop function if exists p_anchor_cte_row_number;
- go
- create function p_anchor_cte_row_number (
- @at date = '9999-12-31'
- )
- returns table as return
- with one as (
- select
- id,
- max(val) as val,
- max(since) as since
- from (
- select
- *,
- row_number() over (partition by id order by since desc) as recency
- from
- attribute_one
- where
- since <= @at
- ) one
- where one.recency = 1
- group by (id)
- ),
- two as (
- select
- id,
- max(val) as val,
- max(since) as since
- from (
- select
- *,
- row_number() over (partition by id order by since desc) as recency
- from
- attribute_two
- where
- since <= @at
- ) two
- where two.recency = 1
- group by id
- )
- select
- a.*,
- a1.val as val_one,
- a1.since as since_one,
- a2.val as val_two,
- a2.since as since_two
- from anchor a
- left join one a1
- on a1.id = a.id
- left join two a2
- on a2.id = a.id
- go
- --select top 100 * from p_anchor_cte_row_number(DEFAULT);
- --select top 100 id from p_anchor_cte_row_number(DEFAULT);
- drop function if exists p_anchor_left_join_max;
- go
- create function p_anchor_left_join_max (
- @at date = '9999-12-31'
- )
- returns table as return
- select
- a.*,
- a1.val as val_one,
- a1.since as since_one,
- a2.val as val_two,
- a2.since as since_two
- from anchor a
- left join (
- select
- id,
- MAX(since) as since
- from attribute_one
- where since <= @at
- group by id
- ) one_max
- on one_max.id = a.id
- left join attribute_one a1
- on a1.id = one_max.id
- and a1.since = one_max.since
- left join (
- select
- id,
- MAX(since) as since
- from attribute_two
- where since <= @at
- group by id
- ) two_max
- on two_max.id = a.id
- left join attribute_two a2
- on a2.id = one_max.id
- and a2.since = one_max.since
- go
- --select top 100 * from p_anchor_left_join_max(DEFAULT);
- --select top 100 id from p_anchor_left_join_max(DEFAULT);
Add Comment
Please, Sign In to add comment