----------------------------- TEST PARAMETERS -------------------------------
/*
Different methods to find the rows in effect by Lars Rönnbäck, 2022.
This script is in the Public Domain.
You need to set up a database with a size of 10GB to hold ~100 million
To run a suite of tests from 1k to 100M:
EXEC performance_test 1000; -- 1k
EXEC performance_test 10000; -- 10k
EXEC performance_test 100000; -- 100k
EXEC performance_test 1000000; -- 1M
EXEC performance_test 10000000; -- 10M
EXEC performance_test 100000000; -- 100M
HISTORY:
2021-01-21 CREATED
2021-01-22 Added additional functions
2021-01-23 Rewrote as stored procedure + bug fixes and improvements
Added clustered columnstore index test
*/
create or alter proc performance_test (
-- approximately how many attribute rows you will have in the attribute table
@approximateAttributeRows int = 1000,
-- how many versions you will maximally have for an attribute value
@maxVersions int = 100,
-- 30 is a statistical "magic" number for statistical significance
@runs int = 30,
-- controls whether sp_updatestats is run after table creation
@stat bit = 1,
-- checks that output from all functions are identical
-- (this is very slow so run it only on a small test)
@verification bit = 0
)
as
begin
set nocount on;
--=======================================================================--
drop table if exists #func;
create table #func (
id int identity(1, 1) not null primary key,
func varchar(555)
);
insert into #func (func)
values
('p_anchor_cte_row_number'),
('p_anchor_first_value'),
('p_anchor_left_join_max'),
('p_anchor_outer_apply_top_1'),
('p_anchor_row_number'),
('p_anchor_top_1_subselect'),
('p_anchor_with_ties');
select * from #func;
-- truncate table [dbo].[result];
if OBJECT_ID('[dbo].[result]') is null
begin
CREATE TABLE [dbo].[result](
[func] [varchar](555) NOT NULL,
[median_duration_ms] [float] NOT NULL,
[average_duration_ms] [bigint] NOT NULL,
[deviation_duration_ms] [float] NOT NULL,
[minimum_duration_ms] [bigint] NOT NULL,
[maximum_duration_ms] [bigint] NOT NULL,
[approximateAttributeRows] [bigint] NOT NULL,
[maxVersions] [int] NOT NULL,
[runs] [int] NOT NULL,
[cold] [bit] NOT NULL,
[stat] [bit] NOT NULL,
[columnar] [bit] NOT NULL,
[version] varchar(555) NOT NULL,
[tested_at] datetime NOT NULL default GETDATE()
);
end
---------------------------- TEST BENCH SETUP -------------------------------
drop table if exists attribute_one;
drop table if exists attribute_two;
drop table if exists anchor;
create table anchor (
id int not null primary key
);
with idGen as (
select 1 as id
union all
select id + 1
from idGen
where id < round(1E0 * @approximateAttributeRows / 25, 0)
)
insert into anchor (id)
select id
from idGen
option (MAXRECURSION 0);
drop table if exists #version;
select top (@maxVersions) *
into #version
from anchor
order by id asc;
alter table #version add primary key (id);
create table attribute_one (
id int not null foreign key references anchor(id),
val varchar(max),
since date not null,
primary key (
id asc,
since desc
)
);
insert into attribute_one (id, val, since)
select
a.id,
'ID: ' + cast(a.id as varchar(10)) + ' VERSION: ' + cast(v.id as varchar(10)),
dateadd(day, v.id - a.id % @maxVersions, '2020-01-01')
from anchor a
join #version v
on v.id <= rand(checksum(a.id)) * (a.id % @maxVersions);
create table attribute_two (
id int not null foreign key references anchor(id),
val varchar(max),
since date not null,
unique (
id asc,
since desc
)
);
create clustered columnstore index cc_attribute_two on attribute_two;
insert into attribute_two (id, val, since)
select
a.id,
'ID: ' + cast(a.id as varchar(10)) + ' VERSION: ' + cast(v.id as varchar(10)),
dateadd(day, v.id - a.id % @maxVersions, '2020-01-01')
from anchor a
join #version v
on v.id <= rand(checksum(a.id)) * (a.id % @maxVersions);
--------------------------------- TESTING ------------------------------------
declare @at char(10) = '2019-12-01';
drop table if exists #timing;
create table #timing (
func varchar(555) not null,
cold bit not null,
columnar bit not null,
iteration int not null,
start_time datetime2(7) not null,
end_time datetime2(7) not null,
duration_ms as datediff(ms, start_time, end_time),
primary key (
func,
cold,
columnar,
iteration
)
);
drop table if exists #result;
select top 0 id, val_one, since_one
into #result
from p_anchor_row_number(@at);
if (@verification = 1)
begin
drop table if exists #failed_verification;
select top 0
a.id,
a1.val as val_one,
a1.since as since_one,
a2.val as val_two,
a2.since as since_two,
CAST(null as varchar(555)) as failing_functions
into #failed_verification
from anchor a
join attribute_one a1
on a1.id = a.id
join attribute_two a2
on a2.id = a.id;
declare @VERIFICATION_SQL varchar(max);
set @VERIFICATION_SQL = (
select '
insert into #failed_verification
select *, ''' + f1.func + ' x ' + f2.func + '''
from (
select * from ' + f1.func + '(DEFAULT)
except
select * from ' + f2.func + '(DEFAULT)
) x
' as [text()]
from #func f1
cross join #func f2
where f1.id <> f2.id
for xml path (''), type
).value('.', 'varchar(max)');
EXEC(@VERIFICATION_SQL);
select * from #failed_verification;
end
select (
select '
SET SHOWPLAN_XML ON;;
GO
' + (
select '
select id, val_one, since_one
from ' + f.func + '(''' + @at + ''');
select id, val_two, since_two
from ' + f.func + '(''' + @at + ''');
' as [text()]
from #func f
for xml path (''), type
).value('.', 'varchar(max)') + '
GO
SET SHOWPLAN_XML OFF;
GO
' for xml path(''), type
) as [Run the following separately if you want to collect execution plans];
declare @id int;
declare @func varchar(555);
declare @SQL varchar(max);
declare @iter int;
declare @start datetime2(7);
declare @end datetime2(7);
declare @db_id int;
-------------- COLD TEST (clustered index) ------------------
set @id = (select min(id) from #func);
while(@id is not null)
begin
set @func = (
select func
from #func
where id = @id
);
set @SQL = '
select id, val_one, since_one
into #result
from ' + @func + '(''' + @at + ''')
OPTION (OPTIMIZE FOR UNKNOWN);
';
set @iter = 0;
while (@iter < @runs)
begin
truncate table #result;
-- cold
CHECKPOINT;
DBCC DROPCLEANBUFFERS;
DBCC FREESYSTEMCACHE ('ALL');
DBCC FREEPROCCACHE WITH NO_INFOMSGS;
SET @db_id = DB_ID();
DBCC FLUSHPROCINDB (@db_id);
set @start = SYSDATETIME()
EXEC(@SQL);
set @end = SYSDATETIME();
insert into #timing (
func,
cold,
columnar,
iteration,
start_time,
end_time
)
values (
@func,
1,
0,
@iter,
@start,
@end
);
set @iter = @iter + 1;
end
set @id = (
select id
from #func
where id = @id + 1
);
end
-------------- COLD TEST (columnstore) ------------------
set @id = (select min(id) from #func);
while(@id is not null)
begin
set @func = (
select func
from #func
where id = @id
);
set @SQL = '
select id, val_two, since_two
into #result
from ' + @func + '(''' + @at + ''')
OPTION (OPTIMIZE FOR UNKNOWN);
';
set @iter = 0;
while (@iter < @runs)
begin
truncate table #result;
-- cold
CHECKPOINT;
DBCC DROPCLEANBUFFERS;
DBCC FREESYSTEMCACHE ('ALL');
DBCC FREEPROCCACHE WITH NO_INFOMSGS;
SET @db_id = DB_ID();
DBCC FLUSHPROCINDB (@db_id);
set @start = SYSDATETIME()
EXEC(@SQL);
set @end = SYSDATETIME();
insert into #timing (
func,
cold,
columnar,
iteration,
start_time,
end_time
)
values (
@func,
1,
1,
@iter,
@start,
@end
);
set @iter = @iter + 1;
end
set @id = (
select id
from #func
where id = @id + 1
);
end
-- update statistics before warm tests
if @stat = 1
begin
EXEC sp_updatestats
end
-------------- WARM TEST (clustered index) ------------------
set @id = (select min(id) from #func);
while(@id is not null)
begin
set @func = (
select func
from #func
where id = @id
);
set @SQL = '
select id, val_one, since_one
into #result
from ' + @func + '(''' + @at + ''');
';
set @iter = 0;
while (@iter < @runs)
begin
truncate table #result;
-- warm
set @start = SYSDATETIME()
EXEC(@SQL);
set @end = SYSDATETIME();
insert into #timing (
func,
cold,
columnar,
iteration,
start_time,
end_time
)
values (
@func,
0,
0,
@iter,
@start,
@end
);
set @iter = @iter + 1;
end
set @id = (
select id
from #func
where id = @id + 1
);
end
-------------- WARM TEST (columnstore) ------------------
set @id = (select min(id) from #func);
while(@id is not null)
begin
set @func = (
select func
from #func
where id = @id
);
set @SQL = '
select id, val_two, since_two
into #result
from ' + @func + '(''' + @at + ''');
';
set @iter = 0;
while (@iter < @runs)
begin
truncate table #result;
-- warm
set @start = SYSDATETIME()
EXEC(@SQL);
set @end = SYSDATETIME();
insert into #timing (
func,
cold,
columnar,
iteration,
start_time,
end_time
)
values (
@func,
0,
1,
@iter,
@start,
@end
);
set @iter = @iter + 1;
end
set @id = (
select id
from #func
where id = @id + 1
);
end
insert into result (
[func],
[median_duration_ms],
[average_duration_ms],
[deviation_duration_ms],
[minimum_duration_ms],
[maximum_duration_ms],
[approximateAttributeRows],
[maxVersions],
[runs],
[cold],
[columnar],
[stat],
[version]
)
select
[func],
[median_duration_ms],
[average_duration_ms],
[deviation_duration_ms],
[minimum_duration_ms],
[maximum_duration_ms],
@approximateAttributeRows,
@maxVersions,
@runs,
[cold],
[columnar],
@stat,
@@VERSION
from (
select
func,
cold,
columnar,
MAX(median_duration_ms) as median_duration_ms,
AVG(duration_ms) as average_duration_ms,
STDEVP(duration_ms) as deviation_duration_ms,
MIN(duration_ms) as minimum_duration_ms,
MAX(duration_ms) as maximum_duration_ms
from (
select
*,
median_duration_ms = PERCENTILE_CONT(0.5)
within group (order by duration_ms)
over (partition by func, cold, columnar)
from
#timing
) t
group by func, cold, columnar
) f
order by 2;
select * from result;
end
go
------------------------------------- FUNCTIONS ------------------------------------------
/* Note that some functions need to use a group by "trick" to retain table elimination */
drop function if exists p_anchor_row_number;
go
create function p_anchor_row_number (
@at date = '9999-12-31'
)
returns table as return
select
a.*,
a1.val as val_one,
a1.since as since_one,
a2.val as val_two,
a2.since as since_two
from anchor a
left join (
select
id,
max(val) as val,
max(since) as since
from (
select
*,
row_number() over (partition by id order by since desc) as recency
from
attribute_one
where
since <= @at
) one
where one.recency = 1
group by (id)
) a1
on a1.id = a.id
left join (
select
id,
max(val) as val,
max(since) as since
from (
select
*,
row_number() over (partition by id order by since desc) as recency
from
attribute_two
where
since <= @at
) two
where two.recency = 1
group by id
) a2
on a2.id = a.id;
go
--select top 100 * from p_anchor_row_number(DEFAULT);
--select top 100 id from p_anchor_row_number(DEFAULT);
drop function if exists p_anchor_first_value;
go
create function p_anchor_first_value (
@at date = '9999-12-31'
)
returns table as return
select
a.*,
a1.val as val_one,
a1.since as since_one,
a2.val as val_two,
a2.since as since_two
from anchor a
outer apply (
select top 1
first_value(val) over (partition by id order by since desc) as val,
first_value(since) over (partition by id order by since desc) as since
from attribute_one
where id = a.id and since <= @at
) a1
outer apply (
select top 1
first_value(val) over (partition by id order by since desc) as val,
first_value(since) over (partition by id order by since desc) as since
from attribute_two
where id = a.id and since <= @at
) a2
go
--select top 100 * from p_anchor_first_value(DEFAULT);
--select top 100 id from p_anchor_first_value(DEFAULT);
drop function if exists p_anchor_top_1_subselect;
go
create function p_anchor_top_1_subselect (
@at date = '9999-12-31'
)
returns table as return
select
a.*,
a1.val as val_one,
a1.since as since_one,
a2.val as val_two,
a2.since as since_two
from anchor a
left join attribute_one a1
on a1.id = a.id
and a1.since = (
select top 1
sub.since
from attribute_one sub
where sub.id = a.id
and sub.since <= @at
)
left join attribute_two a2
on a2.id = a.id
and a2.since = (
select top 1
sub.since
from attribute_two sub
where sub.id = a.id
and sub.since <= @at
)
go
--select top 100 * from p_anchor_top_1_subselect(DEFAULT);
--select top 100 id from p_anchor_top_1_subselect(DEFAULT);
drop function if exists p_anchor_outer_apply_top_1;
go
create function p_anchor_outer_apply_top_1 (
@at date = '9999-12-31'
)
returns table as return
select
a.*,
a1.val as val_one,
a1.since as since_one,
a2.val as val_two,
a2.since as since_two
from anchor a
outer apply (
select top 1
*
from attribute_one sub
where sub.id = a.id
and sub.since <= @at
order by sub.since desc
) a1
outer apply (
select top 1
*
from attribute_two sub
where sub.id = a.id
and sub.since <= @at
order by sub.since desc
) a2
go
--select top 100 * from p_anchor_outer_apply_top_1(DEFAULT);
--select top 100 id from p_anchor_outer_apply_top_1(DEFAULT);
drop function if exists p_anchor_with_ties;
go
create function p_anchor_with_ties (
@at date = '9999-12-31'
)
returns table as return
select
a.*,
a1.val as val_one,
a1.since as since_one,
a2.val as val_two,
a2.since as since_two
from anchor a
left join (
select
id,
max(val) as val,
max(since) as since
from (
select top 1 with ties
*
from attribute_one
where since <= @at
order by row_number() over (partition by id order by since desc)
) one
group by id
) a1
on a1.id = a.id
left join (
select
id,
max(val) as val,
max(since) as since
from (
select top 1 with ties
*
from attribute_two
where since <= @at
order by row_number() over (partition by id order by since desc)
) two
group by id
) a2
on a2.id = a.id
go
--select top 100 * from p_anchor_with_ties(DEFAULT);
--select top 100 id from p_anchor_with_ties(DEFAULT);
drop function if exists p_anchor_cte_row_number;
go
create function p_anchor_cte_row_number (
@at date = '9999-12-31'
)
returns table as return
with one as (
select
id,
max(val) as val,
max(since) as since
from (
select
*,
row_number() over (partition by id order by since desc) as recency
from
attribute_one
where
since <= @at
) one
where one.recency = 1
group by (id)
),
two as (
select
id,
max(val) as val,
max(since) as since
from (
select
*,
row_number() over (partition by id order by since desc) as recency
from
attribute_two
where
since <= @at
) two
where two.recency = 1
group by id
)
select
a.*,
a1.val as val_one,
a1.since as since_one,
a2.val as val_two,
a2.since as since_two
from anchor a
left join one a1
on a1.id = a.id
left join two a2
on a2.id = a.id
go
--select top 100 * from p_anchor_cte_row_number(DEFAULT);
--select top 100 id from p_anchor_cte_row_number(DEFAULT);
drop function if exists p_anchor_left_join_max;
go
create function p_anchor_left_join_max (
@at date = '9999-12-31'
)
returns table as return
select
a.*,
a1.val as val_one,
a1.since as since_one,
a2.val as val_two,
a2.since as since_two
from anchor a
left join (
select
id,
MAX(since) as since
from attribute_one
where since <= @at
group by id
) one_max
on one_max.id = a.id
left join attribute_one a1
on a1.id = one_max.id
and a1.since = one_max.since
left join (
select
id,
MAX(since) as since
from attribute_two
where since <= @at
group by id
) two_max
on two_max.id = a.id
left join attribute_two a2
on a2.id = one_max.id
and a2.since = one_max.since
go
--select top 100 * from p_anchor_left_join_max(DEFAULT);
--select top 100 id from p_anchor_left_join_max(DEFAULT);