Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- drop table parent_tb cascade;
- create table parent_tb as
- select id, md5(random()::text) descr
- from generate_series(1::bigint,11228::bigint) as a(id);
- alter table parent_tb add primary key (id);
- create index idx_parent_tb_desc on parent_tb(descr);
- drop table child_tb;
- create table child_tb (
- id_tenant bigint,
- id bigint,
- descr text,
- id_ref bigint references parent_tb(id) null,
- primary key (id_tenant, id)
- ) partition by list(id_tenant);
- /* code generator
- select format('create table child_tb_ten_%1$s partition of child_tb for values in (%1$L);',id_tenant)
- from generate_series(1,35) as a(id_tenant); \gexec
- */
- create table child_tb_ten_1 partition of child_tb for values in ('1');
- create table child_tb_ten_2 partition of child_tb for values in ('2');
- create table child_tb_ten_3 partition of child_tb for values in ('3');
- create table child_tb_ten_4 partition of child_tb for values in ('4');
- create table child_tb_ten_5 partition of child_tb for values in ('5');
- create table child_tb_ten_6 partition of child_tb for values in ('6');
- create table child_tb_ten_7 partition of child_tb for values in ('7');
- create table child_tb_ten_8 partition of child_tb for values in ('8');
- create table child_tb_ten_9 partition of child_tb for values in ('9');
- create table child_tb_ten_10 partition of child_tb for values in ('10');
- create table child_tb_ten_11 partition of child_tb for values in ('11');
- create table child_tb_ten_12 partition of child_tb for values in ('12');
- create table child_tb_ten_13 partition of child_tb for values in ('13');
- create table child_tb_ten_14 partition of child_tb for values in ('14');
- create table child_tb_ten_15 partition of child_tb for values in ('15');
- create table child_tb_ten_16 partition of child_tb for values in ('16');
- create table child_tb_ten_17 partition of child_tb for values in ('17');
- create table child_tb_ten_18 partition of child_tb for values in ('18');
- create table child_tb_ten_19 partition of child_tb for values in ('19');
- create table child_tb_ten_20 partition of child_tb for values in ('20');
- create table child_tb_ten_21 partition of child_tb for values in ('21');
- create table child_tb_ten_22 partition of child_tb for values in ('22');
- create table child_tb_ten_23 partition of child_tb for values in ('23');
- create table child_tb_ten_24 partition of child_tb for values in ('24');
- create table child_tb_ten_25 partition of child_tb for values in ('25');
- create table child_tb_ten_26 partition of child_tb for values in ('26');
- create table child_tb_ten_27 partition of child_tb for values in ('27');
- create table child_tb_ten_28 partition of child_tb for values in ('28');
- create table child_tb_ten_29 partition of child_tb for values in ('29');
- create table child_tb_ten_30 partition of child_tb for values in ('30');
- create table child_tb_ten_31 partition of child_tb for values in ('31');
- create table child_tb_ten_32 partition of child_tb for values in ('32');
- create table child_tb_ten_33 partition of child_tb for values in ('33');
- create table child_tb_ten_34 partition of child_tb for values in ('34');
- create table child_tb_ten_35 partition of child_tb for values in ('35');
- /*
- * code generator
- select
- 'insert into child_tb(id_tenant, id, descr, id_ref)
- select id_tenant::bigint,
- row_number() over (order by 1) as id,
- md5(random()::text) as descr,
- dense_rank() over (order by id_tenant, id_ref) id_ref
- from ('||
- string_agg(
- format('select %L as id_tenant, %L id_ref from generate_series(1,%L::bigint)', id_tenant, id_ref, n_rows),
- E'\nunion all\n')
- ||') as x'
- from
- (
- <omissis>
- ) x;
- */
- insert into child_tb(id_tenant, id, descr, id_ref);
- select id_tenant::bigint,
- row_number() over (order by 1) as id,
- md5(random()::text) as descr,
- dense_rank() over (order by id_tenant, id_ref) id_ref
- from (select '1' as id_tenant, '61' id_ref from generate_series(1,'7486'::bigint)
- union all
- select '2' as id_tenant, '484' id_ref from generate_series(1,'17102'::bigint)
- union all
- select '3' as id_tenant, '4' id_ref from generate_series(1,'72'::bigint)
- union all
- select '3' as id_tenant, '5' id_ref from generate_series(1,'3977'::bigint)
- union all
- select '3' as id_tenant, '6' id_ref from generate_series(1,'8'::bigint)
- union all
- select '3' as id_tenant, '7' id_ref from generate_series(1,'29'::bigint)
- union all
- select '4' as id_tenant, '759' id_ref from generate_series(1,'70'::bigint)
- union all
- select '4' as id_tenant, '761' id_ref from generate_series(1,'59'::bigint)
- union all
- select '4' as id_tenant, '762' id_ref from generate_series(1,'380'::bigint)
- union all
- select '4' as id_tenant, '764' id_ref from generate_series(1,'72'::bigint)
- union all
- select '4' as id_tenant, '765' id_ref from generate_series(1,'19'::bigint)
- union all
- select '4' as id_tenant, '766' id_ref from generate_series(1,'151'::bigint)
- union all
- select '4' as id_tenant, '767' id_ref from generate_series(1,'16'::bigint)
- union all
- select '4' as id_tenant, '768' id_ref from generate_series(1,'3790'::bigint)
- union all
- select '4' as id_tenant, '769' id_ref from generate_series(1,'37'::bigint)
- union all
- select '4' as id_tenant, '9524' id_ref from generate_series(1,'1011'::bigint)
- union all
- select '5' as id_tenant, '99' id_ref from generate_series(1,'342'::bigint)
- union all
- select '6' as id_tenant, '222' id_ref from generate_series(1,'475759'::bigint)
- union all
- select '7' as id_tenant, '505' id_ref from generate_series(1,'233'::bigint)
- union all
- select '7' as id_tenant, '506' id_ref from generate_series(1,'14'::bigint)
- union all
- select '7' as id_tenant, '507' id_ref from generate_series(1,'1394'::bigint)
- union all
- select '7' as id_tenant, '508' id_ref from generate_series(1,'98'::bigint)
- union all
- select '7' as id_tenant, '6455' id_ref from generate_series(1,'56'::bigint)
- union all
- select '7' as id_tenant, '6461' id_ref from generate_series(1,'67'::bigint)
- union all
- select '8' as id_tenant, '335' id_ref from generate_series(1,'3385'::bigint)
- union all
- select '9' as id_tenant, '11894' id_ref from generate_series(1,'1022'::bigint)
- union all
- select '12' as id_tenant, '25' id_ref from generate_series(1,'2132'::bigint)
- union all
- select '13' as id_tenant, '708' id_ref from generate_series(1,'16975'::bigint)
- union all
- select '14' as id_tenant, '715' id_ref from generate_series(1,'18352'::bigint)
- union all
- select '15' as id_tenant, '572' id_ref from generate_series(1,'876'::bigint)
- union all
- select '16' as id_tenant, '511' id_ref from generate_series(1,'183'::bigint)
- union all
- select '16' as id_tenant, '523' id_ref from generate_series(1,'2364'::bigint)
- union all
- select '16' as id_tenant, '536' id_ref from generate_series(1,'4437'::bigint)
- union all
- select '16' as id_tenant, '12287' id_ref from generate_series(1,'102'::bigint)
- union all
- select '17' as id_tenant, '347' id_ref from generate_series(1,'1247'::bigint)
- union all
- select '17' as id_tenant, '382' id_ref from generate_series(1,'53'::bigint)
- union all
- select '17' as id_tenant, '431' id_ref from generate_series(1,'364'::bigint)
- union all
- select '17' as id_tenant, '10754' id_ref from generate_series(1,'126'::bigint)
- union all
- select '18' as id_tenant, '713' id_ref from generate_series(1,'1260'::bigint)
- union all
- select '18' as id_tenant, '7731' id_ref from generate_series(1,'2096'::bigint)
- union all
- select '19' as id_tenant, '501' id_ref from generate_series(1,'723'::bigint)
- union all
- select '20' as id_tenant, '477' id_ref from generate_series(1,'194'::bigint)
- union all
- select '21' as id_tenant, '654' id_ref from generate_series(1,'4350'::bigint)
- union all
- select '21' as id_tenant, '12290' id_ref from generate_series(1,'62'::bigint)
- union all
- select '21' as id_tenant, '12291' id_ref from generate_series(1,'66'::bigint)
- union all
- select '21' as id_tenant, '12292' id_ref from generate_series(1,'19'::bigint)
- union all
- select '22' as id_tenant, '671' id_ref from generate_series(1,'2546'::bigint)
- union all
- select '23' as id_tenant, '494' id_ref from generate_series(1,'515'::bigint)
- union all
- select '23' as id_tenant, '10920' id_ref from generate_series(1,'8'::bigint)
- union all
- select '24' as id_tenant, '614' id_ref from generate_series(1,'1619'::bigint)
- union all
- select '25' as id_tenant, '79' id_ref from generate_series(1,'2341'::bigint)
- union all
- select '26' as id_tenant, '9525' id_ref from generate_series(1,'216'::bigint)
- union all
- select '27' as id_tenant, '703' id_ref from generate_series(1,'9305'::bigint)
- union all
- select '28' as id_tenant, '36' id_ref from generate_series(1,'3302'::bigint)
- union all
- select '29' as id_tenant, '678' id_ref from generate_series(1,'703'::bigint)
- union all
- select '30' as id_tenant, '612' id_ref from generate_series(1,'1212'::bigint)
- union all
- select '31' as id_tenant, '668' id_ref from generate_series(1,'881'::bigint)
- union all
- select '32' as id_tenant, '329' id_ref from generate_series(1,'1201'::bigint)
- union all
- select '33' as id_tenant, '561' id_ref from generate_series(1,'14'::bigint)
- union all
- select '34' as id_tenant, '470' id_ref from generate_series(1,'419'::bigint)
- union all
- select '35' as id_tenant, '11962' id_ref from generate_series(1,'252'::bigint)) as x
- ;
- set default_statistics_target=10000;
- vacuum analyze parent_tb;
- vacuum analyze child_tb;
- --the tenant=6 is clearly an outlier at a global level
- select count(1) n_keys, id_tenant, count(distinct id_ref) n_le, array_agg(distinct (id_ref,p.descr))
- from child_tb c join parent_tb p on (c.id_ref=p.id)
- group by id_tenant
- order by 1 desc;
- --..and the statistics correctly recognized it: the first mcv and mcf are related to tenant 6 and retains the 80% of the data
- select
- (most_common_vals::text::numeric[])[1] mcv_1, (most_common_freqs::text::numeric[])[1] mcf_1,
- (most_common_vals::text::numeric[])[2] mcv_2, (most_common_freqs::text::numeric[])[2] mcf_2,
- *
- from pg_stats where tablename in ('child_tb','child_tb_ten_6');
- --(1) Let's continue querying data from tenant=6
- select distinct c.id_ref,p.descr
- from child_tb c join parent_tb p on (c.id_ref=p.id)
- where id_tenant=6;
- --First attempt: access by ID
- --Result OK: The optimizer correctly recognize the number of rows resulting from the join
- explain analyze
- select * from child_tb c join parent_tb p on (c.id_ref=p.id)
- where p.id=53 --fill with the appropriate value at point (1)
- and c.id_tenant=6;
- /*
- Nested Loop (cost=0.29..150517.94 rows=475759 width=98) (actual time=5.209..93.730 rows=475759 loops=1)
- -> Index Scan using parent_tb_pkey on parent_tb p (cost=0.29..4.44 rows=1 width=41) (actual time=0.027..0.031 rows=1 loops=1)
- Index Cond: (id = 53)
- -> Seq Scan on child_tb_ten_6 c (cost=0.00..79149.65 rows=475759 width=57) (actual time=5.170..58.785 rows=475759 loops=1)
- Filter: ((id_ref = 53) AND (id_tenant = 6))
- Planning Time: 0.130 ms
- JIT:
- Functions: 7
- Options: Inlining false, Optimization false, Expressions true, Deforming true
- Timing: Generation 0.976 ms, Inlining 0.000 ms, Optimization 0.389 ms, Emission 4.530 ms, Total 5.895 ms
- Execution Time: 109.378 ms
- */
- --Second attempt: access by Parent's descriptive field.
- --Result KO: The optimizer underestimates the resulting records out of the join by 1000x!
- explain analyze
- select * from child_tb c join parent_tb p on (c.id_ref=p.id)
- where p.descr='6ea8c6d951f3c8371662509ff8a5e37e' --fill with the appropriate value at point (1)
- and c.id_tenant=6;
- /*
- Gather (cost=1004.60..37169.40 rows=42 width=98) (actual time=0.495..195.430 rows=475759 loops=1)
- Workers Planned: 2
- Workers Launched: 0
- -> Hash Join (cost=4.60..36165.20 rows=18 width=98) (actual time=0.047..160.268 rows=475759 loops=1)
- Hash Cond: (c.id_ref = p.id)
- -> Parallel Seq Scan on child_tb_ten_6 c (cost=0.00..35637.52 rows=198233 width=57) (actual time=0.009..60.365 rows=475759 loops=1)
- Filter: (id_tenant = 6)
- -> Hash (cost=4.44..4.44 rows=1 width=41) (actual time=0.031..0.033 rows=1 loops=1)
- Buckets: 1024 Batches: 1 Memory Usage: 9kB
- -> Index Scan using idx_parent_tb_desc on parent_tb p (cost=0.29..4.44 rows=1 width=41) (actual time=0.027..0.028 rows=1 loops=1)
- Index Cond: (descr = '6ea8c6d951f3c8371662509ff8a5e37e'::text)
- Planning Time: 0.898 ms
- Execution Time: 213.586 ms
- */
- /*
- * Third attempt: let's see if what happens once we "obfuscate" the Parent accessing value with a materialized CTE
- * Result OK: The optimizer (correcly) assumes a flat data distribution dividing the total number of rows of the
- * Tenant 6 partition to the the global number of distint id_refs:
- select
- (select count(1) from child_tb where id_tenant=6) --total number of rows in child table per tenant=6.
- /
- (select count(distinct id_ref) from child_tb) --number of distinct id_ref in child table. 61 id_refs
- --475759 rows /
- --61 id_refs =
- --7799
- ;
- */
- explain analyze
- with par as materialized (
- select *
- from parent_tb
- where descr='6ea8c6d951f3c8371662509ff8a5e37e' --fill with the appropriate value at point (1)
- )
- select * from child_tb c join par p on (c.id_ref=p.id)
- where c.id_tenant=6;
- /*
- Hash Join (cost=4.90..80919.09 rows=7799 width=97) (actual time=0.051..125.731 rows=475759 loops=1)
- Hash Cond: (c.id_ref = p.id)
- CTE par
- -> Index Scan using idx_parent_tb_desc on parent_tb (cost=0.29..4.44 rows=1 width=41) (actual time=0.028..0.029 rows=1 loops=1)
- Index Cond: (descr = '6ea8c6d951f3c8371662509ff8a5e37e'::text)
- -> Seq Scan on child_tb_ten_6 c (cost=0.00..77960.25 rows=475759 width=57) (actual time=0.013..45.615 rows=475759 loops=1)
- Filter: (id_tenant = 6)
- -> Hash (cost=0.30..0.30 rows=1 width=40) (actual time=0.033..0.034 rows=1 loops=1)
- Buckets: 1024 Batches: 1 Memory Usage: 9kB
- -> CTE Scan on par p (cost=0.00..0.30 rows=1 width=40) (actual time=0.029..0.030 rows=1 loops=1)
- Planning Time: 0.155 ms
- Execution Time: 140.000 ms
- */
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement