Advertisement
Guest User

Why does the PG's Explain Analyze estimate the number of rows wrongly despite the correct stats?

a guest
Jul 6th, 2022
156
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. drop table parent_tb cascade;
  2.  
  3. create table parent_tb as
  4. select id, md5(random()::text) descr
  5. from generate_series(1::bigint,11228::bigint) as a(id);
  6.  
  7. alter table parent_tb add primary key (id);
  8. create index idx_parent_tb_desc on parent_tb(descr);
  9.  
  10. drop table child_tb;
  11. create table child_tb (
  12.     id_tenant bigint,
  13.     id bigint,
  14.     descr text,
  15.     id_ref bigint references parent_tb(id) null,
  16.     primary key (id_tenant, id)
  17. ) partition by list(id_tenant);
  18.  
  19. /* code generator
  20.     select format('create table child_tb_ten_%1$s partition of child_tb for values in (%1$L);',id_tenant)
  21.     from generate_series(1,35) as a(id_tenant); \gexec
  22. */
  23.  
  24. create table child_tb_ten_1 partition of child_tb for values in ('1');
  25. create table child_tb_ten_2 partition of child_tb for values in ('2');
  26. create table child_tb_ten_3 partition of child_tb for values in ('3');
  27. create table child_tb_ten_4 partition of child_tb for values in ('4');
  28. create table child_tb_ten_5 partition of child_tb for values in ('5');
  29. create table child_tb_ten_6 partition of child_tb for values in ('6');
  30. create table child_tb_ten_7 partition of child_tb for values in ('7');
  31. create table child_tb_ten_8 partition of child_tb for values in ('8');
  32. create table child_tb_ten_9 partition of child_tb for values in ('9');
  33. create table child_tb_ten_10 partition of child_tb for values in ('10');
  34. create table child_tb_ten_11 partition of child_tb for values in ('11');
  35. create table child_tb_ten_12 partition of child_tb for values in ('12');
  36. create table child_tb_ten_13 partition of child_tb for values in ('13');
  37. create table child_tb_ten_14 partition of child_tb for values in ('14');
  38. create table child_tb_ten_15 partition of child_tb for values in ('15');
  39. create table child_tb_ten_16 partition of child_tb for values in ('16');
  40. create table child_tb_ten_17 partition of child_tb for values in ('17');
  41. create table child_tb_ten_18 partition of child_tb for values in ('18');
  42. create table child_tb_ten_19 partition of child_tb for values in ('19');
  43. create table child_tb_ten_20 partition of child_tb for values in ('20');
  44. create table child_tb_ten_21 partition of child_tb for values in ('21');
  45. create table child_tb_ten_22 partition of child_tb for values in ('22');
  46. create table child_tb_ten_23 partition of child_tb for values in ('23');
  47. create table child_tb_ten_24 partition of child_tb for values in ('24');
  48. create table child_tb_ten_25 partition of child_tb for values in ('25');
  49. create table child_tb_ten_26 partition of child_tb for values in ('26');
  50. create table child_tb_ten_27 partition of child_tb for values in ('27');
  51. create table child_tb_ten_28 partition of child_tb for values in ('28');
  52. create table child_tb_ten_29 partition of child_tb for values in ('29');
  53. create table child_tb_ten_30 partition of child_tb for values in ('30');
  54. create table child_tb_ten_31 partition of child_tb for values in ('31');
  55. create table child_tb_ten_32 partition of child_tb for values in ('32');
  56. create table child_tb_ten_33 partition of child_tb for values in ('33');
  57. create table child_tb_ten_34 partition of child_tb for values in ('34');
  58. create table child_tb_ten_35 partition of child_tb for values in ('35');
  59.  
  60. /*
  61.  * code generator
  62.     select
  63.     'insert into child_tb(id_tenant, id, descr, id_ref)
  64.     select id_tenant::bigint,
  65.         row_number() over (order by 1) as id,
  66.         md5(random()::text) as descr,
  67.         dense_rank() over (order by id_tenant, id_ref) id_ref
  68.     from ('||
  69.     string_agg(
  70.         format('select %L as id_tenant, %L id_ref from generate_series(1,%L::bigint)', id_tenant, id_ref, n_rows),
  71.         E'\nunion all\n')
  72.     ||') as x'
  73.     from
  74.     (
  75.         <omissis>
  76.     ) x;
  77. */
  78.  
  79. insert into child_tb(id_tenant, id, descr, id_ref);
  80.     select id_tenant::bigint,
  81.         row_number() over (order by 1) as id,
  82.         md5(random()::text) as descr,
  83.         dense_rank() over (order by id_tenant, id_ref) id_ref
  84.     from (select '1' as id_tenant, '61' id_ref from generate_series(1,'7486'::bigint)
  85. union all
  86. select '2' as id_tenant, '484' id_ref from generate_series(1,'17102'::bigint)
  87. union all
  88. select '3' as id_tenant, '4' id_ref from generate_series(1,'72'::bigint)
  89. union all
  90. select '3' as id_tenant, '5' id_ref from generate_series(1,'3977'::bigint)
  91. union all
  92. select '3' as id_tenant, '6' id_ref from generate_series(1,'8'::bigint)
  93. union all
  94. select '3' as id_tenant, '7' id_ref from generate_series(1,'29'::bigint)
  95. union all
  96. select '4' as id_tenant, '759' id_ref from generate_series(1,'70'::bigint)
  97. union all
  98. select '4' as id_tenant, '761' id_ref from generate_series(1,'59'::bigint)
  99. union all
  100. select '4' as id_tenant, '762' id_ref from generate_series(1,'380'::bigint)
  101. union all
  102. select '4' as id_tenant, '764' id_ref from generate_series(1,'72'::bigint)
  103. union all
  104. select '4' as id_tenant, '765' id_ref from generate_series(1,'19'::bigint)
  105. union all
  106. select '4' as id_tenant, '766' id_ref from generate_series(1,'151'::bigint)
  107. union all
  108. select '4' as id_tenant, '767' id_ref from generate_series(1,'16'::bigint)
  109. union all
  110. select '4' as id_tenant, '768' id_ref from generate_series(1,'3790'::bigint)
  111. union all
  112. select '4' as id_tenant, '769' id_ref from generate_series(1,'37'::bigint)
  113. union all
  114. select '4' as id_tenant, '9524' id_ref from generate_series(1,'1011'::bigint)
  115. union all
  116. select '5' as id_tenant, '99' id_ref from generate_series(1,'342'::bigint)
  117. union all
  118. select '6' as id_tenant, '222' id_ref from generate_series(1,'475759'::bigint)
  119. union all
  120. select '7' as id_tenant, '505' id_ref from generate_series(1,'233'::bigint)
  121. union all
  122. select '7' as id_tenant, '506' id_ref from generate_series(1,'14'::bigint)
  123. union all
  124. select '7' as id_tenant, '507' id_ref from generate_series(1,'1394'::bigint)
  125. union all
  126. select '7' as id_tenant, '508' id_ref from generate_series(1,'98'::bigint)
  127. union all
  128. select '7' as id_tenant, '6455' id_ref from generate_series(1,'56'::bigint)
  129. union all
  130. select '7' as id_tenant, '6461' id_ref from generate_series(1,'67'::bigint)
  131. union all
  132. select '8' as id_tenant, '335' id_ref from generate_series(1,'3385'::bigint)
  133. union all
  134. select '9' as id_tenant, '11894' id_ref from generate_series(1,'1022'::bigint)
  135. union all
  136. select '12' as id_tenant, '25' id_ref from generate_series(1,'2132'::bigint)
  137. union all
  138. select '13' as id_tenant, '708' id_ref from generate_series(1,'16975'::bigint)
  139. union all
  140. select '14' as id_tenant, '715' id_ref from generate_series(1,'18352'::bigint)
  141. union all
  142. select '15' as id_tenant, '572' id_ref from generate_series(1,'876'::bigint)
  143. union all
  144. select '16' as id_tenant, '511' id_ref from generate_series(1,'183'::bigint)
  145. union all
  146. select '16' as id_tenant, '523' id_ref from generate_series(1,'2364'::bigint)
  147. union all
  148. select '16' as id_tenant, '536' id_ref from generate_series(1,'4437'::bigint)
  149. union all
  150. select '16' as id_tenant, '12287' id_ref from generate_series(1,'102'::bigint)
  151. union all
  152. select '17' as id_tenant, '347' id_ref from generate_series(1,'1247'::bigint)
  153. union all
  154. select '17' as id_tenant, '382' id_ref from generate_series(1,'53'::bigint)
  155. union all
  156. select '17' as id_tenant, '431' id_ref from generate_series(1,'364'::bigint)
  157. union all
  158. select '17' as id_tenant, '10754' id_ref from generate_series(1,'126'::bigint)
  159. union all
  160. select '18' as id_tenant, '713' id_ref from generate_series(1,'1260'::bigint)
  161. union all
  162. select '18' as id_tenant, '7731' id_ref from generate_series(1,'2096'::bigint)
  163. union all
  164. select '19' as id_tenant, '501' id_ref from generate_series(1,'723'::bigint)
  165. union all
  166. select '20' as id_tenant, '477' id_ref from generate_series(1,'194'::bigint)
  167. union all
  168. select '21' as id_tenant, '654' id_ref from generate_series(1,'4350'::bigint)
  169. union all
  170. select '21' as id_tenant, '12290' id_ref from generate_series(1,'62'::bigint)
  171. union all
  172. select '21' as id_tenant, '12291' id_ref from generate_series(1,'66'::bigint)
  173. union all
  174. select '21' as id_tenant, '12292' id_ref from generate_series(1,'19'::bigint)
  175. union all
  176. select '22' as id_tenant, '671' id_ref from generate_series(1,'2546'::bigint)
  177. union all
  178. select '23' as id_tenant, '494' id_ref from generate_series(1,'515'::bigint)
  179. union all
  180. select '23' as id_tenant, '10920' id_ref from generate_series(1,'8'::bigint)
  181. union all
  182. select '24' as id_tenant, '614' id_ref from generate_series(1,'1619'::bigint)
  183. union all
  184. select '25' as id_tenant, '79' id_ref from generate_series(1,'2341'::bigint)
  185. union all
  186. select '26' as id_tenant, '9525' id_ref from generate_series(1,'216'::bigint)
  187. union all
  188. select '27' as id_tenant, '703' id_ref from generate_series(1,'9305'::bigint)
  189. union all
  190. select '28' as id_tenant, '36' id_ref from generate_series(1,'3302'::bigint)
  191. union all
  192. select '29' as id_tenant, '678' id_ref from generate_series(1,'703'::bigint)
  193. union all
  194. select '30' as id_tenant, '612' id_ref from generate_series(1,'1212'::bigint)
  195. union all
  196. select '31' as id_tenant, '668' id_ref from generate_series(1,'881'::bigint)
  197. union all
  198. select '32' as id_tenant, '329' id_ref from generate_series(1,'1201'::bigint)
  199. union all
  200. select '33' as id_tenant, '561' id_ref from generate_series(1,'14'::bigint)
  201. union all
  202. select '34' as id_tenant, '470' id_ref from generate_series(1,'419'::bigint)
  203. union all
  204. select '35' as id_tenant, '11962' id_ref from generate_series(1,'252'::bigint)) as x
  205. ;
  206.  
  207. set default_statistics_target=10000;
  208. vacuum analyze parent_tb;
  209. vacuum analyze child_tb;
  210.  
  211. --the tenant=6 is clearly an outlier at a global level
  212. select count(1) n_keys, id_tenant, count(distinct id_ref) n_le, array_agg(distinct (id_ref,p.descr))
  213. from child_tb c join parent_tb p on (c.id_ref=p.id)
  214. group by id_tenant
  215. order by 1 desc;
  216.  
  217. --..and the statistics correctly recognized it: the first mcv and mcf are related to tenant 6 and retains the 80% of the data
  218. select
  219.     (most_common_vals::text::numeric[])[1] mcv_1, (most_common_freqs::text::numeric[])[1] mcf_1,
  220.     (most_common_vals::text::numeric[])[2] mcv_2, (most_common_freqs::text::numeric[])[2] mcf_2,
  221.     *
  222. from pg_stats where tablename in ('child_tb','child_tb_ten_6');
  223.  
  224. --(1) Let's continue querying data from tenant=6
  225. select distinct c.id_ref,p.descr
  226. from child_tb c join parent_tb p on (c.id_ref=p.id)
  227. where id_tenant=6;
  228.  
  229.  
  230.  
  231. --First attempt: access by ID
  232. --Result OK: The optimizer correctly recognize the number of rows resulting from the join
  233. explain analyze
  234. select * from child_tb c join parent_tb p on (c.id_ref=p.id)
  235. where p.id=53 --fill with the appropriate value at point (1)
  236. and c.id_tenant=6;
  237.  
  238. /*
  239. Nested Loop  (cost=0.29..150517.94 rows=475759 width=98) (actual time=5.209..93.730 rows=475759 loops=1)
  240.   ->  Index Scan using parent_tb_pkey on parent_tb p  (cost=0.29..4.44 rows=1 width=41) (actual time=0.027..0.031 rows=1 loops=1)
  241.         Index Cond: (id = 53)
  242.   ->  Seq Scan on child_tb_ten_6 c  (cost=0.00..79149.65 rows=475759 width=57) (actual time=5.170..58.785 rows=475759 loops=1)
  243.         Filter: ((id_ref = 53) AND (id_tenant = 6))
  244. Planning Time: 0.130 ms
  245. JIT:
  246.   Functions: 7
  247.   Options: Inlining false, Optimization false, Expressions true, Deforming true
  248.   Timing: Generation 0.976 ms, Inlining 0.000 ms, Optimization 0.389 ms, Emission 4.530 ms, Total 5.895 ms
  249. Execution Time: 109.378 ms
  250. */
  251.  
  252. --Second attempt: access by Parent's descriptive field.
  253. --Result KO: The optimizer underestimates the resulting records out of the join by 1000x!
  254. explain analyze
  255. select * from child_tb c join parent_tb p on (c.id_ref=p.id)
  256. where p.descr='6ea8c6d951f3c8371662509ff8a5e37e' --fill with the appropriate value at point (1)
  257. and c.id_tenant=6;
  258.  
  259. /*
  260. Gather  (cost=1004.60..37169.40 rows=42 width=98) (actual time=0.495..195.430 rows=475759 loops=1)
  261.   Workers Planned: 2
  262.   Workers Launched: 0
  263.   ->  Hash Join  (cost=4.60..36165.20 rows=18 width=98) (actual time=0.047..160.268 rows=475759 loops=1)
  264.         Hash Cond: (c.id_ref = p.id)
  265.         ->  Parallel Seq Scan on child_tb_ten_6 c  (cost=0.00..35637.52 rows=198233 width=57) (actual time=0.009..60.365 rows=475759 loops=1)
  266.               Filter: (id_tenant = 6)
  267.         ->  Hash  (cost=4.44..4.44 rows=1 width=41) (actual time=0.031..0.033 rows=1 loops=1)
  268.               Buckets: 1024  Batches: 1  Memory Usage: 9kB
  269.               ->  Index Scan using idx_parent_tb_desc on parent_tb p  (cost=0.29..4.44 rows=1 width=41) (actual time=0.027..0.028 rows=1 loops=1)
  270.                     Index Cond: (descr = '6ea8c6d951f3c8371662509ff8a5e37e'::text)
  271. Planning Time: 0.898 ms
  272. Execution Time: 213.586 ms
  273. */
  274.  
  275. /*
  276.  * Third attempt: let's see if what happens once we "obfuscate" the Parent accessing value with a materialized CTE
  277.  * Result OK: The optimizer (correcly) assumes a flat data distribution dividing the total number of rows of the
  278.  *            Tenant 6 partition to the the global number of distint id_refs:
  279.  
  280.     select
  281.         (select count(1) from child_tb where id_tenant=6) --total number of rows in child table per tenant=6.
  282.         /
  283.         (select count(distinct id_ref) from child_tb) --number of distinct id_ref in child table. 61 id_refs
  284.     --475759 rows   /
  285.     --61 id_refs    =
  286.     --7799         
  287.     ;
  288. */
  289.  
  290. explain analyze
  291. with par as materialized (
  292.     select *
  293.     from parent_tb
  294.     where descr='6ea8c6d951f3c8371662509ff8a5e37e' --fill with the appropriate value at point (1)
  295. )
  296. select * from child_tb c join par p on (c.id_ref=p.id)
  297. where c.id_tenant=6;
  298.  
  299.  
  300. /*
  301. Hash Join  (cost=4.90..80919.09 rows=7799 width=97) (actual time=0.051..125.731 rows=475759 loops=1)
  302.   Hash Cond: (c.id_ref = p.id)
  303.   CTE par
  304.     ->  Index Scan using idx_parent_tb_desc on parent_tb  (cost=0.29..4.44 rows=1 width=41) (actual time=0.028..0.029 rows=1 loops=1)
  305.           Index Cond: (descr = '6ea8c6d951f3c8371662509ff8a5e37e'::text)
  306.   ->  Seq Scan on child_tb_ten_6 c  (cost=0.00..77960.25 rows=475759 width=57) (actual time=0.013..45.615 rows=475759 loops=1)
  307.         Filter: (id_tenant = 6)
  308.   ->  Hash  (cost=0.30..0.30 rows=1 width=40) (actual time=0.033..0.034 rows=1 loops=1)
  309.         Buckets: 1024  Batches: 1  Memory Usage: 9kB
  310.         ->  CTE Scan on par p  (cost=0.00..0.30 rows=1 width=40) (actual time=0.029..0.030 rows=1 loops=1)
  311. Planning Time: 0.155 ms
  312. Execution Time: 140.000 ms
  313. */
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement