Advertisement
anchormodeling

PostgreSQL Table Elimination

Mar 27th, 2011
928
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. -------------------------------------------------------------------------------
  2. -- Test run on PostgreSQL 9.1.2 on 2012-02-21
  3. --
  4. -- CONCLUSION: Max in a sub-select seems to be the best approach in general.
  5. -- It is the only approach that enables table elimination. For particular
  6. -- queries using a row_number() = 1 can be faster, but it is not in general.
  7. --
  8. -- Clustering gave no performance boost whatsoever, likely because it does not
  9. -- rearrange the actual table data in PostgreSQL.
  10. -------------------------------------------------------------------------------
  11.  
  12. DROP VIEW IF EXISTS rowGenerator;
  13. DROP TABLE IF EXISTS ANAT2_AnchorAttribute2 CASCADE;
  14. DROP TABLE IF EXISTS ANAT1_AnchorAttribute1 CASCADE;
  15. DROP TABLE IF EXISTS AN_Anchor CASCADE;
  16.  
  17. CREATE TABLE IF NOT EXISTS AN_Anchor (
  18.     AN_ID int NOT NULL,
  19.     CONSTRAINT pk_AN_Anchor PRIMARY KEY(AN_ID)
  20. );
  21. --ALTER TABLE AN_Anchor CLUSTER ON pk_AN_Anchor;
  22.  
  23. CREATE TABLE IF NOT EXISTS ANAT1_AnchorAttribute1 (
  24.     AN_ID int NOT NULL,
  25.     ANAT1_AnchorAttribute1Value char(10) NOT NULL,
  26.     CONSTRAINT pk_ANAT1_AnchorAttribute1
  27.         PRIMARY KEY(AN_ID),
  28.     CONSTRAINT fk_ANAT1_AnchorAttribute1
  29.         FOREIGN KEY(AN_ID) REFERENCES AN_Anchor(AN_ID)
  30. );
  31. --ALTER TABLE ANAT1_AnchorAttribute1 CLUSTER ON pk_ANAT1_AnchorAttribute1;
  32.  
  33. CREATE TABLE IF NOT EXISTS ANAT2_AnchorAttribute2 (
  34.     AN_ID int NOT NULL,
  35.     ANAT2_AnchorAttribute2Value char(10) NOT NULL,
  36.     ANAT2_FromDate date NOT NULL,
  37.     CONSTRAINT pk_ANAT2_AnchorAttribute2
  38.         PRIMARY KEY(AN_ID, ANAT2_FromDate),
  39.     CONSTRAINT fk_ANAT2_AnchorAttribute2
  40.         FOREIGN KEY(AN_ID) REFERENCES AN_Anchor(AN_ID)
  41. );
  42. --ALTER TABLE ANAT2_AnchorAttribute2 CLUSTER ON pk_ANAT2_AnchorAttribute2;
  43.  
  44. -------------------------------------------------------------------------------
  45. -- The following will be used to generate som sample data.
  46. -- Anchor:  1 million rows
  47. -- Attribute1:  1 million rows (three values)
  48. -- Attribute2:  3 million rows (three values at three different changing times)
  49. -- It takes approximately 5 minutes to populate tables
  50. -------------------------------------------------------------------------------
  51.  
  52. CREATE VIEW rowGenerator
  53. AS
  54. WITH RECURSIVE gen(n) AS (
  55.     VALUES (1)
  56. UNION ALL
  57.     SELECT n+1 FROM gen WHERE n < 1000000
  58. )
  59. SELECT
  60.     n
  61. FROM
  62.     gen;
  63.  
  64. INSERT INTO AN_Anchor
  65. SELECT
  66.     n
  67. FROM
  68.     rowGenerator;
  69.  
  70. INSERT INTO ANAT1_AnchorAttribute1
  71. SELECT
  72.     n,
  73.     CASE n%3
  74.         WHEN 0 THEN 'Red'
  75.         WHEN 1 THEN 'Green'
  76.         WHEN 2 THEN 'Blue'
  77.     END
  78. FROM
  79.     rowGenerator;
  80.  
  81. INSERT INTO ANAT2_AnchorAttribute2
  82. SELECT
  83.     n,
  84.     'Car',
  85.     to_date('20120101','yyyymmdd')
  86. FROM
  87.     rowGenerator;
  88.  
  89. INSERT INTO ANAT2_AnchorAttribute2
  90. SELECT
  91.     n,
  92.     'Bus',
  93.     to_date('20120102','yyyymmdd')
  94. FROM
  95.     rowGenerator;
  96.  
  97. INSERT INTO ANAT2_AnchorAttribute2
  98. SELECT
  99.     n,
  100.     'Train',
  101.     to_date('20120103','yyyymmdd')
  102. FROM
  103.     rowGenerator;  
  104.  
  105.  
  106. -------------------------------------------------------------------------------
  107. -- Refresh the clustered indexes (1 minute).
  108. -- Update statistics (20 seconds).
  109. -------------------------------------------------------------------------------
  110. /*
  111. CLUSTER; -- needs to be run separately
  112.  
  113. VACUUM ANALYZE;
  114. */
  115. -------------------------------------------------------------------------------
  116. -- Looking at some rows from the data.
  117. -------------------------------------------------------------------------------
  118.  
  119. SELECT * FROM AN_Anchor ORDER BY AN_ID LIMIT 10;
  120. SELECT * FROM ANAT1_AnchorAttribute1 ORDER BY AN_ID LIMIT 10;
  121. SELECT * FROM ANAT2_AnchorAttribute2 ORDER BY AN_ID LIMIT 10;
  122.  
  123. -------------------------------------------------------------------------------
  124. -- Testing which approach will be the best for finding latest data.
  125. -------------------------------------------------------------------------------
  126.  
  127. -- explain
  128. SELECT
  129.     count(*)
  130. FROM (
  131.     SELECT DISTINCT
  132.         AN_ID,
  133.         first_value(ANAT2_AnchorAttribute2Value) OVER w
  134.             AS ANAT2_AnchorAttribute2Value,
  135.         first_value(ANAT2_FromDate) OVER w
  136.             AS ANAT2_FromDate
  137.     FROM
  138.         ANAT2_AnchorAttribute2
  139.     WINDOW
  140.         w AS (PARTITION BY AN_ID ORDER BY ANAT2_FromDate DESC)
  141. ) x;
  142. -- takes 16 - 30 seconds (inconsistent)
  143.  
  144. -- explain
  145. SELECT
  146.     count(*)
  147. FROM (
  148.     SELECT
  149.         AN_ID,
  150.         ANAT2_AnchorAttribute2Value,
  151.         ANAT2_FromDate
  152.     FROM
  153.         ANAT2_AnchorAttribute2 ANAT2
  154.     WHERE
  155.         ANAT2_FromDate = (
  156.             SELECT
  157.                 max(sub.ANAT2_FromDate)
  158.             FROM
  159.                 ANAT2_AnchorAttribute2 sub
  160.             WHERE
  161.                 sub.AN_ID = ANAT2.AN_ID
  162.         )
  163. ) x;
  164. -- takes 17 seconds (consistent)
  165.  
  166. -- explain
  167. SELECT
  168.     count(*)
  169. FROM (
  170.     SELECT
  171.         ANAT2.AN_ID,
  172.         ANAT2.ANAT2_AnchorAttribute2Value,
  173.         ANAT2.ANAT2_FromDate
  174.     FROM
  175.         ANAT2_AnchorAttribute2 ANAT2
  176.     JOIN (
  177.         SELECT
  178.             AN_ID,
  179.             max(ANAT2_FromDate) AS ANAT2_FromDate
  180.         FROM
  181.             ANAT2_AnchorAttribute2
  182.         GROUP BY
  183.             AN_ID
  184.     ) sub
  185.     ON
  186.         sub.AN_ID = ANAT2.AN_ID
  187.     AND
  188.         sub.ANAT2_FromDate = ANAT2.ANAT2_FromDate
  189. ) x;
  190. -- takes 13 seconds (consistent)
  191.  
  192. -- explain
  193. SELECT
  194.     count(*)
  195. FROM (
  196.     SELECT
  197.         AN_ID,
  198.         ANAT2_AnchorAttribute2Value,
  199.         ANAT2_FromDate,
  200.         row_number() OVER (
  201.             PARTITION BY
  202.                 AN_ID
  203.             ORDER BY
  204.                 ANAT2_FromDate DESC
  205.         ) AS rowNumber
  206.     FROM
  207.         ANAT2_AnchorAttribute2
  208. ) x
  209. WHERE
  210.     rowNumber = 1;
  211. -- takes 7 - 11 seconds (inconsistent)
  212.  
  213. -------------------------------------------------------------------------------
  214. -- Create the latest view for the anchor using the best approach found above.
  215. -------------------------------------------------------------------------------
  216.  
  217. CREATE OR REPLACE VIEW lAN_Anchor_Alt1
  218. AS
  219. SELECT
  220.     AN.AN_ID,
  221.     ANAT1.ANAT1_AnchorAttribute1Value,
  222.     ANAT2.ANAT2_AnchorAttribute2Value,
  223.     ANAT2.ANAT2_FromDate
  224. FROM
  225.     AN_Anchor AN
  226. LEFT JOIN
  227.     ANAT1_AnchorAttribute1 ANAT1
  228. ON
  229.     ANAT1.AN_ID = AN.AN_ID
  230. LEFT JOIN (
  231.     SELECT
  232.         AN_ID,
  233.         ANAT2_AnchorAttribute2Value,
  234.         ANAT2_FromDate,
  235.         row_number() OVER (
  236.             PARTITION BY
  237.                 AN_ID
  238.             ORDER BY
  239.                 ANAT2_FromDate DESC
  240.         ) AS rowNumber
  241.     FROM
  242.         ANAT2_AnchorAttribute2
  243. ) ANAT2
  244. ON
  245.     ANAT2.rowNumber = 1
  246. AND
  247.     ANAT2.AN_ID = AN.AN_ID;
  248.    
  249.  
  250. SELECT
  251.     max(ANAT1_AnchorAttribute1Value)
  252. FROM
  253.     lAN_Anchor_Alt1
  254. WHERE
  255.     ANAT2_AnchorAttribute2Value = 'Train';
  256. -- takes 20 seconds to run (consistent)
  257.  
  258. SELECT
  259.     max(ANAT1_AnchorAttribute1Value)
  260. FROM
  261.     lAN_Anchor_Alt1;
  262. -- takes 12 -15 seconds to run (inconsistent)
  263.  
  264. -------------------------------------------------------------------------------
  265. -- Create the latest view for the anchor using the original approach.
  266. -------------------------------------------------------------------------------
  267.  
  268. CREATE OR REPLACE VIEW lAN_Anchor_Alt2
  269. AS
  270. SELECT
  271.     AN.AN_ID,
  272.     ANAT1.ANAT1_AnchorAttribute1Value,
  273.     ANAT2.ANAT2_AnchorAttribute2Value,
  274.     ANAT2.ANAT2_FromDate
  275. FROM
  276.     AN_Anchor AN
  277. LEFT JOIN
  278.     ANAT1_AnchorAttribute1 ANAT1
  279. ON
  280.     ANAT1.AN_ID = AN.AN_ID
  281. LEFT JOIN
  282.     ANAT2_AnchorAttribute2 ANAT2
  283. ON
  284.     ANAT2.AN_ID = AN.AN_ID
  285. AND
  286.     ANAT2.ANAT2_FromDate = (
  287.         SELECT
  288.             max(sub.ANAT2_FromDate)
  289.         FROM
  290.             ANAT2_AnchorAttribute2 sub
  291.         WHERE
  292.             sub.AN_ID = AN.AN_ID
  293.     );
  294.  
  295. SELECT
  296.     max(ANAT1_AnchorAttribute1Value)
  297. FROM
  298.     lAN_Anchor_Alt2
  299. WHERE
  300.     ANAT2_AnchorAttribute2Value = 'Train';
  301. -- takes 16 seconds to run (consistent)
  302.  
  303. SELECT
  304.     max(ANAT1_AnchorAttribute1Value)
  305. FROM
  306.     lAN_Anchor_Alt2;
  307. -- takes 2 seconds to run (consistent)
  308.  
  309. -------------------------------------------------------------------------------
  310. -- Create the latest view for the anchor using the group by approach.
  311. -------------------------------------------------------------------------------
  312.  
  313. CREATE OR REPLACE VIEW lAN_Anchor_Alt3
  314. AS
  315. SELECT
  316.     AN.AN_ID,
  317.     ANAT1.ANAT1_AnchorAttribute1Value,
  318.     ANAT2.ANAT2_AnchorAttribute2Value,
  319.     ANAT2.ANAT2_FromDate
  320. FROM
  321.     AN_Anchor AN
  322. LEFT JOIN
  323.     ANAT1_AnchorAttribute1 ANAT1
  324. ON
  325.     ANAT1.AN_ID = AN.AN_ID
  326. LEFT JOIN (
  327.     SELECT
  328.         aANAT2.AN_ID,
  329.         aANAT2.ANAT2_AnchorAttribute2Value,
  330.         aANAT2.ANAT2_FromDate
  331.     FROM
  332.         ANAT2_AnchorAttribute2 aANAT2
  333.     JOIN (
  334.         SELECT
  335.             AN_ID,
  336.             max(ANAT2_FromDate) AS ANAT2_FromDate
  337.         FROM
  338.             ANAT2_AnchorAttribute2
  339.         GROUP BY
  340.             AN_ID
  341.     ) bANAT2
  342.     ON
  343.         bANAT2.AN_ID = aANAT2.AN_ID
  344.     AND
  345.         bANAT2.ANAT2_FromDate = aANAT2.ANAT2_FromDate
  346. ) ANAT2
  347. ON
  348.     ANAT2.AN_ID = AN.AN_ID;
  349.  
  350. SELECT
  351.     max(ANAT1_AnchorAttribute1Value)
  352. FROM
  353.     lAN_Anchor_Alt3
  354. WHERE
  355.     ANAT2_AnchorAttribute2Value = 'Train';
  356. -- takes 19 seconds to run (consistent)
  357.  
  358. SELECT
  359.     max(ANAT1_AnchorAttribute1Value)
  360. FROM
  361.     lAN_Anchor_Alt3;
  362. -- takes 16 seconds to run (consistent)
  363.  
  364. -------------------------------------------------------------------------------
  365. -- Compare some explains when selecting all columns.
  366. -------------------------------------------------------------------------------
  367.  
  368. explain select * from lAN_Anchor_Alt1;
  369. /*
  370. "Merge Left Join  (cost=556437.96..733458.71 rows=1000000 width=30)"
  371. "  Merge Cond: (an.an_id = anat2.an_id)"
  372. "  ->  Merge Left Join  (cost=1.49..76797.25 rows=1000000 width=15)"
  373. "        Merge Cond: (an.an_id = anat1.an_id)"
  374. "        ->  Index Scan using pk_an_anchor on an_anchor an  (cost=0.00..30408.36 rows=1000000 width=4)"
  375. "        ->  Index Scan using pk_anat1_anchorattribute1 on anat1_anchorattribute1 anat1  (cost=0.00..31389.36 rows=1000000 width=15)"
  376. "  ->  Materialize  (cost=556436.47..653973.97 rows=15000 width=19)"
  377. "        ->  Subquery Scan on anat2  (cost=556436.47..653936.47 rows=15000 width=19)"
  378. "              Filter: (anat2.rownumber = 1)"
  379. "              ->  WindowAgg  (cost=556436.47..616436.47 rows=3000000 width=19)"
  380. "                    ->  Sort  (cost=556436.47..563936.47 rows=3000000 width=19)"
  381. "                          Sort Key: anat2_anchorattribute2.an_id, anat2_anchorattribute2.anat2_fromdate"
  382. "                          ->  Seq Scan on anat2_anchorattribute2  (cost=0.00..49109.00 rows=3000000 width=19)"
  383. */
  384.  
  385. explain select * from lAN_Anchor_Alt2;
  386. /*
  387. "Merge Left Join  (cost=556437.96..11943249.83 rows=1000000 width=30)"
  388. "  Merge Cond: (an.an_id = anat2.an_id)"
  389. "  Join Filter: (anat2.anat2_fromdate = (SubPlan 2))"
  390. "  ->  Merge Left Join  (cost=1.49..76797.25 rows=1000000 width=15)"
  391. "        Merge Cond: (an.an_id = anat1.an_id)"
  392. "        ->  Index Scan using pk_an_anchor on an_anchor an  (cost=0.00..30408.36 rows=1000000 width=4)"
  393. "        ->  Index Scan using pk_anat1_anchorattribute1 on anat1_anchorattribute1 anat1  (cost=0.00..31389.36 rows=1000000 width=15)"
  394. "  ->  Materialize  (cost=556436.47..571436.47 rows=3000000 width=19)"
  395. "        ->  Sort  (cost=556436.47..563936.47 rows=3000000 width=19)"
  396. "              Sort Key: anat2.an_id"
  397. "              ->  Seq Scan on anat2_anchorattribute2 anat2  (cost=0.00..49109.00 rows=3000000 width=19)"
  398. "  SubPlan 2"
  399. "    ->  Result  (cost=4.05..4.06 rows=1 width=0)"
  400. "          InitPlan 1 (returns $1)"
  401. "            ->  Limit  (cost=0.00..4.05 rows=1 width=4)"
  402. "                  ->  Index Scan Backward using pk_anat2_anchorattribute2 on anat2_anchorattribute2 sub  (cost=0.00..12.14 rows=3 width=4)"
  403. "                        Index Cond: ((an_id = an.an_id) AND (anat2_fromdate IS NOT NULL))"
  404. */
  405.  
  406. explain select * from lAN_Anchor_Alt3;
  407. /*
  408. "Merge Left Join  (cost=1051344.42..1237691.33 rows=1000000 width=30)"
  409. "  Merge Cond: (an.an_id = aanat2.an_id)"
  410. "  ->  Merge Left Join  (cost=1.49..76797.25 rows=1000000 width=15)"
  411. "        Merge Cond: (an.an_id = anat1.an_id)"
  412. "        ->  Index Scan using pk_an_anchor on an_anchor an  (cost=0.00..30408.36 rows=1000000 width=4)"
  413. "        ->  Index Scan using pk_anat1_anchorattribute1 on anat1_anchorattribute1 anat1  (cost=0.00..31389.36 rows=1000000 width=15)"
  414. "  ->  Materialize  (cost=1051342.93..1158217.92 rows=15000 width=19)"
  415. "        ->  Merge Join  (cost=1051342.93..1158180.42 rows=15000 width=19)"
  416. "              Merge Cond: (anat2_anchorattribute2.an_id = aanat2.an_id)"
  417. "              Join Filter: (aanat2.anat2_fromdate = (max(anat2_anchorattribute2.anat2_fromdate)))"
  418. "              ->  GroupAggregate  (cost=494906.47..528223.13 rows=1081666 width=8)"
  419. "                    ->  Sort  (cost=494906.47..502406.47 rows=3000000 width=8)"
  420. "                          Sort Key: anat2_anchorattribute2.an_id"
  421. "                          ->  Seq Scan on anat2_anchorattribute2  (cost=0.00..49109.00 rows=3000000 width=8)"
  422. "              ->  Materialize  (cost=556436.47..571436.47 rows=3000000 width=19)"
  423. "                    ->  Sort  (cost=556436.47..563936.47 rows=3000000 width=19)"
  424. "                          Sort Key: aanat2.an_id"
  425. "                          ->  Seq Scan on anat2_anchorattribute2 aanat2  (cost=0.00..49109.00 rows=3000000 width=19)"
  426. */
  427.  
  428. -------------------------------------------------------------------------------
  429. -- Verify table elimination by selecting only one column.
  430. -------------------------------------------------------------------------------
  431.  
  432. explain select ANAT1_AnchorAttribute1Value from lAN_Anchor_Alt1;
  433. -- NOTE THAT THERE IS _NO_ TABLE ELIMINIATION OF THE SECOND ATTRIBUTE!
  434. /*
  435. "Merge Left Join  (cost=556437.96..733458.71 rows=1000000 width=11)"
  436. "  Merge Cond: (an.an_id = anat2.an_id)"
  437. "  ->  Merge Left Join  (cost=1.49..76797.25 rows=1000000 width=15)"
  438. "        Merge Cond: (an.an_id = anat1.an_id)"
  439. "        ->  Index Scan using pk_an_anchor on an_anchor an  (cost=0.00..30408.36 rows=1000000 width=4)"
  440. "        ->  Index Scan using pk_anat1_anchorattribute1 on anat1_anchorattribute1 anat1  (cost=0.00..31389.36 rows=1000000 width=15)"
  441. "  ->  Materialize  (cost=556436.47..653973.97 rows=15000 width=4)"
  442. "        ->  Subquery Scan on anat2  (cost=556436.47..653936.47 rows=15000 width=4)"
  443. "              Filter: (anat2.rownumber = 1)"
  444. "              ->  WindowAgg  (cost=556436.47..616436.47 rows=3000000 width=19)"
  445. "                    ->  Sort  (cost=556436.47..563936.47 rows=3000000 width=19)"
  446. "                          Sort Key: anat2_anchorattribute2.an_id, anat2_anchorattribute2.anat2_fromdate"
  447. "                          ->  Seq Scan on anat2_anchorattribute2  (cost=0.00..49109.00 rows=3000000 width=19)"
  448. */
  449.  
  450. explain select ANAT1_AnchorAttribute1Value from lAN_Anchor_Alt2;
  451. -- NOTE THAT THERE _IS_ TABLE ELIMINIATION OF THE SECOND ATTRIBUTE!
  452. /*
  453. "Merge Left Join  (cost=1.49..76797.25 rows=1000000 width=11)"
  454. "  Merge Cond: (an.an_id = anat1.an_id)"
  455. "  ->  Index Scan using pk_an_anchor on an_anchor an  (cost=0.00..30408.36 rows=1000000 width=4)"
  456. "  ->  Index Scan using pk_anat1_anchorattribute1 on anat1_anchorattribute1 anat1  (cost=0.00..31389.36 rows=1000000 width=15)"
  457. */
  458.  
  459. explain select ANAT1_AnchorAttribute1Value from lAN_Anchor_Alt3;
  460. -- NOTE THAT THERE IS _NO_ TABLE ELIMINIATION OF THE SECOND ATTRIBUTE!
  461. /*
  462. "Merge Left Join  (cost=989814.42..1176161.33 rows=1000000 width=11)"
  463. "  Merge Cond: (an.an_id = aanat2.an_id)"
  464. "  ->  Merge Left Join  (cost=1.49..76797.25 rows=1000000 width=15)"
  465. "        Merge Cond: (an.an_id = anat1.an_id)"
  466. "        ->  Index Scan using pk_an_anchor on an_anchor an  (cost=0.00..30408.36 rows=1000000 width=4)"
  467. "        ->  Index Scan using pk_anat1_anchorattribute1 on anat1_anchorattribute1 anat1  (cost=0.00..31389.36 rows=1000000 width=15)"
  468. "  ->  Materialize  (cost=989812.93..1096687.92 rows=15000 width=4)"
  469. "        ->  Merge Join  (cost=989812.93..1096650.42 rows=15000 width=4)"
  470. "              Merge Cond: (anat2_anchorattribute2.an_id = aanat2.an_id)"
  471. "              Join Filter: (aanat2.anat2_fromdate = (max(anat2_anchorattribute2.anat2_fromdate)))"
  472. "              ->  GroupAggregate  (cost=494906.47..528223.13 rows=1081666 width=8)"
  473. "                    ->  Sort  (cost=494906.47..502406.47 rows=3000000 width=8)"
  474. "                          Sort Key: anat2_anchorattribute2.an_id"
  475. "                          ->  Seq Scan on anat2_anchorattribute2  (cost=0.00..49109.00 rows=3000000 width=8)"
  476. "              ->  Materialize  (cost=494906.47..509906.47 rows=3000000 width=8)"
  477. "                    ->  Sort  (cost=494906.47..502406.47 rows=3000000 width=8)"
  478. "                          Sort Key: aanat2.an_id"
  479. "                          ->  Seq Scan on anat2_anchorattribute2 aanat2  (cost=0.00..49109.00 rows=3000000 width=8)"
  480. */
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement