Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- -------------------------------------------------------------------------------
- -- Test run on PostgreSQL 9.1.2 on 2012-02-21
- --
- -- CONCLUSION: Max in a sub-select seems to be the best approach in general.
- -- It is the only approach that enables table elimination. For particular
- -- queries using a row_number() = 1 can be faster, but it is not in general.
- --
- -- Clustering gave no performance boost whatsoever, likely because it does not
- -- rearrange the actual table data in PostgreSQL.
- -------------------------------------------------------------------------------
- DROP VIEW IF EXISTS rowGenerator;
- DROP TABLE IF EXISTS ANAT2_AnchorAttribute2 CASCADE;
- DROP TABLE IF EXISTS ANAT1_AnchorAttribute1 CASCADE;
- DROP TABLE IF EXISTS AN_Anchor CASCADE;
- CREATE TABLE IF NOT EXISTS AN_Anchor (
- AN_ID int NOT NULL,
- CONSTRAINT pk_AN_Anchor PRIMARY KEY(AN_ID)
- );
- --ALTER TABLE AN_Anchor CLUSTER ON pk_AN_Anchor;
- CREATE TABLE IF NOT EXISTS ANAT1_AnchorAttribute1 (
- AN_ID int NOT NULL,
- ANAT1_AnchorAttribute1Value char(10) NOT NULL,
- CONSTRAINT pk_ANAT1_AnchorAttribute1
- PRIMARY KEY(AN_ID),
- CONSTRAINT fk_ANAT1_AnchorAttribute1
- FOREIGN KEY(AN_ID) REFERENCES AN_Anchor(AN_ID)
- );
- --ALTER TABLE ANAT1_AnchorAttribute1 CLUSTER ON pk_ANAT1_AnchorAttribute1;
- CREATE TABLE IF NOT EXISTS ANAT2_AnchorAttribute2 (
- AN_ID int NOT NULL,
- ANAT2_AnchorAttribute2Value char(10) NOT NULL,
- ANAT2_FromDate date NOT NULL,
- CONSTRAINT pk_ANAT2_AnchorAttribute2
- PRIMARY KEY(AN_ID, ANAT2_FromDate),
- CONSTRAINT fk_ANAT2_AnchorAttribute2
- FOREIGN KEY(AN_ID) REFERENCES AN_Anchor(AN_ID)
- );
- --ALTER TABLE ANAT2_AnchorAttribute2 CLUSTER ON pk_ANAT2_AnchorAttribute2;
- -------------------------------------------------------------------------------
- -- The following will be used to generate som sample data.
- -- Anchor: 1 million rows
- -- Attribute1: 1 million rows (three values)
- -- Attribute2: 3 million rows (three values at three different changing times)
- -- It takes approximately 5 minutes to populate tables
- -------------------------------------------------------------------------------
- CREATE VIEW rowGenerator
- AS
- WITH RECURSIVE gen(n) AS (
- VALUES (1)
- UNION ALL
- SELECT n+1 FROM gen WHERE n < 1000000
- )
- SELECT
- n
- FROM
- gen;
- INSERT INTO AN_Anchor
- SELECT
- n
- FROM
- rowGenerator;
- INSERT INTO ANAT1_AnchorAttribute1
- SELECT
- n,
- CASE n%3
- WHEN 0 THEN 'Red'
- WHEN 1 THEN 'Green'
- WHEN 2 THEN 'Blue'
- END
- FROM
- rowGenerator;
- INSERT INTO ANAT2_AnchorAttribute2
- SELECT
- n,
- 'Car',
- to_date('20120101','yyyymmdd')
- FROM
- rowGenerator;
- INSERT INTO ANAT2_AnchorAttribute2
- SELECT
- n,
- 'Bus',
- to_date('20120102','yyyymmdd')
- FROM
- rowGenerator;
- INSERT INTO ANAT2_AnchorAttribute2
- SELECT
- n,
- 'Train',
- to_date('20120103','yyyymmdd')
- FROM
- rowGenerator;
- -------------------------------------------------------------------------------
- -- Refresh the clustered indexes (1 minute).
- -- Update statistics (20 seconds).
- -------------------------------------------------------------------------------
- /*
- CLUSTER; -- needs to be run separately
- VACUUM ANALYZE;
- */
- -------------------------------------------------------------------------------
- -- Looking at some rows from the data.
- -------------------------------------------------------------------------------
- SELECT * FROM AN_Anchor ORDER BY AN_ID LIMIT 10;
- SELECT * FROM ANAT1_AnchorAttribute1 ORDER BY AN_ID LIMIT 10;
- SELECT * FROM ANAT2_AnchorAttribute2 ORDER BY AN_ID LIMIT 10;
- -------------------------------------------------------------------------------
- -- Testing which approach will be the best for finding latest data.
- -------------------------------------------------------------------------------
- -- explain
- SELECT
- count(*)
- FROM (
- SELECT DISTINCT
- AN_ID,
- first_value(ANAT2_AnchorAttribute2Value) OVER w
- AS ANAT2_AnchorAttribute2Value,
- first_value(ANAT2_FromDate) OVER w
- AS ANAT2_FromDate
- FROM
- ANAT2_AnchorAttribute2
- WINDOW
- w AS (PARTITION BY AN_ID ORDER BY ANAT2_FromDate DESC)
- ) x;
- -- takes 16 - 30 seconds (inconsistent)
- -- explain
- SELECT
- count(*)
- FROM (
- SELECT
- AN_ID,
- ANAT2_AnchorAttribute2Value,
- ANAT2_FromDate
- FROM
- ANAT2_AnchorAttribute2 ANAT2
- WHERE
- ANAT2_FromDate = (
- SELECT
- max(sub.ANAT2_FromDate)
- FROM
- ANAT2_AnchorAttribute2 sub
- WHERE
- sub.AN_ID = ANAT2.AN_ID
- )
- ) x;
- -- takes 17 seconds (consistent)
- -- explain
- SELECT
- count(*)
- FROM (
- SELECT
- ANAT2.AN_ID,
- ANAT2.ANAT2_AnchorAttribute2Value,
- ANAT2.ANAT2_FromDate
- FROM
- ANAT2_AnchorAttribute2 ANAT2
- JOIN (
- SELECT
- AN_ID,
- max(ANAT2_FromDate) AS ANAT2_FromDate
- FROM
- ANAT2_AnchorAttribute2
- GROUP BY
- AN_ID
- ) sub
- ON
- sub.AN_ID = ANAT2.AN_ID
- AND
- sub.ANAT2_FromDate = ANAT2.ANAT2_FromDate
- ) x;
- -- takes 13 seconds (consistent)
- -- explain
- SELECT
- count(*)
- FROM (
- SELECT
- AN_ID,
- ANAT2_AnchorAttribute2Value,
- ANAT2_FromDate,
- row_number() OVER (
- PARTITION BY
- AN_ID
- ORDER BY
- ANAT2_FromDate DESC
- ) AS rowNumber
- FROM
- ANAT2_AnchorAttribute2
- ) x
- WHERE
- rowNumber = 1;
- -- takes 7 - 11 seconds (inconsistent)
- -------------------------------------------------------------------------------
- -- Create the latest view for the anchor using the best approach found above.
- -------------------------------------------------------------------------------
- CREATE OR REPLACE VIEW lAN_Anchor_Alt1
- AS
- SELECT
- AN.AN_ID,
- ANAT1.ANAT1_AnchorAttribute1Value,
- ANAT2.ANAT2_AnchorAttribute2Value,
- ANAT2.ANAT2_FromDate
- FROM
- AN_Anchor AN
- LEFT JOIN
- ANAT1_AnchorAttribute1 ANAT1
- ON
- ANAT1.AN_ID = AN.AN_ID
- LEFT JOIN (
- SELECT
- AN_ID,
- ANAT2_AnchorAttribute2Value,
- ANAT2_FromDate,
- row_number() OVER (
- PARTITION BY
- AN_ID
- ORDER BY
- ANAT2_FromDate DESC
- ) AS rowNumber
- FROM
- ANAT2_AnchorAttribute2
- ) ANAT2
- ON
- ANAT2.rowNumber = 1
- AND
- ANAT2.AN_ID = AN.AN_ID;
- SELECT
- max(ANAT1_AnchorAttribute1Value)
- FROM
- lAN_Anchor_Alt1
- WHERE
- ANAT2_AnchorAttribute2Value = 'Train';
- -- takes 20 seconds to run (consistent)
- SELECT
- max(ANAT1_AnchorAttribute1Value)
- FROM
- lAN_Anchor_Alt1;
- -- takes 12 -15 seconds to run (inconsistent)
- -------------------------------------------------------------------------------
- -- Create the latest view for the anchor using the original approach.
- -------------------------------------------------------------------------------
- CREATE OR REPLACE VIEW lAN_Anchor_Alt2
- AS
- SELECT
- AN.AN_ID,
- ANAT1.ANAT1_AnchorAttribute1Value,
- ANAT2.ANAT2_AnchorAttribute2Value,
- ANAT2.ANAT2_FromDate
- FROM
- AN_Anchor AN
- LEFT JOIN
- ANAT1_AnchorAttribute1 ANAT1
- ON
- ANAT1.AN_ID = AN.AN_ID
- LEFT JOIN
- ANAT2_AnchorAttribute2 ANAT2
- ON
- ANAT2.AN_ID = AN.AN_ID
- AND
- ANAT2.ANAT2_FromDate = (
- SELECT
- max(sub.ANAT2_FromDate)
- FROM
- ANAT2_AnchorAttribute2 sub
- WHERE
- sub.AN_ID = AN.AN_ID
- );
- SELECT
- max(ANAT1_AnchorAttribute1Value)
- FROM
- lAN_Anchor_Alt2
- WHERE
- ANAT2_AnchorAttribute2Value = 'Train';
- -- takes 16 seconds to run (consistent)
- SELECT
- max(ANAT1_AnchorAttribute1Value)
- FROM
- lAN_Anchor_Alt2;
- -- takes 2 seconds to run (consistent)
- -------------------------------------------------------------------------------
- -- Create the latest view for the anchor using the group by approach.
- -------------------------------------------------------------------------------
- CREATE OR REPLACE VIEW lAN_Anchor_Alt3
- AS
- SELECT
- AN.AN_ID,
- ANAT1.ANAT1_AnchorAttribute1Value,
- ANAT2.ANAT2_AnchorAttribute2Value,
- ANAT2.ANAT2_FromDate
- FROM
- AN_Anchor AN
- LEFT JOIN
- ANAT1_AnchorAttribute1 ANAT1
- ON
- ANAT1.AN_ID = AN.AN_ID
- LEFT JOIN (
- SELECT
- aANAT2.AN_ID,
- aANAT2.ANAT2_AnchorAttribute2Value,
- aANAT2.ANAT2_FromDate
- FROM
- ANAT2_AnchorAttribute2 aANAT2
- JOIN (
- SELECT
- AN_ID,
- max(ANAT2_FromDate) AS ANAT2_FromDate
- FROM
- ANAT2_AnchorAttribute2
- GROUP BY
- AN_ID
- ) bANAT2
- ON
- bANAT2.AN_ID = aANAT2.AN_ID
- AND
- bANAT2.ANAT2_FromDate = aANAT2.ANAT2_FromDate
- ) ANAT2
- ON
- ANAT2.AN_ID = AN.AN_ID;
- SELECT
- max(ANAT1_AnchorAttribute1Value)
- FROM
- lAN_Anchor_Alt3
- WHERE
- ANAT2_AnchorAttribute2Value = 'Train';
- -- takes 19 seconds to run (consistent)
- SELECT
- max(ANAT1_AnchorAttribute1Value)
- FROM
- lAN_Anchor_Alt3;
- -- takes 16 seconds to run (consistent)
- -------------------------------------------------------------------------------
- -- Compare some explains when selecting all columns.
- -------------------------------------------------------------------------------
- explain select * from lAN_Anchor_Alt1;
- /*
- "Merge Left Join (cost=556437.96..733458.71 rows=1000000 width=30)"
- " Merge Cond: (an.an_id = anat2.an_id)"
- " -> Merge Left Join (cost=1.49..76797.25 rows=1000000 width=15)"
- " Merge Cond: (an.an_id = anat1.an_id)"
- " -> Index Scan using pk_an_anchor on an_anchor an (cost=0.00..30408.36 rows=1000000 width=4)"
- " -> Index Scan using pk_anat1_anchorattribute1 on anat1_anchorattribute1 anat1 (cost=0.00..31389.36 rows=1000000 width=15)"
- " -> Materialize (cost=556436.47..653973.97 rows=15000 width=19)"
- " -> Subquery Scan on anat2 (cost=556436.47..653936.47 rows=15000 width=19)"
- " Filter: (anat2.rownumber = 1)"
- " -> WindowAgg (cost=556436.47..616436.47 rows=3000000 width=19)"
- " -> Sort (cost=556436.47..563936.47 rows=3000000 width=19)"
- " Sort Key: anat2_anchorattribute2.an_id, anat2_anchorattribute2.anat2_fromdate"
- " -> Seq Scan on anat2_anchorattribute2 (cost=0.00..49109.00 rows=3000000 width=19)"
- */
- explain select * from lAN_Anchor_Alt2;
- /*
- "Merge Left Join (cost=556437.96..11943249.83 rows=1000000 width=30)"
- " Merge Cond: (an.an_id = anat2.an_id)"
- " Join Filter: (anat2.anat2_fromdate = (SubPlan 2))"
- " -> Merge Left Join (cost=1.49..76797.25 rows=1000000 width=15)"
- " Merge Cond: (an.an_id = anat1.an_id)"
- " -> Index Scan using pk_an_anchor on an_anchor an (cost=0.00..30408.36 rows=1000000 width=4)"
- " -> Index Scan using pk_anat1_anchorattribute1 on anat1_anchorattribute1 anat1 (cost=0.00..31389.36 rows=1000000 width=15)"
- " -> Materialize (cost=556436.47..571436.47 rows=3000000 width=19)"
- " -> Sort (cost=556436.47..563936.47 rows=3000000 width=19)"
- " Sort Key: anat2.an_id"
- " -> Seq Scan on anat2_anchorattribute2 anat2 (cost=0.00..49109.00 rows=3000000 width=19)"
- " SubPlan 2"
- " -> Result (cost=4.05..4.06 rows=1 width=0)"
- " InitPlan 1 (returns $1)"
- " -> Limit (cost=0.00..4.05 rows=1 width=4)"
- " -> Index Scan Backward using pk_anat2_anchorattribute2 on anat2_anchorattribute2 sub (cost=0.00..12.14 rows=3 width=4)"
- " Index Cond: ((an_id = an.an_id) AND (anat2_fromdate IS NOT NULL))"
- */
- explain select * from lAN_Anchor_Alt3;
- /*
- "Merge Left Join (cost=1051344.42..1237691.33 rows=1000000 width=30)"
- " Merge Cond: (an.an_id = aanat2.an_id)"
- " -> Merge Left Join (cost=1.49..76797.25 rows=1000000 width=15)"
- " Merge Cond: (an.an_id = anat1.an_id)"
- " -> Index Scan using pk_an_anchor on an_anchor an (cost=0.00..30408.36 rows=1000000 width=4)"
- " -> Index Scan using pk_anat1_anchorattribute1 on anat1_anchorattribute1 anat1 (cost=0.00..31389.36 rows=1000000 width=15)"
- " -> Materialize (cost=1051342.93..1158217.92 rows=15000 width=19)"
- " -> Merge Join (cost=1051342.93..1158180.42 rows=15000 width=19)"
- " Merge Cond: (anat2_anchorattribute2.an_id = aanat2.an_id)"
- " Join Filter: (aanat2.anat2_fromdate = (max(anat2_anchorattribute2.anat2_fromdate)))"
- " -> GroupAggregate (cost=494906.47..528223.13 rows=1081666 width=8)"
- " -> Sort (cost=494906.47..502406.47 rows=3000000 width=8)"
- " Sort Key: anat2_anchorattribute2.an_id"
- " -> Seq Scan on anat2_anchorattribute2 (cost=0.00..49109.00 rows=3000000 width=8)"
- " -> Materialize (cost=556436.47..571436.47 rows=3000000 width=19)"
- " -> Sort (cost=556436.47..563936.47 rows=3000000 width=19)"
- " Sort Key: aanat2.an_id"
- " -> Seq Scan on anat2_anchorattribute2 aanat2 (cost=0.00..49109.00 rows=3000000 width=19)"
- */
- -------------------------------------------------------------------------------
- -- Verify table elimination by selecting only one column.
- -------------------------------------------------------------------------------
- explain select ANAT1_AnchorAttribute1Value from lAN_Anchor_Alt1;
- -- NOTE THAT THERE IS _NO_ TABLE ELIMINIATION OF THE SECOND ATTRIBUTE!
- /*
- "Merge Left Join (cost=556437.96..733458.71 rows=1000000 width=11)"
- " Merge Cond: (an.an_id = anat2.an_id)"
- " -> Merge Left Join (cost=1.49..76797.25 rows=1000000 width=15)"
- " Merge Cond: (an.an_id = anat1.an_id)"
- " -> Index Scan using pk_an_anchor on an_anchor an (cost=0.00..30408.36 rows=1000000 width=4)"
- " -> Index Scan using pk_anat1_anchorattribute1 on anat1_anchorattribute1 anat1 (cost=0.00..31389.36 rows=1000000 width=15)"
- " -> Materialize (cost=556436.47..653973.97 rows=15000 width=4)"
- " -> Subquery Scan on anat2 (cost=556436.47..653936.47 rows=15000 width=4)"
- " Filter: (anat2.rownumber = 1)"
- " -> WindowAgg (cost=556436.47..616436.47 rows=3000000 width=19)"
- " -> Sort (cost=556436.47..563936.47 rows=3000000 width=19)"
- " Sort Key: anat2_anchorattribute2.an_id, anat2_anchorattribute2.anat2_fromdate"
- " -> Seq Scan on anat2_anchorattribute2 (cost=0.00..49109.00 rows=3000000 width=19)"
- */
- explain select ANAT1_AnchorAttribute1Value from lAN_Anchor_Alt2;
- -- NOTE THAT THERE _IS_ TABLE ELIMINIATION OF THE SECOND ATTRIBUTE!
- /*
- "Merge Left Join (cost=1.49..76797.25 rows=1000000 width=11)"
- " Merge Cond: (an.an_id = anat1.an_id)"
- " -> Index Scan using pk_an_anchor on an_anchor an (cost=0.00..30408.36 rows=1000000 width=4)"
- " -> Index Scan using pk_anat1_anchorattribute1 on anat1_anchorattribute1 anat1 (cost=0.00..31389.36 rows=1000000 width=15)"
- */
- explain select ANAT1_AnchorAttribute1Value from lAN_Anchor_Alt3;
- -- NOTE THAT THERE IS _NO_ TABLE ELIMINIATION OF THE SECOND ATTRIBUTE!
- /*
- "Merge Left Join (cost=989814.42..1176161.33 rows=1000000 width=11)"
- " Merge Cond: (an.an_id = aanat2.an_id)"
- " -> Merge Left Join (cost=1.49..76797.25 rows=1000000 width=15)"
- " Merge Cond: (an.an_id = anat1.an_id)"
- " -> Index Scan using pk_an_anchor on an_anchor an (cost=0.00..30408.36 rows=1000000 width=4)"
- " -> Index Scan using pk_anat1_anchorattribute1 on anat1_anchorattribute1 anat1 (cost=0.00..31389.36 rows=1000000 width=15)"
- " -> Materialize (cost=989812.93..1096687.92 rows=15000 width=4)"
- " -> Merge Join (cost=989812.93..1096650.42 rows=15000 width=4)"
- " Merge Cond: (anat2_anchorattribute2.an_id = aanat2.an_id)"
- " Join Filter: (aanat2.anat2_fromdate = (max(anat2_anchorattribute2.anat2_fromdate)))"
- " -> GroupAggregate (cost=494906.47..528223.13 rows=1081666 width=8)"
- " -> Sort (cost=494906.47..502406.47 rows=3000000 width=8)"
- " Sort Key: anat2_anchorattribute2.an_id"
- " -> Seq Scan on anat2_anchorattribute2 (cost=0.00..49109.00 rows=3000000 width=8)"
- " -> Materialize (cost=494906.47..509906.47 rows=3000000 width=8)"
- " -> Sort (cost=494906.47..502406.47 rows=3000000 width=8)"
- " Sort Key: aanat2.an_id"
- " -> Seq Scan on anat2_anchorattribute2 aanat2 (cost=0.00..49109.00 rows=3000000 width=8)"
- */
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement