Untitled

-- Intel(R) Core(TM) i5-3570K CPU @ 3.40GHz
-- 16 GB RAM
-- SSD OCZ Vertex 2
-- Linux black 3.12.8-031208-generic #201401151952 SMP Thu Jan 16 00:53:39 UTC 2014 x86_64 x86_64 x86_64 GNU/Linux
-- Postgres 9.3.4

$ createdb aggtest
$ psql aggtest
psql (9.3.4)
Type "help" for help.

aggtest=# create table randomdata (created_on timestamp without time zone, value double precision);
CREATE TABLE
aggtest=# copy randomdata from '/home/prook/Downloads/randomData.csv' with csv header;
COPY 50000000

-- Import trval asi 2 minuty (od oka, nemereno)

-- Test A: 33.55 s

aggtest=# explain analyze verbose select extract(year from created_on), extract(doy from created_on), count(*), avg(value), min(value), max(value) from randomdata group by extract(year from created_on), extract(doy from created_on) order by extract(year from created_on), extract(doy from created_on);
                                                                  QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------------
 Sort  (cost=1705421.15..1705421.65 rows=200 width=16) (actual time=33555.000..33555.013 rows=367 loops=1)
   Output: (date_part('year'::text, created_on)), (date_part('doy'::text, created_on)), (count(*)), (avg(value)), (min(value)), (max(value))
   Sort Key: (date_part('year'::text, randomdata.created_on)), (date_part('doy'::text, randomdata.created_on))
   Sort Method: quicksort  Memory: 53kB
   ->  HashAggregate  (cost=1705410.01..1705413.51 rows=200 width=16) (actual time=33554.760..33554.805 rows=367 loops=1)
         Output: (date_part('year'::text, created_on)), (date_part('doy'::text, created_on)), count(*), avg(value), min(value), max(value)
         ->  Seq Scan on public.randomdata  (cost=0.00..987840.50 rows=47837967 width=16) (actual time=0.045..17491.283 rows=50000000 loops=1)
               Output: date_part('year'::text, created_on), date_part('doy'::text, created_on), value
 Total runtime: 33555.169 ms
(9 rows)

-- Test B (bez indexu): 4.5 s

aggtest=# explain analyze verbose select extract(year from created_on), extract(doy from created_on), extract(hour from created_on), count(*), avg(value), min(value), max(value) from randomdata where created_on between '2012-07-16 00:00:00' and '2012-07-16 01:00:00' group by extract(year from created_on), extract(doy from created_on), extract(hour from created_on) order by extract(year from created_on), extract(doy from created_on), extract(hour from created_on);
                                                                                       QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 Sort  (cost=1026521.03..1026521.03 rows=1 width=16) (actual time=4545.137..4545.138 rows=2 loops=1)
   Output: (date_part('year'::text, created_on)), (date_part('doy'::text, created_on)), (date_part('hour'::text, created_on)), (count(*)), (avg(value)), (min(value)), (max(value))
   Sort Key: (date_part('year'::text, randomdata.created_on)), (date_part('doy'::text, randomdata.created_on)), (date_part('hour'::text, randomdata.created_on))
   Sort Method: quicksort  Memory: 25kB
   ->  HashAggregate  (cost=1026521.00..1026521.02 rows=1 width=16) (actual time=4545.128..4545.128 rows=2 loops=1)
         Output: (date_part('year'::text, created_on)), (date_part('doy'::text, created_on)), (date_part('hour'::text, created_on)), count(*), avg(value), min(value), max(value)
         ->  Seq Scan on public.randomdata  (cost=0.00..1022146.00 rows=250000 width=16) (actual time=3.485..4541.415 rows=5662 loops=1)
               Output: date_part('year'::text, created_on), date_part('doy'::text, created_on), date_part('hour'::text, created_on), value
               Filter: ((randomdata.created_on >= '2012-07-16 00:00:00'::timestamp without time zone) AND (randomdata.created_on <= '2012-07-16 01:00:00'::timestamp without time zone))
               Rows Removed by Filter: 49994338
 Total runtime: 4545.198 ms
(11 rows)

-- Test B (s indexem na created_on, prvni spusteni): 0.06 s

aggtest=# create index created_on_idx on randomdata (created_on);
CREATE INDEX
aggtest=# explain analyze verbose select extract(year from created_on), extract(doy from created_on), extract(hour from created_on), count(*), avg(value), min(value), max(value) from randomdata where created_on between '2012-07-16 00:00:00' and '2012-07-16 01:00:00' group by extract(year from created_on), extract(doy from created_on), extract(hour from created_on) order by extract(year from created_on), extract(doy from created_on), extract(hour from created_on);
                                                                                            QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 Sort  (cost=291230.09..291230.09 rows=1 width=16) (actual time=59.960..59.961 rows=2 loops=1)
   Output: (date_part('year'::text, created_on)), (date_part('doy'::text, created_on)), (date_part('hour'::text, created_on)), (count(*)), (avg(value)), (min(value)), (max(value))
   Sort Key: (date_part('year'::text, randomdata.created_on)), (date_part('doy'::text, randomdata.created_on)), (date_part('hour'::text, randomdata.created_on))
   Sort Method: quicksort  Memory: 25kB
   ->  HashAggregate  (cost=291230.06..291230.08 rows=1 width=16) (actual time=59.950..59.950 rows=2 loops=1)
         Output: (date_part('year'::text, created_on)), (date_part('doy'::text, created_on)), (date_part('hour'::text, created_on)), count(*), avg(value), min(value), max(value)
         ->  Bitmap Heap Scan on public.randomdata  (cost=5307.06..286855.06 rows=250000 width=16) (actual time=2.731..54.738 rows=5662 loops=1)
               Output: date_part('year'::text, created_on), date_part('doy'::text, created_on), date_part('hour'::text, created_on), value
               Recheck Cond: ((randomdata.created_on >= '2012-07-16 00:00:00'::timestamp without time zone) AND (randomdata.created_on <= '2012-07-16 01:00:00'::timestamp without time zone))
               ->  Bitmap Index Scan on created_on_idx  (cost=0.00..5244.56 rows=250000 width=0) (actual time=1.415..1.415 rows=5662 loops=1)
                     Index Cond: ((randomdata.created_on >= '2012-07-16 00:00:00'::timestamp without time zone) AND (randomdata.created_on <= '2012-07-16 01:00:00'::timestamp without time zone))
 Total runtime: 60.044 ms
(12 rows)

-- Test B (s indexem na created_on, dalsi spusteni): 0.015 s

aggtest=# explain analyze verbose select extract(year from created_on), extract(doy from created_on), extract(hour from created_on), count(*), avg(value), min(value), max(value) from randomdata where created_on between '2012-07-16 00:00:00' and '2012-07-16 01:00:00' group by extract(year from created_on), extract(doy from created_on), extract(hour from created_on) order by extract(year from created_on), extract(doy from created_on), extract(hour from created_on);
                                                                                            QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 Sort  (cost=291230.09..291230.09 rows=1 width=16) (actual time=15.922..15.923 rows=2 loops=1)
   Output: (date_part('year'::text, created_on)), (date_part('doy'::text, created_on)), (date_part('hour'::text, created_on)), (count(*)), (avg(value)), (min(value)), (max(value))
   Sort Key: (date_part('year'::text, randomdata.created_on)), (date_part('doy'::text, randomdata.created_on)), (date_part('hour'::text, randomdata.created_on))
   Sort Method: quicksort  Memory: 25kB
   ->  HashAggregate  (cost=291230.06..291230.08 rows=1 width=16) (actual time=15.913..15.915 rows=2 loops=1)
         Output: (date_part('year'::text, created_on)), (date_part('doy'::text, created_on)), (date_part('hour'::text, created_on)), count(*), avg(value), min(value), max(value)
         ->  Bitmap Heap Scan on public.randomdata  (cost=5307.06..286855.06 rows=250000 width=16) (actual time=2.395..12.231 rows=5662 loops=1)
               Output: date_part('year'::text, created_on), date_part('doy'::text, created_on), date_part('hour'::text, created_on), value
               Recheck Cond: ((randomdata.created_on >= '2012-07-16 00:00:00'::timestamp without time zone) AND (randomdata.created_on <= '2012-07-16 01:00:00'::timestamp without time zone))
               ->  Bitmap Index Scan on created_on_idx  (cost=0.00..5244.56 rows=250000 width=0) (actual time=1.207..1.207 rows=5662 loops=1)
                     Index Cond: ((randomdata.created_on >= '2012-07-16 00:00:00'::timestamp without time zone) AND (randomdata.created_on <= '2012-07-16 01:00:00'::timestamp without time zone))
 Total runtime: 15.992 ms
(12 rows)