Advertisement
bdill

Untitled

Mar 19th, 2025 (edited)
137
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
SQL 2.79 KB | Software | 0 0
  1. -- Install DuckDB command line from
  2. -- https://duckdb.org/docs/installation/
  3.  
  4. -- Download the data set:
  5. -- https://www.stats.govt.nz/large-datasets/csv-files-for-download/
  6. -- https://www3.stats.govt.nz/2018census/Age-sex-by-ethnic-group-grouped-total-responses-census-usually-resident-population-counts-2006-2013-2018-Censuses-RC-TA-SA2-DHB.zip
  7.  
  8. -- Run DuckDB from the command line or in a GUI that supports DuckDB (ex: DBeaver Community edition)
  9. -- https://dbeaver.io/download/
  10. -- ================================================================================
  11.  
  12. SET VARIABLE path = "C:\Users\bdill\Downloads\NZ-census-2006-2013-2018\";
  13. -- ================================================================================
  14. CREATE OR REPLACE TABLE fact_nz AS    SELECT * FROM read_csv( CONCAT(getvariable('path'), 'Data8277.csv') );   -- 837 MB in 4.5 seconds
  15. CREATE OR REPLACE TABLE dim_age AS    SELECT * FROM read_csv( CONCAT(getvariable('path'), 'DimenLookupAge8277.csv') );  
  16. CREATE OR REPLACE TABLE dim_area AS   SELECT * FROM read_csv( CONCAT(getvariable('path'), 'DimenLookupArea8277.csv') );  
  17. CREATE OR REPLACE TABLE dim_ethnic AS SELECT * FROM read_csv( CONCAT(getvariable('path'), 'DimenLookupEthnic8277.csv') );  
  18. CREATE OR REPLACE TABLE dim_sex AS    SELECT * FROM read_csv( CONCAT(getvariable('path'), 'DimenLookupSex8277.csv') );   --1,2,9
  19. CREATE OR REPLACE TABLE dim_year AS   SELECT * FROM read_csv( CONCAT(getvariable('path'), 'DimenLookupYear8277.csv') );  
  20.  
  21. -- ================================================================================
  22. -- convert the 800+ MB csv to parquet just to see how fast it is and how much compression we get.
  23. COPY (SELECT * FROM fact_nz) -- 837 MB -> 75 MB IN 2.1 sec
  24. TO "C:\Users\bdill\Downloads\NZ-census-2006-2013-2018\Data8777.parquet" (FORMAT parquet);
  25.  
  26. -- ================================================================================
  27. -- SELECT counts for all areas, sex and ethnicities
  28. -- GROUP BY ages
  29. -- and PIVOT over the 3 years
  30. SELECT *
  31. FROM (
  32.     SELECT fact_nz.YEAR
  33.     --, fact_nz.age
  34.     , da.description AS age_desc
  35.     , da.SortOrder
  36.     --, fact_nz.ethnic
  37.     --, fact_nz.sex
  38.     --, fact_nz.area
  39.     , fact_nz.count AS n
  40.     FROM fact_nz
  41.     JOIN dim_age AS da ON da.code = fact_nz.age
  42.     WHERE area IN ('9999') -- 9999 = Total - New Zealand by Regional Council/SA2
  43.     AND sex = '9' -- 9 = Total people - sex
  44.     AND ethnic = '9999' -- 9999 = Total people
  45.     --AND age IN ('999999') = Total people - age group
  46.     --AND age IN ('1', '2', '3', '4') --
  47.     --AND age IN (SELECT code FROM dim_age WHERE LEN(code) = 2) -- double digit age codes - 5 year bins
  48.     AND age IN (SELECT code FROM dim_age WHERE LEN(code) = 3 AND code < '130') -- triple digit age codes - single year bins
  49. ) AS X
  50.     PIVOT (
  51.       MAX(N) FOR "YEAR" IN ('2006', '2013', '2018')
  52. ) AS PIV
  53. ORDER BY SortOrder
  54.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement