Advertisement
Guest User

link_pg_server.sql v0 - aka SERVICE keyword like in SPARQL

a guest
Feb 13th, 2019
243
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. /* =================================================
  2. *
  3. * link_pg_server(...) (aka SERVICE keyword like in SPARQL)
  4. *
  5. * Linking open and remote (PostgreSQL) read-only
  6. * databases easily! Uses postgres_fdw the SQL/MED
  7. * implementation (Foreign Data Wrapper).
  8. * Tested with PostgreSQL 11.
  9. *
  10. * Goal is to achieve aka "SQL Endpoints" for linked data
  11. * similar to SPARQL.
  12. *
  13. * Caveats of this implementation:
  14. * - Must be superuser to create extension postgres_fdw.
  15. * - Joins are slow with large datasets.
  16. *
  17. * Credits: https://robots.thoughtbot.com/postgres-foreign-data-wrapper
  18. *
  19. * Below the original script:
  20.  
  21. -- Step 1: Preparation
  22. CREATE EXTENSION IF NOT EXISTS postgres_fdw;  -- Must be superuser
  23. DROP SERVER IF EXISTS gis_db_server CASCADE;
  24. CREATE SERVER gis_db_server  -- prepare remote DB server
  25.  FOREIGN DATA WRAPPER postgres_fdw
  26.  OPTIONS (host '152.96.80.44', port '8080', dbname 'gis_db');
  27. CREATE USER MAPPING FOR CURRENT_USER  -- Connect
  28.  SERVER gis_db_server
  29.  OPTIONS (user 'readonly', password 'Vainyils9');
  30.  
  31. -- Step 2: Import Schema
  32. DROP SCHEMA IF EXISTS gis_db_public CASCADE;
  33. CREATE SCHEMA gis_db_public;
  34. CREATE EXTENSION IF NOT EXISTS postgis;
  35. CREATE EXTENSION IF NOT EXISTS hstore;  -- needed by gis_db schema
  36. IMPORT FOREIGN SCHEMA public
  37.  FROM SERVER gis_db_server
  38.  INTO gis_db_public;
  39.  
  40. -- Step 3: Go!
  41. SELECT COUNT(*) FROM gis_db_public.osm_point;
  42. */
  43.  
  44. drop function if exists link_pg_server(text, text, text, text, text, text, text);
  45. create or replace function link_pg_server(_host text, _port text, _dbname text, _user text, _pw text, _server text, _schema text default 'public')
  46. returns text as $$
  47. declare
  48.  link_server_schema text := concat(_server, '_', _schema);
  49. begin
  50.  create extension if not exists postgres_fdw;
  51.  -- Server:
  52.  execute format('drop server if exists %I cascade', _server);
  53.  execute format('create server %I
  54. foreign data wrapper postgres_fdw
  55. options (host %L, port %L, dbname %L)', _server, _host, _port, _dbname);
  56.  execute format('create user mapping for current_user
  57. server %I
  58. options (user %L, password %L)', _server, _user, _pw);
  59.  -- Schema:
  60.  execute format('drop schema if exists %I cascade', link_server_schema);
  61.  execute format('create schema %I', link_server_schema);
  62.  execute format('import foreign schema %I
  63. from server %I
  64. into %I', _schema, _server, link_server_schema);
  65.  -- Done:
  66.  raise info 'server "%" created linking to database "%" and schema "%", now available as schema "%".', _server, _dbname, _schema, link_server_schema;
  67.  return link_server_schema;
  68. end;
  69. $$ language plpgsql;
  70.  
  71.  
  72. -- Link to remote DB (default schema=public):
  73. create extension if not exists postgis; -- Needed by gis_db
  74. create extension if not exists hstore; -- Needed by gis_db
  75. --select link_pg_server('152.96.80.44', '8080', 'gis_db', 'readonly', 'Vainyils9', 'gis_db_server');
  76. select link_pg_server('152.96.80.41', '8080', 'gis_db', 'readonly', 'Vainyils9', 'gis_db_server');
  77.  
  78. -- Test/Demo "count":
  79. select count(*) from gis_db_server_public.osm_point;
  80. -- "43460156"
  81.  
  82. -- Cleanup:
  83. --drop server if exists gis_db_server cascade;
  84. --drop schema if exists gis_db_server_public cascade;
  85. --drop function if exists link_pg_server(text, text, text, text, text, text, text);
  86.  
  87. -- Test/Demo "filter":
  88. select point.*
  89. from gis_db_server_public.osm_point as point
  90. where osm_id in (4066310784, 1398864548);
  91.  
  92. -- Test/Demo "Join Tables":
  93. /*
  94. -- Approach 1: Slow since it downloads all remote data in order to do the join.
  95. with tmp (osm_id, name) as (
  96.  values (4066310784, 'Tierpark Chur'), (1398864548, 'Tierpark Aletsch')
  97. )
  98. select point.*
  99. from gis_db_server_public.osm_point as point
  100. left join tmp on tmp.osm_id=point.osm_id;
  101. */
  102. -- Approach 2 and solution: Send a list of ids to fetch over to
  103. -- the remote as an array. Feasible for thousands of ids, but not millions.
  104. -- Planner doesn't have any kind of join plan that would do that automatically.
  105. with tmp (osm_id, name) as (
  106.  values (4066310784, 'Tierpark Chur'), (1398864548, 'Tierpark Aletsch')
  107. )
  108. select point.*
  109. from gis_db_server_public.osm_point as point
  110. -- Replacing "left join tmp on tmp.osm_id=point.osm_id" with "ANY":
  111. where point.osm_id = any(array(select osm_id from tmp));
  112.  
  113. --
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement