Advertisement
Guest User

ogr2ogr.rb

a guest
Oct 15th, 2016
86
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Ruby 7.46 KB | None | 0 0
  1. # ogr2ogr.rb
  2. # encoding: utf-8
  3. require 'open3'
  4. require_relative './shp_helper'
  5.  
  6. module CartoDB
  7.   module Importer2
  8.     class Ogr2ogr
  9.       ENCODING  = 'UTF-8'
  10.       SCHEMA    = 'cdb_importer'
  11.  
  12.       OUTPUT_FORMAT_OPTION  = '-f PostgreSQL'
  13.       PG_COPY_OPTION        = 'PG_USE_COPY=YES'
  14.       NEW_LAYER_TYPE_OPTION = '-nlt PROMOTE_TO_MULTI'
  15.       OSM_INDEXING_OPTION   = 'OSM_USE_CUSTOM_INDEXING=NO'
  16.       APPEND_MODE_OPTION    = '-append'
  17.  
  18.       DEFAULT_BINARY = 'which ogr2ogr2.1'
  19.  
  20.       LATITUDE_POSSIBLE_NAMES   = %w{ latitude lat latitudedecimal
  21.         latitud lati decimallatitude decimallat point_latitude }
  22.       LONGITUDE_POSSIBLE_NAMES  = %w{ longitude lon lng
  23.         longitudedecimal longitud long decimallongitude decimallong point_longitude }
  24.  
  25.       DEFAULT_TIMEOUT = '1h'
  26.  
  27.  
  28.       def initialize(table_name, filepath, pg_options, layer=nil, options={})
  29.         self.filepath   = filepath
  30.         self.pg_options = pg_options.with_indifferent_access
  31.         self.table_name = table_name
  32.         self.layer      = layer
  33.         self.options    = options
  34.         self.command_output = ''
  35.         self.exit_code = 0
  36.         set_default_properties
  37.       end
  38.  
  39.       def set_default_properties
  40.         self.append_mode = false
  41.         self.overwrite = false
  42.         self.ogr2ogr2_binary = options.fetch(:ogr2ogr_binary, DEFAULT_BINARY)
  43.         self.csv_guessing = options.fetch(:ogr2ogr_csv_guessing, false)
  44.         self.quoted_fields_guessing = options.fetch(:quoted_fields_guessing, true)
  45.         self.encoding = options.fetch(:encoding, ENCODING)
  46.         self.shape_encoding = ''
  47.         self.shape_coordinate_system = options.fetch(:shape_coordinate_system, '')
  48.       end
  49.  
  50.       def command_for_import
  51.         "#{OSM_INDEXING_OPTION} #{PG_COPY_OPTION} #{client_encoding_option} #{shape_encoding_option} " +
  52.         "#{executable_path} #{OUTPUT_FORMAT_OPTION} #{guessing_option} " +
  53.         "#{postgres_options} #{projection_option} #{layer_creation_options} #{filepath} #{layer} " +
  54.         "#{layer_name_option} #{new_layer_type_option} #{shape_coordinate_option} #{timeout_options} " +
  55.         "#{overwrite_option}"
  56.       end
  57.  
  58.       def command_for_append
  59.         "#{OSM_INDEXING_OPTION} #{PG_COPY_OPTION} #{client_encoding_option} " +
  60.         "#{executable_path} #{APPEND_MODE_OPTION} #{OUTPUT_FORMAT_OPTION} #{postgres_options} " +
  61.         "#{projection_option} #{filepath} #{layer} #{layer_name_option} #{NEW_LAYER_TYPE_OPTION} "
  62.       end
  63.  
  64.       def executable_path
  65.         `#{ogr2ogr2_binary}`.strip
  66.       end
  67.  
  68.       def command
  69.         append_mode ? command_for_append : command_for_import
  70.       end
  71.  
  72.       def run(use_append_mode=false)
  73.         @append_mode = use_append_mode
  74.         stdout, stderr, status  = Open3.capture3(command)
  75.         self.command_output     = (stdout + stderr).encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '?????')
  76.         self.exit_code          = status.to_i
  77.         self
  78.       end
  79.  
  80.       def generic_error?
  81.         command_output =~ /ERROR 1:/i || command_output =~ /ERROR:/i
  82.       end
  83.  
  84.       def encoding_error?
  85.         command_output =~ /has no equivalent in encoding/i || command_output =~ /invalid byte sequence for encoding/i
  86.       end
  87.  
  88.       def invalid_dates?
  89.         command_output =~ /date\/time field value out of range/i
  90.       end
  91.  
  92.       def duplicate_column?
  93.         command_output =~ /column (.*) of relation (.*) already exists/i || command_output =~ /specified more than once/i
  94.       end
  95.  
  96.       def invalid_geojson?
  97.         command_output =~ /nrecognized GeoJSON/i
  98.       end
  99.  
  100.       def too_many_columns?
  101.         command_output =~ /tables can have at most 1600 columns/i
  102.       end
  103.  
  104.       def unsupported_format?
  105.         exit_code == 256 && command_output =~ /Unable to open(.*)with the following drivers/i
  106.       end
  107.  
  108.       def file_too_big?
  109.         (exit_code == 256 && command_output =~ /calloc failed/i) ||
  110.         (exit_code == 35072 && command_output =~ /Killed/i)
  111.       end
  112.  
  113.       def statement_timeout?
  114.         command_output =~ /canceling statement due to statement timeout/i
  115.       end
  116.  
  117.       def segfault_error?
  118.         exit_code == 35584 && command_output =~ /Segmentation fault/i
  119.       end
  120.  
  121.       def kml_style_missing?
  122.         is_kml? && command_output =~/kml Style: No id/i
  123.       end
  124.  
  125.       attr_accessor :append_mode, :filepath, :csv_guessing, :overwrite, :encoding, :shape_encoding,
  126.                     :shape_coordinate_system
  127.       attr_reader   :exit_code, :command_output
  128.  
  129.       private
  130.  
  131.       attr_writer   :exit_code, :command_output
  132.       attr_accessor :pg_options, :options, :table_name, :layer, :ogr2ogr2_binary, :quoted_fields_guessing
  133.  
  134.       def is_csv?
  135.         !(filepath =~ /\.csv$/i).nil?
  136.       end
  137.  
  138.       def is_kml?
  139.         !(filepath =~ /\.kml$/i).nil?
  140.       end
  141.  
  142.       def is_geojson?
  143.         !(filepath =~ /\.geojson$/i).nil?
  144.       end
  145.  
  146.       def is_shp?
  147.         !(filepath =~ /\.shp$/i).nil?
  148.       end
  149.  
  150.       def guessing_option
  151.         if csv_guessing && is_csv?
  152.           # Inverse of the selection: if I want guessing I must NOT leave quoted fields as string
  153.           "-oo AUTODETECT_TYPE=YES -oo QUOTED_FIELDS_AS_STRING=#{quoted_fields_guessing ? 'NO' : 'YES' } " +
  154.           "#{x_y_possible_names_option} -s_srs EPSG:4326 -t_srs EPSG:4326"
  155.         else
  156.           ''
  157.         end
  158.       end
  159.  
  160.       def x_y_possible_names_option
  161.         "-oo X_POSSIBLE_NAMES=#{LONGITUDE_POSSIBLE_NAMES.join(',')} -oo Y_POSSIBLE_NAMES=#{LATITUDE_POSSIBLE_NAMES.join(',')}"
  162.       end
  163.  
  164.       def new_layer_type_option
  165.         # We don't want lat/long columns to generate multipoints in the wkb_geometry column that
  166.         # can be afterwards choosen by the cartodbfication
  167.         if csv_guessing && is_csv?
  168.           ''
  169.         else
  170.           NEW_LAYER_TYPE_OPTION
  171.         end
  172.       end
  173.  
  174.       def overwrite_option
  175.         overwrite ? "-overwrite" : ''
  176.       end
  177.  
  178.       def client_encoding_option
  179.         "PGCLIENTENCODING=#{encoding}"
  180.       end
  181.  
  182.       def shape_encoding_option
  183.         !shape_encoding.nil? && !shape_encoding.empty? ? "SHAPE_ENCODING=#{shape_encoding}" : ''
  184.       end
  185.  
  186.       def shape_coordinate_option
  187.         shape_coordinate_system.empty? ? '' : "-s_srs EPSG:#{shape_coordinate_system}"
  188.       end
  189.  
  190.       def layer_name_option
  191.         "-nln #{SCHEMA}.#{table_name}"
  192.       end
  193.  
  194.       # @see http://www.gdal.org/drv_pg.html
  195.       # @see http://www.gdal.org/drv_pg_advanced.html
  196.       def postgres_options
  197.         %Q{PG:"host=#{pg_options.fetch(:host)} }      +
  198.        %Q{port=#{pg_options.fetch(:direct_port, pg_options.fetch(:port))} }          +
  199.        %Q{user=#{pg_options.fetch(:username)} }          +
  200.        %Q{dbname=#{pg_options.fetch(:database)} }    +
  201.        %Q{password=#{pg_options.fetch(:password)}"}
  202.         # 'schemas=#{SCHEMA},cartodb' param is no longer needed, let the DB build the proper one
  203.       end
  204.  
  205.       def layer_creation_options
  206.         # Dimension option, precision option
  207.         "-lco DIM=2 -lco PRECISION=NO"
  208.       end
  209.  
  210.       def projection_option
  211.         is_csv? || filepath =~ /\.ods/ ? nil : '-t_srs EPSG:4326 '
  212.       end
  213.  
  214.       def timeout_options
  215.         # see http://www.gdal.org/ogr2ogr.html
  216.         # see http://www.gdal.org/drv_pg.html
  217.         %Q{-doo PRELUDE_STATEMENTS="SET statement_timeout TO \'#{DEFAULT_TIMEOUT}\'" } +
  218.         %Q{-doo CLOSING_STATEMENTS='SET statement_timeout TO DEFAULT' } +
  219.         %Q{-update}
  220.       end
  221.     end
  222.   end
  223. end
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement