Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # simple_ar_etl.rb
- # SimpleArEtl
- class SimpleArEtl
- require 'fastercsv'
- def initialize(ar_class)
- @ar_class = ar_class
- end
- def load(source)
- @source = source
- @rows = FasterCSV.read(@source)
- read_column_names
- end
- def save
- if !@normalized_column_names
- @normalized_column_names = []
- end
- @rows.each do |row|
- #create a hash of column:value to pass to activerecord
- values = {}
- row.each_index {|i|
- values[@columns[i].downcase] = row[i]
- }
- #either get an existing object, or create a new one
- new_obj = @ar_class.find(:first, :conditions => ["external_id = ?", values["id"]])
- if !new_object
- new_obj = @ar_class.new(values.dup.delete_if{|key, value| @normalized_column_names.include?(key) })
- end
- # normalize field
- process_normalized_with_habtm(values, new_obj)
- # attach this to another object
- if @attached_class
- attached_object = @attached_class.find(:first, :conditions => [@klass_pk_field.to_s + " = ?", values[@fk_field]])
- if attached_object
- attached_collection = attached_object.send(new_obj.class.table_name.pluralize)
- attached_collection << new_obj
- new_obj.save!
- end
- else
- # we weren't attaching this to anything, just save it
- new_obj.save!
- end
- end # @rows.each
- end
- #TODO: this should be able to be called before load as well (just cache the changes to be made, and make them on import)
- def rename_field(old_name, new_name)
- @columns.map! {|item|
- if item.downcase == old_name.downcase
- new_name.downcase
- else
- item.downcase
- end
- }
- end
- #TODO: this should be able to be called before load as well (just cache the changes to be made, and make them on import)
- def delete_field(field_name)
- index_of = @columns.index(field_name)
- @rows.map! {|item|
- item.delete_at(index_of)
- item
- }
- @columns.delete_at(index_of)
- end
- #TODO: this should support multiple attached classes
- def attach_to(klass, klass_pk_field, fk_field)
- @attached_class = klass
- @fk_field = fk_field
- @klass_pk_field = klass_pk_field
- end
- # TODO: this should support multiple normalizations eventually
- def normalize_with_habtm(normalized_klass, link_klass, normalized_column_names, comparison_field)
- @normalized_klass = normalized_klass
- @link_klass = link_klass
- @normalized_column_names = normalized_column_names
- @comparison_field = comparison_field
- end
- # these methods are here only for backwards compatibility
- def self.rename_field(row, old_name, new_name)
- old_val = row.delete(old_name)
- if old_val
- row[new_name] = old_val
- end
- row
- end
- def self.delete_field(row, field_name)
- row.delete(field_name)
- row
- end
- private
- def read_column_names
- # read the column names, and remove that row
- @columns = @rows[0]
- @rows.delete_at(0)
- end
- def process_normalized_with_habtm(values, new_obj)
- if(@normalized_klass && @link_klass && @comparison_field)
- @normalized_klass.find(:all).each do |norm|
- if values.has_key?(norm.send @comparison_field) && values[norm.send @comparison_field] == "1"
- link_object = @link_klass.new
- link_object.send new_obj.class.name.downcase + "=", new_obj
- link_object.send norm.class.name.downcase + "=", norm
- link_object.save!
- end
- end
- end
- end
- end
- # call to load/save etc
- importer = SimpleArEtl.new(Person)
- importer.load(path)
- importer.rename_field("id", "external_id")
- importer.rename_field("av_add", "av_address")
- importer.rename_field("dob", "date_of_birth")
- importer.rename_field("ppstart", "pp_start")
- importer.rename_field("ppexpdate", "pp_exp_date")
- importer.rename_field("warrantdat", "warrant_date")
- importer.rename_field("warranttyp", "warrant_type")
- importer.rename_field("suptype", "sup_type")
- importer.rename_field("suptype2", "sup_type2")
- importer.rename_field("istatus", "i_status")
- importer.rename_field("lname", "last_name")
- importer.rename_field("fname", "first_name")
- importer.rename_field("resdate", "res_date")
- importer.rename_field("addtype", "add_type")
- importer.save()
Add Comment
Please, Sign In to add comment