Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import psycopg2
- # --- SETUP ---
- # The only things that need to be filled in to run the script:
- # dbname, user, password
- # and
- # year, old data filename, category, empty string for later
- input_for_script = (2016, 'ongeval.csv', 'ongevallen', '')
- #tables = ["ongevallen_2016.csv", "ongevallen_2017.csv"]
- #change input to
- #old_data = ongeval.csv
- #data_to_add = [data1, data2]
- # Connect to an existing database
- conn = psycopg2.connect("dbname=rws_2011_2017 user=postgres password=Prandall19s!")
- # --- EXECUTION ---
- #This code adds a new year of RWS data to the old dataset.
- #Requirements: 1. all tables are located in the same schema (old years, new year to add, reference tables)
- # 2. the table will only join the attributes from the old table
- # 3. the names of the reference tables end in '.txt.csv' */
- # Open a cursor to perform database operations
- cur = conn.cursor()
- # Creates settings table. input it takes:
- # the year (to add),
- # the name of the old table (containing the previous years)
- # and category (partij/ongeval/slachtoffer/voertuig/etcetera)
- cur.execute("""
- DROP TABLE IF EXISTS _rws_settings;
- CREATE TABLE _rws_settings (
- year_to_add INT,
- old_data text,
- category text,
- year_filename text);
- """)
- cur.execute("""
- INSERT INTO _rws_settings
- (year_to_add, old_data, category, year_filename)
- VALUES
- (%s, %s, %s, %s);
- """,
- input_for_script)
- cur.execute("""
- UPDATE _rws_settings
- SET year_filename = CONCAT(_rws_settings.category, '_', _rws_settings.year_to_add, '.csv');
- """)
- # Creates lookup table _rws_datatypes, which states the column names and datatypes of old_data from _rws_settings
- cur.execute("""
- DROP TABLE IF EXISTS _rws_datatypes;
- CREATE TABLE _rws_datatypes AS
- SELECT column_name, data_type FROM INFORMATION_SCHEMA.columns
- WHERE table_name = (SELECT old_data FROM _rws_settings);
- """)
- # Creates lookup table _rws_references: It takes the columns ending in '_ID' or '_CODE' and the name of the reference table.
- # With this, it'll be possible to look up whether columns in the new table (year to add) have to be transformed into '_OMS'.
- cur.execute("""
- DROP TABLE IF EXISTS _rws_references;
- CREATE TABLE _rws_references AS
- SELECT column_name, table_name FROM INFORMATION_SCHEMA.columns
- WHERE table_name LIKE '%.txt.csv'
- AND (column_name LIKE '%\_ID' OR column_name LIKE '%\_CODE');
- """)
- # Selects all columns from new table that end in '_ID' or '_CODE', and outputs it into a Python list
- cur.execute("""
- DROP TABLE IF EXISTS _rws_columns_to_convert;
- CREATE TABLE _rws_columns_to_convert AS
- SELECT column_name FROM INFORMATION_SCHEMA.columns
- WHERE table_name LIKE (SELECT _rws_settings.year_filename FROM _rws_settings)
- AND (column_name LIKE '%\_ID' OR column_name LIKE '%\_CODE');
- """)
- #Makes a list on what columns need to be converted with a reference table
- cur.execute("""
- SELECT _rws_columns_to_convert."column_name" FROM _rws_columns_to_convert;
- """)
- list_to_convert = cur.fetchall()
- #remove '(' and '),'
- list_to_convert = [i[0] for i in list_to_convert]
- print('reference table name manual: ', cur.execute("""SELECT table_name from _rws_references WHERE column_name = 'AOL_ID'"""))
- # --- DIT IS WAT RUDY GEDAAN HEEFT OMG
- #Query voor de loop, selecteert de juiste reference table gebaseerd op het iterable item in de loop
- get_table_name_query = """
- SELECT table_name from _rws_references WHERE column_name = '{0}'
- """
- #Create tables (tables to convert/merge) ["ongevallen_2016.csv", "ongevallen_2017.csv"]
- #create all data table and fill it with the old data
- tables = ["ongevallen_2016.csv"]
- all_data = cur.execute("""
- DROP TABLE IF EXISTS _rws_all_temp;
- CREATE TABLE _rws_all_temp AS SELECT * FROM (SELECT _rws_settings.old_data FROM _rws_settings) AS derived_temp
- """)
- #function that converts _ID or _CODE into _OMS
- #0= data table (2016 for example) - table_to_convert
- #1= the reference table
- #2=item name (but the _OMS version, needs function turn_id_or_code_into_oms)
- #3=item name, iterable (_ID or _CODE)
- def run_rudy_query(table_to_convert, from_table, item_name, item_oms):
- rudy_query = """
- SELECT "{0}".*, oms_table."{3}"
- FROM "{0}"
- LEFT JOIN "{1}" as oms_table
- ON "{0}"."{2}" = oms_table."{2}"
- """
- finished_query = rudy_query.format(table_to_convert, from_table, item_name, item_oms)
- print('finished query: ', finished_query)
- cur.execute(finished_query)
- #changes extension _ID and _CODE into _OMS (for easy lookup in reference table)
- def turn_id_or_code_into_oms(name):
- if("_CODE" in name):
- return name.replace("_CODE", "_OMS")
- else:
- return name.replace("_ID", "_OMS")
- # Loop through all the tables and convertable items, find all reference tables and start function run_rudy_query
- for table in tables:
- for item in list_to_convert:
- #print(cur.execute(get_table_name_query.format(item)))
- print('ref table name automatic: ', cur.execute(get_table_name_query.format(item)))
- print('OMS:', turn_id_or_code_into_oms(item))
- from_table = cur.execute(get_table_name_query.format(item))
- #from_table = cur.fetchone()[0]
- run_rudy_query(table, from_table, item, turn_id_or_code_into_oms(item))
- #def update_employee(id, name, birthdate):
- # cur.execute("UPDATE employees SET name={0}, birthdate={1} WHERE ID = {2}".format(name, birthdate, id))
- # --- TOT HIER, OK?
- # --- THIS IS NOT WORKING, KEEPING IT HERE FOR REFERENCE
- # per column name to convert:
- # find reference file to use
- # use that file for a left join (create a new table the first time)
- #for item in list_to_convert:
- # print(item)
- # reference_table = (("""SELECT _rws_references."table_name" FROM _rws_references WHERE _rws_references."column_name" LIKE %s """), item)
- # rws_data = ("""SELECT _rws_settings.year_filename FROM _rws_settings""")
- # cur.execute("""DROP TABLE IF EXISTS _year_to_add_oms;
- # CREATE TABLE _year_to_add_oms AS
- # SELECT * FROM (SELECT _rws_settings.year_filename FROM _rws_settings) AS _derived_table1, %s
- # LEFT JOIN (SELECT _rws_settings.year_filename FROM _rws_settings) AS _derived_table2 ON
- # (SELECT * FROM (SELECT _rws_columns_to_convert.%s FROM _rws_columns_to_convert) AS derived_table5)
- # = (SELECT * FROM (SELECT _rws_references.%s FROM _rws_references) AS _derived_table4)""", reference_table, , rws_data, rws_data, item)
- # --- END OF NON-WORKING CODE
- # --- TO DO
- #change value from null-columns to correct datatype
- #check for and add missing columns
- #remove unnecessary columns
- #union old and new data
- #create index? aliases?
- # create linking tables (maybe other script)
- # --- FINISH UP RUNNING THE SCRIPT
- # Make the changes to the database persistent
- conn.commit()
- # Close communication with the database
- cur.close()
- conn.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement