Advertisement
Guest User

Untitled

a guest
Oct 11th, 2018
101
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 6.46 KB | None | 0 0
  1. import psycopg2
  2.  
  3. # --- SETUP ---
  4. # The only things that need to be filled in to run the script:
  5. # dbname, user, password
  6. # and
  7. # year, old data filename, category, empty string for later
  8. input_for_script = (2016, 'ongeval.csv', 'ongevallen', '')
  9.  
  10. #tables = ["ongevallen_2016.csv", "ongevallen_2017.csv"]
  11. #change input to
  12. #old_data = ongeval.csv
  13. #data_to_add = [data1, data2]
  14.  
  15. # Connect to an existing database
  16. conn = psycopg2.connect("dbname=rws_2011_2017 user=postgres password=Prandall19s!")
  17.  
  18. # --- EXECUTION ---
  19.  
  20. #This code adds a new year of RWS data to the old dataset.
  21. #Requirements: 1. all tables are located in the same schema (old years, new year to add, reference tables)
  22. #             2. the table will only join the attributes from the old table
  23. #             3. the names of the reference tables end in '.txt.csv' */
  24.  
  25. # Open a cursor to perform database operations
  26. cur = conn.cursor()
  27.  
  28. # Creates settings table. input it takes:
  29. #   the year (to add),
  30. #   the name of the old table (containing the previous years)
  31. #   and category (partij/ongeval/slachtoffer/voertuig/etcetera)
  32. cur.execute("""
  33. DROP TABLE IF EXISTS _rws_settings;
  34. CREATE TABLE _rws_settings (
  35.     year_to_add     INT,
  36.     old_data        text,
  37.     category        text,
  38.     year_filename   text);
  39. """)
  40.  
  41. cur.execute("""
  42. INSERT INTO _rws_settings
  43.     (year_to_add, old_data, category, year_filename)
  44. VALUES
  45.     (%s, %s, %s, %s);
  46.    """,
  47.     input_for_script)
  48.  
  49. cur.execute("""
  50. UPDATE _rws_settings
  51. SET year_filename = CONCAT(_rws_settings.category, '_', _rws_settings.year_to_add, '.csv');
  52. """)
  53.  
  54.  
  55. # Creates lookup table _rws_datatypes, which states the column names and datatypes of old_data from _rws_settings
  56. cur.execute("""
  57. DROP TABLE IF EXISTS _rws_datatypes;
  58. CREATE TABLE _rws_datatypes AS
  59. SELECT column_name, data_type FROM INFORMATION_SCHEMA.columns
  60. WHERE table_name = (SELECT old_data FROM _rws_settings);
  61. """)
  62.  
  63. # Creates lookup table _rws_references: It takes the columns ending in '_ID' or '_CODE' and the name of the reference table.
  64. # With this, it'll be possible to look up whether columns in the new table (year to add) have to be transformed into '_OMS'.
  65. cur.execute("""
  66. DROP TABLE IF EXISTS _rws_references;
  67. CREATE TABLE _rws_references AS
  68. SELECT column_name, table_name FROM INFORMATION_SCHEMA.columns
  69. WHERE table_name LIKE '%.txt.csv'
  70. AND (column_name LIKE '%\_ID' OR column_name LIKE '%\_CODE');
  71. """)
  72.    
  73. # Selects all columns from new table that end in '_ID' or '_CODE', and outputs it into a Python list
  74. cur.execute("""
  75. DROP TABLE IF EXISTS _rws_columns_to_convert;
  76. CREATE TABLE _rws_columns_to_convert AS
  77. SELECT column_name FROM INFORMATION_SCHEMA.columns
  78. WHERE table_name LIKE (SELECT _rws_settings.year_filename FROM _rws_settings)
  79. AND (column_name LIKE '%\_ID' OR column_name LIKE '%\_CODE');
  80. """)
  81.  
  82. #Makes a list on what columns need to be converted with a reference table
  83. cur.execute("""
  84. SELECT _rws_columns_to_convert."column_name" FROM _rws_columns_to_convert;
  85. """)
  86. list_to_convert = cur.fetchall()
  87. #remove '(' and '),'
  88. list_to_convert = [i[0] for i in list_to_convert]
  89.  
  90.  
  91. # --- DIT IS WAT RUDY GEDAAN HEEFT OMG
  92. #Query voor de loop, selecteert de juiste reference table gebaseerd op het iterable item in de loop
  93. get_table_name_query = """
  94. SELECT table_name from _rws_references WHERE column_name = '{0}'
  95. """
  96.  
  97. #Create tables (tables to convert/merge) ["ongevallen_2016.csv", "ongevallen_2017.csv"]
  98. #create all data table and fill it with the old data
  99. tables = ["ongevallen_2016.csv"]
  100. all_data = cur.execute("""
  101. DROP TABLE IF EXISTS _rws_all_temp;
  102. CREATE TABLE _rws_all_temp AS SELECT * FROM (SELECT _rws_settings.old_data FROM _rws_settings) AS derived_temp
  103. """)
  104.  
  105. #function that converts _ID or _CODE into _OMS
  106. #0= data table (2016 for example) - table_to_convert
  107. #1= the reference table
  108. #2=item name (but the _OMS version, needs function turn_id_or_code_into_oms)
  109. #3=item name, iterable (_ID or _CODE)
  110. def run_rudy_query(table_to_convert, from_table, item_name):
  111.     rudy_query = """
  112.    SELECT "{0}".*, oms_table."{2}"
  113.    FROM "{0}"
  114.    LEFT JOIN "{1}" as oms_table
  115.    ON "{0}"."{3}" = oms_table."{3}"
  116.    """
  117.    
  118.     finished_query = rudy_query.format(table_to_convert, from_table, turn_id_or_code_into_oms(item_name), item_name)
  119.     print(finished_query)
  120.     cur.execute(rudy_query)
  121.  
  122. #changes extension _ID and _CODE into _OMS (for easy lookup in reference table)
  123. def turn_id_or_code_into_oms(name):
  124.     if(name.find("_CODE")):
  125.         return name.replace("_CODE", "_OMS")
  126.     else:
  127.         return name.replace("_ID", "_OMS")
  128.  
  129. # Loop through all the tables and convertable items, find all reference tables and start function run_rudy_query
  130. for table in tables:
  131.     for item in list_to_convert:
  132.         print('ref table name: ', get_table_name_query.format(item))
  133.         from_table = cur.execute(get_table_name_query.format(item))
  134.         run_rudy_query(table, from_table, turn_id_or_code_into_oms(item))
  135.  
  136. #def update_employee(id, name, birthdate):
  137. #    cur.execute("UPDATE employees SET name={0}, birthdate={1} WHERE ID = {2}".format(name, birthdate, id))
  138.  
  139. # --- TOT HIER, OK?
  140.  
  141.  
  142. # --- THIS IS NOT WORKING, KEEPING IT HERE FOR REFERENCE
  143. # per column name to convert:
  144. # find reference file to use
  145. # use that file for a left join (create a new table the first time)
  146. #for item in list_to_convert:
  147. #    print(item)
  148. #    reference_table = (("""SELECT _rws_references."table_name" FROM _rws_references WHERE _rws_references."column_name" LIKE %s """), item)
  149. #    rws_data = ("""SELECT _rws_settings.year_filename FROM _rws_settings""")
  150.  
  151. #    cur.execute("""DROP TABLE IF EXISTS _year_to_add_oms;
  152. #    CREATE TABLE _year_to_add_oms AS
  153. #    SELECT * FROM (SELECT _rws_settings.year_filename FROM _rws_settings) AS _derived_table1, %s
  154. #    LEFT JOIN (SELECT _rws_settings.year_filename FROM _rws_settings) AS _derived_table2 ON
  155. #   (SELECT * FROM (SELECT _rws_columns_to_convert.%s FROM _rws_columns_to_convert) AS derived_table5)
  156. #   = (SELECT * FROM (SELECT _rws_references.%s FROM _rws_references) AS _derived_table4)""", reference_table, , rws_data, rws_data, item)
  157. # --- END OF NON-WORKING CODE
  158.  
  159. # --- TO DO
  160. #change value from null-columns to correct datatype
  161. #check for and add missing columns
  162. #remove unnecessary columns
  163. #union old and new data
  164. #create index? aliases?
  165. # create linking tables (maybe other script)
  166.  
  167. # --- FINISH UP RUNNING THE SCRIPT
  168. # Make the changes to the database persistent
  169. conn.commit()
  170.  
  171. # Close communication with the database
  172. cur.close()
  173. conn.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement