Advertisement
Guest User

Untitled

a guest
Oct 11th, 2018
110
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 6.77 KB | None | 0 0
  1. import psycopg2
  2.  
  3. # --- SETUP ---
  4. # The only things that need to be filled in to run the script:
  5. # dbname, user, password
  6. # and
  7. # year, old data filename, category, empty string for later
  8. input_for_script = (2016, 'ongeval.csv', 'ongevallen', '')
  9.  
  10. #tables = ["ongevallen_2016.csv", "ongevallen_2017.csv"]
  11. #change input to
  12. #old_data = ongeval.csv
  13. #data_to_add = [data1, data2]
  14.  
  15. # Connect to an existing database
  16. conn = psycopg2.connect("dbname=rws_2011_2017 user=postgres password=Prandall19s!")
  17.  
  18. # --- EXECUTION ---
  19.  
  20. #This code adds a new year of RWS data to the old dataset.
  21. #Requirements: 1. all tables are located in the same schema (old years, new year to add, reference tables)
  22. #             2. the table will only join the attributes from the old table
  23. #             3. the names of the reference tables end in '.txt.csv' */
  24.  
  25. # Open a cursor to perform database operations
  26. cur = conn.cursor()
  27.  
  28. # Creates settings table. input it takes:
  29. #   the year (to add),
  30. #   the name of the old table (containing the previous years)
  31. #   and category (partij/ongeval/slachtoffer/voertuig/etcetera)
  32. cur.execute("""
  33. DROP TABLE IF EXISTS _rws_settings;
  34. CREATE TABLE _rws_settings (
  35.     year_to_add     INT,
  36.     old_data        text,
  37.     category        text,
  38.     year_filename   text);
  39. """)
  40.  
  41. cur.execute("""
  42. INSERT INTO _rws_settings
  43.     (year_to_add, old_data, category, year_filename)
  44. VALUES
  45.     (%s, %s, %s, %s);
  46.    """,
  47.     input_for_script)
  48.  
  49. cur.execute("""
  50. UPDATE _rws_settings
  51. SET year_filename = CONCAT(_rws_settings.category, '_', _rws_settings.year_to_add, '.csv');
  52. """)
  53.  
  54.  
  55. # Creates lookup table _rws_datatypes, which states the column names and datatypes of old_data from _rws_settings
  56. cur.execute("""
  57. DROP TABLE IF EXISTS _rws_datatypes;
  58. CREATE TABLE _rws_datatypes AS
  59. SELECT column_name, data_type FROM INFORMATION_SCHEMA.columns
  60. WHERE table_name = (SELECT old_data FROM _rws_settings);
  61. """)
  62.  
  63. # Creates lookup table _rws_references: It takes the columns ending in '_ID' or '_CODE' and the name of the reference table.
  64. # With this, it'll be possible to look up whether columns in the new table (year to add) have to be transformed into '_OMS'.
  65. cur.execute("""
  66. DROP TABLE IF EXISTS _rws_references;
  67. CREATE TABLE _rws_references AS
  68. SELECT column_name, table_name FROM INFORMATION_SCHEMA.columns
  69. WHERE table_name LIKE '%.txt.csv'
  70. AND (column_name LIKE '%\_ID' OR column_name LIKE '%\_CODE');
  71. """)
  72.    
  73. # Selects all columns from new table that end in '_ID' or '_CODE', and outputs it into a Python list
  74. cur.execute("""
  75. DROP TABLE IF EXISTS _rws_columns_to_convert;
  76. CREATE TABLE _rws_columns_to_convert AS
  77. SELECT column_name FROM INFORMATION_SCHEMA.columns
  78. WHERE table_name LIKE (SELECT _rws_settings.year_filename FROM _rws_settings)
  79. AND (column_name LIKE '%\_ID' OR column_name LIKE '%\_CODE');
  80. """)
  81.  
  82. #Makes a list on what columns need to be converted with a reference table
  83. cur.execute("""
  84. SELECT _rws_columns_to_convert."column_name" FROM _rws_columns_to_convert;
  85. """)
  86. list_to_convert = cur.fetchall()
  87. #remove '(' and '),'
  88. list_to_convert = [i[0] for i in list_to_convert]
  89.  
  90. print('reference table name manual: ', cur.execute("""SELECT table_name from _rws_references WHERE column_name = 'AOL_ID'"""))
  91.  
  92.  
  93. # --- DIT IS WAT RUDY GEDAAN HEEFT OMG
  94. #Query voor de loop, selecteert de juiste reference table gebaseerd op het iterable item in de loop
  95. get_table_name_query = """
  96. SELECT table_name from _rws_references WHERE column_name = '{0}'
  97. """
  98.  
  99. #Create tables (tables to convert/merge) ["ongevallen_2016.csv", "ongevallen_2017.csv"]
  100. #create all data table and fill it with the old data
  101. tables = ["ongevallen_2016.csv"]
  102. all_data = cur.execute("""
  103. DROP TABLE IF EXISTS _rws_all_temp;
  104. CREATE TABLE _rws_all_temp AS SELECT * FROM (SELECT _rws_settings.old_data FROM _rws_settings) AS derived_temp
  105. """)
  106.  
  107. #function that converts _ID or _CODE into _OMS
  108. #0= data table (2016 for example) - table_to_convert
  109. #1= the reference table
  110. #2=item name (but the _OMS version, needs function turn_id_or_code_into_oms)
  111. #3=item name, iterable (_ID or _CODE)
  112. def run_rudy_query(table_to_convert, from_table, item_name, item_oms):
  113.     rudy_query = """
  114.    SELECT "{0}".*, oms_table."{3}"
  115.    FROM "{0}"
  116.    LEFT JOIN "{1}" as oms_table
  117.    ON "{0}"."{2}" = oms_table."{2}"
  118.    """
  119.    
  120.     finished_query = rudy_query.format(table_to_convert, from_table, item_name, item_oms)
  121.     print('finished query: ', finished_query)
  122.     cur.execute(finished_query)
  123.  
  124. #changes extension _ID and _CODE into _OMS (for easy lookup in reference table)
  125. def turn_id_or_code_into_oms(name):
  126.     if("_CODE" in name):
  127.         return name.replace("_CODE", "_OMS")
  128.     else:
  129.         return name.replace("_ID", "_OMS")
  130.  
  131. # Loop through all the tables and convertable items, find all reference tables and start function run_rudy_query
  132. for table in tables:
  133.     for item in list_to_convert:
  134.         #print(cur.execute(get_table_name_query.format(item)))
  135.         print('ref table name automatic: ', cur.execute(get_table_name_query.format(item)))
  136.         print('OMS:', turn_id_or_code_into_oms(item))
  137.         from_table = cur.execute(get_table_name_query.format(item))
  138.         #from_table = cur.fetchone()[0]
  139.         run_rudy_query(table, from_table, item, turn_id_or_code_into_oms(item))
  140.  
  141.  
  142. #def update_employee(id, name, birthdate):
  143. #    cur.execute("UPDATE employees SET name={0}, birthdate={1} WHERE ID = {2}".format(name, birthdate, id))
  144.  
  145. # --- TOT HIER, OK?
  146.  
  147.  
  148. # --- THIS IS NOT WORKING, KEEPING IT HERE FOR REFERENCE
  149. # per column name to convert:
  150. # find reference file to use
  151. # use that file for a left join (create a new table the first time)
  152. #for item in list_to_convert:
  153. #    print(item)
  154. #    reference_table = (("""SELECT _rws_references."table_name" FROM _rws_references WHERE _rws_references."column_name" LIKE %s """), item)
  155. #    rws_data = ("""SELECT _rws_settings.year_filename FROM _rws_settings""")
  156.  
  157. #    cur.execute("""DROP TABLE IF EXISTS _year_to_add_oms;
  158. #    CREATE TABLE _year_to_add_oms AS
  159. #    SELECT * FROM (SELECT _rws_settings.year_filename FROM _rws_settings) AS _derived_table1, %s
  160. #    LEFT JOIN (SELECT _rws_settings.year_filename FROM _rws_settings) AS _derived_table2 ON
  161. #   (SELECT * FROM (SELECT _rws_columns_to_convert.%s FROM _rws_columns_to_convert) AS derived_table5)
  162. #   = (SELECT * FROM (SELECT _rws_references.%s FROM _rws_references) AS _derived_table4)""", reference_table, , rws_data, rws_data, item)
  163. # --- END OF NON-WORKING CODE
  164.  
  165. # --- TO DO
  166. #change value from null-columns to correct datatype
  167. #check for and add missing columns
  168. #remove unnecessary columns
  169. #union old and new data
  170. #create index? aliases?
  171. # create linking tables (maybe other script)
  172.  
  173. # --- FINISH UP RUNNING THE SCRIPT
  174. # Make the changes to the database persistent
  175. conn.commit()
  176.  
  177. # Close communication with the database
  178. cur.close()
  179. conn.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement