Advertisement
Spiritreader

maestro-bootstrap

Apr 24th, 2019
193
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 7.39 KB | None | 0 0
  1. import psycopg2
  2. import json
  3. import os
  4. import zipfile
  5.  
  6. host = "192.168.178.36"
  7. user = "Sam"
  8. password = "postgres"
  9. database_name = "Maestro-concept"
  10. maestro_zip = "E:\\MAESTRO\\maestro-v1.0.0.zip"
  11. zip = zipfile.ZipFile(maestro_zip, 'r')
  12.  
  13. create_metadata_table_query = "CREATE TABLE metadata (" \
  14.                               "id SERIAL PRIMARY KEY," \
  15.                               "canonical_composer varchar," \
  16.                               "canonical_title varchar," \
  17.                               "split varchar," \
  18.                               "year int," \
  19.                               "midi_filename varchar UNIQUE," \
  20.                               "audio_filename varchar UNIQUE," \
  21.                               "duration float" \
  22.                               ")"
  23.  
  24. create_midi_table_query = "CREATE TABLE midi (" \
  25.                           "id SERIAL PRIMARY KEY," \
  26.                           "name varchar UNIQUE REFERENCES metadata(midi_filename) ON DELETE CASCADE," \
  27.                           "file bytea" \
  28.                           ")"
  29.  
  30. create_wav_table_query = "CREATE TABLE wav (" \
  31.                          "id SERIAL PRIMARY KEY," \
  32.                          "name varchar UNIQUE REFERENCES metadata(audio_filename) ON DELETE CASCADE," \
  33.                          "file oid" \
  34.                          ")"
  35.  
  36.  
  37. def database():
  38.     try:
  39.         print("connecting...")
  40.         conn = psycopg2.connect(user=user,
  41.                                 password=password,
  42.                                 host=host,
  43.                                 port="5432",
  44.                                 database=database_name)
  45.         print(conn)
  46.         cursor = conn.cursor()
  47.         '''
  48.        print("Initializing tables, prefilling metadata")
  49.        init(cursor, conn)
  50.        print("Adding midi files")
  51.        insert_midis("midi", cursor, conn)
  52.        print("Adding wave files as large objects")
  53.        insert_waves("wav", cursor, conn)
  54.        '''
  55.         test_read(conn, 36257)
  56.  
  57.     except psycopg2.Error as error:
  58.         print('Database: ', end='')
  59.         print(error)
  60.     else:
  61.         print("closing connection...")
  62.         if conn:
  63.             cursor.close()
  64.             conn.close()
  65.  
  66.  
  67. def test_read(conn, oid):
  68.     with open("E:/MAESTRO/test.wav", "wb") as file:
  69.         lobj = conn.lobject(oid, "wb")
  70.         lobj_bytes = lobj.read()
  71.         file.write(lobj_bytes)
  72.         file.close()
  73.         lobj.close()
  74.  
  75.  
  76. def init(cursor, conn):
  77.     """
  78.    Creates all tables relevant for the maestro dataset
  79.    :param cursor: database cursor object
  80.    :param conn: database connection object
  81.    """
  82.     cursor.execute(create_metadata_table_query)
  83.     cursor.execute(create_midi_table_query)
  84.     cursor.execute(create_wav_table_query)
  85.     conn.commit()
  86.     cursor.execute(
  87.         generate_insert_metadata_statement("maestro-v1.0.0/maestro-v1.0.0.json", "metadata"))
  88.     conn.commit()
  89.  
  90. def generate_insert_metadata_statement(file, table_name):
  91.     """
  92.    Parses the maestro json metadata file and translates it into a database table
  93.    :param file: file path to the zipped maestro dataset json
  94.    :param table_name: table to be inserted into
  95.    """
  96.     insert_header = "INSERT INTO {} (" \
  97.                     "canonical_composer," \
  98.                     "canonical_title," \
  99.                     "split," \
  100.                     "year," \
  101.                     "midi_filename," \
  102.                     "audio_filename," \
  103.                     "duration" \
  104.                     ") VALUES ".format(table_name)
  105.     values = ""
  106.     with zip.open(file, "r") as json_file:
  107.         data = json.load(json_file)
  108.         counter = 0
  109.         for entry in data:
  110.             counter += 1
  111.             value = ""
  112.             value += "'{0}'".format(entry["canonical_composer"].replace("'", "''")) + ","
  113.             value += "'{0}'".format(entry["canonical_title"].replace("'", "''")) + ","
  114.             value += "'{0}'".format(entry["split"]) + ","
  115.             value += str(entry["year"]) + ","
  116.             value += "'{0}'".format(entry["midi_filename"].replace("'", "''")) + ","
  117.             value += "'{0}'".format(entry["audio_filename"].replace("'", "''")) + ","
  118.             value += str(entry["duration"])
  119.             value = "({0})".format(value)
  120.             values += (value + ",")
  121.     values = values[:-1] + ";"
  122.     return insert_header + values
  123.  
  124.  
  125. def insert_midis(table_name, cursor, conn):
  126.     """
  127.    Inserts all midi files found in the maestro dataset into the database
  128.    :param table_name: table to be inserted into
  129.    :param cursor: database cursor
  130.    :param conn: database connection
  131.    """
  132.     music_dirs = []
  133.     for directory in zip.namelist():
  134.         if directory.endswith('/') and "maestro-v1.0.0" not in os.path.basename(os.path.normpath(directory)):
  135.             music_dirs.append(os.path.basename(os.path.normpath(directory)))
  136.     music_dirs.sort()
  137.     for music_dir in music_dirs:
  138.         music_dir_find_string = "/" + music_dir + "/"
  139.         for zip_info in zip.infolist():
  140.             filename = zip_info.filename
  141.             basename = os.path.basename(filename)
  142.             if ".midi" in filename and music_dir_find_string in filename:
  143.                 with zip.open(filename, "r") as midi:
  144.                     header = midi.read(4)
  145.                     if header != b"MThd":
  146.                         raise ValueError("Not a MIDI file!")
  147.                     midi.seek(0)
  148.                     statement = "INSERT INTO {} VALUES (DEFAULT, '{}', {});" \
  149.                         .format(table_name, music_dir + "/" + basename, psycopg2.Binary(midi.read()))
  150.                     cursor.execute(statement)
  151.                     conn.commit()
  152.                     print("Inserted midi: " + music_dir + "/" + basename)
  153.  
  154.  
  155. def insert_waves(table_name, cursor, conn):
  156.     """
  157.    Inserts all waveforms found in the maestro dataset into the database.
  158.    Adds wav files as large objects, object id is stored in the table.
  159.    Large objects will end up in the pg_largeobject table.
  160.    The references for those large objects according to their filename will be saved in
  161.    table_name
  162.    :param table_name: table to be inserted into
  163.    :param cursor: database cursor object
  164.    :param conn: database connection object
  165.    :return: list of insert statements for table_name
  166.    """
  167.     music_dirs = []
  168.     for directory in zip.namelist():
  169.         if directory.endswith('/') and "maestro-v1.0.0" not in os.path.basename(os.path.normpath(directory)):
  170.             music_dirs.append(os.path.basename(os.path.normpath(directory)))
  171.     music_dirs.sort()
  172.     for music_dir in music_dirs:
  173.         music_dir_find_string = "/" + music_dir + "/"
  174.         for zip_info in zip.infolist():
  175.             filename = zip_info.filename
  176.             basename = os.path.basename(filename)
  177.             if ".wav" in filename and music_dir_find_string in filename:
  178.                 with zip.open(filename, "r") as wav:
  179.                     lobj = conn.lobject(0, 'w', 0)
  180.                     lobj.write(wav.read())
  181.                     lobj.close()
  182.                     statement = "INSERT INTO {} VALUES (DEFAULT, '{}', {});" \
  183.                         .format(table_name, music_dir + "/" + basename, lobj.oid)
  184.                     cursor.execute(statement)
  185.                     conn.commit()
  186.                     print("Inserted large object: " + str(lobj.oid) + " for " + music_dir + "/" + basename)
  187.  
  188.  
  189. database()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement