Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import psycopg2
- import json
- import os
- import zipfile
- host = "192.168.178.36"
- user = "Sam"
- password = "postgres"
- database_name = "Maestro-concept"
- maestro_zip = "E:\\MAESTRO\\maestro-v1.0.0.zip"
- zip = zipfile.ZipFile(maestro_zip, 'r')
- create_metadata_table_query = "CREATE TABLE metadata (" \
- "id SERIAL PRIMARY KEY," \
- "canonical_composer varchar," \
- "canonical_title varchar," \
- "split varchar," \
- "year int," \
- "midi_filename varchar UNIQUE," \
- "audio_filename varchar UNIQUE," \
- "duration float" \
- ")"
- create_midi_table_query = "CREATE TABLE midi (" \
- "id SERIAL PRIMARY KEY," \
- "name varchar UNIQUE REFERENCES metadata(midi_filename) ON DELETE CASCADE," \
- "file bytea" \
- ")"
- create_wav_table_query = "CREATE TABLE wav (" \
- "id SERIAL PRIMARY KEY," \
- "name varchar UNIQUE REFERENCES metadata(audio_filename) ON DELETE CASCADE," \
- "file oid" \
- ")"
- def database():
- try:
- print("connecting...")
- conn = psycopg2.connect(user=user,
- password=password,
- host=host,
- port="5432",
- database=database_name)
- print(conn)
- cursor = conn.cursor()
- '''
- print("Initializing tables, prefilling metadata")
- init(cursor, conn)
- print("Adding midi files")
- insert_midis("midi", cursor, conn)
- print("Adding wave files as large objects")
- insert_waves("wav", cursor, conn)
- '''
- test_read(conn, 36257)
- except psycopg2.Error as error:
- print('Database: ', end='')
- print(error)
- else:
- print("closing connection...")
- if conn:
- cursor.close()
- conn.close()
- def test_read(conn, oid):
- with open("E:/MAESTRO/test.wav", "wb") as file:
- lobj = conn.lobject(oid, "wb")
- lobj_bytes = lobj.read()
- file.write(lobj_bytes)
- file.close()
- lobj.close()
- def init(cursor, conn):
- """
- Creates all tables relevant for the maestro dataset
- :param cursor: database cursor object
- :param conn: database connection object
- """
- cursor.execute(create_metadata_table_query)
- cursor.execute(create_midi_table_query)
- cursor.execute(create_wav_table_query)
- conn.commit()
- cursor.execute(
- generate_insert_metadata_statement("maestro-v1.0.0/maestro-v1.0.0.json", "metadata"))
- conn.commit()
- def generate_insert_metadata_statement(file, table_name):
- """
- Parses the maestro json metadata file and translates it into a database table
- :param file: file path to the zipped maestro dataset json
- :param table_name: table to be inserted into
- """
- insert_header = "INSERT INTO {} (" \
- "canonical_composer," \
- "canonical_title," \
- "split," \
- "year," \
- "midi_filename," \
- "audio_filename," \
- "duration" \
- ") VALUES ".format(table_name)
- values = ""
- with zip.open(file, "r") as json_file:
- data = json.load(json_file)
- counter = 0
- for entry in data:
- counter += 1
- value = ""
- value += "'{0}'".format(entry["canonical_composer"].replace("'", "''")) + ","
- value += "'{0}'".format(entry["canonical_title"].replace("'", "''")) + ","
- value += "'{0}'".format(entry["split"]) + ","
- value += str(entry["year"]) + ","
- value += "'{0}'".format(entry["midi_filename"].replace("'", "''")) + ","
- value += "'{0}'".format(entry["audio_filename"].replace("'", "''")) + ","
- value += str(entry["duration"])
- value = "({0})".format(value)
- values += (value + ",")
- values = values[:-1] + ";"
- return insert_header + values
- def insert_midis(table_name, cursor, conn):
- """
- Inserts all midi files found in the maestro dataset into the database
- :param table_name: table to be inserted into
- :param cursor: database cursor
- :param conn: database connection
- """
- music_dirs = []
- for directory in zip.namelist():
- if directory.endswith('/') and "maestro-v1.0.0" not in os.path.basename(os.path.normpath(directory)):
- music_dirs.append(os.path.basename(os.path.normpath(directory)))
- music_dirs.sort()
- for music_dir in music_dirs:
- music_dir_find_string = "/" + music_dir + "/"
- for zip_info in zip.infolist():
- filename = zip_info.filename
- basename = os.path.basename(filename)
- if ".midi" in filename and music_dir_find_string in filename:
- with zip.open(filename, "r") as midi:
- header = midi.read(4)
- if header != b"MThd":
- raise ValueError("Not a MIDI file!")
- midi.seek(0)
- statement = "INSERT INTO {} VALUES (DEFAULT, '{}', {});" \
- .format(table_name, music_dir + "/" + basename, psycopg2.Binary(midi.read()))
- cursor.execute(statement)
- conn.commit()
- print("Inserted midi: " + music_dir + "/" + basename)
- def insert_waves(table_name, cursor, conn):
- """
- Inserts all waveforms found in the maestro dataset into the database.
- Adds wav files as large objects, object id is stored in the table.
- Large objects will end up in the pg_largeobject table.
- The references for those large objects according to their filename will be saved in
- table_name
- :param table_name: table to be inserted into
- :param cursor: database cursor object
- :param conn: database connection object
- :return: list of insert statements for table_name
- """
- music_dirs = []
- for directory in zip.namelist():
- if directory.endswith('/') and "maestro-v1.0.0" not in os.path.basename(os.path.normpath(directory)):
- music_dirs.append(os.path.basename(os.path.normpath(directory)))
- music_dirs.sort()
- for music_dir in music_dirs:
- music_dir_find_string = "/" + music_dir + "/"
- for zip_info in zip.infolist():
- filename = zip_info.filename
- basename = os.path.basename(filename)
- if ".wav" in filename and music_dir_find_string in filename:
- with zip.open(filename, "r") as wav:
- lobj = conn.lobject(0, 'w', 0)
- lobj.write(wav.read())
- lobj.close()
- statement = "INSERT INTO {} VALUES (DEFAULT, '{}', {});" \
- .format(table_name, music_dir + "/" + basename, lobj.oid)
- cursor.execute(statement)
- conn.commit()
- print("Inserted large object: " + str(lobj.oid) + " for " + music_dir + "/" + basename)
- database()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement