Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pyodbc
- import csv
- SERVER = 'server-thibaultmarkey.database.windows.net'
- DATABASE = 'IMDBThuisopdracht1'
- USERNAME = 'thibaultmarkey'
- PASSWORD = 'Password1'
- DRIVER = '{ODBC Driver 13 for SQL Server}'
- def createFilm():
- createTable = """
- DROP TABLE IF EXISTS Film;
- CREATE TABLE Film(
- FilmId int IDENTITY(1,1) PRIMARY KEY,
- FilmTitle nvarchar(150) NOT NULL,
- IMDBLink nvarchar(150) NOT NULL,
- IMDBScore decimal(2, 1) NOT NULL
- );"""
- connectionString = 'DRIVER=' + DRIVER + ';PORT=1433;SERVER=' + SERVER + ';PORT=1443;DATABASE=' + DATABASE + ';UID=' + USERNAME + ';PWD=' + PASSWORD
- connection = pyodbc.connect(connectionString)
- cursor = connection.cursor()
- result = cursor.execute(createTable)
- cursor.commit()
- def createFilm_Acteurs():
- createTable = """
- DROP TABLE IF EXISTS Film_acteurs;
- CREATE TABLE Film_acteurs(
- FilmActeursId int IDENTITY(1,1) PRIMARY KEY,
- FilmId int NOT NULL,
- ActorId int NOT NULL
- );"""
- connectionString = 'DRIVER=' + DRIVER + ';PORT=1433;SERVER=' + SERVER + ';PORT=1443;DATABASE=' + DATABASE + ';UID=' + USERNAME + ';PWD=' + PASSWORD
- connection = pyodbc.connect(connectionString)
- cursor = connection.cursor()
- result = cursor.execute(createTable)
- cursor.commit()
- def createActeurs():
- createTable = """
- DROP TABLE IF EXISTS Acteurs;
- CREATE TABLE Acteurs(
- ActorId int IDENTITY(1,1) PRIMARY KEY,
- Actor_name nvarchar(150) NOT NULL
- );"""
- connectionString = 'DRIVER=' + DRIVER + ';PORT=1433;SERVER=' + SERVER + ';PORT=1443;DATABASE=' + DATABASE + ';UID=' + USERNAME + ';PWD=' + PASSWORD
- connection = pyodbc.connect(connectionString)
- cursor = connection.cursor()
- result = cursor.execute(createTable)
- cursor.commit()
- def createDirectors():
- createTable = """
- DROP TABLE IF EXISTS Directors;
- CREATE TABLE Directors(
- DirectorId int IDENTITY(1,1) PRIMARY KEY,
- Director_name nvarchar(150) NOT NULL
- );"""
- connectionString = 'DRIVER=' + DRIVER + ';PORT=1433;SERVER=' + SERVER + ';PORT=1443;DATABASE=' + DATABASE + ';UID=' + USERNAME + ';PWD=' + PASSWORD
- connection = pyodbc.connect(connectionString)
- cursor = connection.cursor()
- result = cursor.execute(createTable)
- cursor.commit()
- def createFilmgenres():
- createTable = """
- DROP TABLE IF EXISTS Filmgenres;
- CREATE TABLE Filmgenres(
- FilmgenreId int IDENTITY(1,1) PRIMARY KEY,
- FilmId int NOT NULL,
- GenreId int NOT NULL
- );"""
- connectionString = 'DRIVER=' + DRIVER + ';PORT=1433;SERVER=' + SERVER + ';PORT=1443;DATABASE=' + DATABASE + ';UID=' + USERNAME + ';PWD=' + PASSWORD
- connection = pyodbc.connect(connectionString)
- cursor = connection.cursor()
- result = cursor.execute(createTable)
- cursor.commit()
- def createGenres():
- createTable = """
- DROP TABLE IF EXISTS Genres;
- CREATE TABLE Genres(
- GenreId int IDENTITY(1,1) PRIMARY KEY,
- Genre_name nvarchar(150) NOT NULL
- );"""
- connectionString = 'DRIVER=' + DRIVER + ';PORT=1433;SERVER=' + SERVER + ';PORT=1443;DATABASE=' + DATABASE + ';UID=' + USERNAME + ';PWD=' + PASSWORD
- connection = pyodbc.connect(connectionString)
- cursor = connection.cursor()
- result = cursor.execute(createTable)
- cursor.commit()
- def readCsv(fileName):
- reader = csv.DictReader(open(fileName,encoding="utf8"), delimiter=",")
- counter = 1
- connectionString = 'DRIVER=' + DRIVER + ';PORT=1433;SERVER=' + SERVER + ';PORT=1443;DATABASE=' + DATABASE + ';UID=' + USERNAME + ';PWD=' + PASSWORD
- connection = pyodbc.connect(connectionString)
- cursor = connection.cursor()
- for line in reader:
- # print('{}-{}'.format(counter,line['Domain']))
- insertFilm = "INSERT INTO Film VALUES (?,?,?)"
- insertActeurs = "INSERT INTO Acteurs VALUES (?)"
- insertDirectors = "INSERT INTO Directors VALUES (?)"
- result1 = cursor.execute(insertFilm,
- line['movie_title'],
- line['movie_imdb_link'],
- line['imdb_score'])
- result2 = cursor.execute(insertActeurs,
- line['actor_1_name'])
- result2_2 = cursor.execute(insertActeurs,
- line['actor_2_name'])
- result2_3 = cursor.execute(insertActeurs,
- line['actor_3_name'])
- result3 = cursor.execute(insertDirectors,
- line['director_name'])
- cursor.commit()
- counter += 1
- def readCsv2(fileName):
- reader = csv.DictReader(open(fileName,encoding="utf8"))
- counter = 1
- connectionString = 'DRIVER=' + DRIVER + ';PORT=1433;SERVER=' + SERVER + ';PORT=1443;DATABASE=' + DATABASE + ';UID=' + USERNAME + ';PWD=' + PASSWORD
- connection = pyodbc.connect(connectionString)
- cursor = connection.cursor()
- for line in reader:
- insertGenres = "INSERT INTO Genres VALUES (?)"
- result4 = cursor.execute(insertGenres,
- line['genre_name'])
- cursor.commit()
- counter += 1
- def sortDirectors():
- sortTable = """
- DELETE FROM Directors
- FROM Directors o
- INNER JOIN ( SELECT Director_name
- FROM Directors
- GROUP BY Director_name
- HAVING COUNT(*) > 1
- ) f ON o.Director_name = f.Director_name
- LEFT OUTER JOIN ( SELECT [DirectorId] = MAX(DirectorId)
- FROM Directors
- GROUP BY Director_name
- HAVING COUNT(*) > 1
- ) g ON o.DirectorId = g.DirectorId
- WHERE g.DirectorId IS NULL
- """
- connectionString = 'DRIVER=' + DRIVER + ';PORT=1433;SERVER=' + SERVER + ';PORT=1443;DATABASE=' + DATABASE + ';UID=' + USERNAME + ';PWD=' + PASSWORD
- connection = pyodbc.connect(connectionString)
- cursor = connection.cursor()
- result = cursor.execute(sortTable)
- cursor.commit()
- def clearEmptyDirectors():
- clearNulls = """
- DELETE FROM Directors WHERE Director_name = '' or Director_name is null
- """
- connectionString = 'DRIVER=' + DRIVER + ';PORT=1433;SERVER=' + SERVER + ';PORT=1443;DATABASE=' + DATABASE + ';UID=' + USERNAME + ';PWD=' + PASSWORD
- connection = pyodbc.connect(connectionString)
- cursor = connection.cursor()
- result = cursor.execute(clearNulls)
- cursor.commit()
- def sortActors():
- sortTable = """
- DELETE FROM Acteurs
- FROM Acteurs o
- INNER JOIN ( SELECT Actor_name
- FROM Acteurs
- GROUP BY Actor_name
- HAVING COUNT(*) > 1
- ) f ON o.Actor_name = f.Actor_name
- LEFT OUTER JOIN ( SELECT [ActorId] = MAX(ActorId)
- FROM Acteurs
- GROUP BY Actor_name
- HAVING COUNT(*) > 1
- ) g ON o.ActorId = g.ActorId
- WHERE g.ActorId IS NULL
- """
- connectionString = 'DRIVER=' + DRIVER + ';PORT=1433;SERVER=' + SERVER + ';PORT=1443;DATABASE=' + DATABASE + ';UID=' + USERNAME + ';PWD=' + PASSWORD
- connection = pyodbc.connect(connectionString)
- cursor = connection.cursor()
- result = cursor.execute(sortTable)
- cursor.commit()
- def clearEmptyActors():
- clearNulls = """
- DELETE FROM Acteurs WHERE Actor_name = '' or Actor_name is null
- """
- connectionString = 'DRIVER=' + DRIVER + ';PORT=1433;SERVER=' + SERVER + ';PORT=1443;DATABASE=' + DATABASE + ';UID=' + USERNAME + ';PWD=' + PASSWORD
- connection = pyodbc.connect(connectionString)
- cursor = connection.cursor()
- result = cursor.execute(clearNulls)
- cursor.commit()
- createFilm()
- createFilm_Acteurs()
- createActeurs()
- createDirectors()
- createFilmgenres()
- createGenres()
- readCsv('movie_metadata.csv')
- readCsv2('movie_metadata_genres.csv')
- sortDirectors()
- clearEmptyDirectors()
- sortActors()
- clearEmptyActors()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement