Advertisement
Guest User

Untitled

a guest
Nov 3rd, 2017
80
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 7.37 KB | None | 0 0
  1. import pyodbc
  2. import csv
  3.  
  4. SERVER = 'server-thibaultmarkey.database.windows.net'
  5. DATABASE = 'IMDBThuisopdracht1'
  6. USERNAME = 'thibaultmarkey'
  7. PASSWORD = 'Password1'
  8. DRIVER = '{ODBC Driver 13 for SQL Server}'
  9.  
  10. def createFilm():
  11. createTable = """
  12. DROP TABLE IF EXISTS Film;
  13. CREATE TABLE Film(
  14. FilmId int IDENTITY(1,1) PRIMARY KEY,
  15. FilmTitle nvarchar(150) NOT NULL,
  16. IMDBLink nvarchar(150) NOT NULL,
  17. IMDBScore decimal(2, 1) NOT NULL
  18. );"""
  19. connectionString = 'DRIVER=' + DRIVER + ';PORT=1433;SERVER=' + SERVER + ';PORT=1443;DATABASE=' + DATABASE + ';UID=' + USERNAME + ';PWD=' + PASSWORD
  20. connection = pyodbc.connect(connectionString)
  21. cursor = connection.cursor()
  22. result = cursor.execute(createTable)
  23. cursor.commit()
  24.  
  25. def createFilm_Acteurs():
  26. createTable = """
  27. DROP TABLE IF EXISTS Film_acteurs;
  28. CREATE TABLE Film_acteurs(
  29. FilmActeursId int IDENTITY(1,1) PRIMARY KEY,
  30. FilmId int NOT NULL,
  31. ActorId int NOT NULL
  32. );"""
  33. connectionString = 'DRIVER=' + DRIVER + ';PORT=1433;SERVER=' + SERVER + ';PORT=1443;DATABASE=' + DATABASE + ';UID=' + USERNAME + ';PWD=' + PASSWORD
  34. connection = pyodbc.connect(connectionString)
  35. cursor = connection.cursor()
  36. result = cursor.execute(createTable)
  37. cursor.commit()
  38.  
  39. def createActeurs():
  40. createTable = """
  41. DROP TABLE IF EXISTS Acteurs;
  42. CREATE TABLE Acteurs(
  43. ActorId int IDENTITY(1,1) PRIMARY KEY,
  44. Actor_name nvarchar(150) NOT NULL
  45. );"""
  46. connectionString = 'DRIVER=' + DRIVER + ';PORT=1433;SERVER=' + SERVER + ';PORT=1443;DATABASE=' + DATABASE + ';UID=' + USERNAME + ';PWD=' + PASSWORD
  47. connection = pyodbc.connect(connectionString)
  48. cursor = connection.cursor()
  49. result = cursor.execute(createTable)
  50. cursor.commit()
  51.  
  52. def createDirectors():
  53. createTable = """
  54. DROP TABLE IF EXISTS Directors;
  55. CREATE TABLE Directors(
  56. DirectorId int IDENTITY(1,1) PRIMARY KEY,
  57. Director_name nvarchar(150) NOT NULL
  58. );"""
  59. connectionString = 'DRIVER=' + DRIVER + ';PORT=1433;SERVER=' + SERVER + ';PORT=1443;DATABASE=' + DATABASE + ';UID=' + USERNAME + ';PWD=' + PASSWORD
  60. connection = pyodbc.connect(connectionString)
  61. cursor = connection.cursor()
  62. result = cursor.execute(createTable)
  63. cursor.commit()
  64.  
  65. def createFilmgenres():
  66. createTable = """
  67. DROP TABLE IF EXISTS Filmgenres;
  68. CREATE TABLE Filmgenres(
  69. FilmgenreId int IDENTITY(1,1) PRIMARY KEY,
  70. FilmId int NOT NULL,
  71. GenreId int NOT NULL
  72. );"""
  73. connectionString = 'DRIVER=' + DRIVER + ';PORT=1433;SERVER=' + SERVER + ';PORT=1443;DATABASE=' + DATABASE + ';UID=' + USERNAME + ';PWD=' + PASSWORD
  74. connection = pyodbc.connect(connectionString)
  75. cursor = connection.cursor()
  76. result = cursor.execute(createTable)
  77. cursor.commit()
  78.  
  79. def createGenres():
  80. createTable = """
  81. DROP TABLE IF EXISTS Genres;
  82. CREATE TABLE Genres(
  83. GenreId int IDENTITY(1,1) PRIMARY KEY,
  84. Genre_name nvarchar(150) NOT NULL
  85. );"""
  86. connectionString = 'DRIVER=' + DRIVER + ';PORT=1433;SERVER=' + SERVER + ';PORT=1443;DATABASE=' + DATABASE + ';UID=' + USERNAME + ';PWD=' + PASSWORD
  87. connection = pyodbc.connect(connectionString)
  88. cursor = connection.cursor()
  89. result = cursor.execute(createTable)
  90. cursor.commit()
  91.  
  92. def readCsv(fileName):
  93. reader = csv.DictReader(open(fileName,encoding="utf8"), delimiter=",")
  94. counter = 1
  95.  
  96. connectionString = 'DRIVER=' + DRIVER + ';PORT=1433;SERVER=' + SERVER + ';PORT=1443;DATABASE=' + DATABASE + ';UID=' + USERNAME + ';PWD=' + PASSWORD
  97. connection = pyodbc.connect(connectionString)
  98. cursor = connection.cursor()
  99. for line in reader:
  100. # print('{}-{}'.format(counter,line['Domain']))
  101. insertFilm = "INSERT INTO Film VALUES (?,?,?)"
  102. insertActeurs = "INSERT INTO Acteurs VALUES (?)"
  103. insertDirectors = "INSERT INTO Directors VALUES (?)"
  104.  
  105. result1 = cursor.execute(insertFilm,
  106. line['movie_title'],
  107. line['movie_imdb_link'],
  108. line['imdb_score'])
  109.  
  110. result2 = cursor.execute(insertActeurs,
  111. line['actor_1_name'])
  112.  
  113. result2_2 = cursor.execute(insertActeurs,
  114. line['actor_2_name'])
  115.  
  116. result2_3 = cursor.execute(insertActeurs,
  117. line['actor_3_name'])
  118.  
  119. result3 = cursor.execute(insertDirectors,
  120. line['director_name'])
  121.  
  122. cursor.commit()
  123. counter += 1
  124.  
  125. def readCsv2(fileName):
  126. reader = csv.DictReader(open(fileName,encoding="utf8"))
  127. counter = 1
  128.  
  129. connectionString = 'DRIVER=' + DRIVER + ';PORT=1433;SERVER=' + SERVER + ';PORT=1443;DATABASE=' + DATABASE + ';UID=' + USERNAME + ';PWD=' + PASSWORD
  130. connection = pyodbc.connect(connectionString)
  131. cursor = connection.cursor()
  132. for line in reader:
  133.  
  134. insertGenres = "INSERT INTO Genres VALUES (?)"
  135.  
  136. result4 = cursor.execute(insertGenres,
  137. line['genre_name'])
  138.  
  139. cursor.commit()
  140. counter += 1
  141.  
  142. def sortDirectors():
  143. sortTable = """
  144. DELETE FROM Directors
  145. FROM Directors o
  146. INNER JOIN ( SELECT Director_name
  147. FROM Directors
  148. GROUP BY Director_name
  149. HAVING COUNT(*) > 1
  150. ) f ON o.Director_name = f.Director_name
  151. LEFT OUTER JOIN ( SELECT [DirectorId] = MAX(DirectorId)
  152. FROM Directors
  153. GROUP BY Director_name
  154. HAVING COUNT(*) > 1
  155. ) g ON o.DirectorId = g.DirectorId
  156. WHERE g.DirectorId IS NULL
  157. """
  158. connectionString = 'DRIVER=' + DRIVER + ';PORT=1433;SERVER=' + SERVER + ';PORT=1443;DATABASE=' + DATABASE + ';UID=' + USERNAME + ';PWD=' + PASSWORD
  159. connection = pyodbc.connect(connectionString)
  160. cursor = connection.cursor()
  161. result = cursor.execute(sortTable)
  162. cursor.commit()
  163.  
  164. def clearEmptyDirectors():
  165. clearNulls = """
  166. DELETE FROM Directors WHERE Director_name = '' or Director_name is null
  167. """
  168. connectionString = 'DRIVER=' + DRIVER + ';PORT=1433;SERVER=' + SERVER + ';PORT=1443;DATABASE=' + DATABASE + ';UID=' + USERNAME + ';PWD=' + PASSWORD
  169. connection = pyodbc.connect(connectionString)
  170. cursor = connection.cursor()
  171. result = cursor.execute(clearNulls)
  172. cursor.commit()
  173.  
  174. def sortActors():
  175. sortTable = """
  176. DELETE FROM Acteurs
  177. FROM Acteurs o
  178. INNER JOIN ( SELECT Actor_name
  179. FROM Acteurs
  180. GROUP BY Actor_name
  181. HAVING COUNT(*) > 1
  182. ) f ON o.Actor_name = f.Actor_name
  183. LEFT OUTER JOIN ( SELECT [ActorId] = MAX(ActorId)
  184. FROM Acteurs
  185. GROUP BY Actor_name
  186. HAVING COUNT(*) > 1
  187. ) g ON o.ActorId = g.ActorId
  188. WHERE g.ActorId IS NULL
  189. """
  190. connectionString = 'DRIVER=' + DRIVER + ';PORT=1433;SERVER=' + SERVER + ';PORT=1443;DATABASE=' + DATABASE + ';UID=' + USERNAME + ';PWD=' + PASSWORD
  191. connection = pyodbc.connect(connectionString)
  192. cursor = connection.cursor()
  193. result = cursor.execute(sortTable)
  194. cursor.commit()
  195.  
  196. def clearEmptyActors():
  197. clearNulls = """
  198. DELETE FROM Acteurs WHERE Actor_name = '' or Actor_name is null
  199. """
  200. connectionString = 'DRIVER=' + DRIVER + ';PORT=1433;SERVER=' + SERVER + ';PORT=1443;DATABASE=' + DATABASE + ';UID=' + USERNAME + ';PWD=' + PASSWORD
  201. connection = pyodbc.connect(connectionString)
  202. cursor = connection.cursor()
  203. result = cursor.execute(clearNulls)
  204. cursor.commit()
  205.  
  206. createFilm()
  207. createFilm_Acteurs()
  208. createActeurs()
  209. createDirectors()
  210. createFilmgenres()
  211. createGenres()
  212. readCsv('movie_metadata.csv')
  213. readCsv2('movie_metadata_genres.csv')
  214. sortDirectors()
  215. clearEmptyDirectors()
  216. sortActors()
  217. clearEmptyActors()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement