Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import psycopg2
- import numpy as np
- connection = psycopg2.connect(host = 'imperial.ckp3dl3vzxoh.eu-west-2.rds.amazonaws.com',
- database = 'dvdrental',
- user = 'imperial',
- password = 'imperial')
- cursor = connection.cursor()
- sql = 'SELECT * FROM film'
- cursor.execute(sql)
- cursor.fetchone()
- cursor.fetchall()
- films = cursor.fetchall()
- films
- films[0][1]
- cursor.close()
- connection.close()
- cursor.execute('CREATE DATABASE dogs')
- # Dogs
- connection = psycopg2.connect(host = 'imperial.ckp3dl3vzxoh.eu-west-2.rds.amazonaws.com',
- database = 'dogs',
- user = 'imperial',
- password = 'imperial')
- cursor = connection.cursor()
- sql = "INSERT INTO dogs VALUES (1, 'rover', 'Alsatian', 9)"
- cursor.execute(sql)
- connection.commit()
- sql = "INSERT INTO dogs VALUES (2, 'Fido', 'Great Dane', 9)"
- cursor.execute(sql)
- # Movies
- connection = psycopg2.connect(host = 'imperial.ckp3dl3vzxoh.eu-west-2.rds.amazonaws.com',
- database = 'movies',
- user = 'imperial',
- password = 'imperial')
- cursor = connection.cursor()
- sql = "SELECT budget, gross FROM movie "
- cursor.execute(sql)
- budget_and_gross = cursor.fetchall()
- connection.rollback()
- budget_and_gross
- budgets = [x[0] for x in budget_and_gross]
- grosses = [x[1] for x in budget_and_gross]
- [np.log(x) for x in budgets]
- import matplotlib.pyplot as plt
- %matplotlib inline
- plt.scatter(budgets, grosses)
- plt.xlabel('Budget')
- plt.ylabel('Gross')
- import numpy
- np.log(4)
- np.log
- type(grosses[0])
- np.log(budgets[0])
- log_budgets = []
- for x in budgets:
- if x is not None:
- log_budgets.append(np.log(x))
- else:
- log_budgets.append(0)
- log_grosses = []
- for x in grosses:
- if x is not None:
- log_grosses.append(np.log(x))
- else:
- log_grosses.append(0)
- %pdb
- np.log(None)
- plt.scatter(log_budgets, log_grosses)
- plt.xlabel('Budget')
- plt.ylabel('Gross')
- plt.plot(grosses, 'x')
- sql = "SELECT budget, gross FROM movie ORDER BY title_year "
- cursor.execute(sql)
- budget_and_gross_by_year = cursor.fetchall()
- budget_and_gross_by_year
- %pdb
- plt.plot([x[1] for x in budget_and_gross_by_year])
- NOTEBOOK 2
- #Template
- import numpy as np
- import scipy
- import matplotlib.pyplot as plt
- from pdb import set_trace
- from IPython.display import Image
- from time import sleep
- from matplotlib.image import imread
- %matplotlib inline
- %load_ext autoreload
- %autoreload 2
- from os import listdir
- from os.path import join
- import pandas
- import psycopg2
- #conn_str = "postgres://imperial:imperial@imperial.ckp3dl3vzxoh.eu-west-2.rds.amazonaws.com/dvdrental"
- conn_str= "host=imperial.ckp3dl3vzxoh.eu-west-2.rds.amazonaws.com user=imperial password=imperial dbname=dvdrental"
- conn = psycopg2.connect(conn_str)
- conn
- import pandas as pd
- df = pd.read_sql('select * from film', con=conn)
- df
- df = pd.read_sql('select * from actor', con=conn)
- df
- df.loc[199].first_name
- df.loc[189,'last_name']
- years =
- df['replacement_cost']
- df
- # Movies analysis
- conn_str= "host=imperial.ckp3dl3vzxoh.eu-west-2.rds.amazonaws.com user=imperial password=imperial dbname=movies"
- conn = psycopg2.connect(conn_str)
- import pandas as pd
- df = pd.read_sql('SELECT * FROM movie', con=conn)
- df
- query ="""
- SELECT title_year, AVG(gross) AS mean_gross
- FROM movie
- WHERE gross IS NOT NULL AND title_year IS NOT NULL
- GROUP BY title_year
- ORDER BY title_year
- """
- df = pd.read_sql(query, con=conn)
- df
- print query
- y
- x = df['title_year']
- y = df['mean_gross']
- plt.plot(x,y)
- plt.plot(y)
- query ="""
- SELECT budget, gross
- FROM movie
- WHERE budget < 10000000
- AND gross < 10000000
- """
- df = pd.read_sql(query, con=conn)
- df
- x = df['budget']
- y = df['gross']
- plt.scatter(x,y)
- plt.xlabel('Budget')
- plt.ylabel('Gross')
- query ="""
- SELECT imdb_score, cast_total_facebook_likes
- FROM movie
- WHERE num_user_for_reviews > 1000
- AND cast_total_facebook_likes < 100000
- """
- df = pd.read_sql(query, con=conn)
- df
- x = df['imdb_score']
- y = df['cast_total_facebook_likes']
- plt.scatter(x,y)
- plt.xlabel('IMDB score')
- plt.ylabel('Cast total Facebook likes')
- y = [np.log(p) for p in y]
- x = [np.log(p) for p in x]
- plt.hist(y)
- y
- x
- plt.hist(x)
- NOTEBOOK 3
- import sqlalchemy
- from sqlalchemy import create_engine, inspect
- db_uri = 'postgres://imperial:imperial@imperial.ckp3dl3vzxoh.eu-west-2.rds.amazonaws.com/dvdrental'
- engine = create_engine(db_uri)
- engine
- inspector = inspect(engine)
- print(inspector.get_table_names())
- result = engine.execute('SELECT * FROM film')
- result
- for r in result:
- print(r.description)
- fetched_result = engine.execute('SELECT * FROM film').fetchall()
- fetched_result[0].description
- fetched_result[1].title
- # Models - dogs
- engine = create_engine('postgres://imperial:imperial@imperial.ckp3dl3vzxoh.eu-west-2.rds.amazonaws.com/dogs')
- engine
- from sqlalchemy.orm import sessionmaker
- Session = sessionmaker()
- Session.configure(bind=engine)
- session = Session()
- session
- class Dog(Base):
- __tablename__ = 'dogs'
- id = Column(Integer, primary_key=True)
- name = Column(String)
- breed = Column(String)
- owner_id = Column(Integer)
- def __repr__(self):
- return self.name
- dogs = session.query(Dog).all()
- dogs
- new_dog = Dog(id=3, name='Bonzo', breed='Labrador', owner_id=1)
- new_dog
- session.add(new_dog)
- session.new
- session.commit()
- class Owner(Base):
- __tablename__ = "owners"
- id = Column(Integer, primary_key=True)
- name = Column(String)
- def __repr__(self):
- return self.name
- class Dog(Base):
- __tablename__ = 'dogs'
- id = Column(Integer, primary_key=True)
- name = Column(String)
- breed = Column(String)
- owner_id = Column(Integer, ForeignKey('owners.id'))
- owner = relationship('Owner', back_populates='dogs')
- def __repr__(self):
- return self.name
- class Owner(Base):
- __tablename__ = "owners"
- id = Column(Integer, primary_key=True)
- name = Column(String)
- dogs = relationship('Dog', back_populates='owner')
- def __repr__(self):
- return self.name
- owner = session.query(Owner).first()
- owner
- owner.dogs
- session.rollback()
- bonzo = session.query(Dog).get(3)
- bonzo
- bonzo.owner
- # Models - dvdrental
- import sqlalchemy
- from sqlalchemy import create_engine
- engine = create_engine('postgres://imperial:imperial@imperial.ckp3dl3vzxoh.eu-west-2.rds.amazonaws.com/dvdrental')
- engine
- from sqlalchemy.ext.declarative import declarative_base
- Base = declarative_base()
- from sqlalchemy import Column, Integer, String
- class Film(Base):
- __tablename__ = 'film'
- film_id = Column(Integer, primary_key=True)
- title = Column(String)
- def __repr__(self):
- return self.title
- from sqlalchemy.orm import sessionmaker
- Session = sessionmaker()
- Session.configure(bind=engine)
- session = Session()
- films = session.query(Film).all()
- session.rollback()
- films
- new_film = Film(film_id = 9999, title="First Light")
- session.add(new_film)
- session.dirty
- session.new
- session.commit()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement