Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- """
- Experiments to investigate and improve SQL database performance.
- By Braddock Gaskill, July 2012
- Runtime notes:
- All speeds performed on System76 Gazelle Professional Intel 8 core Core i7
- 2.3GHz under Linux Ubuntu 12.04
- = SQLAlchemy times:
- create_record1: Total time for 10000 records 73.0288729668 0.00730288829803s per record
- (^with commit after each customer)
- create_record2: Total time for 10000 records 2.14980816841 0.000214982199669s per record
- (customer only, no address)
- create_record1: Total time for 10000 records 2.57281708717 0.000257283115387s per record
- (^with NO commit after each customer - SQLAlchemy seems to handle that case!)
- create_record1: Total time for 10000 records 2.57225084305 0.000257225704193s per record
- (^with NO commit after each customer, AND PRAGMA's invoked upon connection)
- create_record1: Total time for 10000 records 15.5766570568 0.00155766689777s per record
- (^with NO commit after each customer, but added SELECT call after each insert)
- create_record1: Total time for 10000 records 20.3696157932 0.00203696208s per record
- (^with NO commit, added SELECT call after each insert, in-MEMORY Db, dbname='sqlite:///:memory:')
- create_record1: Total time for 10000 records 2.3189971447 0.000231900906563s per record
- (^with flush() called every 100 iterations)
- create_record3: Total time for 10000 records 2.10460805893 0.000210461497307s per record
- (^using SQLAlchemy insert(), one at a time)
- create_record4: Total time for 10000 records 0.0987010002136 9.87088680267e-06s per record
- (^Bulk insert, not using correct customer ids but only an incrementing counter)
- = Direct sqlite3 times:
- create_raw1: Total time for 10000 records 0.0553600788116 5.53691387177e-06s per record
- Wow! Straight customer insert, no commits until end
- create_raw1: Total time for 10000 records 39.1845419407 0.00391845560074s per record
- conn.commit() called afterevery customer insert
- create_raw2: Total time for 10000 records 0.0957889556885 9.57977771759e-06s per record
- ^customer and then address inserted using cursor.lastrowid
- """
- import time
- import sqlite3
- from sqlalchemy.ext.declarative import declarative_base
- from sqlalchemy import Column, Integer, String, Sequence, Float, ForeignKey, Table, Text, Boolean, Date, DateTime, create_engine, event, Unicode, UnicodeText
- from sqlalchemy.orm import relationship, backref, scoped_session, sessionmaker
- Base = declarative_base()
- DBSession = scoped_session(sessionmaker())
- engine = None
- def db_connection_setup(dbapi_con, con_record):
- """Things to do on every new DB connection"""
- # It is not clear that these PRAGMAs help
- #dbapi_con.execute("PRAGMA synchronous=OFF")
- #dbapi_con.execute("PRAGMA journal_mode=OFF")
- #dbapi_con.execute("PRAGMA cache_size=20000000")
- #print "connection",dbapi_con,con_record
- pass
- def init(dbname = 'sqlite:///sqlite.db', echo=False):
- global engine
- global DBSession
- if engine == None:
- engine = create_engine(dbname, echo=echo)
- event.listen(engine, 'connect', db_connection_setup)
- DBSession.configure(bind=engine, autoflush=False, expire_on_commit=False)
- def dropall():
- Base.metadata.drop_all(engine)
- def createall():
- Base.metadata.create_all(engine)
- def prep(dbname = 'sqlite:///sqlite.db', echo=False):
- init(dbname=dbname, echo=echo)
- dropall()
- createall()
- class Customer(Base):
- __tablename__ = "customer"
- id = Column(Integer, primary_key=True)
- name = Column(String(255))
- guid = Column(String(32))
- def __unicode__(self):
- return u'[Customer: ' + str(self.id) + ' ' + self.name + ']'
- def __str__(self):
- return unicode(self).encode('utf-8')
- class Address(Base):
- __tablename__ = "address"
- id = Column(Integer, primary_key=True)
- customer_id = Column(ForeignKey("customer.id"))
- customer = relationship("Customer")
- address = Column(String(255))
- guid = Column(String(32))
- def __unicode__(self):
- return u'[Address: ' + str(self.id) + ' for ' + str(self.customer_id) + ' is ' + self.address + ']'
- def __str__(self):
- return unicode(self).encode('utf-8')
- def create_records1(n):
- t0 = time.time()
- for i in range(n):
- if i % 1000 == 0:
- print i, str(time.time() - t0)
- if i % 100 == 0:
- DBSession.flush()
- customer = Customer()
- customer.name = 'NAME ' + str(i)
- DBSession.add(customer)
- #DBSession.commit()
- #c = DBSession.query(Customer).filter(Customer.name == 'NAME ' + str(i)).all()
- address = Address()
- address.address = 'ADDR ' + str(i)
- address.customer = customer
- DBSession.add(address)
- print "commit"
- DBSession.commit()
- print "create_record1: Total time for " + str(n) + " records " + str(time.time() - t0) + " " + str(float(time.time() - t0)/float(n)) + "s per record"
- def create_records2(n):
- t0 = time.time()
- for i in range(n):
- if i % 1000 == 0:
- print i, str(time.time() - t0)
- customer = Customer()
- customer.name = 'NAME ' + str(i)
- DBSession.add(customer)
- print "commit"
- DBSession.commit()
- print "create_record2: Total time for " + str(n) + " records " + str(time.time() - t0) + " " + str(float(time.time() - t0)/float(n)) + "s per record"
- def create_records3(n):
- t0 = time.time()
- for i in range(n):
- if i % 1000 == 0:
- print i, str(time.time() - t0)
- row = { 'name' : 'NAME ' + str(i)}
- ins = Customer.__table__.insert().values(row)
- cursor = DBSession.execute(ins)
- lastrowid = cursor.inserted_primary_key
- row = { 'address' : 'ADDR ' + str(i), 'customer_id' : lastrowid[0] }
- ins = Address.__table__.insert().values(row)
- cursor = DBSession.execute(ins)
- print "commit"
- DBSession.commit()
- print "create_record3: Total time for " + str(n) + " records " + str(time.time() - t0) + " " + str(float(time.time() - t0)/float(n)) + "s per record"
- def create_records4(n):
- t0 = time.time()
- rows = []
- # First add customers
- for i in range(n):
- if i % 1000 == 0:
- print i, str(time.time() - t0)
- row = { 'name' : 'NAME ' + str(i)}
- rows.append(row)
- ins = Customer.__table__.insert()
- cursor = DBSession.execute(ins, rows)
- # lastrowids does not work for multiple insert
- #lastrowids = cursor.inserted_primary_key
- rows = []
- for i in range(n):
- row = { 'address' : 'ADDR ' + str(i), 'customer_id' : i }
- rows.append(row)
- ins = Address.__table__.insert()
- cursor = DBSession.execute(ins, rows)
- print "commit"
- DBSession.commit()
- print "create_record4: Total time for " + str(n) + " records " + str(time.time() - t0) + " " + str(float(time.time() - t0)/float(n)) + "s per record"
- def raw_init(dbname = 'raw.db'):
- conn = sqlite3.connect(dbname)
- c = conn.cursor()
- c.execute("DROP TABLE IF EXISTS customer")
- c.execute("DROP TABLE IF EXISTS address")
- c.execute("CREATE TABLE customer (id INTEGER NOT NULL, name VARCHAR(255), guid VARCHAR(32), PRIMARY KEY(id))")
- c.execute("CREATE TABLE address (id INTEGER NOT NULL, address VARCHAR(255), guid VARCHAR(32), customer_id INTEGER, PRIMARY KEY(id), FOREIGN KEY(customer_id) REFERENCES customer (id))")
- conn.commit()
- c.close()
- return conn
- def create_raw1(n, dbname = 'raw.db'):
- conn = raw_init(dbname)
- c = conn.cursor()
- t0 = time.time()
- for i in range(n):
- if i % 1000 == 0:
- print i, str(time.time() - t0)
- row = ('NAME ' + str(i),)
- c.execute("INSERT INTO customer (name) VALUES (?)", row)
- print "commit"
- conn.commit()
- print "create_raw1: Total time for " + str(n) + " records " + str(time.time() - t0) + " " + str(float(time.time() - t0)/float(n)) + "s per record"
- def create_raw2(n, dbname = 'raw.db'):
- conn = raw_init(dbname)
- c = conn.cursor()
- t0 = time.time()
- for i in range(n):
- if i % 1000 == 0:
- print i, str(time.time() - t0)
- row = ('NAME ' + str(i),)
- c.execute("INSERT INTO customer (name) VALUES (?)", row)
- customer_id = c.lastrowid
- row = ('ADDR ' + str(i), customer_id, )
- c.execute("INSERT INTO address (address, customer_id) VALUES (?, ?)", row)
- print "commit"
- conn.commit()
- print "create_raw2: Total time for " + str(n) + " records " + str(time.time() - t0) + " " + str(float(time.time() - t0)/float(n)) + "s per record"
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement