""" Experiments to investigate and improve SQL database performance. By Braddock Gaskill, July 2012 Runtime notes: All speeds performed on System76 Gazelle Professional Intel 8 core Core i7 2.3GHz under Linux Ubuntu 12.04 = SQLAlchemy times: create_record1: Total time for 10000 records 73.0288729668 0.00730288829803s per record (^with commit after each customer) create_record2: Total time for 10000 records 2.14980816841 0.000214982199669s per record (customer only, no address) create_record1: Total time for 10000 records 2.57281708717 0.000257283115387s per record (^with NO commit after each customer - SQLAlchemy seems to handle that case!) create_record1: Total time for 10000 records 2.57225084305 0.000257225704193s per record (^with NO commit after each customer, AND PRAGMA's invoked upon connection) create_record1: Total time for 10000 records 15.5766570568 0.00155766689777s per record (^with NO commit after each customer, but added SELECT call after each insert) create_record1: Total time for 10000 records 20.3696157932 0.00203696208s per record (^with NO commit, added SELECT call after each insert, in-MEMORY Db, dbname='sqlite:///:memory:') create_record1: Total time for 10000 records 2.3189971447 0.000231900906563s per record (^with flush() called every 100 iterations) create_record3: Total time for 10000 records 2.10460805893 0.000210461497307s per record (^using SQLAlchemy insert(), one at a time) create_record4: Total time for 10000 records 0.0987010002136 9.87088680267e-06s per record (^Bulk insert, not using correct customer ids but only an incrementing counter) = Direct sqlite3 times: create_raw1: Total time for 10000 records 0.0553600788116 5.53691387177e-06s per record Wow! Straight customer insert, no commits until end create_raw1: Total time for 10000 records 39.1845419407 0.00391845560074s per record conn.commit() called afterevery customer insert create_raw2: Total time for 10000 records 0.0957889556885 9.57977771759e-06s per record ^customer and then address inserted using cursor.lastrowid """ import time import sqlite3 from sqlalchemy.ext.declarative import declarative_base from sqlalchemy import Column, Integer, String, Sequence, Float, ForeignKey, Table, Text, Boolean, Date, DateTime, create_engine, event, Unicode, UnicodeText from sqlalchemy.orm import relationship, backref, scoped_session, sessionmaker Base = declarative_base() DBSession = scoped_session(sessionmaker()) engine = None def db_connection_setup(dbapi_con, con_record): """Things to do on every new DB connection""" # It is not clear that these PRAGMAs help #dbapi_con.execute("PRAGMA synchronous=OFF") #dbapi_con.execute("PRAGMA journal_mode=OFF") #dbapi_con.execute("PRAGMA cache_size=20000000") #print "connection",dbapi_con,con_record pass def init(dbname = 'sqlite:///sqlite.db', echo=False): global engine global DBSession if engine == None: engine = create_engine(dbname, echo=echo) event.listen(engine, 'connect', db_connection_setup) DBSession.configure(bind=engine, autoflush=False, expire_on_commit=False) def dropall(): Base.metadata.drop_all(engine) def createall(): Base.metadata.create_all(engine) def prep(dbname = 'sqlite:///sqlite.db', echo=False): init(dbname=dbname, echo=echo) dropall() createall() class Customer(Base): __tablename__ = "customer" id = Column(Integer, primary_key=True) name = Column(String(255)) guid = Column(String(32)) def __unicode__(self): return u'[Customer: ' + str(self.id) + ' ' + self.name + ']' def __str__(self): return unicode(self).encode('utf-8') class Address(Base): __tablename__ = "address" id = Column(Integer, primary_key=True) customer_id = Column(ForeignKey("customer.id")) customer = relationship("Customer") address = Column(String(255)) guid = Column(String(32)) def __unicode__(self): return u'[Address: ' + str(self.id) + ' for ' + str(self.customer_id) + ' is ' + self.address + ']' def __str__(self): return unicode(self).encode('utf-8') def create_records1(n): t0 = time.time() for i in range(n): if i % 1000 == 0: print i, str(time.time() - t0) if i % 100 == 0: DBSession.flush() customer = Customer() customer.name = 'NAME ' + str(i) DBSession.add(customer) #DBSession.commit() #c = DBSession.query(Customer).filter(Customer.name == 'NAME ' + str(i)).all() address = Address() address.address = 'ADDR ' + str(i) address.customer = customer DBSession.add(address) print "commit" DBSession.commit() print "create_record1: Total time for " + str(n) + " records " + str(time.time() - t0) + " " + str(float(time.time() - t0)/float(n)) + "s per record" def create_records2(n): t0 = time.time() for i in range(n): if i % 1000 == 0: print i, str(time.time() - t0) customer = Customer() customer.name = 'NAME ' + str(i) DBSession.add(customer) print "commit" DBSession.commit() print "create_record2: Total time for " + str(n) + " records " + str(time.time() - t0) + " " + str(float(time.time() - t0)/float(n)) + "s per record" def create_records3(n): t0 = time.time() for i in range(n): if i % 1000 == 0: print i, str(time.time() - t0) row = { 'name' : 'NAME ' + str(i)} ins = Customer.__table__.insert().values(row) cursor = DBSession.execute(ins) lastrowid = cursor.inserted_primary_key row = { 'address' : 'ADDR ' + str(i), 'customer_id' : lastrowid[0] } ins = Address.__table__.insert().values(row) cursor = DBSession.execute(ins) print "commit" DBSession.commit() print "create_record3: Total time for " + str(n) + " records " + str(time.time() - t0) + " " + str(float(time.time() - t0)/float(n)) + "s per record" def create_records4(n): t0 = time.time() rows = [] # First add customers for i in range(n): if i % 1000 == 0: print i, str(time.time() - t0) row = { 'name' : 'NAME ' + str(i)} rows.append(row) ins = Customer.__table__.insert() cursor = DBSession.execute(ins, rows) # lastrowids does not work for multiple insert #lastrowids = cursor.inserted_primary_key rows = [] for i in range(n): row = { 'address' : 'ADDR ' + str(i), 'customer_id' : i } rows.append(row) ins = Address.__table__.insert() cursor = DBSession.execute(ins, rows) print "commit" DBSession.commit() print "create_record4: Total time for " + str(n) + " records " + str(time.time() - t0) + " " + str(float(time.time() - t0)/float(n)) + "s per record" def raw_init(dbname = 'raw.db'): conn = sqlite3.connect(dbname) c = conn.cursor() c.execute("DROP TABLE IF EXISTS customer") c.execute("DROP TABLE IF EXISTS address") c.execute("CREATE TABLE customer (id INTEGER NOT NULL, name VARCHAR(255), guid VARCHAR(32), PRIMARY KEY(id))") c.execute("CREATE TABLE address (id INTEGER NOT NULL, address VARCHAR(255), guid VARCHAR(32), customer_id INTEGER, PRIMARY KEY(id), FOREIGN KEY(customer_id) REFERENCES customer (id))") conn.commit() c.close() return conn def create_raw1(n, dbname = 'raw.db'): conn = raw_init(dbname) c = conn.cursor() t0 = time.time() for i in range(n): if i % 1000 == 0: print i, str(time.time() - t0) row = ('NAME ' + str(i),) c.execute("INSERT INTO customer (name) VALUES (?)", row) print "commit" conn.commit() print "create_raw1: Total time for " + str(n) + " records " + str(time.time() - t0) + " " + str(float(time.time() - t0)/float(n)) + "s per record" def create_raw2(n, dbname = 'raw.db'): conn = raw_init(dbname) c = conn.cursor() t0 = time.time() for i in range(n): if i % 1000 == 0: print i, str(time.time() - t0) row = ('NAME ' + str(i),) c.execute("INSERT INTO customer (name) VALUES (?)", row) customer_id = c.lastrowid row = ('ADDR ' + str(i), customer_id, ) c.execute("INSERT INTO address (address, customer_id) VALUES (?, ?)", row) print "commit" conn.commit() print "create_raw2: Total time for " + str(n) + " records " + str(time.time() - t0) + " " + str(float(time.time() - t0)/float(n)) + "s per record"