Pastebin launched a little side project called VERYVIRAL.com, check it out ;-) Want more features on Pastebin? Sign Up, it's FREE!
Guest

SQLAlchemy testing

By: a guest on Jul 27th, 2012  |  syntax: Python  |  size: 8.56 KB  |  views: 74  |  expires: Never
download  |  raw  |  embed  |  report abuse  |  print
Text below is selected. Please press Ctrl+C to copy to your clipboard. (⌘+C on Mac)
  1. """
  2. Experiments to investigate and improve SQL database performance.  
  3. By Braddock Gaskill, July 2012
  4.  
  5. Runtime notes:
  6.  
  7. All speeds performed on System76 Gazelle Professional Intel 8 core Core i7
  8. 2.3GHz under Linux Ubuntu 12.04
  9.  
  10.  
  11. = SQLAlchemy times:
  12.  
  13. create_record1: Total time for 10000 records 73.0288729668 0.00730288829803s per record
  14. (^with commit after each customer)
  15.  
  16. create_record2: Total time for 10000 records 2.14980816841 0.000214982199669s per record
  17. (customer only, no address)
  18.  
  19. create_record1: Total time for 10000 records 2.57281708717 0.000257283115387s per record
  20. (^with NO commit after each customer - SQLAlchemy seems to handle that case!)
  21.  
  22. create_record1: Total time for 10000 records 2.57225084305 0.000257225704193s per record
  23. (^with NO commit after each customer, AND PRAGMA's invoked upon connection)
  24.  
  25. create_record1: Total time for 10000 records 15.5766570568 0.00155766689777s per record
  26. (^with NO commit after each customer, but added SELECT call after each insert)
  27.  
  28. create_record1: Total time for 10000 records 20.3696157932 0.00203696208s per record
  29. (^with NO commit, added SELECT call after each insert, in-MEMORY Db, dbname='sqlite:///:memory:')
  30.  
  31. create_record1: Total time for 10000 records 2.3189971447 0.000231900906563s per record
  32. (^with flush() called every 100 iterations)
  33.  
  34. create_record3: Total time for 10000 records 2.10460805893 0.000210461497307s per record
  35. (^using SQLAlchemy insert(), one at a time)
  36.  
  37. create_record4: Total time for 10000 records 0.0987010002136 9.87088680267e-06s per record
  38. (^Bulk insert, not using correct customer ids but only an incrementing counter)
  39.  
  40.  
  41.  
  42. = Direct sqlite3 times:
  43.  
  44. create_raw1: Total time for 10000 records 0.0553600788116 5.53691387177e-06s per record
  45. Wow!  Straight customer insert, no commits until end
  46.  
  47. create_raw1: Total time for 10000 records 39.1845419407 0.00391845560074s per record
  48. conn.commit() called afterevery customer insert
  49.  
  50. create_raw2: Total time for 10000 records 0.0957889556885 9.57977771759e-06s per record
  51. ^customer and then address inserted using cursor.lastrowid
  52. """
  53.  
  54.  
  55. import time
  56. import sqlite3
  57.  
  58. from sqlalchemy.ext.declarative import declarative_base
  59. from sqlalchemy import Column, Integer, String, Sequence, Float, ForeignKey, Table, Text, Boolean, Date, DateTime, create_engine, event, Unicode, UnicodeText
  60. from sqlalchemy.orm import relationship, backref, scoped_session, sessionmaker
  61.  
  62. Base = declarative_base()
  63. DBSession = scoped_session(sessionmaker())
  64. engine = None
  65.  
  66. def db_connection_setup(dbapi_con, con_record):
  67.     """Things to do on every new DB connection"""
  68.     # It is not clear that these PRAGMAs help
  69.     #dbapi_con.execute("PRAGMA synchronous=OFF")
  70.     #dbapi_con.execute("PRAGMA journal_mode=OFF")
  71.     #dbapi_con.execute("PRAGMA cache_size=20000000")
  72.     #print "connection",dbapi_con,con_record
  73.     pass
  74.  
  75. def init(dbname = 'sqlite:///sqlite.db', echo=False):
  76.     global engine
  77.     global DBSession
  78.     if engine == None:
  79.         engine  = create_engine(dbname, echo=echo)
  80.         event.listen(engine, 'connect', db_connection_setup)
  81.         DBSession.configure(bind=engine, autoflush=False, expire_on_commit=False)
  82.  
  83. def dropall():
  84.     Base.metadata.drop_all(engine)
  85.  
  86. def createall():
  87.     Base.metadata.create_all(engine)
  88.  
  89. def prep(dbname = 'sqlite:///sqlite.db', echo=False):
  90.     init(dbname=dbname, echo=echo)
  91.     dropall()
  92.     createall()
  93.  
  94. class Customer(Base):
  95.     __tablename__ = "customer"
  96.     id = Column(Integer, primary_key=True)
  97.     name = Column(String(255))
  98.     guid = Column(String(32))
  99.  
  100.     def __unicode__(self):
  101.         return u'[Customer: ' + str(self.id) + ' ' + self.name + ']'
  102.  
  103.     def __str__(self):
  104.         return unicode(self).encode('utf-8')
  105.  
  106. class Address(Base):
  107.     __tablename__ = "address"
  108.     id = Column(Integer, primary_key=True)
  109.     customer_id = Column(ForeignKey("customer.id"))
  110.     customer = relationship("Customer")
  111.     address = Column(String(255))
  112.     guid = Column(String(32))
  113.  
  114.     def __unicode__(self):
  115.         return u'[Address: ' + str(self.id) + ' for ' + str(self.customer_id) + ' is ' + self.address + ']'
  116.  
  117.     def __str__(self):
  118.         return unicode(self).encode('utf-8')
  119.  
  120. def create_records1(n):
  121.     t0 = time.time()
  122.     for i in range(n):
  123.         if i % 1000 == 0:
  124.             print i, str(time.time() - t0)
  125.         if i % 100 == 0:
  126.             DBSession.flush()
  127.         customer = Customer()
  128.         customer.name = 'NAME ' + str(i)
  129.         DBSession.add(customer)
  130.         #DBSession.commit()
  131.         #c = DBSession.query(Customer).filter(Customer.name == 'NAME ' + str(i)).all()
  132.         address = Address()
  133.         address.address = 'ADDR ' + str(i)
  134.         address.customer = customer
  135.         DBSession.add(address)
  136.     print "commit"
  137.     DBSession.commit()
  138.     print "create_record1: Total time for " + str(n) + " records " + str(time.time() - t0) + " " + str(float(time.time() - t0)/float(n)) + "s per record"
  139.  
  140.  
  141. def create_records2(n):
  142.     t0 = time.time()
  143.     for i in range(n):
  144.         if i % 1000 == 0:
  145.             print i, str(time.time() - t0)
  146.         customer = Customer()
  147.         customer.name = 'NAME ' + str(i)
  148.         DBSession.add(customer)
  149.     print "commit"
  150.     DBSession.commit()
  151.     print "create_record2: Total time for " + str(n) + " records " + str(time.time() - t0) + " " + str(float(time.time() - t0)/float(n)) + "s per record"
  152.  
  153.  
  154. def create_records3(n):
  155.     t0 = time.time()
  156.     for i in range(n):
  157.         if i % 1000 == 0:
  158.             print i, str(time.time() - t0)
  159.         row = { 'name' : 'NAME ' + str(i)}
  160.         ins = Customer.__table__.insert().values(row)
  161.         cursor = DBSession.execute(ins)
  162.         lastrowid = cursor.inserted_primary_key
  163.         row = { 'address' : 'ADDR ' + str(i), 'customer_id' : lastrowid[0] }
  164.         ins = Address.__table__.insert().values(row)
  165.         cursor = DBSession.execute(ins)
  166.     print "commit"
  167.     DBSession.commit()
  168.     print "create_record3: Total time for " + str(n) + " records " + str(time.time() - t0) + " " + str(float(time.time() - t0)/float(n)) + "s per record"
  169.  
  170.  
  171. def create_records4(n):
  172.     t0 = time.time()
  173.     rows = []
  174.     # First add customers
  175.     for i in range(n):
  176.         if i % 1000 == 0:
  177.             print i, str(time.time() - t0)
  178.         row = { 'name' : 'NAME ' + str(i)}
  179.         rows.append(row)
  180.     ins = Customer.__table__.insert()
  181.     cursor = DBSession.execute(ins, rows)
  182.     # lastrowids does not work for multiple insert
  183.     #lastrowids = cursor.inserted_primary_key
  184.     rows = []
  185.     for i in range(n):
  186.         row = { 'address' : 'ADDR ' + str(i), 'customer_id' : i }
  187.         rows.append(row)
  188.     ins = Address.__table__.insert()
  189.     cursor = DBSession.execute(ins, rows)
  190.     print "commit"
  191.     DBSession.commit()
  192.     print "create_record4: Total time for " + str(n) + " records " + str(time.time() - t0) + " " + str(float(time.time() - t0)/float(n)) + "s per record"
  193.  
  194.  
  195. def raw_init(dbname = 'raw.db'):
  196.     conn = sqlite3.connect(dbname)
  197.     c = conn.cursor()
  198.     c.execute("DROP TABLE IF EXISTS customer")
  199.     c.execute("DROP TABLE IF EXISTS address")
  200.     c.execute("CREATE TABLE customer (id INTEGER NOT NULL, name VARCHAR(255), guid VARCHAR(32), PRIMARY KEY(id))")
  201.     c.execute("CREATE TABLE address (id INTEGER NOT NULL, address VARCHAR(255), guid VARCHAR(32), customer_id INTEGER, PRIMARY KEY(id), FOREIGN KEY(customer_id) REFERENCES customer (id))")
  202.     conn.commit()
  203.     c.close()
  204.     return conn
  205.  
  206. def create_raw1(n, dbname = 'raw.db'):
  207.     conn = raw_init(dbname)
  208.     c = conn.cursor()
  209.     t0 = time.time()
  210.     for i in range(n):
  211.         if i % 1000 == 0:
  212.             print i, str(time.time() - t0)
  213.         row = ('NAME ' + str(i),)
  214.         c.execute("INSERT INTO customer (name) VALUES (?)", row)
  215.     print "commit"
  216.     conn.commit()
  217.     print "create_raw1: Total time for " + str(n) + " records " + str(time.time() - t0) + " " + str(float(time.time() - t0)/float(n)) + "s per record"
  218.  
  219. def create_raw2(n, dbname = 'raw.db'):
  220.     conn = raw_init(dbname)
  221.     c = conn.cursor()
  222.     t0 = time.time()
  223.     for i in range(n):
  224.         if i % 1000 == 0:
  225.             print i, str(time.time() - t0)
  226.         row = ('NAME ' + str(i),)
  227.         c.execute("INSERT INTO customer (name) VALUES (?)", row)
  228.         customer_id = c.lastrowid
  229.         row = ('ADDR ' + str(i), customer_id, )
  230.         c.execute("INSERT INTO address (address, customer_id) VALUES (?, ?)", row)
  231.     print "commit"
  232.     conn.commit()
  233.     print "create_raw2: Total time for " + str(n) + " records " + str(time.time() - t0) + " " + str(float(time.time() - t0)/float(n)) + "s per record"