Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def prepare_cql(cql, params):
- """
- Use the cql cursor prepare method to replace the params before we add it to the batch
- """
- from cql import cursor
- return cursor.prepare(cql, params)
- def main(dry_run=False, batch_size=10, rows=10000, cols=1, consistency='ONE'):
- import random
- import datetime
- import cql
- import time
- import os
- import string
- statements = ''
- keyspace = 'test_case'
- cf = 'testing'
- conn = cql.connect(host='localhost', port=int('9160'), keyspace=keyspace)
- cursor = conn.cursor()
- if not dry_run:
- cursor.execute('CREATE KEYSPACE :ks WITH strategy_class = :strat AND strategy_options:replication_factor = 1',{'ks':keyspace, 'strat':'SimpleStrategy'})
- cursor.execute('CREATE COLUMNFAMILY :cf (KEY text PRIMARY KEY)',{'cf':cf})
- start = datetime.datetime.now()
- print 'Started: '+str(start)
- for i in range(1, rows+1):
- key = 'testAutoInsert'+str(i)
- #build a dictionary of the test data
- test_data = {u'testColumn_'+str(i):''.join(random.choice(string.ascii_uppercase) for x in range(64)) for i in range(cols)}
- #get the keys as a list
- data_fields = test_data.keys()
- #get the values as a matching list
- data_values = test_data.values()
- cql = 'INSERT INTO :column_family (KEY, :c' + ', :c'.join([str(c) for c in range(len(data_fields))]) +') VALUES (:key, :v' + ', :v'.join([str(v) for v in range(len(data_values))]) +') USING TIMESTAMP :ts '
- params = {'ts':int(time.time() * 1e9), 'key':key, 'column_family':cf}
- #update the parameters with the dictionary key names
- params.update({'c' + str(cnt):data_fields[cnt] for cnt in range(len(data_fields))})
- #update the parameters with the dictionary values
- params.update({'v' + str(cnt):data_values[cnt] for cnt in range(len(data_values))})
- trimmed_cql = prepare_cql(cql, params).strip()
- final_cql = trimmed_cql if trimmed_cql[-1:] == ';' else trimmed_cql + ';'
- statements = statements + final_cql
- if i % batch_size == 0:
- exec_cql = 'BEGIN BATCH USING CONSISTENCY ' + consistency + ' ' + statements + ' APPLY BATCH'
- if not dry_run:
- cursor.execute(exec_cql)
- statements = ''
- print 'Inserted Batch Number '+str(rows - (rows - i))
- end = datetime.datetime.now()
- print 'Ended: '+str(end)
- print 'Batch Size: '+str(batch_size)
- print 'Rows Inserted: '+str(rows)
- print 'Columns Inserted per Row: '+str(cols)
- print 'Time Took:'+str(end-start)
- if __name__ == '__main__':
- import argparse
- parser = argparse.ArgumentParser(description='Script used to bench batch inserts for CQL')
- parser.add_argument('-d', '--dry_run', action='store_true', help='In this mode we do not connect to the DB or insert anything in the DB. Default: False')
- parser.add_argument('-b', '--batch_size', type=int, default=10, help='Size of the batch insert we do. DEFAULT: 100')
- parser.add_argument('-r', '--rows', type=int, default=10000, help='How many rows to insert. DEFAULT: 10000')
- parser.add_argument('-c', '--cols', type=int, default=1, help='How many columns per row to insert. DEFAULT: 1')
- parser.add_argument('-cs', '--consistency', type=str, default='ONE', help='Consistency level to use for the batch. DEFAULT: ONE')
- args = parser.parse_args()
- main(**args.__dict__)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement