Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python3
- import sys
- # create mysqldump commands to create multiple sql files for a single (large) table
- # specify dump_file_count for desired number of files
- db_host = 'HOST'
- db_user = 'USER'
- db_name = 'DBNAME'
- db_pass = 'PASS'
- db_table = 'TABLE'
- table_id = 'id' # primary key column for this table
- dump_file_count = 10 # desired number of dump files
- # max_id = 0 means we query the DB and generate equal-size sql files
- # specify max ID to avoid MySQL query, but sql files may vary in size
- # it should be reasonably close to max PRIMARY KEY id but need not be exact
- max_id = 0
- try:
- import MySQLdb
- introspect_db = True
- except ModuleNotFoundError:
- introspect_db = False
- mysqldump = f'mysqldump --opt --order-by-primary --compress -h {db_host} -u {db_user} -p{db_pass}'
- if max_id:
- # there may be gaps in IDs so sql files may not have equal numbers of INSERTs
- chunk_size = int(max_id / dump_file_count)
- chunks = list(range(0, max_id, chunk_size))
- else:
- if not introspect_db:
- print("Unable to load python MySQLdb module")
- print("Either specify max_id or install the module")
- print("Possibly install with:")
- print("pip install --user mysqlclient ")
- sys.exit(1)
- # else
- db = MySQLdb.connect(host=db_host, db=db_name, user=db_user, passwd=db_pass).cursor()
- db.execute(f'SELECT {table_id} FROM {db_table} ORDER BY {table_id}')
- row_ids = [x[0] for x in db.fetchall()]
- row_count = len(row_ids)
- chunk_size = int(row_count / dump_file_count)
- chunks = [0]
- try:
- # put equal numbers of INSERTs in each file
- i = 1
- while True:
- chunks.append(row_ids[i * chunk_size])
- i += 1
- except IndexError:
- chunks.append(row_ids[-1])
- print('# %s rows inserted per file' % (chunk_size,))
- for i in range(len(chunks)):
- no_create = ''
- if i:
- no_create = '--skip-add-drop-table --no-create-info'
- file_count = str(i).zfill(4)
- start = chunks[i]
- try:
- end = chunks[i+1]
- id_range = f'-w"{db_table}.{table_id}>={start} AND {db_table}.{table_id}<{end}"'
- except IndexError:
- # include new rows inserted since our db query
- id_range = f'-w"{db_table}.{table_id}>={end}"'
- print(f'{mysqldump} {no_create} {id_range} -r {db_table}.{file_count}.sql {db_name} {db_table}')
Add Comment
Please, Sign In to add comment