Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # License: GPL 2
- # Disclaimer: This code is ad-hoc and extremly ugly, it might harm your eyes
- import sys
- import re
- import subprocess
- shard_mapping = {
- 's1': [
- 'db1052',
- 'db1067',
- 'db1080',
- 'db1083',
- 'db1089',
- 'db1099:3311',
- 'db1105:3311',
- 'db1106',
- 'db1114',
- 'db1119',
- ],
- 's2': [
- 'db1066',
- # 'db1054',
- 'db1074',
- 'db1076',
- 'db1090:3312',
- 'db1122',
- 'db1103:3312',
- 'db1105:3312',
- ],
- 's3': [
- 'db1075',
- 'db1077',
- 'db1078',
- 'db1123',
- ],
- 's4': [
- 'db1068',
- 'db1081',
- 'db1084',
- 'db1091',
- 'db1097:3314',
- 'db1103:3314',
- 'db1121',
- ],
- 's5': [
- 'db1070',
- 'db1082',
- 'db1096:3315',
- 'db1097:3315',
- 'db1100',
- 'db1110',
- 'db1113:3315',
- ],
- 's6': [
- 'db1061',
- 'db1085',
- 'db1088',
- 'db1093',
- 'db1096:3316',
- 'db1098:3316',
- 'db1113:3316',
- ],
- 's7': [
- 'db1062',
- 'db1079',
- 'db1086',
- 'db1090:3317',
- 'db1094',
- 'db1098:3317',
- 'db1101:3317',
- ],
- 's8': [
- 'db1071',
- 'db1087',
- 'db1092',
- 'db1099:3318',
- 'db1101:3318',
- 'db1104',
- 'db1109',
- ],
- }
- def parse_sql(sql):
- result = {}
- for table_chunk in sql.split('CREATE TABLE '):
- table_chunk = table_chunk.lower()
- table_chunk = re.sub(r'/\*.+?\*/', '', table_chunk)
- table_chunk = re.sub(r'\n\s*\-\-.*', '', table_chunk)
- table_chunk = re.sub(r'\n\s*\n', '\n', table_chunk)
- table_name = table_chunk.split('(')[0].strip()
- if not table_name or '\n' in table_name:
- continue
- if '(' not in table_chunk:
- continue
- indexes = {}
- for res in re.findall(r'create( +unique|) +index +(\S+?) +on +%s +\((.+?)\)\;' % table_name, table_chunk):
- indexes[res[1]] = {'unique': bool(res[0]), 'columns': res[2]}
- table_structure = re.split(r'create( +unique|) +index', '('.join(table_chunk.split('(')[1:]))[0]
- table_structure_real = {}
- pk = None
- for line in table_structure.split('\n'):
- line = line.strip()
- if not line or line.endswith(';'):
- continue
- # Why strip(',') doesn't work?
- if line.endswith(','):
- line = line[:-1]
- if line.startswith('primary key'):
- pk = line.split('(')[1].split(')')[0].replace(' ', '')
- continue
- line = re.sub(r' +', ' ', line).split('--')[0]
- if line.split(' ')[1].startswith('enum'):
- real_type = ' '.join(line.split(')')[0].split(' ')[1:]) + ')'
- real_type = real_type.replace('"', '\'').replace(' ','')
- else:
- real_type = line.split(' ')[1]
- if ' unsigned ' in line:
- line = line.replace(' unsigned ', ' ')
- real_type += ' unsigned'
- table_structure_real[line.split(' ')[0]] = {'type': real_type, 'config': ' '.join(line.split(' ')[2:])}
- result[table_name] = {'structure': table_structure_real, 'indexes': indexes}
- return result
- def compare_table_with_prod(host, table_name, expected_table_structure):
- verbose = '-v' in sys.argv
- port = None
- if host != 'localhost':
- if ':' in host:
- port = host.split(':')[1]
- host = host.split(':')[0]
- host += '.eqiad.wmnet'
- if verbose:
- print('Checking table' + table_name)
- sql_command = sys.argv[2]
- if port:
- sql_command += ' -P ' + port
- p = subprocess.Popen(sql_command + ' -h %s -e "DESC %s;"' % (host, table_name), stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=None,shell=True)
- (output, _) = p.communicate(input='')
- res = output.decode('utf-8')
- fields_in_prod = []
- return_result = {'fields': res}
- for line in res.split('\n'):
- if not line or line.startswith('Field'):
- continue
- field_structure = line.lower().split('\t')
- fields_in_prod.append(field_structure[0])
- name = field_structure[0]
- if name not in expected_table_structure['structure']:
- print(host, table_name, name, 'field-mismatch-prod-extra')
- continue
- expected_type = expected_table_structure['structure'][name]['type'].replace('varchar', 'varbinary').replace('integer', 'int')
- if expected_type != field_structure[1].replace('varchar', 'varbinary'):
- actual_size = None
- if '(' in field_structure[1]:
- actual_size = field_structure[1].split('(')[1].split(')')[0]
- expected_size = None
- if '(' in expected_type:
- expected_size = expected_size.split('(')[1].split(')')[0]
- if actual_size and expected_size and actual_size != expected_size:
- print(host, table_name, name, 'field-size-mismatch', expected_size + ' ' + actual_size)
- if (field_structure[1] + expected_type).count(' unsigned') == 1:
- print(host, table_name, name, 'field-unsigned-mismatch', field_structure[1] + ' ' + expected_type)
- actual_type = field_structure[1].split('(')[0].split(' ')[0]
- expected_type = expected_type.split('(')[0].split(' ')[0]
- if actual_type != expected_type:
- print(host, table_name, name, 'field-type-mismatch', expected_type + ' ' + actual_type)
- expected_config = expected_table_structure['structure'][name]['config']
- if (field_structure[2] == 'no' and 'not null' not in expected_config) or (field_structure[2] == 'yes' and 'not null' in expected_config):
- print(host, table_name, name, 'field-null-mismatch')
- # if len(field_structure[4]) < 4:
- # default = ''
- #else:
- # default = field_structure[4]
- #if default == 'null' and field_structure[2] == 'no':
- # continue
- #print(default, expected_config)
- #if (default and 'default ' + default not in expected_config) or (not default and 'default ' in expected_config):
- # print(host, table_name, name, 'field-default-mismatch')
- #print(expected_config)
- for field in expected_table_structure['structure']:
- if field not in fields_in_prod:
- print(host, table_name, name, 'field-mismatch-codebase-extra')
- p = subprocess.Popen(sys.argv[2] + ' -h %s -e "SHOW INDEX FROM %s;"' % (host, table_name), stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=None,shell=True)
- (output, _) = p.communicate(input='')
- res = output.decode('utf-8')
- return_result['indexes'] = res
- indexes = {}
- for line in res.split('\n'):
- if not line or line.startswith('Table'):
- continue
- index_structure = line.lower().split('\t')
- if index_structure[2] not in indexes:
- indexes[index_structure[2]] = {'unique': index_structure[1] == '0', 'columns': [index_structure[4]]}
- else:
- indexes[index_structure[2]]['columns'].append(index_structure[4])
- expected_indexes = expected_table_structure['indexes']
- for index in indexes:
- #clean up primaries later
- if index == 'primary':
- continue
- if index not in expected_indexes:
- if index == 'tmp1':
- print('wtf')
- print(host, table_name, index, 'index-mismatch-prod-extra')
- continue
- if indexes[index]['unique'] != expected_indexes[index]['unique']:
- print(host, table_name, index, 'index-uniqueness-mismatch')
- expected_columns = expected_indexes[index]['columns'].replace(' ', '')
- expected_columns = re.sub(r'\(.+?\)', '', expected_columns)
- if ','.join(indexes[index]['columns']) != expected_columns:
- print(host, table_name, index, 'index-columns-mismatch', ','.join(indexes[index]['columns']) + ' ' + expected_columns)
- for index in expected_indexes:
- if index not in indexes:
- print(host, table_name, index, 'index-mismatch-code-extra')
- return return_result
- def main():
- with open(sys.argv[1], 'r') as f:
- sql = f.read()
- sql_data = parse_sql(sql)
- final_result = {}
- hosts = ['localhost']
- if '-prod' in sys.argv:
- hosts = shard_mapping[sys.argv[3]]
- for host in hosts[1:]:
- final_result[host] = {}
- for table in sql_data:
- final_result[host][table] = compare_table_with_prod(host, table, sql_data[table])
- for host in hosts:
- for table in final_result[hosts[0]]:
- if final_result[host][table] != final_result[hosts[0]][table]:
- print(table, final_result[host][table], final_result[hosts[0]][table])
- main()
Add Comment
Please, Sign In to add comment