Guest User

Untitled

a guest
Jun 22nd, 2018
65
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 8.45 KB | None | 0 0
  1. # License: GPL 2
  2. # Disclaimer: This code is ad-hoc and extremly ugly, it might harm your eyes
  3. import sys
  4. import re
  5. import subprocess
  6.  
  7. shard_mapping = {
  8. 's1': [
  9. 'db1052',
  10. 'db1067',
  11. 'db1080',
  12. 'db1083',
  13. 'db1089',
  14. 'db1099:3311',
  15. 'db1105:3311',
  16. 'db1106',
  17. 'db1114',
  18. 'db1119',
  19. ],
  20. 's2': [
  21. 'db1066',
  22. # 'db1054',
  23. 'db1074',
  24. 'db1076',
  25. 'db1090:3312',
  26. 'db1122',
  27. 'db1103:3312',
  28. 'db1105:3312',
  29. ],
  30. 's3': [
  31. 'db1075',
  32. 'db1077',
  33. 'db1078',
  34. 'db1123',
  35. ],
  36. 's4': [
  37. 'db1068',
  38. 'db1081',
  39. 'db1084',
  40. 'db1091',
  41. 'db1097:3314',
  42. 'db1103:3314',
  43. 'db1121',
  44. ],
  45. 's5': [
  46. 'db1070',
  47. 'db1082',
  48. 'db1096:3315',
  49. 'db1097:3315',
  50. 'db1100',
  51. 'db1110',
  52. 'db1113:3315',
  53. ],
  54. 's6': [
  55. 'db1061',
  56. 'db1085',
  57. 'db1088',
  58. 'db1093',
  59. 'db1096:3316',
  60. 'db1098:3316',
  61. 'db1113:3316',
  62. ],
  63. 's7': [
  64. 'db1062',
  65. 'db1079',
  66. 'db1086',
  67. 'db1090:3317',
  68. 'db1094',
  69. 'db1098:3317',
  70. 'db1101:3317',
  71. ],
  72. 's8': [
  73. 'db1071',
  74. 'db1087',
  75. 'db1092',
  76. 'db1099:3318',
  77. 'db1101:3318',
  78. 'db1104',
  79. 'db1109',
  80. ],
  81. }
  82.  
  83. def parse_sql(sql):
  84. result = {}
  85. for table_chunk in sql.split('CREATE TABLE '):
  86. table_chunk = table_chunk.lower()
  87. table_chunk = re.sub(r'/\*.+?\*/', '', table_chunk)
  88. table_chunk = re.sub(r'\n\s*\-\-.*', '', table_chunk)
  89. table_chunk = re.sub(r'\n\s*\n', '\n', table_chunk)
  90. table_name = table_chunk.split('(')[0].strip()
  91. if not table_name or '\n' in table_name:
  92. continue
  93. if '(' not in table_chunk:
  94. continue
  95. indexes = {}
  96. for res in re.findall(r'create( +unique|) +index +(\S+?) +on +%s +\((.+?)\)\;' % table_name, table_chunk):
  97. indexes[res[1]] = {'unique': bool(res[0]), 'columns': res[2]}
  98. table_structure = re.split(r'create( +unique|) +index', '('.join(table_chunk.split('(')[1:]))[0]
  99. table_structure_real = {}
  100. pk = None
  101. for line in table_structure.split('\n'):
  102. line = line.strip()
  103. if not line or line.endswith(';'):
  104. continue
  105. # Why strip(',') doesn't work?
  106. if line.endswith(','):
  107. line = line[:-1]
  108. if line.startswith('primary key'):
  109. pk = line.split('(')[1].split(')')[0].replace(' ', '')
  110. continue
  111. line = re.sub(r' +', ' ', line).split('--')[0]
  112. if line.split(' ')[1].startswith('enum'):
  113. real_type = ' '.join(line.split(')')[0].split(' ')[1:]) + ')'
  114. real_type = real_type.replace('"', '\'').replace(' ','')
  115. else:
  116. real_type = line.split(' ')[1]
  117. if ' unsigned ' in line:
  118. line = line.replace(' unsigned ', ' ')
  119. real_type += ' unsigned'
  120. table_structure_real[line.split(' ')[0]] = {'type': real_type, 'config': ' '.join(line.split(' ')[2:])}
  121.  
  122. result[table_name] = {'structure': table_structure_real, 'indexes': indexes}
  123.  
  124. return result
  125.  
  126.  
  127. def compare_table_with_prod(host, table_name, expected_table_structure):
  128. verbose = '-v' in sys.argv
  129. port = None
  130. if host != 'localhost':
  131. if ':' in host:
  132. port = host.split(':')[1]
  133. host = host.split(':')[0]
  134. host += '.eqiad.wmnet'
  135. if verbose:
  136. print('Checking table' + table_name)
  137. sql_command = sys.argv[2]
  138. if port:
  139. sql_command += ' -P ' + port
  140. p = subprocess.Popen(sql_command + ' -h %s -e "DESC %s;"' % (host, table_name), stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=None,shell=True)
  141. (output, _) = p.communicate(input='')
  142. res = output.decode('utf-8')
  143. fields_in_prod = []
  144. return_result = {'fields': res}
  145. for line in res.split('\n'):
  146. if not line or line.startswith('Field'):
  147. continue
  148. field_structure = line.lower().split('\t')
  149. fields_in_prod.append(field_structure[0])
  150. name = field_structure[0]
  151. if name not in expected_table_structure['structure']:
  152. print(host, table_name, name, 'field-mismatch-prod-extra')
  153. continue
  154. expected_type = expected_table_structure['structure'][name]['type'].replace('varchar', 'varbinary').replace('integer', 'int')
  155. if expected_type != field_structure[1].replace('varchar', 'varbinary'):
  156. actual_size = None
  157. if '(' in field_structure[1]:
  158. actual_size = field_structure[1].split('(')[1].split(')')[0]
  159.  
  160. expected_size = None
  161. if '(' in expected_type:
  162. expected_size = expected_size.split('(')[1].split(')')[0]
  163.  
  164. if actual_size and expected_size and actual_size != expected_size:
  165. print(host, table_name, name, 'field-size-mismatch', expected_size + ' ' + actual_size)
  166. if (field_structure[1] + expected_type).count(' unsigned') == 1:
  167. print(host, table_name, name, 'field-unsigned-mismatch', field_structure[1] + ' ' + expected_type)
  168. actual_type = field_structure[1].split('(')[0].split(' ')[0]
  169. expected_type = expected_type.split('(')[0].split(' ')[0]
  170. if actual_type != expected_type:
  171. print(host, table_name, name, 'field-type-mismatch', expected_type + ' ' + actual_type)
  172. expected_config = expected_table_structure['structure'][name]['config']
  173. if (field_structure[2] == 'no' and 'not null' not in expected_config) or (field_structure[2] == 'yes' and 'not null' in expected_config):
  174. print(host, table_name, name, 'field-null-mismatch')
  175.  
  176. # if len(field_structure[4]) < 4:
  177. # default = ''
  178. #else:
  179. # default = field_structure[4]
  180. #if default == 'null' and field_structure[2] == 'no':
  181. # continue
  182. #print(default, expected_config)
  183. #if (default and 'default ' + default not in expected_config) or (not default and 'default ' in expected_config):
  184. # print(host, table_name, name, 'field-default-mismatch')
  185. #print(expected_config)
  186. for field in expected_table_structure['structure']:
  187. if field not in fields_in_prod:
  188. print(host, table_name, name, 'field-mismatch-codebase-extra')
  189.  
  190. p = subprocess.Popen(sys.argv[2] + ' -h %s -e "SHOW INDEX FROM %s;"' % (host, table_name), stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=None,shell=True)
  191. (output, _) = p.communicate(input='')
  192. res = output.decode('utf-8')
  193. return_result['indexes'] = res
  194. indexes = {}
  195. for line in res.split('\n'):
  196. if not line or line.startswith('Table'):
  197. continue
  198. index_structure = line.lower().split('\t')
  199.  
  200. if index_structure[2] not in indexes:
  201. indexes[index_structure[2]] = {'unique': index_structure[1] == '0', 'columns': [index_structure[4]]}
  202. else:
  203. indexes[index_structure[2]]['columns'].append(index_structure[4])
  204.  
  205. expected_indexes = expected_table_structure['indexes']
  206. for index in indexes:
  207. #clean up primaries later
  208. if index == 'primary':
  209. continue
  210. if index not in expected_indexes:
  211. if index == 'tmp1':
  212. print('wtf')
  213. print(host, table_name, index, 'index-mismatch-prod-extra')
  214. continue
  215. if indexes[index]['unique'] != expected_indexes[index]['unique']:
  216. print(host, table_name, index, 'index-uniqueness-mismatch')
  217. expected_columns = expected_indexes[index]['columns'].replace(' ', '')
  218. expected_columns = re.sub(r'\(.+?\)', '', expected_columns)
  219. if ','.join(indexes[index]['columns']) != expected_columns:
  220. print(host, table_name, index, 'index-columns-mismatch', ','.join(indexes[index]['columns']) + ' ' + expected_columns)
  221.  
  222. for index in expected_indexes:
  223. if index not in indexes:
  224. print(host, table_name, index, 'index-mismatch-code-extra')
  225.  
  226. return return_result
  227.  
  228. def main():
  229. with open(sys.argv[1], 'r') as f:
  230. sql = f.read()
  231. sql_data = parse_sql(sql)
  232. final_result = {}
  233. hosts = ['localhost']
  234. if '-prod' in sys.argv:
  235. hosts = shard_mapping[sys.argv[3]]
  236. for host in hosts[1:]:
  237. final_result[host] = {}
  238. for table in sql_data:
  239. final_result[host][table] = compare_table_with_prod(host, table, sql_data[table])
  240.  
  241. for host in hosts:
  242. for table in final_result[hosts[0]]:
  243. if final_result[host][table] != final_result[hosts[0]][table]:
  244. print(table, final_result[host][table], final_result[hosts[0]][table])
  245. main()
Add Comment
Please, Sign In to add comment