Guest User

Untitled

a guest
Jun 18th, 2018
86
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.57 KB | None | 0 0
  1. #!/usr/bin/python3
  2.  
  3. # Script to merge two or more wiki tables and merge duplicate entries based on 'key' columns in each table
  4.  
  5. # How to use:
  6. # wikimerge.py keys columns files...
  7.  
  8. # 'keys' is a comma seperated list of key columns for each table
  9. # This is the column from each table that will be joined on
  10. # Do not put spaces in the list. Use '1,2,3', not '1, 2, 3'
  11.  
  12. # 'columns' is a semi-colon sperated list of table,column pairs specifying the output file columns
  13. # Don't put spaces in this list either. Use '1,2;0,1;2,4' not '1,2; 0,2; 2,4'
  14.  
  15. # files are two or more input files in "wiki table format". Just copy/paste all the lines from a wikie between {| and \}
  16.  
  17. import sys
  18.  
  19. def main(args):
  20.  
  21. keyString = args[0]
  22. keys = keyString.split(',')
  23. keys = list(map(int, keys))
  24.  
  25. outColsString = args[1]
  26. outColsTmp = outColsString.split(';')
  27. def toTuple(istr):
  28. p1,p2 = istr.split(',')
  29. return (int(p1), int(p2))
  30. outCols = list(map(toTuple, outColsTmp))
  31.  
  32. inNames = args[2:]
  33. infs = []
  34. for fn in inNames:
  35. infs.append(open(fn, "rt"))
  36.  
  37. tableColumns = []
  38. tables = []
  39.  
  40. for i in range(len(infs)):
  41. tableColumns.append([])
  42. tables.append(dict())
  43.  
  44. curEntry = dict()
  45. curCol = 0
  46. curRow = dict()
  47.  
  48. for i in range(len(infs)):
  49. for line in infs[i]:
  50. if len(line)==0: continue
  51. if line[0:2]=='{|': continue
  52. if line[0:2]=='|}': continue
  53. if line[0] == '!':
  54. tableColumns[i].append(line[2:-1])
  55. curCol += 1
  56. continue
  57. if line[0:2] == '|-':
  58. if len(curRow)>0:
  59. cr = tables[i].get(curRow[keys[i]], [])
  60. cr.append(curRow)
  61. tables[i][curRow[keys[i]]] = cr
  62. curRow = dict()
  63. curCol = 0
  64. continue
  65. if line[0] == '|':
  66. val = line[2:-1]
  67. curRow[curCol] = val
  68. curCol += 1
  69. continue
  70.  
  71. for inf in infs:
  72. inf.close()
  73.  
  74. newTable = dict()
  75.  
  76. print('{| class="wikitable sortable"')
  77. for oc in outCols:
  78. tab, col = oc
  79. print('! {}'.format(tableColumns[tab][col]))
  80. print('|-')
  81.  
  82. for k in tables[0]:
  83. for oc in outCols:
  84. tab, col = oc
  85.  
  86. vals = []
  87. if tables[tab].get(k):
  88. for v in tables[tab][k]:
  89. vals.append(v[col])
  90. print('| {}'.format(','.join(vals)))
  91. print('|-')
  92. print('|}')
  93.  
  94. if __name__=='__main__':
  95. main(sys.argv[1:])
Add Comment
Please, Sign In to add comment