Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python3
- # Script to merge two or more wiki tables and merge duplicate entries based on 'key' columns in each table
- # How to use:
- # wikimerge.py keys columns files...
- # 'keys' is a comma seperated list of key columns for each table
- # This is the column from each table that will be joined on
- # Do not put spaces in the list. Use '1,2,3', not '1, 2, 3'
- # 'columns' is a semi-colon sperated list of table,column pairs specifying the output file columns
- # Don't put spaces in this list either. Use '1,2;0,1;2,4' not '1,2; 0,2; 2,4'
- # files are two or more input files in "wiki table format". Just copy/paste all the lines from a wikie between {| and \}
- import sys
- def main(args):
- keyString = args[0]
- keys = keyString.split(',')
- keys = list(map(int, keys))
- outColsString = args[1]
- outColsTmp = outColsString.split(';')
- def toTuple(istr):
- p1,p2 = istr.split(',')
- return (int(p1), int(p2))
- outCols = list(map(toTuple, outColsTmp))
- inNames = args[2:]
- infs = []
- for fn in inNames:
- infs.append(open(fn, "rt"))
- tableColumns = []
- tables = []
- for i in range(len(infs)):
- tableColumns.append([])
- tables.append(dict())
- curEntry = dict()
- curCol = 0
- curRow = dict()
- for i in range(len(infs)):
- for line in infs[i]:
- if len(line)==0: continue
- if line[0:2]=='{|': continue
- if line[0:2]=='|}': continue
- if line[0] == '!':
- tableColumns[i].append(line[2:-1])
- curCol += 1
- continue
- if line[0:2] == '|-':
- if len(curRow)>0:
- cr = tables[i].get(curRow[keys[i]], [])
- cr.append(curRow)
- tables[i][curRow[keys[i]]] = cr
- curRow = dict()
- curCol = 0
- continue
- if line[0] == '|':
- val = line[2:-1]
- curRow[curCol] = val
- curCol += 1
- continue
- for inf in infs:
- inf.close()
- newTable = dict()
- print('{| class="wikitable sortable"')
- for oc in outCols:
- tab, col = oc
- print('! {}'.format(tableColumns[tab][col]))
- print('|-')
- for k in tables[0]:
- for oc in outCols:
- tab, col = oc
- vals = []
- if tables[tab].get(k):
- for v in tables[tab][k]:
- vals.append(v[col])
- print('| {}'.format(','.join(vals)))
- print('|-')
- print('|}')
- if __name__=='__main__':
- main(sys.argv[1:])
Add Comment
Please, Sign In to add comment