Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from sys import argv
- from tqdm import tqdm
- from itertools import product
- sep_map = {'tab': '\t',
- 'comma': ',',
- ',': ',',
- 'semicolon': ';',
- ';': ';',
- 'space': ' ',
- 'pipe': '|',
- '|': '|'}
- def main(fpath, inner_sep, outer_sep='tab'):
- """
- :param str fpath:
- :param str inner_sep:
- :param str outer_sep:
- """
- out_fpath = fpath[:-4] + '_unstacked'+fpath[-4:]
- if outer_sep not in sep_map:
- return
- if inner_sep not in sep_map:
- return
- outer_sep = sep_map[outer_sep]
- inner_sep = sep_map[inner_sep]
- def disaggregateded_lines(line):
- tokens = line.strip().split(outer_sep)
- subtokens = [token.split(inner_sep) for token in tokens]
- lines = '\n'.join([outer_sep.join(tpl) for tpl in product(*subtokens)])
- return lines + '\n'
- with open(out_fpath, 'w') as outfile:
- for line in tqdm(open(fpath, 'r')):
- outfile.write(disaggregateded_lines(line))
- if __name__ == "__main__":
- args = argv[1:]
- if 0 < len(args) < 3:
- main(*args)
- else:
- print('Usage: python simple_unstacker.tsv')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement