Advertisement
Guest User

Untitled

a guest
Apr 29th, 2016
59
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.15 KB | None | 0 0
  1. from sys import argv
  2. from tqdm import tqdm
  3. from itertools import product
  4.  
  5. sep_map = {'tab': '\t',
  6. 'comma': ',',
  7. ',': ',',
  8. 'semicolon': ';',
  9. ';': ';',
  10. 'space': ' ',
  11. 'pipe': '|',
  12. '|': '|'}
  13.  
  14.  
  15. def main(fpath, inner_sep, outer_sep='tab'):
  16. """
  17. :param str fpath:
  18. :param str inner_sep:
  19. :param str outer_sep:
  20. """
  21. out_fpath = fpath[:-4] + '_unstacked'+fpath[-4:]
  22. if outer_sep not in sep_map:
  23. return
  24. if inner_sep not in sep_map:
  25. return
  26.  
  27. outer_sep = sep_map[outer_sep]
  28. inner_sep = sep_map[inner_sep]
  29.  
  30. def disaggregateded_lines(line):
  31. tokens = line.strip().split(outer_sep)
  32. subtokens = [token.split(inner_sep) for token in tokens]
  33. lines = '\n'.join([outer_sep.join(tpl) for tpl in product(*subtokens)])
  34. return lines + '\n'
  35.  
  36. with open(out_fpath, 'w') as outfile:
  37. for line in tqdm(open(fpath, 'r')):
  38. outfile.write(disaggregateded_lines(line))
  39.  
  40.  
  41. if __name__ == "__main__":
  42. args = argv[1:]
  43. if 0 < len(args) < 3:
  44. main(*args)
  45. else:
  46. print('Usage: python simple_unstacker.tsv')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement