Advertisement
gluk47

Experiments splitter

Apr 26th, 2018
212
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.68 KB | None | 0 0
  1. #!/usr/bin/env python3
  2. # Пропустить первые HEADER строк в файле и вывести следующие BODY строк.
  3. # Повторять до готовности. Каждое BODY сохраняется в новый файл.
  4.  
  5. from sys import argv, stderr, exit
  6.  
  7. HEADER_LINES = 7
  8. BODY_LINES = 2500
  9. PROGRESS_STEP = 20  # print progress each N chunks
  10. #HEADER_LINES = 1
  11. #BODY_LINES = 3
  12. OUTFILE_NAME = '%s-{number}.txt' % argv[1]
  13.  
  14. if len(argv) < 2:
  15.     print('Usage: %s filename.\n'
  16.           '  The script will split file "filename" into files %s,\n'
  17.           '  skipping %s lines of header for each experiment\n'
  18.           '  and writing %s lines of text.' % (argv[0], OUTFILE_NAME, HEADER_LINES, BODY_LINES))
  19.     exit(0)
  20.  
  21. def word(count, base):
  22.     return '%s %s%s' % (count, base, ['', 's'][count != 1])
  23.  
  24. exp_no = 0
  25. with open(argv[1]) as inp:
  26.     while True:
  27.         i = -1
  28.         for i, _ in enumerate(inp):
  29.             if i >= HEADER_LINES - 1:
  30.                 break
  31.         if i < HEADER_LINES - 1:
  32.             break  # end of file reached
  33.        
  34.         exp_no += 1
  35.         with open(OUTFILE_NAME.format(number=exp_no), 'w') as out:
  36.             i = -1
  37.             for i, line in enumerate(inp):
  38.                 out.write(line)
  39.                 if i >= BODY_LINES - 1:
  40.                     break
  41.         if i < BODY_LINES - 1:
  42.             print('The last experiment is too short (%s only)' % word(i + 1, 'line'))
  43.             break  # end of file reached
  44.         if exp_no % PROGRESS_STEP == 0:
  45.             print('%s processed' % word(exp_no, 'experiment'), file=stderr)
  46.  
  47. print('\n%s processed' % word(exp_no, 'experiment'), file=stderr)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement