Advertisement
Python253

split_data_w_markers

Mar 1st, 2024 (edited)
666
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.63 KB | None | 0 0
  1. #!/usr/bin/env python3
  2. # Filename: split_data_w_markers.py
  3. # Author: Jeoi Reqi
  4. # Split Data With Markers is a simple script to be used along with the Data Marker Python Script
  5.  
  6. import os
  7.  
  8. def split_file(input_file, output_directory, lines_per_chunk=100):
  9.     with open(input_file, 'r', encoding='utf-8') as file:
  10.         lines = file.readlines()
  11.  
  12.     total_lines = len(lines)
  13.     total_chunks = (total_lines // lines_per_chunk) + 1  # Adjust for the last chunk
  14.  
  15.     print(f"Total number of chunks: {total_chunks}")
  16.  
  17.     # Calculate the number of digits needed for naming files
  18.     num_digits = len(str(total_chunks))
  19.  
  20.     for i in range(total_chunks):
  21.         start_index = i * lines_per_chunk
  22.         end_index = min((i + 1) * lines_per_chunk, total_lines)
  23.  
  24.         chunk = lines[start_index:end_index]
  25.  
  26.         # Format the chunk number with leading zeros
  27.         chunk_number = str(i + 1).zfill(num_digits)
  28.  
  29.         output_file_path = os.path.join(output_directory, f'chunk_{chunk_number}.txt')
  30.  
  31.         with open(output_file_path, 'w', encoding='utf-8') as output:
  32.             output.writelines(chunk)
  33.  
  34.     print("File split complete.")
  35.  
  36. if __name__ == "__main__":
  37.     current_directory = os.getcwd()
  38.     input_file_path = os.path.join(current_directory, 'isoon_chat.txt')   # Edit Input Name To Your Desired File Name
  39.     output_directory_path = os.path.join(current_directory, 'split_isoon_chat_data_w_markers')  # Edit Output File Name To Your Desired File Name
  40.  
  41.     os.makedirs(output_directory_path, exist_ok=True)
  42.  
  43.     split_file(input_file_path, output_directory_path, lines_per_chunk=100)  # Split At The Markers
  44.  
  45.  
  46.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement