Advertisement
jack06215

[pandas] to_csv() progress bar

Jun 28th, 2020
145
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.43 KB | None | 0 0
  1. import pandas as pd
  2. from tqdm import tqdm
  3. from alive_progress import alive_bar
  4. import time
  5. from pathlib import Path
  6. import os
  7.  
  8.  
  9. def chunker(seq, size):
  10.     # from http://stackoverflow.com/a/434328
  11.     return (seq[pos:pos + size] for pos in range(0, len(seq), size))
  12.  
  13. def progress_1(df, filename):
  14.     chunksize = int(len(df) / 100) if len(df) >=100000 else len(df)
  15.     print('chunksize: ', chunksize)
  16.     if os.path.isfile(filename):
  17.         os.remove(filename)
  18.     with tqdm(total=len(df)) as pbar:
  19.         for i, cdf in enumerate(chunker(df, chunksize)):
  20.             mode = "w" if i == 0 else "a"
  21.             cdf.to_csv(filename + '.csv', index=False, header=False, mode=mode)
  22.             pbar.update(chunksize)
  23.  
  24. def progress_2(df, filename):
  25.     chunksize = int(len(df) / 100) if len(df) >=100000 else len(df)
  26.     items = range(0, len(df), chunksize)
  27.     if os.path.isfile(filename):
  28.         os.remove(filename)
  29.  
  30.     with alive_bar(total=len(items), bar='bubbles', spinner='dots_reverse') as bar:
  31.         for item, item2 in zip(items, enumerate(chunker(df, chunksize))):
  32.             mode = "w" if item2[0] == 0 else "a"
  33.             item2[1].to_csv(filename + '.csv', index=False, header=False, mode=mode)
  34.             bar()                      
  35.  
  36.  
  37. df = pd.DataFrame({'a': range(0, 100000)})
  38. # drive_path = str(Path(os.path.abspath(os.path.dirname(__file__))))
  39. drive_path = 'E:'
  40. progress_1(df, drive_path + '/out_f.csv')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement