Advertisement
Guest User

Untitled

a guest
Mar 15th, 2021
173
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.52 KB | None | 0 0
  1. import pyarrow as pa
  2. import time
  3. from pathlib import Path
  4. from tqdm.auto import tqdm
  5.  
  6. # This benchmark creates an arrow file of 1GB and evaluates the time needed to access examples via memory mapping
  7.  
  8. # define paths
  9. tmp_path = Path() / "tmp"
  10. tmp_path.mkdir(exist_ok=True)
  11. arrow_file_path = tmp_path / "test.arrow"
  12.  
  13. # write table
  14. schema = pa.schema({"text": pa.string()})
  15. writer = pa.RecordBatchFileWriter(arrow_file_path, schema=schema)
  16. table1MB = pa.Table.from_pydict({"text": ["a" * 1024] * 1024})
  17. for _ in tqdm(range(1024)):  # write 1GB
  18.     writer.write_table(table1MB)
  19. writer.close()
  20.  
  21. # memory map the table
  22. mmapped_table1GB = pa.ipc.open_file(pa.memory_map(str(arrow_file_path))).read_all()
  23.  
  24. # run benchmark
  25. n = 10
  26. repeats = 10
  27. for i in range(n):
  28.     i = int(i / n * len(mmapped_table1GB))
  29.     _start = time.time()
  30.     for _ in range(repeats):
  31.         mmapped_table1GB["text"][i]
  32.     average_microseconds = (time.time() - _start) * 1e6 / repeats
  33.     print(f"Time to access example at i={i/len(mmapped_table1GB)*100:.0f}%\t: {average_microseconds:.1f}μs")
  34.  
  35. # Results
  36. # Time to access example at i=0%    : 6.7μs
  37. # Time to access example at i=10%   : 7.2μs
  38. # Time to access example at i=20%   : 9.1μs
  39. # Time to access example at i=30%   : 11.4μs
  40. # Time to access example at i=40%   : 13.8μs
  41. # Time to access example at i=50%   : 16.2μs
  42. # Time to access example at i=60%   : 18.7μs
  43. # Time to access example at i=70%   : 21.1μs
  44. # Time to access example at i=80%   : 26.8μs
  45. # Time to access example at i=90%   : 25.2μs
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement