Guest User

Untitled

a guest
Mar 26th, 2021
145
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.36 KB | None | 0 0
  1. import time
  2. import numpy as np
  3.  
  4. from dataclasses import dataclass
  5.  
  6.  
  7. @dataclass
  8. class RandIter:
  9.     low: int
  10.     high: int
  11.     size: int
  12.     seed: int
  13.  
  14.     def __post_init__(self):
  15.         rng = np.random.default_rng(self.seed)
  16.         self._sampled_values = rng.integers(low=self.low, high=self.high, size=self.size).tolist()
  17.  
  18.     def __iter__(self):
  19.         return iter(self._sampled_values)
  20.  
  21.     def __len__(self):
  22.         return self.size
  23.  
  24.  
  25. def bench(bc, keys, n):
  26.     for key in keys:
  27.         _start = time.time()
  28.         for _ in range(n):
  29.             bc[key]
  30.         print("Avg access time key={:<50}:\t{:.3f}ms".format(str(key), (time.time() - _start) / n * 1000))
  31.  
  32.  
  33. if __name__ == "__main__":
  34.     import os
  35.     os.environ["USE_TF"] = "0"
  36.     import datasets as ds
  37.     ds.logging.set_verbosity_error()
  38.  
  39.     bc = ds.load_dataset("bookcorpus", split="train")
  40.     # bc = ds.concatenate_datasets([bc] * 10)
  41.     n = 100
  42.     keys = [1, len(bc) - 1, range(len(bc) - 1024, len(bc)), RandIter(0, len(bc), 1024, 42)]
  43.  
  44.     print(f"Loaded dataset '{bc.info.builder_name}', len={len(bc)}, nbytes={bc.data.nbytes}\n")
  45.  
  46.     print("\n" + "=" * 25 + " Querying unshuffled bookcorpus " + "=" * 25 + "\n")
  47.     bench(bc, keys, n)
  48.  
  49.     print("\n" + "=" * 26 + " Querying shuffled bookcorpus " + "=" * 26 + "\n")
  50.     bc = bc.shuffle(42)
  51.     bench(bc, keys, n)
  52.  
Advertisement
Add Comment
Please, Sign In to add comment