Advertisement
Guest User

Untitled

a guest
Sep 20th, 2017
64
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.96 KB | None | 0 0
  1. import string
  2. import pandas as pd
  3. import numpy as np
  4. from distributed import Client
  5. from timeit import default_timer as time
  6.  
  7.  
  8. def main():
  9. client = Client()
  10. N = 1_000_000
  11. P = N // 5
  12.  
  13. df = pd.DataFrame({
  14. "A": np.random.randint(N),
  15. "B": np.random.uniform(N),
  16. "C": pd.DataFrame(np.random.choice(list(string.ascii_letters), (N, 10))).sum(1),
  17. "D": pd.Categorical(P * ['aaaaa'] + P * ['bbbbb'] + P * ['cccccc'] + P * ['ddddd'] + P * ['eeeee']),
  18. "E": pd.date_range(start="1900", periods=N, freq="ns")
  19. })
  20. t0 = time()
  21. f = client.scatter(df, broadcast=True)
  22. client.gather(f)
  23. t1 = time()
  24. print("Took {:.2f} seconds to scatter frame".format(t1 - t0))
  25.  
  26. for _, col in df.iteritems():
  27. t0 = time()
  28. f = client.scatter(col, broadcast=True)
  29. client.gather(f)
  30. t1 = time()
  31. print("Took {:.2f} seconds to scatter {}".format(t1 - t0, col.dtype))
  32.  
  33.  
  34. if __name__ == '__main__':
  35. main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement