Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import string
- import pandas as pd
- import numpy as np
- from distributed import Client
- from timeit import default_timer as time
- def main():
- client = Client()
- N = 1_000_000
- P = N // 5
- df = pd.DataFrame({
- "A": np.random.randint(N),
- "B": np.random.uniform(N),
- "C": pd.DataFrame(np.random.choice(list(string.ascii_letters), (N, 10))).sum(1),
- "D": pd.Categorical(P * ['aaaaa'] + P * ['bbbbb'] + P * ['cccccc'] + P * ['ddddd'] + P * ['eeeee']),
- "E": pd.date_range(start="1900", periods=N, freq="ns")
- })
- t0 = time()
- f = client.scatter(df, broadcast=True)
- client.gather(f)
- t1 = time()
- print("Took {:.2f} seconds to scatter frame".format(t1 - t0))
- for _, col in df.iteritems():
- t0 = time()
- f = client.scatter(col, broadcast=True)
- client.gather(f)
- t1 = time()
- print("Took {:.2f} seconds to scatter {}".format(t1 - t0, col.dtype))
- if __name__ == '__main__':
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement