Guest User

Untitled

a guest
Feb 21st, 2018
62
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.65 KB | None | 0 0
  1. import multiprocessing
  2. import pandas as pd
  3. import numpy as np
  4. from multiprocessing import Pool
  5. num_partitions = 5
  6. num_cores = multiprocessing.cpu_count()
  7.  
  8. def parallelize_dataframe(df, func):
  9. a,b,c,d,e = np.array_split(df, num_partitions)
  10. pool = Pool(num_cores)
  11. df = pd.concat(pool.map(func, [a,b,c,d,e]))
  12. pool.close()
  13. pool.join()
  14. return df
  15.  
  16. def square(x):
  17. return x**2
  18.  
  19. def test_func(data):
  20. print("Process working on: ",data)
  21. data["square"] = data["col"].apply(square)
  22. return data
  23.  
  24. df = pd.DataFrame({'col': [0,1,2,3,4,5,6,7,8,9]})
  25.  
  26. if __name__ == '__main__':
  27. test = parallelize_dataframe(df, test_func)
  28. print(test)
Add Comment
Please, Sign In to add comment