Guest User

Untitled

a guest
Oct 16th, 2017
100
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.30 KB | None | 0 0
  1. def read_sql_query(query, chunk_size, cnxn):
  2. try:
  3. df = pd.read_sql_query(query, cnxn, index_col=['product_key'], chunksize=100000)
  4. return df
  5. except Exception as e:
  6. print(e)
  7.  
  8. def return_chunks_in_df(df, start_date, end_date):
  9. try:
  10.  
  11. sub_df = pd.DataFrame()
  12. for chunks in df:
  13. sub_df = pd.concat([sub_df, chunks.loc[(chunks['trans_date'] > start_date) & (chunks['trans_date'] < end_date)]], ignore_index=True)
  14. print(sub_df.info())
  15. return sub_df
  16. except Exception as e:
  17. print(e)
  18.  
  19. query = r"select * from sales_rollup where product_key in (select product_key from temp limit 10000)"
  20.  
  21. start_time = timeit.default_timer()
  22. df = read_sql_query(query, 100000, cnxn)
  23. print(df)
  24. print('time to chunk:' + str(timeit.default_timer() - start_time))
  25.  
  26. #scenario 1
  27. df = read_sql_query(query, 100000, cnxn)
  28. start_time = timeit.default_timer()
  29. sub_df1 = return_chunks_in_df(df, '2015-01-01', '2016-01-01')
  30. print('scenario1:' + str(timeit.default_timer() - start_time))
  31.  
  32. #scenario 2
  33. start_time = timeit.default_timer()
  34. sub_df1 = return_chunks_in_df(df, '2016-01-01', '2016-12-31')
  35. print('scenario1:' + str(timeit.default_timer() - start_time))
  36.  
  37. for chunks in df:
  38. print(chunks.info())
  39.  
  40. df = read_sql_query(query, 100000, cnxn)
Add Comment
Please, Sign In to add comment