Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def read_sql_query(query, chunk_size, cnxn):
- try:
- df = pd.read_sql_query(query, cnxn, index_col=['product_key'], chunksize=100000)
- return df
- except Exception as e:
- print(e)
- def return_chunks_in_df(df, start_date, end_date):
- try:
- sub_df = pd.DataFrame()
- for chunks in df:
- sub_df = pd.concat([sub_df, chunks.loc[(chunks['trans_date'] > start_date) & (chunks['trans_date'] < end_date)]], ignore_index=True)
- print(sub_df.info())
- return sub_df
- except Exception as e:
- print(e)
- query = r"select * from sales_rollup where product_key in (select product_key from temp limit 10000)"
- start_time = timeit.default_timer()
- df = read_sql_query(query, 100000, cnxn)
- print(df)
- print('time to chunk:' + str(timeit.default_timer() - start_time))
- #scenario 1
- df = read_sql_query(query, 100000, cnxn)
- start_time = timeit.default_timer()
- sub_df1 = return_chunks_in_df(df, '2015-01-01', '2016-01-01')
- print('scenario1:' + str(timeit.default_timer() - start_time))
- #scenario 2
- start_time = timeit.default_timer()
- sub_df1 = return_chunks_in_df(df, '2016-01-01', '2016-12-31')
- print('scenario1:' + str(timeit.default_timer() - start_time))
- for chunks in df:
- print(chunks.info())
- df = read_sql_query(query, 100000, cnxn)
Add Comment
Please, Sign In to add comment