Untitled

def read_sql_query(query, chunk_size, cnxn):
    try:
        df = pd.read_sql_query(query, cnxn, index_col=['product_key'], chunksize=100000)
        return df
    except Exception as e:
        print(e)

def return_chunks_in_df(df, start_date, end_date):
    try:

        sub_df = pd.DataFrame()
        for chunks in df:
            sub_df = pd.concat([sub_df, chunks.loc[(chunks['trans_date'] > start_date) & (chunks['trans_date'] < end_date)]], ignore_index=True)
        print(sub_df.info())
        return sub_df
    except Exception as e:
        print(e)

query = r"select * from  sales_rollup where  product_key in (select product_key from temp limit 10000)"

start_time = timeit.default_timer()
df = read_sql_query(query, 100000, cnxn)
print(df)
print('time to chunk:' + str(timeit.default_timer() - start_time))

#scenario 1
df = read_sql_query(query, 100000, cnxn)
start_time = timeit.default_timer()
sub_df1 = return_chunks_in_df(df, '2015-01-01', '2016-01-01')
print('scenario1:' + str(timeit.default_timer() - start_time))

#scenario 2
start_time = timeit.default_timer()
sub_df1 = return_chunks_in_df(df, '2016-01-01', '2016-12-31')
print('scenario1:' + str(timeit.default_timer() - start_time))

for chunks in df:
    print(chunks.info())

df = read_sql_query(query, 100000, cnxn)