Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- import numpy as np
- def reduce_mem_usage(df):
- numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
- start_mem = df.memory_usage().sum() / 1024**2
- for col in df.columns:
- col_type = df[col].dtypes
- if col_type in numerics:
- c_min = df[col].min()
- c_max = df[col].max()
- if str(col_type)[:3] == 'int':
- if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
- df[col] = df[col].astype(np.int8)
- elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
- df[col] = df[col].astype(np.int16)
- elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
- df[col] = df[col].astype(np.int32)
- elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
- df[col] = df[col].astype(np.int64)
- else:
- if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
- df[col] = df[col].astype(np.float16)
- elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
- df[col] = df[col].astype(np.float32)
- else:
- df[col] = df[col].astype(np.float64)
- end_mem = df.memory_usage().sum() / 1024**2
- print('Memory usage after optimization is: {:.2f} MB'.format(end_mem))
- print('Decreased by {:.1f}%'.format(100 * (start_mem - end_mem) / start_mem))
- return df
- d0 = pd.read_csv('./data/CSVs/02-14-2018.csv')
- dtypes_of_0 = d0.dtypes.to_dict()
- d1 = pd.read_csv('./data/CSVs/02-15-2018.csv', low_memory=False,dtype=dtypes_of_0,index_col=False)
- d2 = pd.read_csv('./data/CSVs/02-16-2018.csv', low_memory=False,dtype=dtypes_of_0,index_col=False)
- d3 = pd.read_csv('./data/CSVs/02-20-2018.csv', low_memory=False,dtype=dtypes_of_0,index_col=False)
- d4 = pd.read_csv('./data/CSVs/02-21-2018.csv', low_memory=False,dtype=dtypes_of_0,index_col=False)
- d5 = pd.read_csv('./data/CSVs/02-22-2018.csv', low_memory=False,dtype=dtypes_of_0,index_col=False)
- d6 = pd.read_csv('./data/CSVs/02-23-2018.csv', low_memory=False,dtype=dtypes_of_0,index_col=False)
- d7 = pd.read_csv('./data/CSVs/02-28-2018.csv', low_memory=False,dtype=dtypes_of_0,index_col=False)
- d8 = pd.read_csv('./data/CSVs/03-01-2018.csv', low_memory=False,dtype=dtypes_of_0,index_col=False)
- d9 = pd.read_csv('./data/CSVs/03-02-2018.csv', low_memory=False,dtype=dtypes_of_0,index_col=False)
- dataframes = [d0, d1, d2, d3, d4, d5, d6, d7, d8, d9]
- for df in dataframes:
- df = reduce_mem_usage(df)
- df = pd.concat([d0, d1])
- del d0, d1
- df = pd.concat([df, d2])
- del d2
- df = pd.concat([df, d3])
- del d3
- df = pd.concat([df, d4])
- del d4
- df = pd.concat([df, d5])
- del d5
- df = pd.concat([df, d6])
- del d6
- df = pd.concat([df, d7])
- del d7
- df = pd.concat([df, d8])
- del d8
- df = pd.concat([df, d9])
- del d9
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement