Advertisement
Guest User

IDS-18 Data Loading

a guest
May 6th, 2021
233
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.73 KB | None | 0 0
  1. import pandas as pd
  2. import numpy as np
  3.  
  4.  
  5. def reduce_mem_usage(df):
  6.     numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
  7.     start_mem = df.memory_usage().sum() / 1024**2
  8.     for col in df.columns:
  9.         col_type = df[col].dtypes
  10.         if col_type in numerics:
  11.             c_min = df[col].min()
  12.             c_max = df[col].max()
  13.             if str(col_type)[:3] == 'int':
  14.                 if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
  15.                     df[col] = df[col].astype(np.int8)
  16.                 elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
  17.                     df[col] = df[col].astype(np.int16)
  18.                 elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
  19.                     df[col] = df[col].astype(np.int32)
  20.                 elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
  21.                     df[col] = df[col].astype(np.int64)
  22.             else:
  23.                 if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
  24.                     df[col] = df[col].astype(np.float16)
  25.                 elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
  26.                     df[col] = df[col].astype(np.float32)
  27.                 else:
  28.                     df[col] = df[col].astype(np.float64)
  29.     end_mem = df.memory_usage().sum() / 1024**2
  30.     print('Memory usage after optimization is: {:.2f} MB'.format(end_mem))
  31.     print('Decreased by {:.1f}%'.format(100 * (start_mem - end_mem) / start_mem))
  32.     return df
  33.  
  34.  
  35. d0 = pd.read_csv('./data/CSVs/02-14-2018.csv')
  36. dtypes_of_0 = d0.dtypes.to_dict()
  37. d1 = pd.read_csv('./data/CSVs/02-15-2018.csv', low_memory=False,dtype=dtypes_of_0,index_col=False)
  38. d2 = pd.read_csv('./data/CSVs/02-16-2018.csv', low_memory=False,dtype=dtypes_of_0,index_col=False)
  39. d3 = pd.read_csv('./data/CSVs/02-20-2018.csv', low_memory=False,dtype=dtypes_of_0,index_col=False)
  40. d4 = pd.read_csv('./data/CSVs/02-21-2018.csv', low_memory=False,dtype=dtypes_of_0,index_col=False)
  41. d5 = pd.read_csv('./data/CSVs/02-22-2018.csv', low_memory=False,dtype=dtypes_of_0,index_col=False)
  42. d6 = pd.read_csv('./data/CSVs/02-23-2018.csv', low_memory=False,dtype=dtypes_of_0,index_col=False)
  43. d7 = pd.read_csv('./data/CSVs/02-28-2018.csv', low_memory=False,dtype=dtypes_of_0,index_col=False)
  44. d8 = pd.read_csv('./data/CSVs/03-01-2018.csv', low_memory=False,dtype=dtypes_of_0,index_col=False)
  45. d9 = pd.read_csv('./data/CSVs/03-02-2018.csv', low_memory=False,dtype=dtypes_of_0,index_col=False)
  46.  
  47. dataframes = [d0, d1, d2, d3, d4, d5, d6, d7, d8, d9]
  48.  
  49. for df in dataframes:
  50.     df = reduce_mem_usage(df)
  51.  
  52.  
  53. df = pd.concat([d0, d1])
  54. del d0, d1
  55.  
  56. df = pd.concat([df, d2])
  57. del d2
  58.  
  59. df = pd.concat([df, d3])
  60. del d3
  61.  
  62. df = pd.concat([df, d4])
  63. del d4
  64.  
  65. df = pd.concat([df, d5])
  66. del d5
  67.  
  68. df = pd.concat([df, d6])
  69. del d6
  70.  
  71. df = pd.concat([df, d7])
  72. del d7
  73.  
  74. df = pd.concat([df, d8])
  75. del d8
  76.  
  77. df = pd.concat([df, d9])
  78. del d9
  79.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement