Advertisement
Guest User

Untitled

a guest
Dec 12th, 2019
123
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.41 KB | None | 0 0
  1. """ Author: Johnny (Shaun) Lowis, for Bodeker Scientific.
  2. Using a multivariate linear regression model to error correct ODIN's and ES642's
  3. using the Woolston TEOM as a training set.
  4. """
  5.  
  6. import netCDF4 as nc
  7. import numpy as np
  8. import pandas as pd
  9. from users.sl_scripts.MAPM.TEOM.plot_hourly_means import plot_data
  10. from users.sl_scripts.Regression.multivariate_regress_sample import regress
  11. from users.sl_scripts.Regression.multivariate_regress_sample import error
  12. from sklearn import preprocessing
  13.  
  14.  
  15. class LeroyGression:
  16.  
  17. def __init__(self, X_train_fp, y_train_fp, X_test_fp, X_train_variables, X_test_variables,
  18. regressor=0, y_scaler=0, plot=False, error_out=False):
  19. self.X_train_fp = X_train_fp
  20. self.y_train_fp = y_train_fp
  21. self.X_test_fp = X_test_fp
  22. self.X_train_variables = X_train_variables
  23. self.X_test_variables = X_test_variables
  24. self.plot = plot
  25. self.error_out = error_out
  26. self.regressor = regressor
  27. self.y_scaler = y_scaler
  28. self._process_data()
  29. self._scale_data()
  30.  
  31. def _process_data(self):
  32. data_TEOM = nc.Dataset(self.y_train_fp)
  33. teom_time = nc.num2date(data_TEOM.variables["time"][:],
  34. data_TEOM.variables['time'].units)
  35. teom_pm = data_TEOM.variables['pm2.5'][:]
  36. data_TEOM.close()
  37.  
  38. data_642 = nc.Dataset(self.X_train_fp)
  39. ES642_time = nc.num2date(data_642.variables["time"][:], data_642.variables['time'].units)
  40.  
  41. time_values, teom_idxs, es642_idxs = np.intersect1d(teom_time, ES642_time, return_indices=True)
  42.  
  43. ES642_vars = process_vars(self.X_train_variables, data_642)
  44.  
  45. shifted_ES642_vars = []
  46.  
  47. for var in ES642_vars:
  48. shifted_var = var[es642_idxs]
  49. shifted_ES642_vars.append(shifted_var)
  50.  
  51. shifted_teom_time = teom_time[teom_idxs]
  52. shifted_teom_pm = teom_pm[teom_idxs]
  53.  
  54. shifted_ES642_vars_df = pd.DataFrame(shifted_ES642_vars)
  55. transposed_ES642_vars_df = shifted_ES642_vars_df.transpose()
  56.  
  57. shifted_teom_pm_df = pd.DataFrame(shifted_teom_pm)
  58.  
  59. return shifted_teom_time, transposed_ES642_vars_df, shifted_teom_pm_df
  60.  
  61. def _scale_data(self):
  62. shifted_teom_time, transposed_ES642_vars_df, shifted_teom_pm_df = self._process_data()
  63. x_scaler = preprocessing.StandardScaler()
  64. x_scaler.fit(transposed_ES642_vars_df)
  65. X_train = x_scaler.transform(transposed_ES642_vars_df)
  66.  
  67. self.y_scaler = preprocessing.StandardScaler()
  68. self.y_scaler.fit(shifted_teom_pm_df)
  69. y_train = self.y_scaler.transform(shifted_teom_pm_df)
  70.  
  71. y_pred, self.regressor = regress(X_train, y_train)
  72.  
  73. data_X_test = nc.Dataset(self.X_test_fp)
  74. X_test_vars = pd.DataFrame(process_vars(self.X_test_variables, data_X_test)).transpose()
  75. X_test = x_scaler.transform(X_test_vars)
  76.  
  77. y_pred, self.regressor = regress(X_test, y_train)
  78.  
  79. if self.plot is True:
  80. plot_data(shifted_teom_time, y_train, shifted_teom_time, y_pred)
  81. if self.error_out is True:
  82. error(y_train, y_pred)
  83.  
  84.  
  85. def process_vars(vars_in, data):
  86. outlist = []
  87.  
  88. for key in vars_in:
  89. if "**" in key.strip():
  90. key = key[:key.index("**")].strip()
  91. ES642_var = data.variables[key][:]
  92. outlist.append(ES642_var ** 2)
  93. else:
  94. ES642_var = data.variables[key][:]
  95. outlist.append(ES642_var)
  96.  
  97. return outlist
  98.  
  99.  
  100. def main():
  101. fp_TEOM = r"/mnt/storage/Scratch/Shaun/working_dir/MAPM/TEOM/Colocation_1/Raw/NetCDF" \
  102. r"/TEOM_Woolston_Colocation_1_raw.nc"
  103. fp_642_hours = r"/mnt/temp/Projects/MAPM/Data_Permanent/MAPM_campaign/ES642/Colocation_1/Averaged/NetCDF/ES" \
  104. r"-642_DM2_Christchurch2019_Colocation_1_averaged.nc"
  105. fp_642_minutes = r"/mnt/storage/Scratch/Shaun/working_dir/MAPM/ES642/Colocation_1/Raw/NetCDF/ES" \
  106. r"-642_DM2_Christchurch2019_Colocation_1_raw.nc"
  107.  
  108. keys = ['pm2.5', 'air_pressure', 'air_temperature', 'relative_humidity', 'pm2.5 ** 2']
  109. regress_class = LeroyGression(fp_642_hours, fp_TEOM, fp_642_minutes, keys, keys, plot=True, error_out=True)
  110. y_scaler, regressor = regress_class.y_scaler, regress_class.regressor
  111. print(y_scaler, regressor)
  112.  
  113.  
  114. if __name__ == '__main__':
  115. main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement