Advertisement
Guest User

Untitled

a guest
Apr 9th, 2020
182
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.29 KB | None | 0 0
  1. def create_semester_data(data, semester, target):
  2.     #data after fds['dynamic_interm']
  3.     #semester - number of semester
  4.     #target - target name
  5.     #returns data sliced by semester and target as usual, can be taken by Preprocessor()
  6.     new_data = data.copy()
  7.     if target=='drop_probability':
  8.         new_data.dropna(subset=['semester_dropped_number'], inplace=True)
  9.         new_data['drop'] = (new_data['semester_dropped_number'] >= semester).astype(int)
  10.         #print(new_data['semester_dropped_number'].value_counts())
  11.         new_data = new_data.drop(['semester_dropped_number'], axis=1)
  12.     elif target=='debt_probability':
  13.         new_data.dropna(subset=['DEBTS_NUMBER_SEMESTER_'+str(semester)], inplace=True)
  14.         new_data['debt'] = (new_data['DEBTS_NUMBER_SEMESTER_'+str(semester)]).astype(bool).astype(int)
  15.         new_data = new_data.drop(['DEBTS_NUMBER_SEMESTER_'+str(semester)], axis=1)
  16.     elif target == 'mean_debt':
  17.         new_data.dropna(subset=['DEBTS_NUMBER_SEMESTER_'+str(semester)], inplace=True)
  18.         new_data['mean_debt'] = new_data['DEBTS_NUMBER_SEMESTER_'+str(semester)]
  19.         new_data = new_data.drop(['DEBTS_NUMBER_SEMESTER_'+str(semester)], axis=1)
  20.     elif target == 'mean_mark':
  21.         new_data.dropna(subset=['MARKS_MEAN_SEMESTER_'+str(semester)], inplace=True)
  22.         new_data['mean_mark'] = new_data['MARKS_MEAN_SEMESTER_'+str(semester)]
  23.         new_data = new_data.drop(['MARKS_MEAN_SEMESTER_'+str(semester)], axis=1)
  24.     elif target=='scopus_probability':
  25.         new_data.dropna(subset=['SCOPUS_PUBLICATIONS_NUMBER'], inplace=True)
  26.         new_data['scopus'] = (new_data['SCOPUS_PUBLICATIONS_NUMBER']).astype(bool).astype(int)
  27.         new_data = new_data.drop(['SCOPUS_PUBLICATIONS_NUMBER'], axis=1)
  28.     elif target=='publication_probability':
  29.         new_data.dropna(subset=['OTHER_PUBLICATIONS_NUMBER'], inplace=True)
  30.         new_data['publication'] = (new_data['OTHER_PUBLICATIONS_NUMBER']).astype(bool).astype(int)
  31.         new_data = new_data.drop(['OTHER_PUBLICATIONS_NUMBER'], axis=1)
  32.     semesters = np.arange(semester, 9)
  33.     invalid_features = set()
  34.     for elem in semesters:
  35.         invalid_features.update([name for name in data.columns if str(elem) in name])
  36.     features = set(new_data.columns)
  37.     return new_data[features-invalid_features]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement