# brute force script

May 10th, 2023
228
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
1. import pandas as pd
2. from sklearn.linear_model import LinearRegression
3.
4. stupid = ['OPEID', 'name', 'city', 'state', 'region']
5. categorical = ['highest_degree', 'ownership', 'locale', 'hbcu', 'online_only']
6. not_consider = ['enrollment', 'net_price', 'avg_cost']
8.     .drop(columns=stupid + categorical + not_consider)
9.
10. train = df.sample(int(len(df) * 0.8))
11. test = df.drop(train.index)
12.
13. target = 'default_rate'
14.
15. X_train = train.drop(columns=[target])
16. og_feats = list(X_train.columns)
17. X_poly = X_train ** 2
18. X_poly.rename(columns={name: name + '^2' for name in X_poly.columns}, inplace=True)
19. X_train = pd.concat([X_train, X_poly], axis=1)
20.
21. X_test = test.drop(columns=[target])
22. X_poly = X_test ** 2
23. X_poly.rename(columns={name: name + '^2' for name in X_poly.columns}, inplace=True)
24. X_test = pd.concat([X_test, X_poly], axis=1)
25.
26. y_train = train[target]
27. y_test = test[target]
28.
29. best_r2 = 0
30. best_things = None, None
31. for ss in range(1, 1 << len(og_feats)):
32.     use = [f for v, f in enumerate(og_feats) if ss & (1 << v)]
33.     for deg2 in range(1 << len(use)):
34.         this_use = use.copy()
35.         for i in range(len(use)):
36.             if deg2 & (1 << i):
37.                 this_use.append(use[i] + '^2')
38.
39.         this_X = X_train[this_use]
40.         regressor = LinearRegression()
41.         regressor.fit(this_X, y_train)
42.
43.         this_X = X_test[this_use]
44.         r2 = regressor.score(this_X, y_test)
45.         if r2 > best_r2:
46.             best_r2 = r2
47.             best_things = this_use
48.
49. print(best_r2)
50. print(best_things)
51.