# Untitled

Dec 5th, 2020
664
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
1. import pandas as pd
2. from sklearn.linear_model import LinearRegression
3. from sklearn.model_selection import train_test_split
4.
5. pd.set_option('display.max_columns', None)
7. all.isnull().any()
8. all = all.fillna(method='ffill')
9.
10. print(all.shape[0])
11.
12. mask = all['type'] == "white"
14. print(all)
15.
16. print("\t\t\tОбщая выборка:")
17. df = all
18.
19. df = df.drop(df.columns[0], axis='columns')
20. df = df.astype('float')
21. x_norm = (df - df.min()) / (df.max() - df.min())
22. #x_norm = df
23. print(x_norm)
24.
25. for k in range(10):
26.     print("Разбивка №", k+1, "")
27.     x_norm = x_norm.sample(frac=1)
28.     x = x_norm[['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar', 'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density', 'pH', 'sulphates','alcohol']].values
29.     y = x_norm['quality'].values
30.
31.     X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=0)
32.     regressor = LinearRegression()
33.     regressor.fit(X_train, y_train)
34.
35.     y_pred = regressor.predict(X_test)
36.     summ = 0
37.
38.     for i in range(y_pred.shape[0]):
39.         print(y_test[i], " - ",  y_pred[i])
40.         summ = summ + abs(y_test[i] - y_pred[i])
41.
42.     mistake = summ / y_pred.shape[0]
43.     good = 1 - mistake
44.     print("Процент верного предсказания = ",float(good), "%\n")
45.
46. print("\t\t\tКрасное вино:")
47. df = red
48.
49. df = df.drop(df.columns[0], axis='columns')
50. df = df.astype('float')
51. x_norm = (df - df.min()) / (df.max() - df.min())
52.
53. for k in range(10):
54.     print("Разбивка №", k+1, "")
55.     x_norm = x_norm.sample(frac=1)
56.     x = x_norm[['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar', 'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density', 'pH', 'sulphates','alcohol']].values
57.     y = x_norm['quality'].values
58.
59.     X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=0)
60.     regressor = LinearRegression()
61.     regressor.fit(X_train, y_train)
62.
63.     y_pred = regressor.predict(X_test)
64.     summ = 0
65.     for i in range(y_pred.shape[0]):
66.         summ = summ + abs(y_test[i] - y_pred[i])
67.
68.     mistake = summ / y_pred.shape[0]
69.     good = 1 - mistake
70.     print("Процент верного предсказания = ",float(good), "%\n")
71.
72. print("\t\t\tБелое вино:")
73. df = white
74.
75. df = df.drop(df.columns[0], axis='columns')
76. df = df.astype('float')
77. x_norm = (df - df.min()) / (df.max() - df.min())
78.
79. for k in range(10):
80.     print("Разбивка №", k+1, "")
81.     x_norm = x_norm.sample(frac=1)
82.     x = x_norm[['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar', 'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density', 'pH', 'sulphates','alcohol']].values
83.     y = x_norm['quality'].values
84.
85.     X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=0)
86.     regressor = LinearRegression()
87.     regressor.fit(X_train, y_train)
88.
89.     y_pred = regressor.predict(X_test)
90.     summ = 0
91.     for i in range(y_pred.shape[0]):
92.         summ = summ + abs(y_test[i] - y_pred[i])
93.
94.     mistake = summ / y_pred.shape[0]
95.     good = 1 - mistake
96.     print("Процент верного предсказания = ",float(good), "%\n")
RAW Paste Data