Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- from sklearn.linear_model import LinearRegression
- from sklearn.model_selection import train_test_split
- pd.set_option('display.max_columns', None)
- all = pd.read_csv('winequalityN.csv', header=0)
- all.isnull().any()
- all = all.fillna(method='ffill')
- print(all.shape[0])
- mask = all['type'] == "white"
- white, red = all[mask], all[~mask]
- print(all)
- print("\t\t\tОбщая выборка:")
- df = all
- df = df.drop(df.columns[0], axis='columns')
- df = df.astype('float')
- x_norm = (df - df.min()) / (df.max() - df.min())
- #x_norm = df
- print(x_norm)
- for k in range(10):
- print("Разбивка №", k+1, "")
- x_norm = x_norm.sample(frac=1)
- x = x_norm[['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar', 'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density', 'pH', 'sulphates','alcohol']].values
- y = x_norm['quality'].values
- X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=0)
- regressor = LinearRegression()
- regressor.fit(X_train, y_train)
- y_pred = regressor.predict(X_test)
- summ = 0
- for i in range(y_pred.shape[0]):
- print(y_test[i], " - ", y_pred[i])
- summ = summ + abs(y_test[i] - y_pred[i])
- mistake = summ / y_pred.shape[0]
- good = 1 - mistake
- print("Процент верного предсказания = ",float(good), "%\n")
- print("\t\t\tКрасное вино:")
- df = red
- df = df.drop(df.columns[0], axis='columns')
- df = df.astype('float')
- x_norm = (df - df.min()) / (df.max() - df.min())
- for k in range(10):
- print("Разбивка №", k+1, "")
- x_norm = x_norm.sample(frac=1)
- x = x_norm[['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar', 'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density', 'pH', 'sulphates','alcohol']].values
- y = x_norm['quality'].values
- X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=0)
- regressor = LinearRegression()
- regressor.fit(X_train, y_train)
- y_pred = regressor.predict(X_test)
- summ = 0
- for i in range(y_pred.shape[0]):
- summ = summ + abs(y_test[i] - y_pred[i])
- mistake = summ / y_pred.shape[0]
- good = 1 - mistake
- print("Процент верного предсказания = ",float(good), "%\n")
- print("\t\t\tБелое вино:")
- df = white
- df = df.drop(df.columns[0], axis='columns')
- df = df.astype('float')
- x_norm = (df - df.min()) / (df.max() - df.min())
- for k in range(10):
- print("Разбивка №", k+1, "")
- x_norm = x_norm.sample(frac=1)
- x = x_norm[['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar', 'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density', 'pH', 'sulphates','alcohol']].values
- y = x_norm['quality'].values
- X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=0)
- regressor = LinearRegression()
- regressor.fit(X_train, y_train)
- y_pred = regressor.predict(X_test)
- summ = 0
- for i in range(y_pred.shape[0]):
- summ = summ + abs(y_test[i] - y_pred[i])
- mistake = summ / y_pred.shape[0]
- good = 1 - mistake
- print("Процент верного предсказания = ",float(good), "%\n")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement