Aaaaa988

Untitled

Dec 5th, 2020
664
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import pandas as pd
  2. from sklearn.linear_model import LinearRegression
  3. from sklearn.model_selection import train_test_split
  4.  
  5. pd.set_option('display.max_columns', None)
  6. all = pd.read_csv('winequalityN.csv', header=0)
  7. all.isnull().any()
  8. all = all.fillna(method='ffill')
  9.  
  10. print(all.shape[0])
  11.  
  12. mask = all['type'] == "white"
  13. white, red = all[mask], all[~mask]
  14. print(all)
  15.  
  16. print("\t\t\tОбщая выборка:")
  17. df = all
  18.  
  19. df = df.drop(df.columns[0], axis='columns')
  20. df = df.astype('float')
  21. x_norm = (df - df.min()) / (df.max() - df.min())
  22. #x_norm = df
  23. print(x_norm)
  24.  
  25. for k in range(10):
  26.     print("Разбивка №", k+1, "")
  27.     x_norm = x_norm.sample(frac=1)
  28.     x = x_norm[['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar', 'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density', 'pH', 'sulphates','alcohol']].values
  29.     y = x_norm['quality'].values
  30.  
  31.     X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=0)
  32.     regressor = LinearRegression()
  33.     regressor.fit(X_train, y_train)
  34.  
  35.     y_pred = regressor.predict(X_test)
  36.     summ = 0
  37.  
  38.     for i in range(y_pred.shape[0]):
  39.         print(y_test[i], " - ",  y_pred[i])
  40.         summ = summ + abs(y_test[i] - y_pred[i])
  41.  
  42.     mistake = summ / y_pred.shape[0]
  43.     good = 1 - mistake
  44.     print("Процент верного предсказания = ",float(good), "%\n")
  45.  
  46. print("\t\t\tКрасное вино:")
  47. df = red
  48.  
  49. df = df.drop(df.columns[0], axis='columns')
  50. df = df.astype('float')
  51. x_norm = (df - df.min()) / (df.max() - df.min())
  52.  
  53. for k in range(10):
  54.     print("Разбивка №", k+1, "")
  55.     x_norm = x_norm.sample(frac=1)
  56.     x = x_norm[['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar', 'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density', 'pH', 'sulphates','alcohol']].values
  57.     y = x_norm['quality'].values
  58.  
  59.     X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=0)
  60.     regressor = LinearRegression()
  61.     regressor.fit(X_train, y_train)
  62.  
  63.     y_pred = regressor.predict(X_test)
  64.     summ = 0
  65.     for i in range(y_pred.shape[0]):
  66.         summ = summ + abs(y_test[i] - y_pred[i])
  67.  
  68.     mistake = summ / y_pred.shape[0]
  69.     good = 1 - mistake
  70.     print("Процент верного предсказания = ",float(good), "%\n")
  71.  
  72. print("\t\t\tБелое вино:")
  73. df = white
  74.  
  75. df = df.drop(df.columns[0], axis='columns')
  76. df = df.astype('float')
  77. x_norm = (df - df.min()) / (df.max() - df.min())
  78.  
  79. for k in range(10):
  80.     print("Разбивка №", k+1, "")
  81.     x_norm = x_norm.sample(frac=1)
  82.     x = x_norm[['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar', 'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density', 'pH', 'sulphates','alcohol']].values
  83.     y = x_norm['quality'].values
  84.  
  85.     X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=0)
  86.     regressor = LinearRegression()
  87.     regressor.fit(X_train, y_train)
  88.  
  89.     y_pred = regressor.predict(X_test)
  90.     summ = 0
  91.     for i in range(y_pred.shape[0]):
  92.         summ = summ + abs(y_test[i] - y_pred[i])
  93.  
  94.     mistake = summ / y_pred.shape[0]
  95.     good = 1 - mistake
  96.     print("Процент верного предсказания = ",float(good), "%\n")
RAW Paste Data