Chans

Untitled

Feb 7th, 2019
116
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.01 KB | None | 0 0
  1. # import libraries
  2. import matplotlib.pyplot as plt
  3. import pandas as pd
  4. import seaborn as sns
  5. from sklearn.model_selection import  train_test_split
  6.  
  7. # Load the data
  8. FILE_PATH = './games.csv'
  9. games = pd.read_csv(FILE_PATH)
  10.  
  11. # Print the names of the columns & shapes in games
  12. print(games.columns)
  13. print(games.shape)
  14.  
  15. # Make a histogram of all the ratings in the average_rating column
  16. plt.hist(games["average_rating"])
  17. plt.show()
  18.  
  19. # Print the first row of all the games with zero scores
  20. print(games[games['average_rating'] == 0].iloc[0])
  21.  
  22. # Print the first row of games with scores grater than 0
  23. print(games[games['average_rating'] > 0].iloc[0])
  24.  
  25. # Remove any rows without user reviews
  26. games = games[games['users_rated'] > 0]
  27.  
  28. # Remove any rows with missing values
  29. games = games.dropna(axis=0)
  30.  
  31. # Make a histogram of all the average ratings
  32. plt.hist(games['average_rating'])
  33. plt.show()
  34.  
  35. # Correlation matrix
  36. corrmat = games.corr()
  37. fig = plt.figure(figsize=(12, 9))
  38.  
  39. # Plot using seaborn
  40. sns.heatmap(corrmat, vmax=.8, square=True)
  41. plt.show()
  42.  
  43. # Get all the columns from the dataframe
  44. columns = games.columns.tolist()
  45.  
  46. # Filter the columns to remove data we do not want
  47. columns = [c for c in columns if c not in ["bayes_average_rating", "average_rating", "type", "name", "id"]]
  48.  
  49. # Store the variable we`ll be predicting on
  50. target = "average_rating"
  51.  
  52. # Generate training and test datasets
  53. train_X, test_X, train_Y, test_Y = train_test_split(games[columns], games[target], train_size=0.8, test_size=0.2, random_state=1)
  54. # Print shapes
  55. print(train_X.shape)
  56. print(test_X.shape)
  57.  
  58. # Import linear regression model
  59. from sklearn.linear_model import LinearRegression
  60. from sklearn.metrics import mean_squared_error
  61.  
  62. # Initialize the model class
  63. LR = LinearRegression()
  64.  
  65. # Fit the model the training data
  66. LR.fit(train_X, train_Y)
  67.  
  68. # Generate prediction for the test set
  69. predictions = LR.predict(test_X)
  70.  
  71. # Compute error between our test prediction and actual values
  72. mean_squared_error(predictions, test_Y)
Advertisement
Add Comment
Please, Sign In to add comment