Chans

Untitled

Feb 7th, 2019
106
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.05 KB | None | 0 0
  1. # import libraries
  2. import matplotlib.pyplot as plt
  3. import pandas as pd
  4. import seaborn as sns
  5. from sklearn.model_selection import  train_test_split
  6.  
  7. # Load the data
  8. FILE_PATH = './games.csv'
  9. games = pd.read_csv(FILE_PATH)
  10.  
  11. # Print the names of the columns & shapes in games
  12. print(games.columns)
  13. print(games.shape)
  14.  
  15. # Make a histogram of all the ratings in the average_rating column
  16. plt.hist(games["average_rating"])
  17. plt.show()
  18.  
  19. # Print the first row of all the games with zero scores
  20. print(games[games['average_rating'] == 0].iloc[0])
  21.  
  22. # Print the first row of games with scores grater than 0
  23. print(games[games['average_rating'] > 0].iloc[0])
  24.  
  25. # Remove any rows without user reviews
  26. games = games[games['users_rated'] > 0]
  27.  
  28. # Remove any rows with missing values
  29. games = games.dropna(axis=0)
  30.  
  31. # Make a histogram of all the average ratings
  32. plt.hist(games['average_rating'])
  33. plt.show()
  34.  
  35. # Correlation matrix
  36. corrmat = games.corr()
  37. fig = plt.figure(figsize=(12, 9))
  38.  
  39. # Plot using seaborn
  40. sns.heatmap(corrmat, vmax=.8, square=True)
  41. plt.show()
Advertisement
Add Comment
Please, Sign In to add comment