Untitled

# import libraries
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.model_selection import  train_test_split

# Load the data
FILE_PATH = './games.csv'
games = pd.read_csv(FILE_PATH)

# Print the names of the columns & shapes in games
print(games.columns)
print(games.shape)

# Make a histogram of all the ratings in the average_rating column
plt.hist(games["average_rating"])
plt.show()

# Print the first row of all the games with zero scores
print(games[games['average_rating'] == 0].iloc[0])

# Print the first row of games with scores grater than 0
print(games[games['average_rating'] > 0].iloc[0])

# Remove any rows without user reviews
games = games[games['users_rated'] > 0]

# Remove any rows with missing values
games = games.dropna(axis=0)

# Make a histogram of all the average ratings
plt.hist(games['average_rating'])
plt.show()

# Correlation matrix
corrmat = games.corr()
fig = plt.figure(figsize=(12, 9))

# Plot using seaborn
sns.heatmap(corrmat, vmax=.8, square=True)
plt.show()