Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # In our class, Python ≥3.6 is required
- import sys
- assert sys.version_info >= (3, 6)
- # Scikit-Learn ≥0.21.3 is required
- import sklearn
- assert sklearn.__version__ >= "0.21.3"
- import matplotlib.pyplot as plt
- import pandas as pd
- import numpy as np
- baseball = pd.read_csv("baseball.csv")
- moneyball = baseball.loc[baseball["Year"] < 2002].copy()
- #index = 1
- #lst = []
- #a = moneyball["Team"].unique()
- #print(a)
- #for el in a:
- #if el not in lst:
- #lst.append(index)
- #index += 1
- #print(lst) #wrong, useless
- # Querini did this to give indexes
- team_idx = {v:k for [k, v] in list(enumerate(moneyball["Team"].unique()))}
- moneyball["id_team"] = [team_idx[x] for x in moneyball["Team"]]
- col = np.where(moneyball["Playoffs"] == 1, "r", "k") #colours
- moneyball.plot(kind='scatter', c= col, x="W", y="id_team", )
- plt.tight_layout()
- plt.savefig('scatter-plot.png', dpi=600)
- plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement