Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- import matplotlib.pyplot as plt
- from sklearn import linear_model
- import statsmodels.api as sm
- import seaborn as sns
- import pandas as pd
- ### then create a dataframe with three columns: "state", "urbanization" and "income".
- ### Call this dataframe df
- model = sm.OLS(urbanization, income).fit()
- pred = model.predict(income)
- res = abs(urbanization - pred)
- df['res'] = res
- plt.figure(figsize=(15,8))
- plt.scatter(df["income"], df["urbanization"],
- c = res, cmap="inferno", s = 60)
- plt.tick_params(axis='both', which='major', labelsize=15)
- ax = plt.gca()
- for line in range(0,len(df)):
- if df.res[line]>15:
- plt.text(df.income[line], df.urbanization[line]+1, df.state[line], horizontalalignment='left', size=15, color='blue', weight='semibold')
- plt.plot(income, pred, 'r')
- cbar = plt.colorbar()
- cbar.set_label("Linear regression residuals", size = 20)
- plt.xlabel("annual income in 2015 (dollars)", size = 20)
- plt.ylabel("urbanization rate (percentage)", size = 20)
- plt.title("Urbanization and Annual Income by US States", size = 30)
- plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement