Advertisement
Guest User

Untitled

a guest
Jan 16th, 2018
70
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.08 KB | None | 0 0
  1. import numpy as np
  2. import matplotlib.pyplot as plt
  3. from sklearn import  linear_model
  4. import statsmodels.api as sm
  5. import seaborn as sns
  6. import pandas as pd
  7.  
  8. ### then create a dataframe with three columns: "state", "urbanization" and "income".
  9. ### Call this dataframe df
  10.  
  11. model = sm.OLS(urbanization, income).fit()
  12. pred = model.predict(income)
  13. res = abs(urbanization - pred)
  14. df['res'] = res
  15. plt.figure(figsize=(15,8))
  16. plt.scatter(df["income"], df["urbanization"],
  17.             c = res, cmap="inferno", s = 60)
  18. plt.tick_params(axis='both', which='major', labelsize=15)
  19. ax = plt.gca()
  20. for line in range(0,len(df)):
  21.     if df.res[line]>15:
  22.         plt.text(df.income[line], df.urbanization[line]+1, df.state[line], horizontalalignment='left', size=15, color='blue', weight='semibold')
  23. plt.plot(income, pred, 'r')
  24. cbar = plt.colorbar()
  25. cbar.set_label("Linear regression residuals", size = 20)
  26. plt.xlabel("annual income in 2015 (dollars)", size = 20)
  27. plt.ylabel("urbanization rate (percentage)", size = 20)
  28. plt.title("Urbanization and Annual Income by US States", size = 30)
  29. plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement