Guest User

Untitled

a guest
Apr 7th, 2022
68
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.47 KB | None | 0 0
  1. import numpy as np
  2. from sklearn.decomposition import PCA
  3. import matplotlib.pyplot as plt
  4.  
  5. N_ABILITIES = 10
  6. MIN_G_LOADING = 0.4
  7. MAX_G_LOADING = 0.8
  8. G_TO_EDU_EFFECT = 0.5
  9. RELATIVE_EDU_TO_IQ_EFFECT = 0.25 # (relative to g)
  10. EDU_TO_IQ_EFFECT = RELATIVE_EDU_TO_IQ_EFFECT * np.mean([MIN_G_LOADING, MAX_G_LOADING])
  11. IQ_TO_OUTCOME_EFFECT = 0.3
  12. SAMPLE_SIZE = 20000
  13.  
  14. G_LOADINGS = np.linspace(MIN_G_LOADING, MAX_G_LOADING, N_ABILITIES)
  15. EDU_LOADINGS = EDU_TO_IQ_EFFECT * np.sqrt(1-G_LOADINGS**2)
  16.  
  17. # actual simulation
  18. g = np.random.normal(0, 1, SAMPLE_SIZE)
  19. edu = g * G_TO_EDU_EFFECT + np.random.normal(0, np.sqrt(1-G_TO_EDU_EFFECT**2), SAMPLE_SIZE)
  20. factors = g.reshape((-1, 1)) * G_LOADINGS + edu.reshape((-1, 1)) * EDU_LOADINGS
  21. factors += np.random.normal(0, 1, (SAMPLE_SIZE, N_ABILITIES)) * np.sqrt(1-np.var(factors, axis=0))
  22. iq = np.mean(factors, axis=1); iq = iq / np.std(iq)
  23. outcome = iq * IQ_TO_OUTCOME_EFFECT + np.random.normal(0, np.sqrt(1-IQ_TO_OUTCOME_EFFECT**2), SAMPLE_SIZE)
  24.  
  25. print(np.round(np.cov(np.concatenate([g.reshape((-1, 1)), edu.reshape((-1, 1)), factors, iq.reshape((-1, 1)), outcome.reshape((-1, 1))], axis=1).T), 2))
  26.  
  27. pca = PCA(1)
  28. pc1 = pca.fit_transform(factors)
  29. ESTIMATED_G_LOADINGS = np.corrcoef(np.concatenate([pc1.reshape((-1, 1)), factors], axis=1).T)[0, 1:]
  30. if ESTIMATED_G_LOADINGS[0] < 0:
  31. # probably PCA randomly inverted it
  32. ESTIMATED_G_LOADINGS *= -1
  33. # You can estimate g loadings from PCA
  34. # Plot shows perfectly positive correlation
  35. plt.subplot(131)
  36. plt.scatter(EDU_LOADINGS, G_LOADINGS)
  37. plt.xlabel("effect of education on factor")
  38. plt.ylabel("direct effect of g on factor")
  39. plt.title(f"r~{np.round(np.corrcoef(EDU_LOADINGS, G_LOADINGS)[0, 1], 2)}") # r~-0.99
  40. # Education is negatively g-loaded
  41. # (plot shows perfectly negative correlation)
  42. plt.subplot(132)
  43. plt.scatter(ESTIMATED_G_LOADINGS, G_LOADINGS)
  44. plt.xlabel("estimated g-loading according to pca")
  45. plt.ylabel("direct effect of g on factor")
  46. plt.title(f"r~{np.round(np.corrcoef(ESTIMATED_G_LOADINGS, G_LOADINGS)[0, 1], 2)}") # r~0.99
  47. # Outcome-factor correlations are g-loaded
  48. # Plot shows perfectly positive correlation
  49. OUTCOME_FACTOR_CORRELATIONS = np.corrcoef(np.concatenate([outcome.reshape((-1, 1)), factors], axis=1).T)[0, 1:]
  50. plt.subplot(133)
  51. plt.scatter(OUTCOME_FACTOR_CORRELATIONS, G_LOADINGS)
  52. plt.xlabel("outcome-factor correlations")
  53. plt.ylabel("direct effect of g on factor")
  54. plt.title(f"r~{np.round(np.corrcoef(OUTCOME_FACTOR_CORRELATIONS, G_LOADINGS)[0, 1], 2)}") # r~0.99
  55. plt.show()
Advertisement
Add Comment
Please, Sign In to add comment