Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- from sklearn.decomposition import PCA
- import matplotlib.pyplot as plt
- N_ABILITIES = 10
- MIN_G_LOADING = 0.4
- MAX_G_LOADING = 0.8
- G_TO_EDU_EFFECT = 0.5
- RELATIVE_EDU_TO_IQ_EFFECT = 0.25 # (relative to g)
- EDU_TO_IQ_EFFECT = RELATIVE_EDU_TO_IQ_EFFECT * np.mean([MIN_G_LOADING, MAX_G_LOADING])
- IQ_TO_OUTCOME_EFFECT = 0.3
- SAMPLE_SIZE = 20000
- G_LOADINGS = np.linspace(MIN_G_LOADING, MAX_G_LOADING, N_ABILITIES)
- EDU_LOADINGS = EDU_TO_IQ_EFFECT * np.sqrt(1-G_LOADINGS**2)
- # actual simulation
- g = np.random.normal(0, 1, SAMPLE_SIZE)
- edu = g * G_TO_EDU_EFFECT + np.random.normal(0, np.sqrt(1-G_TO_EDU_EFFECT**2), SAMPLE_SIZE)
- factors = g.reshape((-1, 1)) * G_LOADINGS + edu.reshape((-1, 1)) * EDU_LOADINGS
- factors += np.random.normal(0, 1, (SAMPLE_SIZE, N_ABILITIES)) * np.sqrt(1-np.var(factors, axis=0))
- iq = np.mean(factors, axis=1); iq = iq / np.std(iq)
- outcome = iq * IQ_TO_OUTCOME_EFFECT + np.random.normal(0, np.sqrt(1-IQ_TO_OUTCOME_EFFECT**2), SAMPLE_SIZE)
- print(np.round(np.cov(np.concatenate([g.reshape((-1, 1)), edu.reshape((-1, 1)), factors, iq.reshape((-1, 1)), outcome.reshape((-1, 1))], axis=1).T), 2))
- pca = PCA(1)
- pc1 = pca.fit_transform(factors)
- ESTIMATED_G_LOADINGS = np.corrcoef(np.concatenate([pc1.reshape((-1, 1)), factors], axis=1).T)[0, 1:]
- if ESTIMATED_G_LOADINGS[0] < 0:
- # probably PCA randomly inverted it
- ESTIMATED_G_LOADINGS *= -1
- # You can estimate g loadings from PCA
- # Plot shows perfectly positive correlation
- plt.subplot(131)
- plt.scatter(EDU_LOADINGS, G_LOADINGS)
- plt.xlabel("effect of education on factor")
- plt.ylabel("direct effect of g on factor")
- plt.title(f"r~{np.round(np.corrcoef(EDU_LOADINGS, G_LOADINGS)[0, 1], 2)}") # r~-0.99
- # Education is negatively g-loaded
- # (plot shows perfectly negative correlation)
- plt.subplot(132)
- plt.scatter(ESTIMATED_G_LOADINGS, G_LOADINGS)
- plt.xlabel("estimated g-loading according to pca")
- plt.ylabel("direct effect of g on factor")
- plt.title(f"r~{np.round(np.corrcoef(ESTIMATED_G_LOADINGS, G_LOADINGS)[0, 1], 2)}") # r~0.99
- # Outcome-factor correlations are g-loaded
- # Plot shows perfectly positive correlation
- OUTCOME_FACTOR_CORRELATIONS = np.corrcoef(np.concatenate([outcome.reshape((-1, 1)), factors], axis=1).T)[0, 1:]
- plt.subplot(133)
- plt.scatter(OUTCOME_FACTOR_CORRELATIONS, G_LOADINGS)
- plt.xlabel("outcome-factor correlations")
- plt.ylabel("direct effect of g on factor")
- plt.title(f"r~{np.round(np.corrcoef(OUTCOME_FACTOR_CORRELATIONS, G_LOADINGS)[0, 1], 2)}") # r~0.99
- plt.show()
Advertisement
Add Comment
Please, Sign In to add comment