Advertisement
elena1234

how to find correlation coefficiant and p in Python

May 12th, 2022 (edited)
437
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import numpy as np
  2. import pandas as pd
  3. import matplotlib.pyplot as plt
  4. import seaborn as sns
  5. from scipy import stats
  6.  
  7. da = pd.read_csv("C:/Users/eli/Desktop/YtPruboBEemdqA7UJJ_tgg_63e179e3722f4ef783f58ff6e395feb7_nhanes_2015_2016.csv")
  8.  
  9. da["RIAGENDRx"] = da.RIAGENDR.replace({1: "Male", 2: "Female"})
  10. print(da.loc[da.RIAGENDRx=="Female", ["BMXLEG", "BMXARML"]].dropna().corr())
  11. print(da.loc[da.RIAGENDRx=="Male", ["BMXLEG", "BMXARML"]].dropna().corr())
  12.  
  13. da.BMXLEG.dropna(inplace=True)
  14. da.BMXARML.dropna(inplace=True)
  15. df=da.loc[:, ["BMXARML", "BMXLEG"]].fillna(0)
  16. da = da.corr()
  17.  
  18. ########################################################
  19.  
  20. # jointplot returns a jointgrid, which we need to assign to a variable in order to add an annotation
  21. # This line is almost like the original, but it seems that fill is needed explicitly now.
  22. # And most importantly, ".annotate" is not just deprecated. It's gone.
  23. jg = sns.jointplot(x='BMXLEG', y='BMXARML', data=da, kind='kde', fill=True)
  24.  
  25. # To get the correlation, we need to consider only the records with NA values for either measurement.
  26. da_no_nulls = da[['BMXLEG', 'BMXARML']].dropna()
  27. pearsonr, p = stats.pearsonr(da_no_nulls.BMXLEG, da_no_nulls.BMXARML)
  28. pearson_str = f'pearsonr = {pearsonr:.2f}; p = {p}'
  29.  
  30. # Placing the annotation somewhere readable requires that we find the max of the axes
  31. jg.ax_joint.text(
  32.     jg.ax_joint._axes.xaxis.get_data_interval()[1],
  33.     jg.ax_joint._axes.yaxis.get_data_interval()[1],
  34.     pearson_str,
  35.     horizontalalignment='right')
  36. plt.show()
Advertisement
Advertisement
Advertisement
RAW Paste Data Copied
Advertisement