Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- from sklearn.feature_selection import RFE
- import statsmodels.api as sm
- def runLogit():
- df = pd.read_excel('InputFile.xlsx', sheetname='InputToCode')
- field1 = df['field1']
- field2 = df['field2']
- field3 = df['field3']
- field4 = df['field4']
- field5 = df['field5']
- field6 = df['field6']
- field7 = df['field7']
- field8 = df['field8']
- field9 = df['field9']
- field10 = df['field10']
- field11e = df['field11']
- field12 = df['field12']
- field13 = df['field13']
- df = pd.DataFrame(
- {
- 'field1': field1,
- 'field2': field2,
- 'field3': field3,
- 'field4': field4,
- 'field5': field5,
- 'field6': field6,
- 'field7': feild7,
- 'field8': field8,
- 'field9': field9,
- 'field10': field10,
- 'field11': field11,
- 'field12': field12,
- 'field13': field13
- }
- )
- # Field1 is an Actual list of 1's and 0's in the input data set (which we are trying to predict through the Logit)
- y = df['field1'].values
- print (len(y))
- print (df.shape)
- logit_model = sm.Logit(y.astype(float), df.astype(float))
- result = logit_model.fit()
- print (result.summary())
- #==============================================================================
- # Initial call
- #==============================================================================
- runLogit()
- The warning I am getting is:
- Warning: Maximum number of iterations has been exceeded.
- Current function value: 0.296861
- Iterations: 35
- Traceback (most recent call last):
- File "<ipython-input-53-4d118e1bf22e>", line 1, in <module>
- runfile('C:/Users/xxxx/Desktop/proj/proj.py', wdir='C:/Users/xxxx/Desktop/KYV')
- File "C:UsersxxxxAppDataLocalContinuumanaconda3libsite-packagesspyderutilssitesitecustomize.py", line 710, in runfile
- execfile(filename, namespace)
- File "C:UsersxxxxAppDataLocalContinuumanaconda3libsite-packagesspyderutilssitesitecustomize.py", line 101, in execfile
- exec(compile(f.read(), filename, 'exec'), namespace)
- File "C:/Users/xxxx/Desktop/KYV/KYV.py", line 135, in <module>
- runLogit()
- File "C:/Users/xxxx/Desktop/KYV/KYV.py", line 125, in runLogit
- result = logit_model.fit()
- File "C:UsersxxxxAppDataLocalContinuumanaconda3libsite-packagesstatsmodelsdiscretediscrete_model.py", line 1377, in fit
- disp=disp, callback=callback, **kwargs)
- File "C:UsersxxxxAppDataLocalContinuumanaconda3libsite-packagesstatsmodelsdiscretediscrete_model.py", line 204, in fit
- disp=disp, callback=callback, **kwargs)
- File "C:UsersxxxxAppDataLocalContinuumanaconda3libsite-packagesstatsmodelsbasemodel.py", line 458, in fit
- Hinv = np.linalg.inv(-retvals['Hessian']) / nobs
- File "C:UsersxxxxAppDataLocalContinuumanaconda3libsite-packagesnumpylinalglinalg.py", line 513, in inv
- ainv = _umath_linalg.inv(a, signature=signature, extobj=extobj)
- File "C:UsersxxxxAppDataLocalContinuumanaconda3libsite-packagesnumpylinalglinalg.py", line 90, in _raise_linalgerror_singular
- raise LinAlgError("Singular matrix")
- LinAlgError: Singular matrix End of Stack Trace.
- y = ['y']
- X = [i for i in data_final if i not in y]
- #Feature selection
- logistic = LogisticRegression()
- rfe = RFE(logistic, *number of features*)
- rfe = rfe.fit(***df[X], data_final[y]***)
- print(rfe.support_)
- print(rfe.ranking_)
Add Comment
Please, Sign In to add comment