Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import DDC_python as ddp
- # helper function
- def __index(x, vec, default):
- for i in range(0, len(vec)):
- if x == vec[i]:
- return i + 1
- return default
- def DDC(X, DDCpars = {}):
- if DDCpars == None:
- DDCpars = []
- if not isinstance(DDCpars, dict):
- raise TypeError('DDCpars must be a dictionary')
- if not 'numDiscrete' in DDCpars.keys():
- DDCpars['numDiscrete'] = 3
- if not 'precScale' in DDCpars.keys():
- DDCpars['precScale'] = 1e-12
- if not 'cleanNAfirst' in DDCpars.keys():
- DDCpars['cleanNAfirst'] = 'automatic'
- if not 'tolProb' in DDCpars.keys():
- DDCpars['tolProb'] = 0.99
- if not 'corrlim' in DDCpars.keys():
- DDCpars['corrlim'] = 0.5
- if not 'combinRule' in DDCpars.keys():
- DDCpars['combinRule'] = 'wmean'
- if not 'returnBigXimp' in DDCpars.keys():
- DDCpars['returnBigXimp'] = False
- if not 'silent' in DDCpars.keys():
- DDCpars['silent'] = False
- if not 'nLocScale' in DDCpars.keys():
- DDCpars['nLocScale'] = 25000
- if not 'fastDDC' in DDCpars.keys():
- if X.shape[2] < 750:
- fastDDC = 0
- DDCpars['fastDDC'] = False
- else:
- fastDDC = 1
- DDCpars['fastDDC'] = True
- else:
- fastDDC = DDCpars['fastDDC'] + 1
- if DDCpars['fastDDC']:
- if X.shape[2] < 500:
- fastDDC = 0
- else:
- if X.shape[2] < 2000:
- print("Consider using the option 'fastDDC == TRUE' which runs much faster on datasets with many variables.")
- if not 'standType' in DDCpars.keys():
- DDCpars['standType'] = '1stepM'
- if not 'corrType' in DDCpars.keys():
- DDCpars['corrType'] = 'gkwls'
- if not 'nbngbrs' in DDCpars.keys():
- DDCpars['nbngbrs'] = 100
- # other parameters
- if not 'coreOnly' in DDCpars.keys():
- DDCpars['coreOnly'] = False
- if not 'tolProbCell' in DDCpars.keys():
- DDCpars['tolProbCell'] = DDCpars['tolProb']
- if not 'tolProbRow' in DDCpars.keys():
- DDCpars['tolProbRow'] = DDCpars['tolProb']
- if not 'tolProbReg' in DDCpars.keys():
- DDCpars['tolProbReg'] = DDCpars['tolProb']
- if not 'tolProbCorr' in DDCpars.keys():
- DDCpars['tolProbCorr'] = DDCpars['tolProb']
- if not 'includeSelf' in DDCpars.keys():
- DDCpars['includeSelf'] = 1
- if not 'numiter' in DDCpars.keys():
- DDCpars['numiter'] = 1
- if not 'qdim' in DDCpars.keys():
- DDCpars['qdim'] = 30
- if not 'nCorr' in DDCpars.keys():
- DDCpars['nCorr'] = 100
- transFun = __index(DDCpars['transFun'], ['Huber', 'wrap', 'rank'], 2)
- standType = __index(DDCpars['standType'], ['1stepM','hubhub','wrap','mcd', 'rawmcd', 'wrapmedmad'], 1) - 1
- corrType = __index(DDCpars['corrType'], ['wrap', 'rank', 'gkwls'], 3)
- combiRule = __index(DDCpars['combiRule'], ['wmean','wmedian','mean','median'], 1)
- fracNA = DDCpars['fracNA']
- numDiscrete = DDCpars['numDiscrete']
- preScale = DDCpars['preScale']
- returnBigXimp = DDCpars['returnBigXimp']
- silent = DDCpars['silent']
- cleanNAfirst = DDCpars['cleanNAfirst']
- if DDCpars['coreOnly'] == False:
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement