Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #Model Tuning
- from pyspark.ml import Pipeline
- from pyspark.ml.tuning import ParamGridBuilder, CrossValidator
- pipeLine2=Pipeline()
- pipeLine2.setStages([classifier])
- paramGrid2 = ParamGridBuilder().addGrid(lr.regParam, [0.1, 0.01]).build()
- classifier.explainParam('stepSize')
- cv = CrossValidator(estimator=pipeLine2,
- estimatorParamMaps=paramGrid2,
- evaluator=evaluator,
- numFolds=2)
- cvm = cv.fit(train)
- predictions = cvm.transform(test)
- evaluator.evaluate(predictions)
- #Compare models
- bestModel = cvm.bestModel.stages[0]
- bestModel.weights.array
- bestModel.layers
- cvm.avgMetrics
- #Try other classifiers
- from pyspark.ml.classification import (LogisticRegression,
- DecisionTreeClassifier,
- RandomForestClassifier
- )
- lr = LogisticRegression()
- dt = DecisionTreeClassifier()
- rf = RandomForestClassifier(maxBins=10)
- lrm = lr.fit(train)
- dtm = dt.fit(train)
- rfm = rf.fit(train)
- lrm_results = lrm.transform(test)
- dtm_results = dtm.transform(test)
- rfm_results = rfm.transform(test)
- evaluation_lrm=evaluator.evaluate(lrm_results)
- evaluation_dtm=evaluator.evaluate(dtm_results)
- evaluation_rfm=evaluator.evaluate(rfm_results)
- print('evaluation of logistic regression model = %g'%evaluation_lrm)
- print('evaluation of decision tree model = %g'%evaluation_dtm)
- print('evaluation of random forest model = %g'%evaluation_rfm)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement