Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- df = spark.sql("SELECT * FROM pokemonconverted")
- from pyspark.ml.feature import VectorAssembler
- vectorAssembler = VectorAssembler(inputCols = ['Attack'], outputCol = 'features')
- v_df = vectorAssembler.transform(df)
- vhouse_df = v_df.select(['features', 'Total'])
- vhouse_df.show(3)
- splits = vhouse_df.randomSplit([0.7, 0.3])
- train_df = splits[0]
- test_df = splits[1]
- from pyspark.ml.regression import LinearRegression
- lr = LinearRegression(featuresCol = 'features', labelCol='Total', maxIter=10, regParam=0.3, elasticNetParam=0.8)
- lr_model = lr.fit(train_df)
- print("Coefficients: " + str(lr_model.coefficients))
- print("Intercept: " + str(lr_model.intercept))
- # Summarize the model over the training set and print out some metrics
- trainingSummary = lr_model.summary
- print("numIterations: %d" % trainingSummary.totalIterations)
- print("objectiveHistory: %s" % str(trainingSummary.objectiveHistory))
- trainingSummary.residuals.show()
- print("RMSE: %f" % trainingSummary.rootMeanSquaredError)
- print("r2: %f" % trainingSummary.r2)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement