Advertisement
Guest User

Untitled

a guest
Jul 16th, 2019
113
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.02 KB | None | 0 0
  1. df = spark.sql("SELECT * FROM pokemonconverted")
  2.  
  3. from pyspark.ml.feature import VectorAssembler
  4. vectorAssembler = VectorAssembler(inputCols = ['Attack'], outputCol = 'features')
  5.  
  6. v_df = vectorAssembler.transform(df)
  7. vhouse_df = v_df.select(['features', 'Total'])
  8. vhouse_df.show(3)
  9.  
  10. splits = vhouse_df.randomSplit([0.7, 0.3])
  11. train_df = splits[0]
  12. test_df = splits[1]
  13.  
  14. from pyspark.ml.regression import LinearRegression
  15. lr = LinearRegression(featuresCol = 'features', labelCol='Total', maxIter=10, regParam=0.3, elasticNetParam=0.8)
  16. lr_model = lr.fit(train_df)
  17. print("Coefficients: " + str(lr_model.coefficients))
  18. print("Intercept: " + str(lr_model.intercept))
  19.  
  20. # Summarize the model over the training set and print out some metrics
  21. trainingSummary = lr_model.summary
  22. print("numIterations: %d" % trainingSummary.totalIterations)
  23. print("objectiveHistory: %s" % str(trainingSummary.objectiveHistory))
  24. trainingSummary.residuals.show()
  25. print("RMSE: %f" % trainingSummary.rootMeanSquaredError)
  26. print("r2: %f" % trainingSummary.r2)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement