Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- restultSet=testingData.map(lambda p: (p.label, linearModel.predict(p.features))).collect()
- [(2.0, array([ 2.09078012])), (2.0, array([ 2.09078012])), (2.0, array([ 2.09078012])), (1.0, array([ 2.09078012])), (2.0, array([ 2.09078012])), (1.0, array([ 2.09078012]))]
- restultSet.toDF()
- import numpy as np
- from pyspark.ml.linalg import Vectors
- # your data as an RDD (i.e. before 'collect')
- dd = sc.parallelize([(2.0, np.array([ 2.09078012])), (2.0, np.array([ 2.09078012])), (2.0, np.array([ 2.09078012])), (1.0, np.array([ 2.09078012])), (2.0, np.array([ 2.09078012])), (1.0, np.array([ 2.09078012]))])
- dd.take(1)
- # [(2.0, array([ 2.09078012]))]
- df = dd.map(lambda x: (x[0], Vectors.dense(x[1]))).toDF()
- df.show()
- # +---+------------+
- # | _1| _2|
- # +---+------------+
- # |2.0|[2.09078012]|
- # |2.0|[2.09078012]|
- # |2.0|[2.09078012]|
- # |1.0|[2.09078012]|
- # |2.0|[2.09078012]|
- # |1.0|[2.09078012]|
- # +---+------------+
Add Comment
Please, Sign In to add comment