Guest User

Untitled

a guest
Jun 22nd, 2018
101
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.85 KB | None | 0 0
  1. #Importing the required libraries
  2. import pandas as pd
  3. from pyspark.sql.types import *
  4. from pyspark.ml.regression import RandomForestRegressor
  5. from pyspark.mllib.util import MLUtils
  6. from pyspark.ml import Pipeline
  7. from pyspark.ml.tuning import CrossValidator, ParamGridBuilder
  8. from pyspark.ml.evaluation import RegressionEvaluator
  9. from pyspark.ml.linalg import Vectors
  10. from pyspark.ml import Pipeline
  11. from pyspark.ml.tuning import CrossValidator, ParamGridBuilder
  12. from pyspark.mllib.fpm import *
  13. from pyspark.sql import SparkSession
  14.  
  15. spark = SparkSession .builder .appName("Python Spark") .config("spark.some.config.option", "some-value")
  16.  
  17. # read the data
  18. df = pd.read_json("events.json")
  19.  
  20. df = (df.rdd.map(lambda x: (x[1],[x[0]])).reduceByKey(lambda x,y: x+y).sortBy(lambda k_v: (k_v[0], sorted(k_v[1], key=lambda x: x[1], reverse=True))).collect())
Add Comment
Please, Sign In to add comment