Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ratings = spark.createDataFrame(
- sc.textFile("myfile.json").map(lambda l: json.loads(l)),
- )
- ratings.registerTempTable("mytable")
- final_df = sqlContext.sql("select * from mytable");
- user_id_list = final_df.select('user_id').rdd.flatMap(lambda x: x).collect()
- created_at_list = final_df.select('created_at').rdd.flatMap(lambda x: x).collect()
- for i in range(len(user_id_list)):
- status=get_status(user_id_list[I],created_at_list[I])
- final_df.withColumn('status', 'give the condition here')
Add Comment
Please, Sign In to add comment