Advertisement
Guest User

Untitled

a guest
Jun 20th, 2019
56
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.53 KB | None | 0 0
  1. from pyspark import SparkContext
  2. from pyspark.sql import SparkSession
  3. from pyspark.streaming import StreamingContext
  4.  
  5. sc = SparkContext.getOrCreate()
  6. spark = SparkSession(sc)
  7. ssc = StreamingContext(sc, 1)
  8.  
  9. stream_data = ssc.textFileStream("file:///tmp/stream") \
  10. .map( lambda x: x.split(","))
  11.  
  12. def savetheresult( rdd ):
  13. if not rdd.isEmpty():
  14. rdd.toDF( [ "name", "score" ] ) \
  15. .write.save("points_json", format="json", mode="append")
  16.  
  17. stream_data.foreachRDD(savetheresult)
  18.  
  19. ssc.start()
  20.  
  21. ssc.awaitTermination()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement