Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from pyspark import SparkContext
- from pyspark.sql import SparkSession
- from pyspark.streaming import StreamingContext
- sc = SparkContext.getOrCreate()
- spark = SparkSession(sc)
- ssc = StreamingContext(sc, 1)
- stream_data = ssc.textFileStream("file:///tmp/stream") \
- .map( lambda x: x.split(","))
- def savetheresult( rdd ):
- if not rdd.isEmpty():
- rdd.toDF( [ "name", "score" ] ) \
- .write.save("points_json", format="json", mode="append")
- stream_data.foreachRDD(savetheresult)
- ssc.start()
- ssc.awaitTermination()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement