Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from pyspark.sql import SparkSession
- from pyspark.sql.functions import explode
- from pyspark.sql.functions import split
- spark = SparkSession.builder.appName("StructuredNetworkWordCount").getOrCreate()
- lines = spark.readStream.format("socket").option("host", "localhost") \
- .option("port", 9999).load()
- # Split the lines into words
- words = lines.select( explode( split(lines.value, " ")).alias("word"))
- # Generate running word count
- wordCounts = words.groupBy("word").count()
- # Start running the query that prints the running counts to the console
- query = wordCounts.writeStream.outputMode("complete").format("console") \
- .start()
- query.awaitTermination()
Add Comment
Please, Sign In to add comment