Guest User

Untitled

a guest
Oct 22nd, 2018
83
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.65 KB | None | 0 0
  1. from pyspark.sql import SparkSession
  2. from pyspark.sql.functions import explode
  3. from pyspark.sql.functions import split
  4.  
  5. spark = SparkSession.builder.appName("StructuredNetworkWordCount").getOrCreate()
  6.  
  7. lines = spark.readStream.format("socket").option("host", "localhost") \
  8. .option("port", 9999).load()
  9.  
  10. # Split the lines into words
  11. words = lines.select( explode( split(lines.value, " ")).alias("word"))
  12.  
  13. # Generate running word count
  14. wordCounts = words.groupBy("word").count()
  15.  
  16. # Start running the query that prints the running counts to the console
  17. query = wordCounts.writeStream.outputMode("complete").format("console") \
  18. .start()
  19.  
  20. query.awaitTermination()
Add Comment
Please, Sign In to add comment