Guest User

Untitled

a guest
Apr 19th, 2018
86
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.71 KB | None | 0 0
  1. #
  2. # nc -lk 9999
  3. # hello world
  4. #
  5.  
  6. from pyspark import SparkContext
  7. from pyspark.streaming import StreamingContext
  8.  
  9. # Create a local StreamingContext with two working thread and batch interval of 1 second
  10. sc = SparkContext("local[2]", "NetworkWordCount")
  11. ssc = StreamingContext(sc, 1)
  12.  
  13. lines = ssc.socketTextStream("localhost", 9999)
  14.  
  15. words = lines.flatMap(lambda line: line.split(" "))
  16.  
  17. # Count each word in each batch
  18. pairs = words.map(lambda word: (word, 1))
  19. wordCounts = pairs.reduceByKey(lambda x, y: x + y)
  20.  
  21. # Print the first ten elements of each RDD generated in this DStream to the console
  22. wordCounts.pprint()
  23.  
  24. ssc.start() # Start the computation
  25. ssc.awaitTermination() # Wait for the computation to terminate
Add Comment
Please, Sign In to add comment