Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #
- # nc -lk 9999
- # hello world
- #
- from pyspark import SparkContext
- from pyspark.streaming import StreamingContext
- # Create a local StreamingContext with two working thread and batch interval of 1 second
- sc = SparkContext("local[2]", "NetworkWordCount")
- ssc = StreamingContext(sc, 1)
- lines = ssc.socketTextStream("localhost", 9999)
- words = lines.flatMap(lambda line: line.split(" "))
- # Count each word in each batch
- pairs = words.map(lambda word: (word, 1))
- wordCounts = pairs.reduceByKey(lambda x, y: x + y)
- # Print the first ten elements of each RDD generated in this DStream to the console
- wordCounts.pprint()
- ssc.start() # Start the computation
- ssc.awaitTermination() # Wait for the computation to terminate
Add Comment
Please, Sign In to add comment