Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import org.apache.spark._
- import org.apache.spark.SparkConf
- /** Create a RDD of lines from a text file, and keep count of
- * how often each word appears.
- */
- object wordcount1 {
- def main(args: Array[String]) {
- // Set up a SparkContext named WordCount that runs locally using
- // all available cores.
- println("before conf")
- val conf = new SparkConf().setAppName("WordCount")
- conf.setMaster("local[*]")
- val sc = new SparkContext(conf)
- println("after the textfile")
- // Create a RDD of lines of text in our book
- val input = sc.textFile("book.txt")
- println("after the textfile")
- // Use flatMap to convert this into an rdd of each word in each line
- val words = input.flatMap(line => line.split(' '))
- // Convert these words to lowercase
- val lowerCaseWords = words.map(word => word.toLowerCase())
- // Count up the occurence of each unique word
- println("before text file")
- val wordCounts = lowerCaseWords.countByValue()
- // Print the first 20 results
- val sample = wordCounts.take(20)
- for ((word, count) <- sample) {
- println(word + " " + count)
- }
- sc.stop()
- }
- }
Add Comment
Please, Sign In to add comment