Guest User

Untitled

a guest
Jul 18th, 2018
83
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.18 KB | None | 0 0
  1. import org.apache.spark._
  2. import org.apache.spark.SparkConf
  3.  
  4. /** Create a RDD of lines from a text file, and keep count of
  5. * how often each word appears.
  6. */
  7. object wordcount1 {
  8.  
  9. def main(args: Array[String]) {
  10. // Set up a SparkContext named WordCount that runs locally using
  11. // all available cores.
  12.  
  13. println("before conf")
  14. val conf = new SparkConf().setAppName("WordCount")
  15. conf.setMaster("local[*]")
  16. val sc = new SparkContext(conf)
  17. println("after the textfile")
  18.  
  19. // Create a RDD of lines of text in our book
  20. val input = sc.textFile("book.txt")
  21.  
  22. println("after the textfile")
  23. // Use flatMap to convert this into an rdd of each word in each line
  24. val words = input.flatMap(line => line.split(' '))
  25. // Convert these words to lowercase
  26. val lowerCaseWords = words.map(word => word.toLowerCase())
  27. // Count up the occurence of each unique word
  28.  
  29. println("before text file")
  30. val wordCounts = lowerCaseWords.countByValue()
  31.  
  32. // Print the first 20 results
  33. val sample = wordCounts.take(20)
  34.  
  35. for ((word, count) <- sample) {
  36. println(word + " " + count)
  37. }
  38.  
  39. sc.stop()
  40. }
  41. }
Add Comment
Please, Sign In to add comment