Guest User

Untitled

a guest
Jan 23rd, 2018
62
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.51 KB | None | 0 0
  1. scala> val rdd = sc.parallelize(1 to 9 map (_.toString))
  2. rdd: org.apache.spark.rdd.RDD[String] = ParallelCollectionRDD[24] at parallelize at <console>:24
  3.  
  4. scala> val result = rdd.someAction{ _+_ }
  5.  
  6. scala> rdd.fold(""){ _+_ }
  7. res10: String = 312456879
  8.  
  9. scala> val rdd = sc.parallelize(1 to 9 map (_.toString))
  10. rdd: org.apache.spark.rdd.RDD[String] = ParallelCollectionRDD[48] at parallelize at <console>:24
  11.  
  12. scala> rdd.zipWithIndex.map(x => (x._2, x._1)).sortByKey().map(_._2).fold(""){ _+_ }
  13. res22: String = 341276895
  14.  
  15. scala> rdd.zipWithIndex.map(x => (x._2, x._1)).sortByKey().map(_._2).fold(""){ _+_ }
  16. res23: String = 914856273
  17.  
  18. scala> rdd.zipWithIndex.map(x => (x._2, x._1)).sortByKey().map(_._2).fold(""){ _+_ }
  19. res24: String = 742539618
  20.  
  21. scala> rdd.zipWithIndex.map(x => (x._2, x._1)).sortByKey().map(_._2).fold(""){ _+_ }
  22. res25: String = 271468359
  23.  
  24. import scala.reflect.ClassTag
  25.  
  26. def orderedFold[T : ClassTag](rdd: RDD[T])(zero: T)(f: (T, T) => T): T = {
  27. rdd.mapPartitions(iter => Iterator(iter.foldLeft(zero)(f))).collect.reduce(f)
  28. }
  29.  
  30. scala> val rdd = sc.parallelize(1 to 9 map (_.toString)).coalesce(1)
  31. rdd: org.apache.spark.rdd.RDD[String] = CoalescedRDD[27] at coalesce at <console>:27
  32.  
  33. scala> rdd.zipWithIndex.map(x => (x._2, x._1)).sortByKey().map(_._2).fold(""){ _+_ }
  34. res4: String = 123456789
  35.  
  36. scala> rdd.zipWithIndex.map(x => (x._2, x._1)).sortByKey().map(_._2).fold(""){ _+_ }
  37. res5: String = 123456789
  38.  
  39. scala> rdd.zipWithIndex.map(x => (x._2, x._1)).sortByKey().map(_._2).fold(""){ _+_ }
  40. res6: String = 123456789
Add Comment
Please, Sign In to add comment