Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- scala> val rdd = sc.parallelize(1 to 9 map (_.toString))
- rdd: org.apache.spark.rdd.RDD[String] = ParallelCollectionRDD[24] at parallelize at <console>:24
- scala> val result = rdd.someAction{ _+_ }
- scala> rdd.fold(""){ _+_ }
- res10: String = 312456879
- scala> val rdd = sc.parallelize(1 to 9 map (_.toString))
- rdd: org.apache.spark.rdd.RDD[String] = ParallelCollectionRDD[48] at parallelize at <console>:24
- scala> rdd.zipWithIndex.map(x => (x._2, x._1)).sortByKey().map(_._2).fold(""){ _+_ }
- res22: String = 341276895
- scala> rdd.zipWithIndex.map(x => (x._2, x._1)).sortByKey().map(_._2).fold(""){ _+_ }
- res23: String = 914856273
- scala> rdd.zipWithIndex.map(x => (x._2, x._1)).sortByKey().map(_._2).fold(""){ _+_ }
- res24: String = 742539618
- scala> rdd.zipWithIndex.map(x => (x._2, x._1)).sortByKey().map(_._2).fold(""){ _+_ }
- res25: String = 271468359
- import scala.reflect.ClassTag
- def orderedFold[T : ClassTag](rdd: RDD[T])(zero: T)(f: (T, T) => T): T = {
- rdd.mapPartitions(iter => Iterator(iter.foldLeft(zero)(f))).collect.reduce(f)
- }
- scala> val rdd = sc.parallelize(1 to 9 map (_.toString)).coalesce(1)
- rdd: org.apache.spark.rdd.RDD[String] = CoalescedRDD[27] at coalesce at <console>:27
- scala> rdd.zipWithIndex.map(x => (x._2, x._1)).sortByKey().map(_._2).fold(""){ _+_ }
- res4: String = 123456789
- scala> rdd.zipWithIndex.map(x => (x._2, x._1)).sortByKey().map(_._2).fold(""){ _+_ }
- res5: String = 123456789
- scala> rdd.zipWithIndex.map(x => (x._2, x._1)).sortByKey().map(_._2).fold(""){ _+_ }
- res6: String = 123456789
Add Comment
Please, Sign In to add comment