Advertisement
Guest User

Untitled

a guest
Sep 14th, 2017
81
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.82 KB | None | 0 0
  1. val rddkv = sc.parallelize(List(("k1",1),("k2",2),("k1",2),("k3",5),("k3",1)))
  2. //rddkv.collect
  3. //Array[(String, Int)] = Array((k1,1), (k2,2), (k1,2), (k3,5), (k3,1))
  4.  
  5. //creating a hash partition
  6. rddkv.partitionBy(new org.apache.spark.HashPartitioner(3)).mapPartitionsWithIndex( (i,iter_p) => iter_p.map(x=>" index="+i+" value="+x)).collect
  7. //Array[String] = Array(" index=0 value=(k1,1)", " index=0 value=(k1,2)", " index=1 value=(k2,2)", " index=2 value=(k3,5)", " index=2 value=(k3,1)")
  8.  
  9.  
  10. //creating a range partition
  11. rddkv.partitionBy(new org.apache.spark.RangePartitioner(3,rddkv)).mapPartitionsWithIndex( (i,iter_p) => iter_p.map(x=>" index="+i+" value="+x)).collect
  12. //Array[String] = Array(" index=0 value=(k1,1)", " index=0 value=(k1,2)", " index=1 value=(k2,2)", " index=1 value=(k3,5)", " index=1 value=(k3,1)")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement