Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- // SD1 and SD2
- val text = sc.textFile("/1.txt")
- val hBV = text.map(a=>a.toDouble)
- val hbv1 p pairHBV.map(a=>(a._2, a.2)
- val pairHBV = hBV.zipWithIndex
- import org.apache.spark.rdd.PairRDDFunctions
- val tmp1 = pairHBV.map(a => (a._2, a._1))
- val tmp2 = pairHBV.map(a => (a._2 + 1, a._1))
- val hbvBase = tmp1.join(tmp2)
- val hbvBase1 = hbvBase.map(a => a._2)
- val h = hbvBase1.map(a=>(1, a._2-a._1, (a._2- a._1) * (a._2-a._1))).reduce((a, b) => (a._1+b._1,a._2+b._2,a._3+b._3))
- (h._2-h._3)*(h._2-h._3) / h._1
- // mapPartition example 0
- val parallel = sc.parallelize(1 to 9, 3)
- parallel.mapPartitions( x => List(x.next).iterator).collect
- // res383: Array[Int] = Array(1, 4, 7)
- // compare to the same, but with default parallelize
- val parallel = sc.parallelize(1 to 9)
- parallel.mapPartitions( x => List(x.next).iterator).collect
- // res384: Array[Int] = Array(1, 2, 3, 4, 5, 6, 7, 8)
- // mapPartition example 1
- val a = sc.parallelize(1 to 9, 3)
- def myfunc[T](iter: Iterator[T]) : Iterator[(T, T)] = {
- var res = List[(T, T)]()
- var pre = iter.next
- while (iter.hasNext)
- {
- val cur = iter.next;
- res .::= (pre, cur)
- pre = cur;
- }
- res.iterator
- }
- a.mapPartitions(myfunc).collect
- //res0: Array[(Int, Int)] = Array((2,3), (1,2), (5,6), (4,5), (8,9), (7,8))
- // mapPartition example 2
- val z = sc.parallelize(List("a","b","c","d","e","f"),2)
- //lets first print out the contents of the RDD with partition labels
- def myfunc(index: Int, iter: Iterator[(String)]) : Iterator[String] = {
- iter.toList.map(x => "[partID:" + index + ", val: " + x + "]").iterator
- }
- z.mapPartitionsWithIndex(myfunc).collect
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement