Guest User

Untitled

a guest
Jul 18th, 2018
77
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.91 KB | None | 0 0
  1. val tempDict = sqlContext.sql("select words.pName_token,collect_set(words.pID) as docids
  2. from words
  3. group by words.pName_token").toDF()
  4.  
  5. val wordDocs = tempDict.filter(newDict("pName_token")===word)
  6.  
  7. val listDocs = wordDocs.map(t => t(1)).collect()
  8.  
  9. listDocs: Array
  10.  
  11. [Any] = Array(WrappedArray(123, 234, 205876618, 456))
  12.  
  13. import org.apache.spark.sql.Row
  14. import org.apache.spark.sql.functions._
  15. import scala.collection.mutable.WrappedArray
  16.  
  17. val data = Seq((Seq(1,2,3),Seq(4,5,6),Seq(7,8,9)))
  18. val df = sqlContext.createDataFrame(data)
  19. val first = df.first
  20.  
  21. // use a pattern match to deferral the type
  22. val mapped = first.getAs[WrappedArray[Int]](0)
  23.  
  24. // now we can use it like normal collection
  25. mapped.mkString("n")
  26.  
  27. // get rows where has array
  28. val rows = df.collect.map {
  29. case Row(a: Seq[Any], b: Seq[Any], c: Seq[Any]) =>
  30. (a, b, c)
  31. }
  32. rows.mkString("n")
Add Comment
Please, Sign In to add comment