daily pastebin goal
13%
SHARE
TWEET

Untitled

a guest Mar 13th, 2018 57 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. def isHeader(s: String):Boolean = {
  2.     s.take(1) == "m"
  3. }
  4.  
  5. def cs(pair: ( (String, String), (List[String], List[String]) ) ) = {
  6.     val msgs1 = pair._2._1.toSet
  7.     val msgs2 = pair._2._2.toSet
  8.     val numer = msgs1.intersect(msgs2).size
  9.     val denom = Math.sqrt(msgs1.size*msgs2.size)
  10.     (pair._1._1, pair._1._2, numer / denom)
  11. }
  12. def to_csv_cos(t: (String, String, Double)): String = {
  13.     t._1 ++ "," ++ t._2 ++ "," ++ t._3.toString
  14. }
  15.  
  16.  
  17. val messages = sc.textFile("....csv")
  18. val msgData1 = messages.filter(x => !isHeader(x))
  19. val data = msgData1.map(x => x.split(','))
  20.  
  21. val pairs = data.map(x => (x(0), List(x(1)))).reduceByKey((a,b) => a ++ b).flatMap(x => x._2.combinations(2).toList).map(x => (x(0), x(1)))
  22. val msgs = data.map( x => (x(1), List(x(0)))).reduceByKey((a,b) => a++b)
  23. val pairs_mapped = pairs.join(msgs).map{
  24.      case (x, (y,z)) => (y, (x,z))
  25. }.join(msgs).map{
  26.     case (x, ( (y,z),t) ) => ( (x,y), (t,z) )
  27. }
  28. val res = pairs_mapped.map{
  29.     x => cs(x)  
  30. }.map(x => to_csv_cos(x)).saveAsTextFile("F:\\Scala\\night_result6")
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top