Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import org.apache.spark.graphx._
- import org.apache.spark.rdd.RDD
- import org.apache.spark.sql._
- import org.apache.spark.sql.types._
- import org.graphframes._
- //import collection.mutable.HashMap
- val utenti = sqlContext.sql("SELECT user_id as id,name FROM users")
- /*val usersVertices: RDD[(String, String)] = utenti.select("user_id", "name")
- .rdd
- .map(row => (row(0).asInstanceOf[String], row(1).asInstanceOf[String]))
- usersVertices.take(1)*/
- utenti.show()
- val friends = sqlContext.sql("SELECT user_id,friends FROM users")
- //friends.printSchema()
- var friends_arr = friends.map(e => (e.get(0).toString(), e.get(1).toString.replace("'","").replaceAll("\\s+","").replace("\"","").replace("[","").replace("]","").split(",") ) )
- friends.printSchema()
- //friends.flatMap { t => t._1.map { prp => (t.user_id, prp) }}.show
- val relationships= friends_arr.flatMap { t =>
- t._2.map { prp =>
- (t._1, prp) }}
- val relazioni= relationships.toDF("src", "dst")
- // Edge DataFrame
- // Create a GraphFrame
- val gf = GraphFrame(utenti, relazioni)
- val gx: Graph[Row, Row] = gf.toGraphX
- gx.cache()
- val cc = gx.connectedComponents().vertices
- val prova= sqlContext.sql("SELECT * FROM cc")
- prova.show()
- //val graph = Graph(friends_arr, relationships, defaultUser)
- //val a = friends_arr.count()
- //System.out.println("Ciao " + a)
- //friends_arr.printSchema()
- //friends_arr.take(1)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement