Guest User

Untitled

a guest
Jan 21st, 2019
87
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.86 KB | None | 0 0
  1. from graphframes import *
  2. def vertices(line):
  3. vert = [int(x) for x in line.split(" ")]
  4. return vert
  5.  
  6. vertices = adjacency_list.flatMap(lambda x: vertices(x)).distinct().collect()
  7. vertices = sqlContext.createDataFrame([[x] for x in vertices], ["id"])
  8.  
  9. def create_edges(line):
  10. a = [int(x) for x in line.split(" ")]
  11. edges_list=[]
  12. if len(a)==1:
  13. edges_list.append((a[0],a[0]))
  14. for i in range(0, len(a)-1):
  15. for j in range(i+1 ,len(a)):
  16. edges_list.append((a[i],a[j]))
  17. edges_list.append((a[j],a[i]))
  18. return edges_list
  19.  
  20. edges = adjacency_list.flatMap(lambda x: create_edges(x)).distinct().collect()
  21. edges = sqlContext.createDataFrame(edges, ["src", "dst"])
  22.  
  23. g = GraphFrame(vertices, edges)
  24. sc.setCheckpointDir(".")
  25.  
  26. # graphframes uses the same paper we referenced apparently
  27. cc = g.connectedComponents()
  28. print cc.show()
Add Comment
Please, Sign In to add comment