Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from graphframes import *
- def vertices(line):
- vert = [int(x) for x in line.split(" ")]
- return vert
- vertices = adjacency_list.flatMap(lambda x: vertices(x)).distinct().collect()
- vertices = sqlContext.createDataFrame([[x] for x in vertices], ["id"])
- def create_edges(line):
- a = [int(x) for x in line.split(" ")]
- edges_list=[]
- if len(a)==1:
- edges_list.append((a[0],a[0]))
- for i in range(0, len(a)-1):
- for j in range(i+1 ,len(a)):
- edges_list.append((a[i],a[j]))
- edges_list.append((a[j],a[i]))
- return edges_list
- edges = adjacency_list.flatMap(lambda x: create_edges(x)).distinct().collect()
- edges = sqlContext.createDataFrame(edges, ["src", "dst"])
- g = GraphFrame(vertices, edges)
- sc.setCheckpointDir(".")
- # graphframes uses the same paper we referenced apparently
- cc = g.connectedComponents()
- print cc.show()
Add Comment
Please, Sign In to add comment