Advertisement
Guest User

GraphLab Label Propagation

a guest
Nov 23rd, 2014
222
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.60 KB | None | 0 0
  1. def degree_count_fn (src, edge, dst):
  2.         """
  3.        http://graphlab.com/products/create/docs/generated/graphlab.SGraph.triple_apply.html
  4.        """
  5.         src['degree'] += 1
  6.         dst['degree'] += 1
  7.         return (src, edge, dst)
  8.  
  9.  
  10. # In[15]:
  11.  
  12. #from collections import Counter
  13. #from random import choice
  14.        
  15. def top_labels(labels):
  16.     from collections import Counter    
  17.     c = Counter(labels)
  18.     best = max(c.values())
  19.     top = [label for label, count in c.iteritems()
  20.            if count == best]
  21.     return top
  22.        
  23. def propagate_labels(src, edge, dst):
  24.     from random import choice, random
  25.    
  26.     def handle(src, dst):
  27.        
  28.         # aliases
  29.         src_degree = src["degree"]
  30.         src_neighbor_labels = src["neighbor_labels"]
  31.         dst_label = dst["label"]        
  32.         src_neighbor_labels.append(dst_label)
  33.        
  34.         top_labels_in_src_neighbors = top_labels(src_neighbor_labels)
  35.                
  36.         if len(src_neighbor_labels) == src_degree:
  37.             if src["stage"] == 1:
  38.                 src["label"] = choice(top_labels_in_src_neighbors)
  39.             elif src["stage"] == 2:
  40.                 src["done"] = int(src["label"] in top_labels_in_src_neighbors)# and len(top_labels_in_src_neighbors) == 1)
  41.    
  42.     if random() < 0.5:
  43.         handle(src, dst)
  44.         handle(dst, src)
  45.     else:
  46.         handle(dst, src)
  47.         handle(src, dst)
  48.    
  49.     return src, edge, dst
  50.  
  51. import graphlab.aggregate as agg
  52.  
  53. STAGE_PROPAGATE = 1
  54. STAGE_IS_DONE = 2
  55.  
  56. def label_graph(g, max_iterations=10000):
  57.     g.vertices['label'] = g.vertices['__id']
  58.     g.vertices['done'] = 0
  59.    
  60.     #for iteration in xrange(iterations):
  61.     #    print "#", iteration
  62.     for i in xrange(max_iterations):
  63.         g.vertices['neighbor_labels'] = g.vertices.apply(lambda _:[])
  64.         g.vertices['stage'] = STAGE_PROPAGATE        
  65.         g =  g.triple_apply(propagate_labels, ['label', 'neighbor_labels'])
  66.        
  67.         g.vertices['neighbor_labels'] = g.vertices.apply(lambda _:[])
  68.         g.vertices['stage'] = STAGE_IS_DONE
  69.         g =  g.triple_apply(propagate_labels, ['neighbor_labels', 'done'])
  70.        
  71.         done_count = g.vertices.groupby(key_columns='done', operations={'count': agg.COUNT()})
  72.         if 0 not in done_count["done"]:
  73.             break
  74.        
  75.     del g.vertices['neighbor_labels']
  76.     del g.vertices['stage']
  77.     del g.vertices['done']
  78.     return g
  79.  
  80.  
  81. # In[15]:
  82.  
  83. sgraph.vertices["degree"] = 0
  84. sgraph_with_degrees = sgraph.triple_apply(degree_count_fn, mutated_fields=['degree'])
  85. labelled_g = label_graph(sgraph_with_degrees)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement