Guest User

Untitled

a guest
Jan 22nd, 2018
71
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.08 KB | None | 0 0
  1. new_dict = modify_object_dict(object_dict, object)
  2.  
  3. object_dict = {} # dictionary is initially empty
  4. RDD = (sc.parallelize(Objects)
  5. .map(lambda object: modify_object_dict(object_dict, object))
  6.  
  7. class Foobar(object):
  8. def __init__(self, name, x=None, y=None, z=None):
  9. self.name = name
  10. self.x = x
  11. self.y = y
  12. self.z = z
  13.  
  14. objects = sc.parallelize([
  15. {"name": "foo", "x": 1}, {"name": "foo", "y": 3},
  16. {"name": "bar", "z": 4}
  17. ]).map(lambda x: Foobar(**x))
  18.  
  19. pairs = objects.map(lambda obj: (obj.name, obj))
  20.  
  21. rdd = pairs.groupByKey().mapValues(lambda iter: ...)
  22.  
  23. def seq_op(obj_dict, obj):
  24. # equivalent to modify_object_dict
  25. # Lets assume it is as simple as this
  26. obj_dict.update((k, getattr(obj, k)) for k in ("x", "y", "z"))
  27. return obj_dict
  28.  
  29. def comb_op(obj_dict_1, obj_dict_2):
  30. # lets it is a simple union
  31. obj_dict_1.update(obj_dict_2)
  32. return obj_dict_1
  33.  
  34. dicts = pairs.aggregateByKey({}, seq_op, comb_op)
  35.  
  36. dicts.collectAsMap()
  37. ## {'bar': {'x': None, 'y': None, 'z': 4},
  38. ## 'foo': {'x': None, 'y': 3, 'z': None}}
Add Comment
Please, Sign In to add comment