Advertisement
jlandells

FlatSplode code for Script Snap

Mar 18th, 2021
309
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.23 KB | None | 0 0
  1. # Import the interface required by the Script snap.
  2. from com.snaplogic.scripting.language import ScriptHook
  3. import itertools
  4. import sys
  5. import java.util
  6.  
  7.  
  8. class TransformScript(ScriptHook):
  9.     def __init__(self, input, output, error, log):
  10.         self.input = input
  11.         self.output = output
  12.         self.error = error
  13.         self.log = log
  14.         self.list_types = (java.util.ArrayList, list, tuple)
  15.    
  16.     def explode(self, item):
  17.         """ Explode JSON object with list values.
  18.  
  19.            :param dict item: Object to explode
  20.  
  21.            :Example:
  22.  
  23.            >>> explode({'fizz': ['buzz', 'jazz', 'fuzz']})
  24.        """
  25.         # Collect item values that are lists/tuples
  26.         lists = (
  27.             [(k, x) for x in v]
  28.             for k, v in item.items()
  29.             if isinstance(v, (java.util.ArrayList, list, tuple))
  30.         )
  31.         # Calculate combinations of values in each list
  32.         combos = map(dict, itertools.product(*lists))
  33.         # Yield each combination
  34.         for combo in combos:
  35.             xitem = item.copy()
  36.             xitem.update(combo)
  37.             yield xitem
  38.  
  39.  
  40.     def flatsplode(self, item, join='.'):
  41.         """ Explode & flatten JSON object with list values.
  42.  
  43.            :param dict item: Object to explode
  44.  
  45.            :Example:
  46.  
  47.            >>> flatsplode({'fizz': [{'key': buzz'}, {'key': 'jazz'}]})
  48.        """
  49.         flatsploded = []
  50.         for expl in self.explode(item):
  51.             flat = self.flatten(expl, join)
  52.             items = filter(lambda x: isinstance(x, self.list_types), flat.values())
  53.             if any(items):
  54.                 for y in self.flatsplode(flat, join):
  55.                     yield y
  56.             else:
  57.                 yield flat
  58.  
  59.  
  60.     def flatten(self, item, join='.'):
  61.         """ Flattens nested JSON object.
  62.  
  63.            :param dict item: Object to flatten
  64.  
  65.            :Example:
  66.  
  67.            >>> flatten({'fizz': {'buzz': {'jazz': 'fuzz'}}})
  68.        """
  69.         return dict(self.iterkv(item, (), join))
  70.  
  71.  
  72.     def iterkv(self, item, parents=(), join='.'):
  73.         """ Iterate over key/values of item recursively.
  74.  
  75.            :param dict item: Item to flatten
  76.            :param tuple parents: Running tuple of parent keys
  77.        """
  78.         for key, val in item.items():
  79.             path = parents + (key,)     # Assemble path parts
  80.             key = str.join(join, path)  # join path parts
  81.  
  82.             # Recurse into nested dict
  83.             if isinstance(val, dict) and any(val):
  84.                 for x in self.iterkv(val, path, join):
  85.                     yield x
  86.  
  87.             # Or `None` if empty dict
  88.             elif isinstance(val, dict):
  89.                 yield(key, None)
  90.  
  91.             # Otherwise, yield base case
  92.             else:
  93.                 yield(key, val)
  94.  
  95.  
  96.     # The "execute()" method is called once when the pipeline is started
  97.     # and allowed to process its inputs or just send data to its outputs.
  98.     def execute(self):
  99.         jythonVersion = sys.version_info
  100.         self.log.info("Executing Transform script")
  101.         self.log.info("Jython Version: %d.%d.%d" % (jythonVersion.major, jythonVersion.minor, jythonVersion.micro))
  102.         while self.input.hasNext():
  103.             try:
  104.                 # Read the next input document, store it in a new dictionary, and write this as an output document.
  105.                 inDoc = self.input.next()
  106.                 # outDoc = {
  107.                 #     'original' : inDoc
  108.                 # }
  109.                 # self.output.write(inDoc, outDoc)
  110.                 items = self.flatsplode(inDoc)
  111.                 for item in items:
  112.                     self.output.write(item)
  113.             except Exception as e:
  114.                 errDoc = {
  115.                     'error' : str(e)
  116.                 }
  117.                 self.log.error("Error in python script")
  118.                 self.error.write(errDoc)
  119.  
  120.         self.log.info("Script executed")
  121.  
  122.     # The "cleanup()" method is called after the snap has exited the execute() method
  123.     def cleanup(self):
  124.         self.log.info("Cleaning up")
  125.  
  126. # The Script Snap will look for a ScriptHook object in the "hook"
  127. # variable.  The snap will then call the hook's "execute" method.
  128. hook = TransformScript(input, output, error, log)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement