jlandells

FlatSplode code for Script Snap

Mar 18th, 2021
12
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. # Import the interface required by the Script snap.
  2. from com.snaplogic.scripting.language import ScriptHook
  3. import itertools
  4. import sys
  5. import java.util
  6.  
  7.  
  8. class TransformScript(ScriptHook):
  9.     def __init__(self, input, output, error, log):
  10.         self.input = input
  11.         self.output = output
  12.         self.error = error
  13.         self.log = log
  14.         self.list_types = (java.util.ArrayList, list, tuple)
  15.    
  16.     def explode(self, item):
  17.         """ Explode JSON object with list values.
  18.  
  19.            :param dict item: Object to explode
  20.  
  21.            :Example:
  22.  
  23.            >>> explode({'fizz': ['buzz', 'jazz', 'fuzz']})
  24.        """
  25.         # Collect item values that are lists/tuples
  26.         lists = (
  27.             [(k, x) for x in v]
  28.             for k, v in item.items()
  29.             if isinstance(v, (java.util.ArrayList, list, tuple))
  30.         )
  31.         # Calculate combinations of values in each list
  32.         combos = map(dict, itertools.product(*lists))
  33.         # Yield each combination
  34.         for combo in combos:
  35.             xitem = item.copy()
  36.             xitem.update(combo)
  37.             yield xitem
  38.  
  39.  
  40.     def flatsplode(self, item, join='.'):
  41.         """ Explode & flatten JSON object with list values.
  42.  
  43.            :param dict item: Object to explode
  44.  
  45.            :Example:
  46.  
  47.            >>> flatsplode({'fizz': [{'key': buzz'}, {'key': 'jazz'}]})
  48.        """
  49.         flatsploded = []
  50.         for expl in self.explode(item):
  51.             flat = self.flatten(expl, join)
  52.             items = filter(lambda x: isinstance(x, self.list_types), flat.values())
  53.             if any(items):
  54.                 for y in self.flatsplode(flat, join):
  55.                     yield y
  56.             else:
  57.                 yield flat
  58.  
  59.  
  60.     def flatten(self, item, join='.'):
  61.         """ Flattens nested JSON object.
  62.  
  63.            :param dict item: Object to flatten
  64.  
  65.            :Example:
  66.  
  67.            >>> flatten({'fizz': {'buzz': {'jazz': 'fuzz'}}})
  68.        """
  69.         return dict(self.iterkv(item, (), join))
  70.  
  71.  
  72.     def iterkv(self, item, parents=(), join='.'):
  73.         """ Iterate over key/values of item recursively.
  74.  
  75.            :param dict item: Item to flatten
  76.            :param tuple parents: Running tuple of parent keys
  77.        """
  78.         for key, val in item.items():
  79.             path = parents + (key,)     # Assemble path parts
  80.             key = str.join(join, path)  # join path parts
  81.  
  82.             # Recurse into nested dict
  83.             if isinstance(val, dict) and any(val):
  84.                 for x in self.iterkv(val, path, join):
  85.                     yield x
  86.  
  87.             # Or `None` if empty dict
  88.             elif isinstance(val, dict):
  89.                 yield(key, None)
  90.  
  91.             # Otherwise, yield base case
  92.             else:
  93.                 yield(key, val)
  94.  
  95.  
  96.     # The "execute()" method is called once when the pipeline is started
  97.     # and allowed to process its inputs or just send data to its outputs.
  98.     def execute(self):
  99.         jythonVersion = sys.version_info
  100.         self.log.info("Executing Transform script")
  101.         self.log.info("Jython Version: %d.%d.%d" % (jythonVersion.major, jythonVersion.minor, jythonVersion.micro))
  102.         while self.input.hasNext():
  103.             try:
  104.                 # Read the next input document, store it in a new dictionary, and write this as an output document.
  105.                 inDoc = self.input.next()
  106.                 # outDoc = {
  107.                 #     'original' : inDoc
  108.                 # }
  109.                 # self.output.write(inDoc, outDoc)
  110.                 items = self.flatsplode(inDoc)
  111.                 for item in items:
  112.                     self.output.write(item)
  113.             except Exception as e:
  114.                 errDoc = {
  115.                     'error' : str(e)
  116.                 }
  117.                 self.log.error("Error in python script")
  118.                 self.error.write(errDoc)
  119.  
  120.         self.log.info("Script executed")
  121.  
  122.     # The "cleanup()" method is called after the snap has exited the execute() method
  123.     def cleanup(self):
  124.         self.log.info("Cleaning up")
  125.  
  126. # The Script Snap will look for a ScriptHook object in the "hook"
  127. # variable.  The snap will then call the hook's "execute" method.
  128. hook = TransformScript(input, output, error, log)
RAW Paste Data

Adblocker detected! Please consider disabling it...

We've detected AdBlock Plus or some other adblocking software preventing Pastebin.com from fully loading.

We don't have any obnoxious sound, or popup ads, we actively block these annoying types of ads!

Please add Pastebin.com to your ad blocker whitelist or disable your adblocking software.

×