Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Import the interface required by the Script snap.
- from com.snaplogic.scripting.language import ScriptHook
- import itertools
- import sys
- import java.util
- class TransformScript(ScriptHook):
- def __init__(self, input, output, error, log):
- self.input = input
- self.output = output
- self.error = error
- self.log = log
- self.list_types = (java.util.ArrayList, list, tuple)
- def explode(self, item):
- """ Explode JSON object with list values.
- :param dict item: Object to explode
- :Example:
- >>> explode({'fizz': ['buzz', 'jazz', 'fuzz']})
- """
- # Collect item values that are lists/tuples
- lists = (
- [(k, x) for x in v]
- for k, v in item.items()
- if isinstance(v, (java.util.ArrayList, list, tuple))
- )
- # Calculate combinations of values in each list
- combos = map(dict, itertools.product(*lists))
- # Yield each combination
- for combo in combos:
- xitem = item.copy()
- xitem.update(combo)
- yield xitem
- def flatsplode(self, item, join='.'):
- """ Explode & flatten JSON object with list values.
- :param dict item: Object to explode
- :Example:
- >>> flatsplode({'fizz': [{'key': buzz'}, {'key': 'jazz'}]})
- """
- flatsploded = []
- for expl in self.explode(item):
- flat = self.flatten(expl, join)
- items = filter(lambda x: isinstance(x, self.list_types), flat.values())
- if any(items):
- for y in self.flatsplode(flat, join):
- yield y
- else:
- yield flat
- def flatten(self, item, join='.'):
- """ Flattens nested JSON object.
- :param dict item: Object to flatten
- :Example:
- >>> flatten({'fizz': {'buzz': {'jazz': 'fuzz'}}})
- """
- return dict(self.iterkv(item, (), join))
- def iterkv(self, item, parents=(), join='.'):
- """ Iterate over key/values of item recursively.
- :param dict item: Item to flatten
- :param tuple parents: Running tuple of parent keys
- """
- for key, val in item.items():
- path = parents + (key,) # Assemble path parts
- key = str.join(join, path) # join path parts
- # Recurse into nested dict
- if isinstance(val, dict) and any(val):
- for x in self.iterkv(val, path, join):
- yield x
- # Or `None` if empty dict
- elif isinstance(val, dict):
- yield(key, None)
- # Otherwise, yield base case
- else:
- yield(key, val)
- # The "execute()" method is called once when the pipeline is started
- # and allowed to process its inputs or just send data to its outputs.
- def execute(self):
- jythonVersion = sys.version_info
- self.log.info("Executing Transform script")
- self.log.info("Jython Version: %d.%d.%d" % (jythonVersion.major, jythonVersion.minor, jythonVersion.micro))
- while self.input.hasNext():
- try:
- # Read the next input document, store it in a new dictionary, and write this as an output document.
- inDoc = self.input.next()
- # outDoc = {
- # 'original' : inDoc
- # }
- # self.output.write(inDoc, outDoc)
- items = self.flatsplode(inDoc)
- for item in items:
- self.output.write(item)
- except Exception as e:
- errDoc = {
- 'error' : str(e)
- }
- self.log.error("Error in python script")
- self.error.write(errDoc)
- self.log.info("Script executed")
- # The "cleanup()" method is called after the snap has exited the execute() method
- def cleanup(self):
- self.log.info("Cleaning up")
- # The Script Snap will look for a ScriptHook object in the "hook"
- # variable. The snap will then call the hook's "execute" method.
- hook = TransformScript(input, output, error, log)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement