SHARE
TWEET

extract_words

DeaD_EyE Mar 22nd, 2019 62 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. def extract_words(mixture):
  2.     """
  3.    Flat deep nested iterables and split strings if they occour.
  4.    """
  5.     stack = deque([mixture])
  6.     # using a stack to allow iterating
  7.     # deep nested list
  8.     # it could be done easier with recursion
  9.     # but all stack based languages have a recursion limit
  10.     to_split = (str, bytes)
  11.     # we want to split str and bytes
  12.     while stack:
  13.         # loop runs until stack is empty
  14.         current = stack.popleft()
  15.         # with the first iteration
  16.         # stack is currently empty
  17.         # and current has the first element from stack
  18.         if isinstance(current, to_split):
  19.             # split if the current object is a str or bytes
  20.             yield from current.split()
  21.         else:
  22.             # this branch is executed, if the current object
  23.             # is not a str or bytes
  24.             try:
  25.                 current = iter(current)
  26.                 # iter of iter returns the same iterator
  27.                 subelement = next(current)
  28.                 # the next does what the for loop does
  29.             except StopIteration:
  30.                 # but we have to check for errors manually
  31.                 pass
  32.             except TypeError:
  33.                 # and if an element is not iterable, it raieses
  34.                 # TypeError. Intgers are for example are not
  35.                 # iterable
  36.                 yield subelement
  37.             else:
  38.                 # if no error happens, put the current iterator back
  39.                 # to the left side of the stack
  40.                 stack.appendleft(current)
  41.                 # put the subelement of the beginning of the deque
  42.                 stack.appendleft(subelement)
  43.  
  44.  
  45. data = ['Test eins zwei drei', ['hallo', '123'], [[[[['foo bar']]], 'bat']], 12]
  46. extractor = extract_words(data)
  47.  
  48. result = list(extractor)
  49. print(result)
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top