Guest User

Untitled

a guest
Jul 16th, 2018
66
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.87 KB | None | 0 0
  1. pipeline
  2. | "Load" >> ReadFromText("query.txt") #gs://beaconinside-dataflow-test/*.txt
  3. | "Count Words" >> CountWordsTransform()
  4.  
  5. class CountWordsTransform(beam.PTransform):
  6. def expand(self, p_collection):
  7. anotherPipleline = beam.Pipeline(runner="DataflowRunner", argv=[
  8. "--staging_location", ("%s/staging" % gcs_path),
  9. "--temp_location", ("%s/temp" % gcs_path),
  10. "--output", ("%s/output" % gcs_path),
  11. "--setup_file", "./setup.py"
  12. ])
  13. value2 = anotherPipleline | 'create2' >> Create([("a", 1), ("b", 2), ("c", 3)])
  14. return (p_collection
  15. | "Split" >> (beam.ParDo(FindWords(), beam.pvalue.AsDict(value2))))
  16.  
  17. class FindWords(beam.DoFn):
  18. def process(self, element, values):
  19. import re as regex
  20. return regex.findall(r"[A-Za-z']+", element)
  21.  
  22. 'NoneType' object has no attribute 'parts'
Add Comment
Please, Sign In to add comment