Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- pipeline
- | "Load" >> ReadFromText("query.txt") #gs://beaconinside-dataflow-test/*.txt
- | "Count Words" >> CountWordsTransform()
- class CountWordsTransform(beam.PTransform):
- def expand(self, p_collection):
- anotherPipleline = beam.Pipeline(runner="DataflowRunner", argv=[
- "--staging_location", ("%s/staging" % gcs_path),
- "--temp_location", ("%s/temp" % gcs_path),
- "--output", ("%s/output" % gcs_path),
- "--setup_file", "./setup.py"
- ])
- value2 = anotherPipleline | 'create2' >> Create([("a", 1), ("b", 2), ("c", 3)])
- return (p_collection
- | "Split" >> (beam.ParDo(FindWords(), beam.pvalue.AsDict(value2))))
- class FindWords(beam.DoFn):
- def process(self, element, values):
- import re as regex
- return regex.findall(r"[A-Za-z']+", element)
- 'NoneType' object has no attribute 'parts'
Add Comment
Please, Sign In to add comment