Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- REGISTER 's3://bucket_name/udf/utils.py' using jython as utils;
- # Cogroup the email events and segments
- # This gives you records consisting of bags of email_events and segments
- grp_email_segments = cogroup email_event by subscriber_key, segments by customer_id;
- # Filter out the segments that don't match email events
- with_email = filter grp_email_segments by not IsEmpty(email_event);
- # You may want to use these later, if just for error checking
- without_segment = filter grp_email_segments by IsEmpty(segment);
- # Flatten everything
- email_segment_dates = foreach with_email generate
- flatten(email_event),
- flatten(segments);
- # Run the range UDF
- # You'll have to run a "describe email_segment_dates" to get the correct naming
- email_segments = foreach email_segment_dates generate
- email_event.subscriber_key,
- email_event.send_id
- ...
- utils.is_contained_within(email_event.date, segment.start_date, segment.end_date) as is_contained;
- # Filter out the false records without segments
- emails_with_segments = filter email_segments by (is_contained == 1);
Add Comment
Please, Sign In to add comment