Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /*
- * Dedupes and aggregates the raw data to get the final output table.
- */
- WITH
- /* 1. Join the raw data with the POI table. */
- join_poi AS (
- SELECT poi.poi_type,
- poi.poi_sub_type,
- poi.poi_name,
- poi.address,
- poi.city,
- LPAD(poi.zip, 5, '0') zip,
- RAW.*
- FROM `marketplace-0.bizsim.bizsim_output_21_30_21_30_0_true_1fc31293_1c04_4899_b919_98d113652c25` RAW
- INNER JOIN `marketplace-0.bizsim.poi_output` poi
- ON RAW.poi_id=poi.poi_id
- AND RAW.uuid=poi.job_uuid
- ),
- /* 2. Dedup chain stores. */
- deduped AS (
- SELECT *
- FROM join_poi
- WHERE poi_sub_type = 'MomAndPop'
- UNION ALL
- SELECT a.*
- FROM join_poi a
- INNER JOIN
- (
- SELECT poi_name,
- destination_lane_id,
- MIN(haversine_distance_meters) haversine_distance_meters
- FROM join_poi
- WHERE poi_sub_type != 'MomAndPop'
- GROUP BY poi_name,
- destination_lane_id
- ) b
- ON a.poi_name = b.poi_name
- AND a.destination_lane_id = b.destination_lane_id
- AND a.haversine_distance_meters = b.haversine_distance_meters
- ),
- /* 3. Join with population table. */
- join_pop AS (
- SELECT deduped.*,
- pop.pop_avg,
- pop.size_avg
- FROM deduped
- INNER JOIN `marketplace-0.bizsim._gen_lanes_usa_with_pop_endpoint` pop
- ON pop.pop_avg > 0 AND pop.size_avg > 0
- AND deduped.destination_lane_id = pop.lane_id
- )
- /* 4. Aggregate. */
- SELECT
- /* POI info */
- poi_id,
- ANY_VALUE(poi_type) poi_type,
- ANY_VALUE(poi_sub_type) poi_sub_type,
- ANY_VALUE(poi_name) poi_name,
- ANY_VALUE(address) poi_address,
- ANY_VALUE(city) poi_city,
- ANY_VALUE(state) poi_state,
- ANY_VALUE(zip) poi_zip,
- zip_code destination_zip,
- /* Reachability statistics */
- SUM(size_avg) unconstrained_sq_meters,
- SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN size_avg ELSE 0 END) constrained_sq_meters,
- SUM(pop_avg) unconstrained_pop,
- SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END) / SUM(pop_avg) physical_reachability,
- /* The following four columns have physical_reachability as the denominator. */
- SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 10 THEN pop_avg ELSE 0 END), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) physical_reachability_10_min,
- SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 13 THEN pop_avg ELSE 0 END), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) physical_reachability_13_min,
- SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 15 THEN pop_avg ELSE 0 END), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) physical_reachability_15_min,
- SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 20 THEN pop_avg ELSE 0 END), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) physical_reachability_20_min,
- SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 30 THEN pop_avg ELSE 0 END), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) physical_reachability_30_min,
- /* The following four columns have the corresponding physical_reachability_*_min column as the denominator. detour factor using time */
- SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 10 AND
- constrained_time_minutes / unconstrained_time_minutes < (CASE
- WHEN poi_type = 'QSR' AND poi_sub_type = 'RestaurantChain' THEN 1.25
- WHEN poi_type = 'FoodAggregator' AND poi_sub_type = 'RestaurantChain' THEN 1.25
- WHEN poi_type = 'FoodAggregator' AND poi_sub_type = 'MomAndPop' THEN 1.25
- WHEN poi_type = 'Grocery' AND poi_sub_type = 'GroceryChain' THEN 1.5
- WHEN poi_type = 'ConvenienceStores' AND poi_sub_type = 'ConvenienceStores' THEN 1.5
- WHEN poi_type = 'ConvenienceStores' AND poi_sub_type = 'Pharmacy' THEN 1.5
- ELSE 1 END) THEN pop_avg ELSE 0 END),
- SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 10 THEN pop_avg ELSE 0 END)) lsv_by_time_10_min,
- SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 13 AND
- constrained_time_minutes / unconstrained_time_minutes < (CASE
- WHEN poi_type = 'QSR' AND poi_sub_type = 'RestaurantChain' THEN 1.25
- WHEN poi_type = 'FoodAggregator' AND poi_sub_type = 'RestaurantChain' THEN 1.25
- WHEN poi_type = 'FoodAggregator' AND poi_sub_type = 'MomAndPop' THEN 1.25
- WHEN poi_type = 'Grocery' AND poi_sub_type = 'GroceryChain' THEN 1.5
- WHEN poi_type = 'ConvenienceStores' AND poi_sub_type = 'ConvenienceStores' THEN 1.5
- WHEN poi_type = 'ConvenienceStores' AND poi_sub_type = 'Pharmacy' THEN 1.5
- ELSE 1 END) THEN pop_avg ELSE 0 END),
- SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 13 THEN pop_avg ELSE 0 END)) lsv_by_time_13_min,
- SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 15 AND
- constrained_time_minutes / unconstrained_time_minutes < (CASE
- WHEN poi_type = 'QSR' AND poi_sub_type = 'RestaurantChain' THEN 1.25
- WHEN poi_type = 'FoodAggregator' AND poi_sub_type = 'RestaurantChain' THEN 1.25
- WHEN poi_type = 'FoodAggregator' AND poi_sub_type = 'MomAndPop' THEN 1.25
- WHEN poi_type = 'Grocery' AND poi_sub_type = 'GroceryChain' THEN 1.5
- WHEN poi_type = 'ConvenienceStores' AND poi_sub_type = 'ConvenienceStores' THEN 1.5
- WHEN poi_type = 'ConvenienceStores' AND poi_sub_type = 'Pharmacy' THEN 1.5
- ELSE 1 END) THEN pop_avg ELSE 0 END),
- SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 15 THEN pop_avg ELSE 0 END)) lsv_by_time_15_min,
- SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 20 AND
- constrained_time_minutes / unconstrained_time_minutes < (CASE
- WHEN poi_type = 'QSR' AND poi_sub_type = 'RestaurantChain' THEN 1.25
- WHEN poi_type = 'FoodAggregator' AND poi_sub_type = 'RestaurantChain' THEN 1.25
- WHEN poi_type = 'FoodAggregator' AND poi_sub_type = 'MomAndPop' THEN 1.25
- WHEN poi_type = 'Grocery' AND poi_sub_type = 'GroceryChain' THEN 1.5
- WHEN poi_type = 'ConvenienceStores' AND poi_sub_type = 'ConvenienceStores' THEN 1.5
- WHEN poi_type = 'ConvenienceStores' AND poi_sub_type = 'Pharmacy' THEN 1.5
- ELSE 1 END) THEN pop_avg ELSE 0 END),
- SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 20 THEN pop_avg ELSE 0 END)) lsv_by_time_20_min,
- SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 30 AND
- constrained_time_minutes / unconstrained_time_minutes < (CASE
- WHEN poi_type = 'QSR' AND poi_sub_type = 'RestaurantChain' THEN 1.25
- WHEN poi_type = 'FoodAggregator' AND poi_sub_type = 'RestaurantChain' THEN 1.25
- WHEN poi_type = 'FoodAggregator' AND poi_sub_type = 'MomAndPop' THEN 1.25
- WHEN poi_type = 'Grocery' AND poi_sub_type = 'GroceryChain' THEN 1.5
- WHEN poi_type = 'ConvenienceStores' AND poi_sub_type = 'ConvenienceStores' THEN 1.5
- WHEN poi_type = 'ConvenienceStores' AND poi_sub_type = 'Pharmacy' THEN 1.5
- ELSE 1 END) THEN pop_avg ELSE 0 END),
- SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 30 THEN pop_avg ELSE 0 END)) lsv_by_time_30_min,
- /* The following four columns have the corresponding physical_reachability_*_min column as the denominator. detour factor using distance */
- SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 10 AND
- constrained_dist_total_miles / unconstrained_dist_total_miles < (CASE
- WHEN poi_type = 'QSR' AND poi_sub_type = 'RestaurantChain' THEN 1.25
- WHEN poi_type = 'FoodAggregator' AND poi_sub_type = 'RestaurantChain' THEN 1.25
- WHEN poi_type = 'FoodAggregator' AND poi_sub_type = 'MomAndPop' THEN 1.25
- WHEN poi_type = 'Grocery' AND poi_sub_type = 'GroceryChain' THEN 1.5
- WHEN poi_type = 'ConvenienceStores' AND poi_sub_type = 'ConvenienceStores' THEN 1.5
- WHEN poi_type = 'ConvenienceStores' AND poi_sub_type = 'Pharmacy' THEN 1.5
- ELSE 1 END) THEN pop_avg ELSE 0 END),
- SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 10 THEN pop_avg ELSE 0 END)) lsv_by_distance_10_min,
- SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 13 AND
- constrained_dist_total_miles / unconstrained_dist_total_miles < (CASE
- WHEN poi_type = 'QSR' AND poi_sub_type = 'RestaurantChain' THEN 1.25
- WHEN poi_type = 'FoodAggregator' AND poi_sub_type = 'RestaurantChain' THEN 1.25
- WHEN poi_type = 'FoodAggregator' AND poi_sub_type = 'MomAndPop' THEN 1.25
- WHEN poi_type = 'Grocery' AND poi_sub_type = 'GroceryChain' THEN 1.5
- WHEN poi_type = 'ConvenienceStores' AND poi_sub_type = 'ConvenienceStores' THEN 1.5
- WHEN poi_type = 'ConvenienceStores' AND poi_sub_type = 'Pharmacy' THEN 1.5
- ELSE 1 END) THEN pop_avg ELSE 0 END),
- SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 13 THEN pop_avg ELSE 0 END)) lsv_by_distance_13_min,
- SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 15 AND
- constrained_dist_total_miles / unconstrained_dist_total_miles < (CASE
- WHEN poi_type = 'QSR' AND poi_sub_type = 'RestaurantChain' THEN 1.25
- WHEN poi_type = 'FoodAggregator' AND poi_sub_type = 'RestaurantChain' THEN 1.25
- WHEN poi_type = 'FoodAggregator' AND poi_sub_type = 'MomAndPop' THEN 1.25
- WHEN poi_type = 'Grocery' AND poi_sub_type = 'GroceryChain' THEN 1.5
- WHEN poi_type = 'ConvenienceStores' AND poi_sub_type = 'ConvenienceStores' THEN 1.5
- WHEN poi_type = 'ConvenienceStores' AND poi_sub_type = 'Pharmacy' THEN 1.5
- ELSE 1 END) THEN pop_avg ELSE 0 END),
- SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 15 THEN pop_avg ELSE 0 END)) lsv_by_distance_15_min,
- SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 20 AND
- constrained_dist_total_miles / unconstrained_dist_total_miles < (CASE
- WHEN poi_type = 'QSR' AND poi_sub_type = 'RestaurantChain' THEN 1.25
- WHEN poi_type = 'FoodAggregator' AND poi_sub_type = 'RestaurantChain' THEN 1.25
- WHEN poi_type = 'FoodAggregator' AND poi_sub_type = 'MomAndPop' THEN 1.25
- WHEN poi_type = 'Grocery' AND poi_sub_type = 'GroceryChain' THEN 1.5
- WHEN poi_type = 'ConvenienceStores' AND poi_sub_type = 'ConvenienceStores' THEN 1.5
- WHEN poi_type = 'ConvenienceStores' AND poi_sub_type = 'Pharmacy' THEN 1.5
- ELSE 1 END) THEN pop_avg ELSE 0 END),
- SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 20 THEN pop_avg ELSE 0 END)) lsv_by_distance_20_min,
- SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 30 AND
- constrained_dist_total_miles / unconstrained_dist_total_miles < (CASE
- WHEN poi_type = 'QSR' AND poi_sub_type = 'RestaurantChain' THEN 1.25
- WHEN poi_type = 'FoodAggregator' AND poi_sub_type = 'RestaurantChain' THEN 1.25
- WHEN poi_type = 'FoodAggregator' AND poi_sub_type = 'MomAndPop' THEN 1.25
- WHEN poi_type = 'Grocery' AND poi_sub_type = 'GroceryChain' THEN 1.5
- WHEN poi_type = 'ConvenienceStores' AND poi_sub_type = 'ConvenienceStores' THEN 1.5
- WHEN poi_type = 'ConvenienceStores' AND poi_sub_type = 'Pharmacy' THEN 1.5
- ELSE 1 END) THEN pop_avg ELSE 0 END),
- SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 30 THEN pop_avg ELSE 0 END)) lsv_by_distance_30_min,
- /* Constrained route breakdown */
- SAFE_DIVIDE(SUM(constrained_dist_total_miles * pop_avg), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_constrained_dist_total_miles,
- SAFE_DIVIDE(SUM(constrained_time_minutes * pop_avg), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_constrained_time_minutes,
- SAFE_DIVIDE(SUM(constrained_dist_over_55_mph_miles * pop_avg), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_constrained_dist_over_55_mph_miles,
- SAFE_DIVIDE(SUM(constrained_dist_41_to_54_mph_miles * pop_avg), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_constrained_dist_41_to_54_mph_miles,
- SAFE_DIVIDE(SUM(constrained_dist_31_to_40_mph_miles * pop_avg), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_constrained_dist_31_to_40_mph_miles,
- SAFE_DIVIDE(SUM(constrained_dist_0_to_30_mph_miles * pop_avg), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_constrained_dist_0_to_30_mph_miles,
- SAFE_DIVIDE(SUM(constrained_num_upc_over_55_mph_1_lane * pop_avg), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_constrained_num_upc_over_55_mph_1_lane,
- SAFE_DIVIDE(SUM(constrained_num_upc_41_to_54_mph_1_lane * pop_avg), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_constrained_num_upc_41_to_54_mph_1_lane,
- SAFE_DIVIDE(SUM(constrained_num_upc_31_to_40_mph_1_lane * pop_avg), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_constrained_num_upc_31_to_40_mph_1_lane,
- SAFE_DIVIDE(SUM(constrained_num_upc_0_to_30_mph_1_lane * pop_avg), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_constrained_num_upc_0_to_30_mph_1_lane,
- SAFE_DIVIDE(SUM(constrained_num_upc_over_55_mph_2_lanes * pop_avg), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_constrained_num_upc_over_55_mph_2_lanes,
- SAFE_DIVIDE(SUM(constrained_num_upc_41_to_54_mph_2_lanes * pop_avg), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_constrained_num_upc_41_to_54_mph_2_lanes,
- SAFE_DIVIDE(SUM(constrained_num_upc_31_to_40_mph_2_lanes * pop_avg), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_constrained_num_upc_31_to_40_mph_2_lanes,
- SAFE_DIVIDE(SUM(constrained_num_upc_0_to_30_mph_2_lanes * pop_avg), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_constrained_num_upc_0_to_30_mph_2_lanes,
- SAFE_DIVIDE(SUM(constrained_num_upc_over_55_mph_over_3_lanes * pop_avg), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_constrained_num_upc_over_55_mph_over_3_lanes,
- SAFE_DIVIDE(SUM(constrained_num_upc_41_to_54_mph_over_3_lanes * pop_avg), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_constrained_num_upc_41_to_54_mph_over_3_lanes,
- SAFE_DIVIDE(SUM(constrained_num_upc_31_to_40_mph_over_3_lanes * pop_avg), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_constrained_num_upc_31_to_40_mph_over_3_lanes,
- SAFE_DIVIDE(SUM(constrained_num_upc_0_to_30_mph_over_3_lanes * pop_avg), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_constrained_num_upc_0_to_30_mph_over_3_lanes,
- /* Unconstrained route breakdown */
- SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN unconstrained_dist_total_miles * pop_avg ELSE 0 END), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_unconstrained_dist_total_miles,
- SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN unconstrained_time_minutes * pop_avg ELSE 0 END), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_unconstrained_time_minutes,
- SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN unconstrained_dist_over_55_mph_miles * pop_avg ELSE 0 END), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_unconstrained_dist_over_55_mph_miles,
- SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN unconstrained_dist_41_to_54_mph_miles * pop_avg ELSE 0 END), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_unconstrained_dist_41_to_54_mph_miles,
- SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN unconstrained_dist_31_to_40_mph_miles * pop_avg ELSE 0 END), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_unconstrained_dist_31_to_40_mph_miles,
- SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN unconstrained_dist_0_to_30_mph_miles * pop_avg ELSE 0 END), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_unconstrained_dist_0_to_30_mph_miles
- FROM join_pop
- GROUP BY poi_id, zip_code;
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement