Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from __future__ import absolute_import
- import csv
- import datetime
- import os
- from google.cloud import bigquery
- query_template = """
- /*
- * Dedupes and aggregates the raw data to get the final output table.
- */
- WITH
- /* 1. Join the raw data with the POI table. */
- join_poi AS (
- SELECT poi.poi_type,
- poi.poi_sub_type,
- poi.poi_name,
- poi.address,
- poi.city,
- poi.zip,
- raw.*
- FROM `marketplace-0.bizsim.{}` raw
- INNER JOIN `marketplace-0.bizsim.poi_output` poi
- ON raw.poi_id=poi.poi_id
- AND raw.uuid=poi.job_uuid
- ),
- /* 2. Dedup chain stores. */
- deduped AS (
- SELECT *
- FROM join_poi
- WHERE poi_sub_type = 'MomAndPop'
- UNION ALL
- SELECT a.*
- FROM join_poi a
- INNER JOIN
- (
- SELECT poi_name,
- destination_lane_id,
- min(haversine_distance_meters) haversine_distance_meters
- FROM join_poi
- WHERE poi_sub_type != 'MomAndPop'
- GROUP BY poi_name,
- destination_lane_id
- ) b
- ON a.poi_name = b.poi_name
- AND a.destination_lane_id = b.destination_lane_id
- AND a.haversine_distance_meters = b.haversine_distance_meters
- ),
- /* 3. Join with population table. */
- join_pop AS (
- SELECT deduped.*,
- pop.pop_avg,
- pop.size_avg
- FROM deduped
- INNER JOIN `marketplace-0.bizsim._gen_lanes_usa_with_pop_endpoint` pop
- ON pop.pop_avg > 0 AND pop.size_avg > 0
- AND deduped.destination_lane_id = pop.lane_id
- )
- /* 4. Aggregate. */
- SELECT
- /* POI info */
- poi_id,
- ANY_VALUE(poi_type) poi_type,
- ANY_VALUE(poi_sub_type) poi_sub_type,
- ANY_VALUE(poi_name) poi_name,
- ANY_VALUE(address) poi_address,
- ANY_VALUE(city) poi_city,
- ANY_VALUE(state) poi_state,
- ANY_VALUE(zip) poi_zip,
- zip_code destination_zip,
- /* Reachability statistics */
- sum(size_avg) unconstrained_sq_meters,
- SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN size_avg ELSE 0 END) constrained_sq_meters,
- SUM(pop_avg) unconstrained_pop,
- SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END) / SUM(pop_avg) physical_reachability,
- /* The following four columns have physical_reachability as the denominator. */
- SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 10 THEN pop_avg ELSE 0 END), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) physical_reachability_10_min,
- SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 15 THEN pop_avg ELSE 0 END), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) physical_reachability_15_min,
- SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 30 THEN pop_avg ELSE 0 END), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) physical_reachability_30_min,
- SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 45 THEN pop_avg ELSE 0 END), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) physical_reachability_45_min,
- SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 60 THEN pop_avg ELSE 0 END), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) physical_reachability_60_min,
- /* The following four columns have the corresponding physical_reachability_*_min column as the denominator. detour factor using time */
- SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 10 AND
- constrained_time_minutes / unconstrained_time_minutes < (CASE
- WHEN poi_type = 'QSR' AND poi_sub_type = 'RestaurantChain' THEN 1.25
- WHEN poi_type = 'FoodAggregator' AND poi_sub_type = 'RestaurantChain' THEN 1.25
- WHEN poi_type = 'FoodAggregator' AND poi_sub_type = 'MomAndPop' THEN 1.25
- WHEN poi_type = 'Grocery' AND poi_sub_type = 'GroceryChain' THEN 1.5
- WHEN poi_type = 'ConvenienceStores' AND poi_sub_type = 'ConvenienceStores' THEN 1.5
- WHEN poi_type = 'ConvenienceStores' AND poi_sub_type = 'Pharmacy' THEN 1.5
- ELSE 1 END) THEN pop_avg ELSE 0 END),
- SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 10 THEN pop_avg ELSE 0 END)) lsv_by_time_10_min,
- SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 15 AND
- constrained_time_minutes / unconstrained_time_minutes < (CASE
- WHEN poi_type = 'QSR' AND poi_sub_type = 'RestaurantChain' THEN 1.25
- WHEN poi_type = 'FoodAggregator' AND poi_sub_type = 'RestaurantChain' THEN 1.25
- WHEN poi_type = 'FoodAggregator' AND poi_sub_type = 'MomAndPop' THEN 1.25
- WHEN poi_type = 'Grocery' AND poi_sub_type = 'GroceryChain' THEN 1.5
- WHEN poi_type = 'ConvenienceStores' AND poi_sub_type = 'ConvenienceStores' THEN 1.5
- WHEN poi_type = 'ConvenienceStores' AND poi_sub_type = 'Pharmacy' THEN 1.5
- ELSE 1 END) THEN pop_avg ELSE 0 END),
- SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 15 THEN pop_avg ELSE 0 END)) lsv_by_time_15_min,
- SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 30 AND
- constrained_time_minutes / unconstrained_time_minutes < (CASE
- WHEN poi_type = 'QSR' AND poi_sub_type = 'RestaurantChain' THEN 1.25
- WHEN poi_type = 'FoodAggregator' AND poi_sub_type = 'RestaurantChain' THEN 1.25
- WHEN poi_type = 'FoodAggregator' AND poi_sub_type = 'MomAndPop' THEN 1.25
- WHEN poi_type = 'Grocery' AND poi_sub_type = 'GroceryChain' THEN 1.5
- WHEN poi_type = 'ConvenienceStores' AND poi_sub_type = 'ConvenienceStores' THEN 1.5
- WHEN poi_type = 'ConvenienceStores' AND poi_sub_type = 'Pharmacy' THEN 1.5
- ELSE 1 END) THEN pop_avg ELSE 0 END),
- SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 30 THEN pop_avg ELSE 0 END)) lsv_by_time_30_min,
- SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 45 AND
- constrained_time_minutes / unconstrained_time_minutes < (CASE
- WHEN poi_type = 'QSR' AND poi_sub_type = 'RestaurantChain' THEN 1.25
- WHEN poi_type = 'FoodAggregator' AND poi_sub_type = 'RestaurantChain' THEN 1.25
- WHEN poi_type = 'FoodAggregator' AND poi_sub_type = 'MomAndPop' THEN 1.25
- WHEN poi_type = 'Grocery' AND poi_sub_type = 'GroceryChain' THEN 1.5
- WHEN poi_type = 'ConvenienceStores' AND poi_sub_type = 'ConvenienceStores' THEN 1.5
- WHEN poi_type = 'ConvenienceStores' AND poi_sub_type = 'Pharmacy' THEN 1.5
- ELSE 1 END) THEN pop_avg ELSE 0 END),
- SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 45 THEN pop_avg ELSE 0 END)) lsv_by_time_45_min,
- SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 60 AND
- constrained_time_minutes / unconstrained_time_minutes < (CASE
- WHEN poi_type = 'QSR' AND poi_sub_type = 'RestaurantChain' THEN 1.25
- WHEN poi_type = 'FoodAggregator' AND poi_sub_type = 'RestaurantChain' THEN 1.25
- WHEN poi_type = 'FoodAggregator' AND poi_sub_type = 'MomAndPop' THEN 1.25
- WHEN poi_type = 'Grocery' AND poi_sub_type = 'GroceryChain' THEN 1.5
- WHEN poi_type = 'ConvenienceStores' AND poi_sub_type = 'ConvenienceStores' THEN 1.5
- WHEN poi_type = 'ConvenienceStores' AND poi_sub_type = 'Pharmacy' THEN 1.5
- ELSE 1 END) THEN pop_avg ELSE 0 END),
- SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 60 THEN pop_avg ELSE 0 END)) lsv_by_time_60_min,
- /* The following four columns have the corresponding physical_reachability_*_min column as the denominator. detour factor using distance */
- SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 10 AND
- constrained_dist_total_miles / unconstrained_dist_total_miles < (CASE
- WHEN poi_type = 'QSR' AND poi_sub_type = 'RestaurantChain' THEN 1.25
- WHEN poi_type = 'FoodAggregator' AND poi_sub_type = 'RestaurantChain' THEN 1.25
- WHEN poi_type = 'FoodAggregator' AND poi_sub_type = 'MomAndPop' THEN 1.25
- WHEN poi_type = 'Grocery' AND poi_sub_type = 'GroceryChain' THEN 1.5
- WHEN poi_type = 'ConvenienceStores' AND poi_sub_type = 'ConvenienceStores' THEN 1.5
- WHEN poi_type = 'ConvenienceStores' AND poi_sub_type = 'Pharmacy' THEN 1.5
- ELSE 1 END) THEN pop_avg ELSE 0 END),
- SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 10 THEN pop_avg ELSE 0 END)) lsv_by_distance_10_min,
- SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 15 AND
- constrained_dist_total_miles / unconstrained_dist_total_miles < (CASE
- WHEN poi_type = 'QSR' AND poi_sub_type = 'RestaurantChain' THEN 1.25
- WHEN poi_type = 'FoodAggregator' AND poi_sub_type = 'RestaurantChain' THEN 1.25
- WHEN poi_type = 'FoodAggregator' AND poi_sub_type = 'MomAndPop' THEN 1.25
- WHEN poi_type = 'Grocery' AND poi_sub_type = 'GroceryChain' THEN 1.5
- WHEN poi_type = 'ConvenienceStores' AND poi_sub_type = 'ConvenienceStores' THEN 1.5
- WHEN poi_type = 'ConvenienceStores' AND poi_sub_type = 'Pharmacy' THEN 1.5
- ELSE 1 END) THEN pop_avg ELSE 0 END),
- SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 15 THEN pop_avg ELSE 0 END)) lsv_by_distance_15_min,
- SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 30 AND
- constrained_dist_total_miles / unconstrained_dist_total_miles < (CASE
- WHEN poi_type = 'QSR' AND poi_sub_type = 'RestaurantChain' THEN 1.25
- WHEN poi_type = 'FoodAggregator' AND poi_sub_type = 'RestaurantChain' THEN 1.25
- WHEN poi_type = 'FoodAggregator' AND poi_sub_type = 'MomAndPop' THEN 1.25
- WHEN poi_type = 'Grocery' AND poi_sub_type = 'GroceryChain' THEN 1.5
- WHEN poi_type = 'ConvenienceStores' AND poi_sub_type = 'ConvenienceStores' THEN 1.5
- WHEN poi_type = 'ConvenienceStores' AND poi_sub_type = 'Pharmacy' THEN 1.5
- ELSE 1 END) THEN pop_avg ELSE 0 END),
- SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 30 THEN pop_avg ELSE 0 END)) lsv_by_distance_30_min,
- SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 45 AND
- constrained_dist_total_miles / unconstrained_dist_total_miles < (CASE
- WHEN poi_type = 'QSR' AND poi_sub_type = 'RestaurantChain' THEN 1.25
- WHEN poi_type = 'FoodAggregator' AND poi_sub_type = 'RestaurantChain' THEN 1.25
- WHEN poi_type = 'FoodAggregator' AND poi_sub_type = 'MomAndPop' THEN 1.25
- WHEN poi_type = 'Grocery' AND poi_sub_type = 'GroceryChain' THEN 1.5
- WHEN poi_type = 'ConvenienceStores' AND poi_sub_type = 'ConvenienceStores' THEN 1.5
- WHEN poi_type = 'ConvenienceStores' AND poi_sub_type = 'Pharmacy' THEN 1.5
- ELSE 1 END) THEN pop_avg ELSE 0 END),
- SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 45 THEN pop_avg ELSE 0 END)) lsv_by_distance_45_min,
- SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 60 AND
- constrained_dist_total_miles / unconstrained_dist_total_miles < (CASE
- WHEN poi_type = 'QSR' AND poi_sub_type = 'RestaurantChain' THEN 1.25
- WHEN poi_type = 'FoodAggregator' AND poi_sub_type = 'RestaurantChain' THEN 1.25
- WHEN poi_type = 'FoodAggregator' AND poi_sub_type = 'MomAndPop' THEN 1.25
- WHEN poi_type = 'Grocery' AND poi_sub_type = 'GroceryChain' THEN 1.5
- WHEN poi_type = 'ConvenienceStores' AND poi_sub_type = 'ConvenienceStores' THEN 1.5
- WHEN poi_type = 'ConvenienceStores' AND poi_sub_type = 'Pharmacy' THEN 1.5
- ELSE 1 END) THEN pop_avg ELSE 0 END),
- SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 60 THEN pop_avg ELSE 0 END)) lsv_by_distance_60_min,
- /* Constrained route breakdown */
- SAFE_DIVIDE(SUM(constrained_dist_total_miles * pop_avg), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_constrained_dist_total_miles,
- SAFE_DIVIDE(SUM(constrained_time_minutes * pop_avg), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_constrained_time_minutes,
- SAFE_DIVIDE(SUM(constrained_dist_over_55_mph_miles * pop_avg), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_constrained_dist_over_55_mph_miles,
- SAFE_DIVIDE(SUM(constrained_dist_41_to_54_mph_miles * pop_avg), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_constrained_dist_41_to_54_mph_miles,
- SAFE_DIVIDE(SUM(constrained_dist_31_to_40_mph_miles * pop_avg), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_constrained_dist_31_to_40_mph_miles,
- SAFE_DIVIDE(SUM(constrained_dist_0_to_30_mph_miles * pop_avg), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_constrained_dist_0_to_30_mph_miles,
- SAFE_DIVIDE(SUM(constrained_num_upc_over_55_mph_1_lane * pop_avg), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_constrained_num_upc_over_55_mph_1_lane,
- SAFE_DIVIDE(SUM(constrained_num_upc_41_to_54_mph_1_lane * pop_avg), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_constrained_num_upc_41_to_54_mph_1_lane,
- SAFE_DIVIDE(SUM(constrained_num_upc_31_to_40_mph_1_lane * pop_avg), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_constrained_num_upc_31_to_40_mph_1_lane,
- SAFE_DIVIDE(SUM(constrained_num_upc_0_to_30_mph_1_lane * pop_avg), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_constrained_num_upc_0_to_30_mph_1_lane,
- SAFE_DIVIDE(SUM(constrained_num_upc_over_55_mph_2_lanes * pop_avg), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_constrained_num_upc_over_55_mph_2_lanes,
- SAFE_DIVIDE(SUM(constrained_num_upc_41_to_54_mph_2_lanes * pop_avg), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_constrained_num_upc_41_to_54_mph_2_lanes,
- SAFE_DIVIDE(SUM(constrained_num_upc_31_to_40_mph_2_lanes * pop_avg), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_constrained_num_upc_31_to_40_mph_2_lanes,
- SAFE_DIVIDE(SUM(constrained_num_upc_0_to_30_mph_2_lanes * pop_avg), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_constrained_num_upc_0_to_30_mph_2_lanes,
- SAFE_DIVIDE(SUM(constrained_num_upc_over_55_mph_over_3_lanes * pop_avg), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_constrained_num_upc_over_55_mph_over_3_lanes,
- SAFE_DIVIDE(SUM(constrained_num_upc_41_to_54_mph_over_3_lanes * pop_avg), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_constrained_num_upc_41_to_54_mph_over_3_lanes,
- SAFE_DIVIDE(SUM(constrained_num_upc_31_to_40_mph_over_3_lanes * pop_avg), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_constrained_num_upc_31_to_40_mph_over_3_lanes,
- SAFE_DIVIDE(SUM(constrained_num_upc_0_to_30_mph_over_3_lanes * pop_avg), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_constrained_num_upc_0_to_30_mph_over_3_lanes,
- /* Unconstrained route breakdown */
- SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN unconstrained_dist_total_miles * pop_avg ELSE 0 END), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_unconstrained_dist_total_miles,
- SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN unconstrained_time_minutes * pop_avg ELSE 0 END), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_unconstrained_time_minutes,
- SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN unconstrained_dist_over_55_mph_miles * pop_avg ELSE 0 END), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_unconstrained_dist_over_55_mph_miles,
- SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN unconstrained_dist_41_to_54_mph_miles * pop_avg ELSE 0 END), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_unconstrained_dist_41_to_54_mph_miles,
- SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN unconstrained_dist_31_to_40_mph_miles * pop_avg ELSE 0 END), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_unconstrained_dist_31_to_40_mph_miles,
- SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN unconstrained_dist_0_to_30_mph_miles * pop_avg ELSE 0 END), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_unconstrained_dist_0_to_30_mph_miles
- FROM join_pop
- WHERE {}
- GROUP BY poi_id, zip_code;
- """
- header = ["poi_id", "poi_type", "poi_sub_type", "poi_name", "poi_address", "poi_city", "poi_state", "poi_zip", "destination_zip",
- "unconstrained_sqr_meters", "constrained_sq_meters", "unconstrained_pop",
- "physical_reachability", "physical_reachability_10_min", "physical_reachability_15_min", "physical_reachability_30_min",
- "physical_reachability_45_min",
- "physical_reachability_60_min", "lsv_by_time_10_min", "lsv_by_time_15_min", "lsv_by_time_30_min", "lsv_by_time_45_min",
- "lsv_by_time_60_min",
- "lsv_by_distance_10_min", "lsv_by_distance_15_min", "lsv_by_distance_30_min", "lsv_by_distance_45_min", "lsv_by_distance_60_min",
- "avg_constrained_dist_total_miles",
- "avg_constrained_time_minutes", "avg_constrained_dist_over_55_mph_miles", "avg_constrained_dist_41_to_54_mph_miles",
- "avg_constrained_dist_31_to_40_mph_miles", "avg_constrained_dist_0_to_30_mph_miles", "avg_constrained_num_upc_over_55_mph_1_lane",
- "avg_constrained_num_upc_41_to_54_mph_1_lane", "avg_constrained_num_upc_31_to_40_mph_1_lane", "avg_constrained_num_upc_0_to_30_mph_1_lane",
- "avg_constrained_num_upc_over_55_mph_2_lanes", "avg_constrained_num_upc_41_to_54_mph_2_lanes",
- "avg_constrained_num_upc_31_to_40_mph_2_lanes", "avg_constrained_num_upc_0_to_30_mph_2_lanes",
- "avg_constrained_num_upc_over_55_mph_over_3_lanes", "avg_constrained_num_upc_41_to_54_mph_over_3_lanes",
- "avg_constrained_num_upc_31_to_40_mph_over_3_lanes", "avg_constrained_num_upc_0_to_30_mph_over_3_lanes",
- "avg_unconstrained_dist_total_miles", "avg_unconstrained_time_minutes", "avg_unconstrained_dist_over_55_mph_miles",
- "avg_unconstrained_dist_41_to_54_mph_miles", "avg_unconstrained_dist_31_to_40_mph_miles", "avg_unconstrained_dist_0_to_30_mph_miles"]
- class BigQuery():
- def __init__(self, output_dir):
- self.client = bigquery.Client()
- self.output_dir = output_dir
- def query(self, output_table_name, is_fa):
- if not os.path.exists(self.output_dir):
- os.mkdir(self.output_dir)
- if is_fa:
- where_stmt = "poi_type='FoodAggregator'"
- fa_str = "fa"
- else:
- where_stmt = "poi_type!='FoodAggregator'"
- fa_str = "nonfa"
- cur_date = datetime.datetime.today()
- file_name = output_table_name.replace("bizsim_output_", "bizsim_summary_{}{}_".format(cur_date.month, cur_date.day))
- file_name = file_name.replace("_false_", "_protected_" + fa_str)
- file_name = file_name.replace("_true_", "_unprotected_" + fa_str)
- file_name = file_name + '.csv'
- full_path = os.path.join(output_dir, file_name)
- if os.path.exists(full_path):
- print "file exists: {}".format(full_path)
- exit(1)
- fa_query_job = self.client.query(query_template.format(output_table_name, where_stmt))
- results = fa_query_job.result() # Waits for job to complete.
- print "start processing ", full_path
- rows_to_write = []
- for row in results:
- rows_to_write.append(row)
- print "start writing ", full_path
- with open(full_path, 'w+') as f:
- csv_writer = csv.writer(f)
- csv_writer.writerow(header)
- csv_writer.writerows(rows_to_write)
- if __name__ == '__main__':
- output_tables = [
- "bizsim_output_21_30_21_30_0_true_44eebece_abef_49e7_a424_c3af0a52694a",
- "bizsim_output_21_30_21_30_2_true_8454e8d6_d798_42c3_b2fe_e08c86e4b4cc",
- "bizsim_output_21_30_31_40_2_true_91e83f91_860d_4bdd_aae5_3a671571fc43",
- "bizsim_output_31_40_31_40_2_true_27df91f7_c733_41a5_bed4_d1bb9e0001c3",
- "bizsim_output_31_40_31_40_4_true_deb45752_4696_44da_a9cd_1e5b23155264",
- "bizsim_output_41_54_41_54_999_true_b66e8e4d_387a_463c_bb83_a9d73f105839",
- #
- # "bizsim_output_21_30_21_30_2_false_5ec30a97_5410_4e48_8cd2_ac60db9738c0",
- # "bizsim_output_21_30_31_40_2_false_a645b9cb_fd1b_4e99_8665_57071834a549",
- # "bizsim_output_31_40_31_40_2_false_0cc4b2aa_906b_4a7b_9c51_ff82a505159c",
- # "bizsim_output_31_40_31_40_4_false_771ea28f_3876_442e_b49f_7d44b39b38a5",
- # "bizsim_output_41_54_41_54_999_false_b31bb5f7_6bfe_4a5d_930c_1465f9f60334"
- ]
- output_dir = "./outputs"
- bq = BigQuery(output_dir)
- for output_table in output_tables:
- for fa in [True, False]:
- bq.query(output_table, fa)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement