Advertisement
Guest User

Untitled

a guest
Jul 15th, 2019
117
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 22.55 KB | None | 0 0
  1. from __future__ import absolute_import
  2.  
  3. import csv
  4. import datetime
  5. import os
  6.  
  7. from google.cloud import bigquery
  8.  
  9. query_template = """
  10. /*
  11. * Dedupes and aggregates the raw data to get the final output table.
  12. */
  13.  
  14. WITH
  15. /* 1. Join the raw data with the POI table. */
  16. join_poi AS (
  17. SELECT poi.poi_type,
  18. poi.poi_sub_type,
  19. poi.poi_name,
  20. poi.address,
  21. poi.city,
  22. poi.zip,
  23. raw.*
  24. FROM `marketplace-0.bizsim.{}` raw
  25. INNER JOIN `marketplace-0.bizsim.poi_output` poi
  26. ON raw.poi_id=poi.poi_id
  27. AND raw.uuid=poi.job_uuid
  28. ),
  29. /* 2. Dedup chain stores. */
  30. deduped AS (
  31. SELECT *
  32. FROM join_poi
  33. WHERE poi_sub_type = 'MomAndPop'
  34. UNION ALL
  35. SELECT a.*
  36. FROM join_poi a
  37. INNER JOIN
  38. (
  39. SELECT poi_name,
  40. destination_lane_id,
  41. min(haversine_distance_meters) haversine_distance_meters
  42. FROM join_poi
  43. WHERE poi_sub_type != 'MomAndPop'
  44. GROUP BY poi_name,
  45. destination_lane_id
  46. ) b
  47. ON a.poi_name = b.poi_name
  48. AND a.destination_lane_id = b.destination_lane_id
  49. AND a.haversine_distance_meters = b.haversine_distance_meters
  50. ),
  51. /* 3. Join with population table. */
  52. join_pop AS (
  53. SELECT deduped.*,
  54. pop.pop_avg,
  55. pop.size_avg
  56. FROM deduped
  57. INNER JOIN `marketplace-0.bizsim._gen_lanes_usa_with_pop_endpoint` pop
  58. ON pop.pop_avg > 0 AND pop.size_avg > 0
  59. AND deduped.destination_lane_id = pop.lane_id
  60. )
  61. /* 4. Aggregate. */
  62. SELECT
  63. /* POI info */
  64. poi_id,
  65. ANY_VALUE(poi_type) poi_type,
  66. ANY_VALUE(poi_sub_type) poi_sub_type,
  67. ANY_VALUE(poi_name) poi_name,
  68. ANY_VALUE(address) poi_address,
  69. ANY_VALUE(city) poi_city,
  70. ANY_VALUE(state) poi_state,
  71. ANY_VALUE(zip) poi_zip,
  72. zip_code destination_zip,
  73.  
  74. /* Reachability statistics */
  75. sum(size_avg) unconstrained_sq_meters,
  76. SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN size_avg ELSE 0 END) constrained_sq_meters,
  77. SUM(pop_avg) unconstrained_pop,
  78. SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END) / SUM(pop_avg) physical_reachability,
  79.  
  80. /* The following four columns have physical_reachability as the denominator. */
  81. SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 10 THEN pop_avg ELSE 0 END), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) physical_reachability_10_min,
  82. SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 15 THEN pop_avg ELSE 0 END), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) physical_reachability_15_min,
  83. SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 30 THEN pop_avg ELSE 0 END), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) physical_reachability_30_min,
  84. SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 45 THEN pop_avg ELSE 0 END), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) physical_reachability_45_min,
  85. SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 60 THEN pop_avg ELSE 0 END), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) physical_reachability_60_min,
  86. /* The following four columns have the corresponding physical_reachability_*_min column as the denominator. detour factor using time */
  87. SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 10 AND
  88. constrained_time_minutes / unconstrained_time_minutes < (CASE
  89. WHEN poi_type = 'QSR' AND poi_sub_type = 'RestaurantChain' THEN 1.25
  90. WHEN poi_type = 'FoodAggregator' AND poi_sub_type = 'RestaurantChain' THEN 1.25
  91. WHEN poi_type = 'FoodAggregator' AND poi_sub_type = 'MomAndPop' THEN 1.25
  92. WHEN poi_type = 'Grocery' AND poi_sub_type = 'GroceryChain' THEN 1.5
  93. WHEN poi_type = 'ConvenienceStores' AND poi_sub_type = 'ConvenienceStores' THEN 1.5
  94. WHEN poi_type = 'ConvenienceStores' AND poi_sub_type = 'Pharmacy' THEN 1.5
  95. ELSE 1 END) THEN pop_avg ELSE 0 END),
  96. SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 10 THEN pop_avg ELSE 0 END)) lsv_by_time_10_min,
  97. SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 15 AND
  98. constrained_time_minutes / unconstrained_time_minutes < (CASE
  99. WHEN poi_type = 'QSR' AND poi_sub_type = 'RestaurantChain' THEN 1.25
  100. WHEN poi_type = 'FoodAggregator' AND poi_sub_type = 'RestaurantChain' THEN 1.25
  101. WHEN poi_type = 'FoodAggregator' AND poi_sub_type = 'MomAndPop' THEN 1.25
  102. WHEN poi_type = 'Grocery' AND poi_sub_type = 'GroceryChain' THEN 1.5
  103. WHEN poi_type = 'ConvenienceStores' AND poi_sub_type = 'ConvenienceStores' THEN 1.5
  104. WHEN poi_type = 'ConvenienceStores' AND poi_sub_type = 'Pharmacy' THEN 1.5
  105. ELSE 1 END) THEN pop_avg ELSE 0 END),
  106. SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 15 THEN pop_avg ELSE 0 END)) lsv_by_time_15_min,
  107. SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 30 AND
  108. constrained_time_minutes / unconstrained_time_minutes < (CASE
  109. WHEN poi_type = 'QSR' AND poi_sub_type = 'RestaurantChain' THEN 1.25
  110. WHEN poi_type = 'FoodAggregator' AND poi_sub_type = 'RestaurantChain' THEN 1.25
  111. WHEN poi_type = 'FoodAggregator' AND poi_sub_type = 'MomAndPop' THEN 1.25
  112. WHEN poi_type = 'Grocery' AND poi_sub_type = 'GroceryChain' THEN 1.5
  113. WHEN poi_type = 'ConvenienceStores' AND poi_sub_type = 'ConvenienceStores' THEN 1.5
  114. WHEN poi_type = 'ConvenienceStores' AND poi_sub_type = 'Pharmacy' THEN 1.5
  115. ELSE 1 END) THEN pop_avg ELSE 0 END),
  116. SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 30 THEN pop_avg ELSE 0 END)) lsv_by_time_30_min,
  117. SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 45 AND
  118. constrained_time_minutes / unconstrained_time_minutes < (CASE
  119. WHEN poi_type = 'QSR' AND poi_sub_type = 'RestaurantChain' THEN 1.25
  120. WHEN poi_type = 'FoodAggregator' AND poi_sub_type = 'RestaurantChain' THEN 1.25
  121. WHEN poi_type = 'FoodAggregator' AND poi_sub_type = 'MomAndPop' THEN 1.25
  122. WHEN poi_type = 'Grocery' AND poi_sub_type = 'GroceryChain' THEN 1.5
  123. WHEN poi_type = 'ConvenienceStores' AND poi_sub_type = 'ConvenienceStores' THEN 1.5
  124. WHEN poi_type = 'ConvenienceStores' AND poi_sub_type = 'Pharmacy' THEN 1.5
  125. ELSE 1 END) THEN pop_avg ELSE 0 END),
  126. SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 45 THEN pop_avg ELSE 0 END)) lsv_by_time_45_min,
  127. SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 60 AND
  128. constrained_time_minutes / unconstrained_time_minutes < (CASE
  129. WHEN poi_type = 'QSR' AND poi_sub_type = 'RestaurantChain' THEN 1.25
  130. WHEN poi_type = 'FoodAggregator' AND poi_sub_type = 'RestaurantChain' THEN 1.25
  131. WHEN poi_type = 'FoodAggregator' AND poi_sub_type = 'MomAndPop' THEN 1.25
  132. WHEN poi_type = 'Grocery' AND poi_sub_type = 'GroceryChain' THEN 1.5
  133. WHEN poi_type = 'ConvenienceStores' AND poi_sub_type = 'ConvenienceStores' THEN 1.5
  134. WHEN poi_type = 'ConvenienceStores' AND poi_sub_type = 'Pharmacy' THEN 1.5
  135. ELSE 1 END) THEN pop_avg ELSE 0 END),
  136. SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 60 THEN pop_avg ELSE 0 END)) lsv_by_time_60_min,
  137. /* The following four columns have the corresponding physical_reachability_*_min column as the denominator. detour factor using distance */
  138. SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 10 AND
  139. constrained_dist_total_miles / unconstrained_dist_total_miles < (CASE
  140. WHEN poi_type = 'QSR' AND poi_sub_type = 'RestaurantChain' THEN 1.25
  141. WHEN poi_type = 'FoodAggregator' AND poi_sub_type = 'RestaurantChain' THEN 1.25
  142. WHEN poi_type = 'FoodAggregator' AND poi_sub_type = 'MomAndPop' THEN 1.25
  143. WHEN poi_type = 'Grocery' AND poi_sub_type = 'GroceryChain' THEN 1.5
  144. WHEN poi_type = 'ConvenienceStores' AND poi_sub_type = 'ConvenienceStores' THEN 1.5
  145. WHEN poi_type = 'ConvenienceStores' AND poi_sub_type = 'Pharmacy' THEN 1.5
  146. ELSE 1 END) THEN pop_avg ELSE 0 END),
  147. SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 10 THEN pop_avg ELSE 0 END)) lsv_by_distance_10_min,
  148. SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 15 AND
  149. constrained_dist_total_miles / unconstrained_dist_total_miles < (CASE
  150. WHEN poi_type = 'QSR' AND poi_sub_type = 'RestaurantChain' THEN 1.25
  151. WHEN poi_type = 'FoodAggregator' AND poi_sub_type = 'RestaurantChain' THEN 1.25
  152. WHEN poi_type = 'FoodAggregator' AND poi_sub_type = 'MomAndPop' THEN 1.25
  153. WHEN poi_type = 'Grocery' AND poi_sub_type = 'GroceryChain' THEN 1.5
  154. WHEN poi_type = 'ConvenienceStores' AND poi_sub_type = 'ConvenienceStores' THEN 1.5
  155. WHEN poi_type = 'ConvenienceStores' AND poi_sub_type = 'Pharmacy' THEN 1.5
  156. ELSE 1 END) THEN pop_avg ELSE 0 END),
  157. SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 15 THEN pop_avg ELSE 0 END)) lsv_by_distance_15_min,
  158. SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 30 AND
  159. constrained_dist_total_miles / unconstrained_dist_total_miles < (CASE
  160. WHEN poi_type = 'QSR' AND poi_sub_type = 'RestaurantChain' THEN 1.25
  161. WHEN poi_type = 'FoodAggregator' AND poi_sub_type = 'RestaurantChain' THEN 1.25
  162. WHEN poi_type = 'FoodAggregator' AND poi_sub_type = 'MomAndPop' THEN 1.25
  163. WHEN poi_type = 'Grocery' AND poi_sub_type = 'GroceryChain' THEN 1.5
  164. WHEN poi_type = 'ConvenienceStores' AND poi_sub_type = 'ConvenienceStores' THEN 1.5
  165. WHEN poi_type = 'ConvenienceStores' AND poi_sub_type = 'Pharmacy' THEN 1.5
  166. ELSE 1 END) THEN pop_avg ELSE 0 END),
  167. SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 30 THEN pop_avg ELSE 0 END)) lsv_by_distance_30_min,
  168. SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 45 AND
  169. constrained_dist_total_miles / unconstrained_dist_total_miles < (CASE
  170. WHEN poi_type = 'QSR' AND poi_sub_type = 'RestaurantChain' THEN 1.25
  171. WHEN poi_type = 'FoodAggregator' AND poi_sub_type = 'RestaurantChain' THEN 1.25
  172. WHEN poi_type = 'FoodAggregator' AND poi_sub_type = 'MomAndPop' THEN 1.25
  173. WHEN poi_type = 'Grocery' AND poi_sub_type = 'GroceryChain' THEN 1.5
  174. WHEN poi_type = 'ConvenienceStores' AND poi_sub_type = 'ConvenienceStores' THEN 1.5
  175. WHEN poi_type = 'ConvenienceStores' AND poi_sub_type = 'Pharmacy' THEN 1.5
  176. ELSE 1 END) THEN pop_avg ELSE 0 END),
  177. SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 45 THEN pop_avg ELSE 0 END)) lsv_by_distance_45_min,
  178. SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 60 AND
  179. constrained_dist_total_miles / unconstrained_dist_total_miles < (CASE
  180. WHEN poi_type = 'QSR' AND poi_sub_type = 'RestaurantChain' THEN 1.25
  181. WHEN poi_type = 'FoodAggregator' AND poi_sub_type = 'RestaurantChain' THEN 1.25
  182. WHEN poi_type = 'FoodAggregator' AND poi_sub_type = 'MomAndPop' THEN 1.25
  183. WHEN poi_type = 'Grocery' AND poi_sub_type = 'GroceryChain' THEN 1.5
  184. WHEN poi_type = 'ConvenienceStores' AND poi_sub_type = 'ConvenienceStores' THEN 1.5
  185. WHEN poi_type = 'ConvenienceStores' AND poi_sub_type = 'Pharmacy' THEN 1.5
  186. ELSE 1 END) THEN pop_avg ELSE 0 END),
  187. SUM(CASE WHEN constrained_time_minutes IS NOT NULL AND constrained_time_minutes <= 60 THEN pop_avg ELSE 0 END)) lsv_by_distance_60_min,
  188.  
  189. /* Constrained route breakdown */
  190. SAFE_DIVIDE(SUM(constrained_dist_total_miles * pop_avg), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_constrained_dist_total_miles,
  191. SAFE_DIVIDE(SUM(constrained_time_minutes * pop_avg), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_constrained_time_minutes,
  192.  
  193. SAFE_DIVIDE(SUM(constrained_dist_over_55_mph_miles * pop_avg), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_constrained_dist_over_55_mph_miles,
  194. SAFE_DIVIDE(SUM(constrained_dist_41_to_54_mph_miles * pop_avg), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_constrained_dist_41_to_54_mph_miles,
  195. SAFE_DIVIDE(SUM(constrained_dist_31_to_40_mph_miles * pop_avg), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_constrained_dist_31_to_40_mph_miles,
  196. SAFE_DIVIDE(SUM(constrained_dist_0_to_30_mph_miles * pop_avg), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_constrained_dist_0_to_30_mph_miles,
  197.  
  198. SAFE_DIVIDE(SUM(constrained_num_upc_over_55_mph_1_lane * pop_avg), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_constrained_num_upc_over_55_mph_1_lane,
  199. SAFE_DIVIDE(SUM(constrained_num_upc_41_to_54_mph_1_lane * pop_avg), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_constrained_num_upc_41_to_54_mph_1_lane,
  200. SAFE_DIVIDE(SUM(constrained_num_upc_31_to_40_mph_1_lane * pop_avg), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_constrained_num_upc_31_to_40_mph_1_lane,
  201. SAFE_DIVIDE(SUM(constrained_num_upc_0_to_30_mph_1_lane * pop_avg), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_constrained_num_upc_0_to_30_mph_1_lane,
  202.  
  203. SAFE_DIVIDE(SUM(constrained_num_upc_over_55_mph_2_lanes * pop_avg), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_constrained_num_upc_over_55_mph_2_lanes,
  204. SAFE_DIVIDE(SUM(constrained_num_upc_41_to_54_mph_2_lanes * pop_avg), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_constrained_num_upc_41_to_54_mph_2_lanes,
  205. SAFE_DIVIDE(SUM(constrained_num_upc_31_to_40_mph_2_lanes * pop_avg), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_constrained_num_upc_31_to_40_mph_2_lanes,
  206. SAFE_DIVIDE(SUM(constrained_num_upc_0_to_30_mph_2_lanes * pop_avg), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_constrained_num_upc_0_to_30_mph_2_lanes,
  207.  
  208. SAFE_DIVIDE(SUM(constrained_num_upc_over_55_mph_over_3_lanes * pop_avg), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_constrained_num_upc_over_55_mph_over_3_lanes,
  209. SAFE_DIVIDE(SUM(constrained_num_upc_41_to_54_mph_over_3_lanes * pop_avg), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_constrained_num_upc_41_to_54_mph_over_3_lanes,
  210. SAFE_DIVIDE(SUM(constrained_num_upc_31_to_40_mph_over_3_lanes * pop_avg), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_constrained_num_upc_31_to_40_mph_over_3_lanes,
  211. SAFE_DIVIDE(SUM(constrained_num_upc_0_to_30_mph_over_3_lanes * pop_avg), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_constrained_num_upc_0_to_30_mph_over_3_lanes,
  212.  
  213. /* Unconstrained route breakdown */
  214. SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN unconstrained_dist_total_miles * pop_avg ELSE 0 END), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_unconstrained_dist_total_miles,
  215. SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN unconstrained_time_minutes * pop_avg ELSE 0 END), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_unconstrained_time_minutes,
  216. SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN unconstrained_dist_over_55_mph_miles * pop_avg ELSE 0 END), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_unconstrained_dist_over_55_mph_miles,
  217. SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN unconstrained_dist_41_to_54_mph_miles * pop_avg ELSE 0 END), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_unconstrained_dist_41_to_54_mph_miles,
  218. SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN unconstrained_dist_31_to_40_mph_miles * pop_avg ELSE 0 END), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_unconstrained_dist_31_to_40_mph_miles,
  219. SAFE_DIVIDE(SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN unconstrained_dist_0_to_30_mph_miles * pop_avg ELSE 0 END), SUM(CASE WHEN constrained_time_minutes IS NOT NULL THEN pop_avg ELSE 0 END)) avg_unconstrained_dist_0_to_30_mph_miles
  220. FROM join_pop
  221. WHERE {}
  222. GROUP BY poi_id, zip_code;
  223.  
  224. """
  225. header = ["poi_id", "poi_type", "poi_sub_type", "poi_name", "poi_address", "poi_city", "poi_state", "poi_zip", "destination_zip",
  226. "unconstrained_sqr_meters", "constrained_sq_meters", "unconstrained_pop",
  227. "physical_reachability", "physical_reachability_10_min", "physical_reachability_15_min", "physical_reachability_30_min",
  228. "physical_reachability_45_min",
  229. "physical_reachability_60_min", "lsv_by_time_10_min", "lsv_by_time_15_min", "lsv_by_time_30_min", "lsv_by_time_45_min",
  230. "lsv_by_time_60_min",
  231. "lsv_by_distance_10_min", "lsv_by_distance_15_min", "lsv_by_distance_30_min", "lsv_by_distance_45_min", "lsv_by_distance_60_min",
  232. "avg_constrained_dist_total_miles",
  233. "avg_constrained_time_minutes", "avg_constrained_dist_over_55_mph_miles", "avg_constrained_dist_41_to_54_mph_miles",
  234. "avg_constrained_dist_31_to_40_mph_miles", "avg_constrained_dist_0_to_30_mph_miles", "avg_constrained_num_upc_over_55_mph_1_lane",
  235. "avg_constrained_num_upc_41_to_54_mph_1_lane", "avg_constrained_num_upc_31_to_40_mph_1_lane", "avg_constrained_num_upc_0_to_30_mph_1_lane",
  236. "avg_constrained_num_upc_over_55_mph_2_lanes", "avg_constrained_num_upc_41_to_54_mph_2_lanes",
  237. "avg_constrained_num_upc_31_to_40_mph_2_lanes", "avg_constrained_num_upc_0_to_30_mph_2_lanes",
  238. "avg_constrained_num_upc_over_55_mph_over_3_lanes", "avg_constrained_num_upc_41_to_54_mph_over_3_lanes",
  239. "avg_constrained_num_upc_31_to_40_mph_over_3_lanes", "avg_constrained_num_upc_0_to_30_mph_over_3_lanes",
  240. "avg_unconstrained_dist_total_miles", "avg_unconstrained_time_minutes", "avg_unconstrained_dist_over_55_mph_miles",
  241. "avg_unconstrained_dist_41_to_54_mph_miles", "avg_unconstrained_dist_31_to_40_mph_miles", "avg_unconstrained_dist_0_to_30_mph_miles"]
  242.  
  243.  
  244. class BigQuery():
  245. def __init__(self, output_dir):
  246. self.client = bigquery.Client()
  247. self.output_dir = output_dir
  248.  
  249. def query(self, output_table_name, is_fa):
  250. if not os.path.exists(self.output_dir):
  251. os.mkdir(self.output_dir)
  252. if is_fa:
  253. where_stmt = "poi_type='FoodAggregator'"
  254. fa_str = "fa"
  255. else:
  256. where_stmt = "poi_type!='FoodAggregator'"
  257. fa_str = "nonfa"
  258.  
  259. cur_date = datetime.datetime.today()
  260. file_name = output_table_name.replace("bizsim_output_", "bizsim_summary_{}{}_".format(cur_date.month, cur_date.day))
  261. file_name = file_name.replace("_false_", "_protected_" + fa_str)
  262. file_name = file_name.replace("_true_", "_unprotected_" + fa_str)
  263. file_name = file_name + '.csv'
  264.  
  265. full_path = os.path.join(output_dir, file_name)
  266. if os.path.exists(full_path):
  267. print "file exists: {}".format(full_path)
  268. exit(1)
  269.  
  270. fa_query_job = self.client.query(query_template.format(output_table_name, where_stmt))
  271. results = fa_query_job.result() # Waits for job to complete.
  272.  
  273. print "start processing ", full_path
  274. rows_to_write = []
  275. for row in results:
  276. rows_to_write.append(row)
  277.  
  278. print "start writing ", full_path
  279. with open(full_path, 'w+') as f:
  280. csv_writer = csv.writer(f)
  281. csv_writer.writerow(header)
  282. csv_writer.writerows(rows_to_write)
  283.  
  284.  
  285. if __name__ == '__main__':
  286. output_tables = [
  287. "bizsim_output_21_30_21_30_0_true_44eebece_abef_49e7_a424_c3af0a52694a",
  288. "bizsim_output_21_30_21_30_2_true_8454e8d6_d798_42c3_b2fe_e08c86e4b4cc",
  289. "bizsim_output_21_30_31_40_2_true_91e83f91_860d_4bdd_aae5_3a671571fc43",
  290. "bizsim_output_31_40_31_40_2_true_27df91f7_c733_41a5_bed4_d1bb9e0001c3",
  291. "bizsim_output_31_40_31_40_4_true_deb45752_4696_44da_a9cd_1e5b23155264",
  292. "bizsim_output_41_54_41_54_999_true_b66e8e4d_387a_463c_bb83_a9d73f105839",
  293. #
  294. # "bizsim_output_21_30_21_30_2_false_5ec30a97_5410_4e48_8cd2_ac60db9738c0",
  295. # "bizsim_output_21_30_31_40_2_false_a645b9cb_fd1b_4e99_8665_57071834a549",
  296. # "bizsim_output_31_40_31_40_2_false_0cc4b2aa_906b_4a7b_9c51_ff82a505159c",
  297. # "bizsim_output_31_40_31_40_4_false_771ea28f_3876_442e_b49f_7d44b39b38a5",
  298. # "bizsim_output_41_54_41_54_999_false_b31bb5f7_6bfe_4a5d_930c_1465f9f60334"
  299. ]
  300.  
  301. output_dir = "./outputs"
  302. bq = BigQuery(output_dir)
  303. for output_table in output_tables:
  304. for fa in [True, False]:
  305. bq.query(output_table, fa)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement