Advertisement
Guest User

Untitled

a guest
Oct 23rd, 2019
107
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.29 KB | None | 0 0
  1. # Copyright 2019 Google LLC.
  2. # SPDX-License-Identifier: Apache-2.0
  3.  
  4. WITH
  5.  
  6. # Generate a sample time series from the taxi trips dataset.
  7. sample_time_series AS (
  8. SELECT
  9. SUBSTR(taxi_id, 16, 24) AS taxi_id, # Truncate for readability.
  10. trip_start_timestamp,
  11. trip_seconds
  12. FROM
  13. `bigquery-public-data.chicago_taxi_trips.taxi_trips`
  14. WHERE
  15. SUBSTR(taxi_id, 0, 2) = 'cc' # Downsample data based on hash
  16. ORDER BY
  17. taxi_id,
  18. trip_start_timestamp
  19. )
  20.  
  21. # Predict the trip seconds for the next ride.
  22. # Note that time intervals here are not regular (ex. hourly).
  23. # Some cleanup still required, such as removing next_trip_seconds nulls
  24. # and sequences that are too short.
  25. # Can create a second window for a different level of aggregation for
  26. # time series history.
  27. SELECT
  28. taxi_id,
  29. trip_start_timestamp,
  30. trip_seconds,
  31. LEAD(trip_seconds, 1) OVER (
  32. PARTITION BY
  33. taxi_id
  34. ORDER BY
  35. trip_start_timestamp
  36. ) AS next_trip_seconds,
  37. # ARRAY_AGG cannot aggreggate nulls.
  38. ARRAY_AGG(IFNULL(trip_seconds, 0)) OVER (
  39. PARTITION BY
  40. taxi_id
  41. ORDER BY
  42. # AutoML Tables will treat as an ordered array
  43. # OFFSET(0) in array should be the oldest element
  44. trip_start_timestamp
  45. ROWS BETWEEN 10 PRECEDING AND CURRENT ROW
  46. ) AS trip_history
  47. FROM
  48. sample_time_series
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement