Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Copyright 2019 Google LLC.
- # SPDX-License-Identifier: Apache-2.0
- WITH
- # Generate a sample time series from the taxi trips dataset.
- sample_time_series AS (
- SELECT
- SUBSTR(taxi_id, 16, 24) AS taxi_id, # Truncate for readability.
- trip_start_timestamp,
- trip_seconds
- FROM
- `bigquery-public-data.chicago_taxi_trips.taxi_trips`
- WHERE
- SUBSTR(taxi_id, 0, 2) = 'cc' # Downsample data based on hash
- ORDER BY
- taxi_id,
- trip_start_timestamp
- )
- # Predict the trip seconds for the next ride.
- # Note that time intervals here are not regular (ex. hourly).
- # Some cleanup still required, such as removing next_trip_seconds nulls
- # and sequences that are too short.
- # Can create a second window for a different level of aggregation for
- # time series history.
- SELECT
- taxi_id,
- trip_start_timestamp,
- trip_seconds,
- LEAD(trip_seconds, 1) OVER (
- PARTITION BY
- taxi_id
- ORDER BY
- trip_start_timestamp
- ) AS next_trip_seconds,
- # ARRAY_AGG cannot aggreggate nulls.
- ARRAY_AGG(IFNULL(trip_seconds, 0)) OVER (
- PARTITION BY
- taxi_id
- ORDER BY
- # AutoML Tables will treat as an ordered array
- # OFFSET(0) in array should be the oldest element
- trip_start_timestamp
- ROWS BETWEEN 10 PRECEDING AND CURRENT ROW
- ) AS trip_history
- FROM
- sample_time_series
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement