Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import csv
- from datetime import datetime, timedelta
- from google.cloud import bigquery
- def query_phone_contacts(start_date, end_date):
- """ Generate phone contacts query using BigQuery Standard SQL """
- return """
- SELECT
- PARSE_DATE('%Y%m%d', date) AS page_event_date,
- visitStartTime as timestamp,
- (SELECT value FROM UNNEST(h.customDimensions) WHERE index = 60) AS specialtyID,
- (SELECT value FROM UNNEST(h.customDimensions) WHERE index = 11) AS locationID,
- (SELECT value FROM UNNEST(h.customDimensions) WHERE index = 5) AS professionalID,
- (SELECT value FROM UNNEST(h.customDimensions) WHERE index = 42) AS session_id,
- (SELECT value FROM UNNEST(h.customDimensions) WHERE index = 61) AS persistent_session_id,
- (SELECT value FROM UNNEST(h.customDimensions) WHERE index = 9) AS sales_region_id,
- COUNT(DISTINCT(SELECT value FROM UNNEST(h.customDimensions) WHERE index = 134)) AS phoneAction,
- COUNT(*) AS altphoneAction
- FROM
- `seventh-circle-461.75615261.ga_sessions_*`,
- UNNEST(hits) AS h
- WHERE
- _TABLE_SUFFIX BETWEEN '{start_date}' AND '{end_date}'
- AND (
- (SELECT value FROM UNNEST(h.customDimensions) WHERE index = 97) like "%ad_click_type=phone%"
- OR
- (SELECT value FROM UNNEST(h.customDimensions) WHERE index = 97) like "%tel:%"
- )
- GROUP BY
- page_event_date,
- timestamp,
- specialtyID,
- locationID,
- professionalID,
- session_id,
- persistent_session_id,
- sales_region_id
- """.format(start_date=start_date, end_date=end_date)
- def get_phone_contacts(bq_client, query):
- """ Fetch phone contacts from GA sessions in BigQuery """
- query_job = bq_client.query(query, location='US')
- return query_job.result()
- def write_phone_contacts(data, filename):
- """ Write BigQuery results to CSV file """
- with open(filename, 'w') as fh:
- csv_writer = csv.writer(fh)
- for row in data:
- csv_writer.writerow(row.values())
- print('Total rows written to {}: {}'.format(filename, data.total_rows))
- def main():
- # set variables
- bq_client = bigquery.Client.from_service_account_json('service_account.json')
- yesterday = datetime.today() - timedelta(days=1)
- start_date = yesterday.strftime('%Y%m%d')
- # start_date = '20190315'
- # end_date = '20190315'
- end_date = yesterday.strftime('%Y%m%d')
- # function calls
- phone_contacts_query = query_phone_contacts(start_date, end_date)
- phone_contacts_data = get_phone_contacts(bq_client, phone_contacts_query)
- write_phone_contacts(phone_contacts_data, filename='ga_sessions_phone_contacts.csv')
- if __name__ == '__main__':
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement