Advertisement
makispaiktis

Python-based SQL queries - Group by, order by -Exercise 4

Jun 8th, 2023 (edited)
868
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.31 KB | None | 0 0
  1. # *********************************************************************
  2. # *********************************************************************
  3. # Step 1 - Setup
  4. # *********************************************************************
  5. # *********************************************************************
  6.  
  7. from google.cloud import bigquery
  8.  
  9. # Create a "Client" object
  10. client = bigquery.Client()
  11.  
  12. # Construct a reference to the "world_bank_intl_education" dataset
  13. dataset_ref = client.dataset("world_bank_intl_education", project="bigquery-public-data")
  14.  
  15. # API request - fetch the dataset
  16. dataset = client.get_dataset(dataset_ref)
  17.  
  18. # Construct a reference to the "international_education" table
  19. table_ref = dataset_ref.table("international_education")
  20.  
  21. # API request - fetch the table
  22. table = client.get_table(table_ref)
  23.  
  24. # Preview the first five lines of the "international_education" table
  25. client.list_rows(table, max_results=5).to_dataframe()
  26.  
  27.  
  28.  
  29. # *********************************************************************
  30. # *********************************************************************
  31. # Step 2 - Group by, order by
  32. # *********************************************************************
  33. # *********************************************************************
  34.  
  35.  
  36. country_spend_pct_query = """
  37.                          SELECT country_name, AVG(value) AS avg_ed_spending_pct
  38.                          FROM `bigquery-public-data.world_bank_intl_education.international_education`
  39.                          WHERE indicator_code = 'SE.XPD.TOTL.GD.ZS' AND year>=2010 and year<=2017
  40.                          GROUP BY country_name
  41.                          ORDER BY avg_ed_spending_pct DESC
  42.                          """
  43.  
  44. # Set up the query (cancel the query if it would use too much of
  45. # your quota, with the limit set to 1 GB)
  46. safe_config = bigquery.QueryJobConfig(maximum_bytes_billed=10**10)
  47. country_spend_pct_query_job = client.query(country_spend_pct_query, job_config=safe_config)
  48.  
  49. # API request - run the query, and return a pandas DataFrame
  50. country_spending_results = country_spend_pct_query_job.to_dataframe()
  51. # View top few rows of results
  52. print(country_spending_results.head())
  53.  
  54.  
  55. # *********************************************************************
  56. # *********************************************************************
  57. # Step 3 - Group by a combination of columns (indicator_code, indicator_name)
  58. # *********************************************************************
  59. # *********************************************************************
  60.  
  61. code_count_query = """
  62.                   SELECT indicator_code, indicator_name, COUNT(1) AS num_rows
  63.                   FROM `bigquery-public-data.world_bank_intl_education.international_education`
  64.                   WHERE year = 2016
  65.                   GROUP BY indicator_name, indicator_code
  66.                   HAVING COUNT(1) >= 175
  67.                   ORDER BY COUNT(1) DESC
  68.                   """
  69.  
  70. # Set up the query
  71. safe_config = bigquery.QueryJobConfig(maximum_bytes_billed=10**10)
  72. code_count_query_job = client.query(code_count_query, job_config=safe_config)
  73.  
  74. # API request - run the query, and return a pandas DataFrame
  75. code_count_results = code_count_query_job.to_dataframe()
  76. # View top few rows of results
  77. print(code_count_results.head())
  78.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement