Advertisement
Guest User

Untitled

a guest
Jun 7th, 2024
96
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.24 KB | None | 0 0
  1. from urllib.parse import unquote
  2. from pytrends.request import TrendReq
  3. import pandas as pd
  4. from openpyxl import load_workbook
  5. from openpyxl.utils.dataframe import dataframe_to_rows
  6. import time
  7.  
  8. def get_trend_data(url):
  9. # Parse URL parameters
  10. params = {}
  11. url_parts = url.split('?')[1].split('&')
  12. for part in url_parts:
  13. key, value = part.split('=')
  14. params[key] = unquote(value) # Decode URL encoding, including %20 to spaces
  15.  
  16. # Split multiple values for date, geo, and q
  17. date_ranges = params['date'].split(',')
  18. geos = params.get('geo', '').split(',') # Handle missing 'geo' parameter
  19. queries = params['q'].split(',')
  20.  
  21. # Ensure the geos list has the same length as date_ranges and queries
  22. if len(geos) < len(date_ranges):
  23. geos.extend([''] * (len(date_ranges) - len(geos)))
  24.  
  25. # Initialize pytrends with a timeout
  26. pytrends = TrendReq(hl=params.get('hl', 'en'), tz=360, timeout=(10, 25))
  27.  
  28. # Store trend data for each query
  29. trend_data_list = []
  30.  
  31. for date_range, geo, query in zip(date_ranges, geos, queries):
  32. # Build payload for each query
  33. pytrends.build_payload(kw_list=[query], geo=geo, timeframe=date_range)
  34.  
  35. # Get trend data with retry mechanism
  36. trend_data = None
  37. retries = 5
  38. for i in range(retries):
  39. try:
  40. trend_data = pytrends.interest_over_time()
  41. if not trend_data.empty:
  42. break
  43. except Exception as e:
  44. if "Google returned a response with code 429" in str(e):
  45. wait_time = (2 ** i) + (0.1 * i) # Exponential backoff
  46. print(f"Rate limit exceeded. Waiting for {wait_time} seconds before retrying...")
  47. time.sleep(wait_time)
  48. else:
  49. raise e
  50.  
  51. if trend_data is None or trend_data.empty:
  52. raise Exception("Failed to retrieve data after several retries.")
  53.  
  54. trend_data_list.append(trend_data)
  55.  
  56. return trend_data_list
  57.  
  58. def update_excel_with_trend_data(filename):
  59. wb = load_workbook(filename)
  60. for sheet in wb.sheetnames:
  61. ws = wb[sheet]
  62.  
  63. url = ws['B1'].value
  64.  
  65. if url:
  66. print(f"Processing sheet: {sheet}, URL: {url}")
  67. trend_data_list = get_trend_data(url)
  68.  
  69. start_col = 1
  70.  
  71. for i, trend_data in enumerate(trend_data_list):
  72. trend_data.reset_index(inplace=True) # Reset index to include the date in the DataFrame
  73.  
  74. # Write date and interest data to the worksheet
  75. for r_idx, row in enumerate(dataframe_to_rows(trend_data, index=False, header=True), start=4):
  76. ws.cell(row=r_idx, column=start_col, value=row[0]) # Date column
  77. ws.cell(row=r_idx, column=start_col + 1, value=row[1]) # Interest column
  78.  
  79. # Adjust start_col for next set of data
  80. start_col += 2
  81.  
  82. wb.save(filename)
  83.  
  84. if __name__ == "__main__":
  85. excel_file = "C:/Users/coryn/Desktop/py4e/Search Demand Index Report/SDIR_TAB_TEST.xlsx"
  86. update_excel_with_trend_data(excel_file)
  87. print(f"Google Trends data updated in {excel_file}")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement