Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from urllib.parse import unquote
- from pytrends.request import TrendReq
- import pandas as pd
- from openpyxl import load_workbook
- from openpyxl.utils.dataframe import dataframe_to_rows
- import time
- def get_trend_data(url):
- # Parse URL parameters
- params = {}
- url_parts = url.split('?')[1].split('&')
- for part in url_parts:
- key, value = part.split('=')
- params[key] = unquote(value) # Decode URL encoding, including %20 to spaces
- # Split multiple values for date, geo, and q
- date_ranges = params['date'].split(',')
- geos = params.get('geo', '').split(',') # Handle missing 'geo' parameter
- queries = params['q'].split(',')
- # Ensure the geos list has the same length as date_ranges and queries
- if len(geos) < len(date_ranges):
- geos.extend([''] * (len(date_ranges) - len(geos)))
- # Initialize pytrends with a timeout
- pytrends = TrendReq(hl=params.get('hl', 'en'), tz=360, timeout=(10, 25))
- # Store trend data for each query
- trend_data_list = []
- for date_range, geo, query in zip(date_ranges, geos, queries):
- # Build payload for each query
- pytrends.build_payload(kw_list=[query], geo=geo, timeframe=date_range)
- # Get trend data with retry mechanism
- trend_data = None
- retries = 5
- for i in range(retries):
- try:
- trend_data = pytrends.interest_over_time()
- if not trend_data.empty:
- break
- except Exception as e:
- if "Google returned a response with code 429" in str(e):
- wait_time = (2 ** i) + (0.1 * i) # Exponential backoff
- print(f"Rate limit exceeded. Waiting for {wait_time} seconds before retrying...")
- time.sleep(wait_time)
- else:
- raise e
- if trend_data is None or trend_data.empty:
- raise Exception("Failed to retrieve data after several retries.")
- trend_data_list.append(trend_data)
- return trend_data_list
- def update_excel_with_trend_data(filename):
- wb = load_workbook(filename)
- for sheet in wb.sheetnames:
- ws = wb[sheet]
- url = ws['B1'].value
- if url:
- print(f"Processing sheet: {sheet}, URL: {url}")
- trend_data_list = get_trend_data(url)
- start_col = 1
- for i, trend_data in enumerate(trend_data_list):
- trend_data.reset_index(inplace=True) # Reset index to include the date in the DataFrame
- # Write date and interest data to the worksheet
- for r_idx, row in enumerate(dataframe_to_rows(trend_data, index=False, header=True), start=4):
- ws.cell(row=r_idx, column=start_col, value=row[0]) # Date column
- ws.cell(row=r_idx, column=start_col + 1, value=row[1]) # Interest column
- # Adjust start_col for next set of data
- start_col += 2
- wb.save(filename)
- if __name__ == "__main__":
- excel_file = "C:/Users/coryn/Desktop/py4e/Search Demand Index Report/SDIR_TAB_TEST.xlsx"
- update_excel_with_trend_data(excel_file)
- print(f"Google Trends data updated in {excel_file}")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement