Untitled

from urllib.parse import unquote
from pytrends.request import TrendReq
import pandas as pd
from openpyxl import load_workbook
from openpyxl.utils.dataframe import dataframe_to_rows
import time

def get_trend_data(url):
    # Parse URL parameters
    params = {}
    url_parts = url.split('?')[1].split('&')
    for part in url_parts:
        key, value = part.split('=')
        params[key] = unquote(value)  # Decode URL encoding, including %20 to spaces

    # Split multiple values for date, geo, and q
    date_ranges = params['date'].split(',')
    geos = params.get('geo', '').split(',')  # Handle missing 'geo' parameter
    queries = params['q'].split(',')

    # Ensure the geos list has the same length as date_ranges and queries
    if len(geos) < len(date_ranges):
        geos.extend([''] * (len(date_ranges) - len(geos)))

    # Initialize pytrends with a timeout
    pytrends = TrendReq(hl=params.get('hl', 'en'), tz=360, timeout=(10, 25))

    # Store trend data for each query
    trend_data_list = []

    for date_range, geo, query in zip(date_ranges, geos, queries):
        # Build payload for each query
        pytrends.build_payload(kw_list=[query], geo=geo, timeframe=date_range)

        # Get trend data with retry mechanism
        trend_data = None
        retries = 5
        for i in range(retries):
            try:
                trend_data = pytrends.interest_over_time()
                if not trend_data.empty:
                    break
            except Exception as e:
                if "Google returned a response with code 429" in str(e):
                    wait_time = (2 ** i) + (0.1 * i)  # Exponential backoff
                    print(f"Rate limit exceeded. Waiting for {wait_time} seconds before retrying...")
                    time.sleep(wait_time)
                else:
                    raise e

        if trend_data is None or trend_data.empty:
            raise Exception("Failed to retrieve data after several retries.")

        trend_data_list.append(trend_data)

    return trend_data_list

def update_excel_with_trend_data(filename):
    wb = load_workbook(filename)
    for sheet in wb.sheetnames:
        ws = wb[sheet]

        url = ws['B1'].value

        if url:
            print(f"Processing sheet: {sheet}, URL: {url}")
            trend_data_list = get_trend_data(url)

            start_col = 1

            for i, trend_data in enumerate(trend_data_list):
                trend_data.reset_index(inplace=True)  # Reset index to include the date in the DataFrame

                # Write date and interest data to the worksheet
                for r_idx, row in enumerate(dataframe_to_rows(trend_data, index=False, header=True), start=4):
                    ws.cell(row=r_idx, column=start_col, value=row[0])  # Date column
                    ws.cell(row=r_idx, column=start_col + 1, value=row[1])  # Interest column

                # Adjust start_col for next set of data
                start_col += 2

    wb.save(filename)

if __name__ == "__main__":
    excel_file = "C:/Users/coryn/Desktop/py4e/Search Demand Index Report/SDIR_TAB_TEST.xlsx"
    update_excel_with_trend_data(excel_file)
    print(f"Google Trends data updated in {excel_file}")