Untitled

#!/usr/bin/env python3
import argparse
import glob
import json
import os
import re
import sys

import pandas as pd


# --- Colour utilities -------------------------------------------------------

CSS_HEX = {
    'black':'#000000','white':'#000000','red':'#ff0000','green':'#008000',
    'blue':'#0000ff','yellow':'#ffff00','gray':'#808080','grey':'#808080',
    'orange':'#ffa500','pink':'#ffc0cb','purple':'#800080','beige':'#f5f5dc',
    'mint':'#98ff98','chocolate':'#d2691e','cherry':'#de3163','lemon':'#fff700',
    'cyan':'#00ffff','gold':'#ffd700','silver':'#c0c0c0','sky':'#87ceeb',
    'burgundy':'#800020','cream':'#fffdd0','olive':'#808000','teal':'#008080',
    'maroon':'#800000','navy':'#000080'
}

def hex_to_rgb(h: str) -> tuple[int,int,int]:
    h = h.lstrip('#')
    return tuple(int(h[i:i+2], 16) for i in (0,2,4))

def contrast_color(bg_hex: str, fg_hex: str) -> str:
    bg = hex_to_rgb(bg_hex); fg = hex_to_rgb(fg_hex)
    if sum((bg[i]-fg[i])**2 for i in range(3)) < 10000:
        return '#000000'
    return fg_hex

def get_css_color(name: str) -> str:
    return CSS_HEX.get(name.lower(), '#000000')


# --- HTML parsing -----------------------------------------------------------

def extract_store_name_from_url(url: str) -> str:
    # This is a simplified extraction. A more robust solution might involve
    # parsing the URL and extracting the domain or a specific path segment.
    # For now, let's try to get a recognizable part of the URL.
    match = re.search(r'ebay\.com\.au/str/([^/?]+)', url)
    if match:
        return match.group(1).replace('-', ' ').title()
    return url # Fallback to full URL if store name cannot be extracted

def extract_page_url(html_path: str) -> str:
    txt = open(html_path, encoding='utf-8', errors='ignore').read()
    m = re.search(r'<link[^>]+rel=["\']canonical["\'][^>]+href=["\']([^"\']+)', txt, re.IGNORECASE)
    if m: return m.group(1)
    m = re.search(r'<meta[^>]+property=["\']og:url["\'][^>]+content=["\']([^"\']+)', txt, re.IGNORECASE)
    if m: return m.group(1)
    raise RuntimeError("Could not find page URL in HTML")

def _extract_from_html(html: str) -> dict | None:
    marker = 'VariationViewModel'
    pos = html.find(marker)
    if pos < 0: return None
    start = html.rfind('{', 0, pos)
    if start < 0: return None
    depth = 0
    for i, ch in enumerate(html[start:], start):
        if ch == '{': depth += 1
        elif ch == '}':
            depth -= 1
            if depth == 0:
                try:
                    return json.loads(html[start:i+1])
                except json.JSONDecodeError:
                    return None
    return None

def fetch_variation_model(path: str) -> dict:
    if not os.path.isfile(path):
        raise RuntimeError(f"Local file not found: {path}")
    raw = open(path, encoding='utf-8', errors='ignore').read()
    vm = _extract_from_html(raw)
    if vm is None:
        raise RuntimeError("VariationViewModel not found in HTML")
    return vm


# --- DataFrame construction ------------------------------------------------

def build_dataframe(vm: dict) -> pd.DataFrame:
    menus = vm.get('selectMenus', [])
    labels = ['Filament Type', 'Colour']
    if len(menus) >= 2:
        def detect(lbl: str) -> str|None:
            ll = lbl.lower()
            if any(k in ll for k in ('filament','product','material','type')):
                return 'Filament Type'
            if any(k in ll for k in ('color','colour')):
                return 'Colour'
            return None
        l0 = detect(menus[0].get('displayLabel',''))
        l1 = detect(menus[1].get('displayLabel',''))
        if l0 and l1 and l0 != l1:
            labels = [l0, l1]

    rows = []
    menu_map = vm['menuItemMap']
    for combo, vid in vm['variationCombinations'].items():
        if '_' not in combo: continue
        id0, id1 = combo.split('_',1)
        val0 = menu_map.get(id0,{}).get('valueName','')
        val1 = menu_map.get(id1,{}).get('valueName','')
        var = vm['variationsMap'].get(str(vid),{})
        price = var.get('binModel',{}).get('price',{}).get('value',{}).get('value', 0)
        oos   = var.get('quantity',{}).get('outOfStock', False)
        rows.append({
            labels[0]: val0,
            labels[1]: val1,
            'Price (AUD)': price,
            'Out of Stock': 'Yes' if oos else 'No',
            'pricing per spool if you buy 10': price * 0.6,
            'with CBTJ20 20% discount': price * 0.6 * (1 - 0.20)
        })

    df = pd.DataFrame(rows)
    if 'Filament Type' in df.columns and 'Colour' in df.columns:
        rest = [c for c in df.columns if c not in ('Filament Type','Colour')]
        df = df[['Filament Type','Colour'] + rest]
    return df.sort_values(['Filament Type','Colour']).reset_index(drop=True)


# --- Fill colours ----------------------------------------------------------

def get_fill_color(name: str) -> str:
    n = name.lower()
    for base,col in [
        ('clear yellow','#ffffcc'),('clear red','#ffcccc'),
        ('clear green','#ccffcc'),('clear blue','#ccccff'),
        ('clear orange','#ffe5cc')
    ]:
        if base in n: return col
    if re.search(r'\bclear\b', n) or 'transparent' in n: return '#ffffff'
    if 'wood' in n: return '#f9f5eb'
    m = re.match(r'Dual-Color\s+(.+)', name, re.IGNORECASE)
    if m: return get_fill_color(re.split(r'\s*&\s*', m.group(1))[0])
    m = re.match(r'Tri-Color\s+(.+)', name, re.IGNORECASE)
    if m: return get_fill_color(re.split(r'\s*&\s*', m.group(1))[0])
    if 'rainbow' in n: return '#f5e0ff'
    for kw,col in [
        ('beige','#fff8e5'),('chocolate','#ffe4d1'),('cherry','#ffe5e5'),
        ('lemon','#ffffe5'),('cyan','#e5ffff'),('mint','#e5ffe5'),
        ('olive','#f0f0e0'),('burgundy','#f5e0e5'),('cream','#fffcf5'),
        ('teal','#e0f5f5'),('maroon','#f5e0e0'),('navy','#e0e5f5'),
        ('gold','#fff5cc'),('pink','#ffccdd'),('purple','#e5ccff'),
        ('orange','#ffe5cc'),('red','#ffcccc'),('green','#ccffcc'),
        ('blue','#ccccff'),('grey','#dddddd'),('gray','#dddddd')
    ]:
        if kw in n: return col
    if 'white' in n: return '#ffffff'
    if 'black' in n: return '#eeeeee'
    return '#f0f0f0'


# --- Excel output ----------------------------------------------------------

def process_sheet(writer, df: pd.DataFrame, html_file: str, sheet_name: str):
    import xlsxwriter
    page_url = extract_page_url(html_file)
    df.to_excel(writer, sheet_name=sheet_name, startrow=1, index=False)
    wb, ws = writer.book, writer.sheets[sheet_name]
    nr, nc = df.shape

    # hyperlink at A1
    link_fmt = wb.add_format({'font_color':'blue','underline':True})
    ws.write_url(0, 0, page_url, link_fmt, string=page_url)

    # autofilter
    ws.autofilter(1, 0, nr+1, nc-1)

    # currency formatting
    money_fmt = wb.add_format({'num_format':'$0.00'})
    for col in ['Price (AUD)', 'pricing per spool if you buy 10', 'with CBTJ20 20% discount']:
        idx = df.columns.get_loc(col)
        letter = xlsxwriter.utility.xl_col_to_name(idx)
        ws.set_column(f'{letter}:{letter}', None, money_fmt)

    # Colour column formatting
    cidx = df.columns.get_loc('Colour')
    for r, val in enumerate(df['Colour'], start=2):
        fill = get_fill_color(val)
        base_fmt = wb.add_format({'bg_color': fill})
        nl = val.lower()

        # Dual-Color
        m = re.match(r'(Dual-Color)\s+(.+)', val, re.IGNORECASE)
        if m:
            parts = re.split(r'\s*&\s*', m.group(2))
            fmt1 = wb.add_format({'font_color':contrast_color(fill,get_css_color(parts[0])),'bg_color':fill})
            fmt2 = wb.add_format({'font_color':contrast_color(fill,get_css_color(parts[1])),'bg_color':fill})
            ws.write_rich_string(r, cidx,
                                 fmt1, f"{m.group(1)} {parts[0]} & ",
                                 fmt2, parts[1],
                                 base_fmt)
            continue

        # Tri-Color
        m = re.match(r'(Tri-Color)\s+(.+)', val, re.IGNORECASE)
        if m:
            parts = re.split(r'\s*&\s*', m.group(2))
            fmts = [wb.add_format({'font_color':contrast_color(fill,get_css_color(p)),'bg_color':fill})
                    for p in parts]
            segments = [fmts[0], f"{m.group(1)} {parts[0]} & ",
                        fmts[1], f"{parts[1]} & ",
                        fmts[2], parts[2]]
            ws.write_rich_string(r, cidx, *segments, base_fmt)
            continue

        # Rainbow
        if 'rainbow' in nl:
            m = re.match(r'(.*?)(Rainbow\d*|Rainbow)(.*)', val, re.IGNORECASE)
            if m:
                prefix, core, suffix = m.groups()
                parts = []
                if prefix: parts.append(prefix)
                rainbow_cols = ['#FF0000','#FF7F00','#FFFF00','#00FF00','#0000FF','#4B0082','#8F00FF']
                for i, ch in enumerate(core):
                    fmt = wb.add_format({'font_color': contrast_color(fill,rainbow_cols[i % len(rainbow_cols)]),
                                         'bg_color': fill})
                    parts.extend([fmt, ch])
                if suffix: parts.append(suffix)
                ws.write_rich_string(r, cidx, *parts, base_fmt)
                continue

        # Default
        ws.write(r, cidx, val, base_fmt)

    # Out of Stock highlighting
    oidx = df.columns.get_loc('Out of Stock')
    red_fmt = writer.book.add_format({'bg_color':'#ffcccc','font_color':'#000000'})
    for r, v in enumerate(df['Out of Stock'], start=2):
        if str(v).strip().lower() == 'yes':
            ws.write(r, oidx, v, red_fmt)


def process_sheet_combined(writer, df: pd.DataFrame, sheet_name: str):
    import xlsxwriter
    df.to_excel(writer, sheet_name=sheet_name, startrow=1, index=False)
    wb, ws = writer.book, writer.sheets[sheet_name]
    nr, nc = df.shape

    # hyperlink for Store Link column
    link_fmt = wb.add_format({'font_color':'blue','underline':True})
    sidx = df.columns.get_loc('Store Link')
    sname_idx = df.columns.get_loc('Store Name') # Get index for the new 'Store Name' column
    for r, url_val in enumerate(df['Store Link'], start=2):
        store_name_display = df.iloc[r-2, sname_idx] # Get the store name from the DataFrame using iloc
        ws.write_url(r, sidx, url_val, link_fmt, string=store_name_display)

    # autofilter
    ws.autofilter(1, 0, nr+1, nc-1)

    # currency formatting
    money_fmt = wb.add_format({'num_format':'$0.00'})
    for col in ['Price (AUD)', 'pricing per spool if you buy 10', 'with CBTJ20 20% discount']:
        idx = df.columns.get_loc(col)
        letter = xlsxwriter.utility.xl_col_to_name(idx)
        ws.set_column(f'{letter}:{letter}', None, money_fmt)

    # Colour column formatting
    if 'Colour' in df.columns:
        cidx = df.columns.get_loc('Colour')
        for r, val in enumerate(df['Colour'], start=2):
            fill = get_fill_color(val)
            base_fmt = wb.add_format({'bg_color': fill})
            nl = val.lower()

            # Dual-Color
            m = re.match(r'(Dual-Color)\s+(.+)', val, re.IGNORECASE)
            if m:
                parts = re.split(r'\s*&\s*', m.group(2))
                fmt1 = wb.add_format({'font_color':contrast_color(fill,get_css_color(parts[0])),'bg_color':fill})
                fmt2 = wb.add_format({'font_color':contrast_color(fill,get_css_color(parts[1])),'bg_color':fill})
                ws.write_rich_string(r, cidx,
                                     fmt1, f"{m.group(1)} {parts[0]} & ",
                                     fmt2, parts[1],
                                     base_fmt)
                continue

            # Tri-Color
            m = re.match(r'(Tri-Color)\s+(.+)', val, re.IGNORECASE)
            if m:
                parts = re.split(r'\s*&\s*', m.group(2))
                fmts = [wb.add_format({'font_color':contrast_color(fill,get_css_color(p)),'bg_color':fill})
                        for p in parts]
                segments = [fmts[0], f"{m.group(1)} {parts[0]} & ",
                            fmts[1], f"{parts[1]} & ",
                            fmts[2], parts[2]]
                ws.write_rich_string(r, cidx, *segments, base_fmt)
                continue

            # Rainbow
            if 'rainbow' in nl:
                m = re.match(r'(.*?)(Rainbow\d*|Rainbow)(.*)', val, re.IGNORECASE)
                if m:
                    prefix, core, suffix = m.groups()
                    parts = []
                    if prefix: parts.append(prefix)
                    rainbow_cols = ['#FF0000','#FF7F00','#FFFF00','#00FF00','#0000FF','#4B0082','#8F00FF']
                    for i, ch in enumerate(core):
                        fmt = wb.add_format({'font_color': contrast_color(fill,rainbow_cols[i % len(rainbow_cols)]),
                                             'bg_color': fill})
                        parts.extend([fmt, ch])
                    if suffix: parts.append(suffix)
                    ws.write_rich_string(r, cidx, *parts, base_fmt)
                    continue

            # Default
            ws.write(r, cidx, val, base_fmt)

    # Out of Stock highlighting
    oidx = df.columns.get_loc('Out of Stock')
    red_fmt = writer.book.add_format({'bg_color':'#ffcccc','font_color':'#000000'})
    for r, v in enumerate(df['Out of Stock'], start=2):
        if str(v).strip().lower() == 'yes':
            ws.write(r, oidx, v, red_fmt)


def main():
    p = argparse.ArgumentParser(description="Extract eBay variation data → pastel-coloured Excel")
    p.add_argument('sources', nargs='*', help="HTML file(s) to process")
    p.add_argument('-all', action='store_true', help="Process all .html files in folder")
    p.add_argument('-o','--output', help="Output .xlsx filename (single-file mode)")
    args = p.parse_args()

    if args.all:
        html_files = glob.glob("*.html")
        if not html_files:
            print("No .html files found.", file=sys.stderr)
            sys.exit(1)
        combined_writer = pd.ExcelWriter("combined.xlsx", engine='xlsxwriter')
        all_dataframes = []
        individual_sheet_details = [] # To store (df, html_file, sheet_name) for later writing
    else:
        html_files = args.sources
        if not html_files:
            p.error("Specify HTML file(s) or use -all")

    for html in html_files:
        if not os.path.isfile(html):
            print(f"Skipping missing file: {html}", file=sys.stderr)
            continue

        base = os.path.splitext(os.path.basename(html))[0]
        out_file = f"{base}.xlsx" if args.all else (args.output or f"{base}.xlsx")

        try:
            vm = fetch_variation_model(html)
            df = build_dataframe(vm)
            page_url = extract_page_url(html) # Extract URL here

            # write individual workbook if not in -all mode
            if not args.all:
                writer = pd.ExcelWriter(out_file, engine='xlsxwriter')
                process_sheet(writer, df, html, sheet_name="Variations")
                writer.close()
                print(f"Wrote {len(df)} rows to {out_file}")

            # Collect data for combined workbook
            if args.all:
                df_with_link = df.copy()
                df_with_link.insert(0, 'Store Link', page_url)
                df_with_link.insert(1, 'Store Name', base.replace('_', ' ').title()) # Use base filename as store name
                all_dataframes.append(df_with_link)
                individual_sheet_details.append((df, html, base[:31])) # Store for later

        except Exception as e:
            print(f"Error processing {html}: {e}", file=sys.stderr)

    if args.all:
        if all_dataframes:
            combined_df = pd.concat(all_dataframes, ignore_index=True)
            # Write the combined dataframe to the first sheet
            process_sheet_combined(combined_writer, combined_df, sheet_name="All Filaments")
            print(f"Wrote {len(combined_df)} rows to 'All Filaments' sheet in combined.xlsx")

            # Now write individual sheets after the combined sheet
            for df_ind, html_ind, sheet_name_ind in individual_sheet_details:
                try:
                    process_sheet(combined_writer, df_ind, html_ind, sheet_name=sheet_name_ind)
                except Exception as e:
                    print(f"Error processing {html_ind} for combined workbook (individual sheet): {e}", file=sys.stderr)

        combined_writer.close()
        print("Wrote combined workbook to combined.xlsx")


if __name__ == '__main__':
    main()