sitemap product image scraper

import xml.etree.ElementTree as ET
import random

def extract_sitemap_data(xml_file_path):
    # Parse the XML file
    tree = ET.parse(xml_file_path)
    root = tree.getroot()

    # Namespace handling
    namespaces = {
        'ns': 'http://www.sitemaps.org/schemas/sitemap/0.9',
        'image': 'http://www.google.com/schemas/sitemap-image/1.1'
    }

    # Extracting product information
    products = []
    for url in root.findall('ns:url', namespaces):
        loc = url.find('ns:loc', namespaces).text
        image = url.find('image:image', namespaces)
        if image is not None:
            image_loc = image.find('image:loc', namespaces).text
            image_title = image.find('image:title', namespaces).text
            products.append((loc, image_loc, image_title))

    # Select 30 random entries
    selected_products = random.sample(products, min(30, len(products)))
    return selected_products

def main():
    xml_file_path = 'sitemap_products_1.xml' # Replace with your XML file path
    random_entries = extract_sitemap_data(xml_file_path)

    # Display the selected entries
    for entry in random_entries:
        print(f"URL: {entry[0]}\nImage URL: {entry[1]}\nTitle: {entry[2]}\n")

if __name__ == "__main__":
    main()