Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import xml.etree.ElementTree as ET
- import random
- def extract_sitemap_data(xml_file_path):
- # Parse the XML file
- tree = ET.parse(xml_file_path)
- root = tree.getroot()
- # Namespace handling
- namespaces = {
- 'ns': 'http://www.sitemaps.org/schemas/sitemap/0.9',
- 'image': 'http://www.google.com/schemas/sitemap-image/1.1'
- }
- # Extracting product information
- products = []
- for url in root.findall('ns:url', namespaces):
- loc = url.find('ns:loc', namespaces).text
- image = url.find('image:image', namespaces)
- if image is not None:
- image_loc = image.find('image:loc', namespaces).text
- image_title = image.find('image:title', namespaces).text
- products.append((loc, image_loc, image_title))
- # Select 30 random entries
- selected_products = random.sample(products, min(30, len(products)))
- return selected_products
- def main():
- xml_file_path = 'sitemap_products_1.xml' # Replace with your XML file path
- random_entries = extract_sitemap_data(xml_file_path)
- # Display the selected entries
- for entry in random_entries:
- print(f"URL: {entry[0]}\nImage URL: {entry[1]}\nTitle: {entry[2]}\n")
- if __name__ == "__main__":
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement