Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from bs4 import BeautifulSoup
- import re
- # Deschide și citește conținutul fișierelor HTML
- with open('c:/Folder7/new-file.html', 'r', encoding='utf-8') as file:
- new_file_content = file.read()
- with open('c:/Folder7/old-file.html', 'r', encoding='utf-8') as file:
- old_file_content = file.read()
- # Creează obiecte BeautifulSoup
- old_soup = BeautifulSoup(old_file_content, 'html.parser')
- new_soup = BeautifulSoup(new_file_content, 'html.parser')
- # Extrage datele necesare din new_file.html
- item_id_match = re.search('<!-- \$item_id = (\d+) ;', new_file_content)
- item_id = item_id_match.group(1) if item_id_match else None
- title = new_soup.title.string if new_soup.title else None
- canonical_link_element = new_soup.find("link", {"rel": "canonical"})
- canonical_link = canonical_link_element['href'] if canonical_link_element else None
- meta_description_element = new_soup.find("meta", {"name": "description"})
- meta_description = meta_description_element['content'] if meta_description_element else None
- article_match = re.search('<!-- ARTICOL START -->(.*?)<!-- ARTICOL FINAL -->', new_file_content, re.DOTALL)
- article = article_match.group(1) if article_match else None
- # Înlocuiește datele în old_file.html
- if item_id:
- old_file_content = re.sub('<!-- \$item_id = \d+ ;', f'<!-- $item_id = {item_id} ;', old_file_content)
- if title and old_soup.title:
- old_soup.title.string.replace_with(title)
- if canonical_link:
- canonical_link_element_old = old_soup.find("link", {"rel": "canonical"})
- if canonical_link_element_old:
- canonical_link_element_old['href'] = canonical_link
- if meta_description:
- meta_description_element_old = old_soup.find("meta", {"name": "description"})
- if meta_description_element_old:
- meta_description_element_old['content'] = meta_description
- if article:
- old_file_content = re.sub('<!-- ARTICOL START -->(.*?)<!-- ARTICOL FINAL -->', f'<!-- ARTICOL START -->{article}<!-- ARTICOL FINAL -->', old_file_content, flags=re.DOTALL)
- # Salvează modificările în old_file.html
- with open('c:/Folder7/old-file.html', 'w', encoding='utf-8') as
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement