Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests
- from bs4 import BeautifulSoup as bs # pip install beautifulsoup4
- import pandas as pd
- import base64
- r_page_exapmle=requests.get('https://bolshoj-kamen.jsprav.ru/magazinyi-avtozapchastej-i-avtotovarov/')
- soup_page=bs(r_page_exapmle.content)
- #len(soup_page.body.find_all('div', class_='org'))
- list_of_names=[]
- list_of_address=[]
- list_of_phone=[]
- list_of_value_time=[]
- list_of_web =[]
- list_of_emails=[]
- phone_number=''
- web_name=''
- for i in range (len(soup_page.body.find_all('div', class_='org'))):
- list_of_names.append(soup_page.body.find_all('div', class_='org')[i].find('h3').get_text())
- list_of_address.append(soup_page.body.find_all('div', class_='org')[i].find_all('ul', class_="address")[0].find('span', class_='value address').get_text())
- try:
- list_of_phone.append(base64.b64decode(soup_page.body.find_all('div', class_='org')[i].find_all('ul', class_="address")[0].find('span', class_='phone-show')['data-phone'][2:-1]).decode("UTF-8"))
- except TypeError:
- list_of_phone.append('blank')
- try:
- list_of_value_time.append(soup_page.body.find_all('div', class_='org')[i].find_all('ul', class_="address")[0].find('span', class_='value time').get_text())
- list_of_emails.append(soup_page.body.find_all('div', class_='org')[i].find_all('ul', class_="address")[0].find('span', class_='value email').get_text())
- except AttributeError:
- list_of_value_time.append('blank')
- list_of_emails.append('blank')
- # вот здесь обращаюсь ко всем p тегам:
- soup_page.body.find_all('div', class_='org')[0].find_all('ul', class_="address")[0].find_all('p')
- [<p><span class="nm"><i class="glyphicon glyphicon-home"></i> адрес:</span> <span class="value address">Приморский край, Большой Камень г., ул. Гагарина, 16</span></p>,
- <p>
- <span class="nm"><i class="glyphicon glyphicon-phone-alt"></i> телефон:</span> <span class="value phone"><span class="small-phone">8 (800) 555-23-…</span> <span class="phone-show" data-phone="b'OCAoODAwKSA1NTUtMjMtODgsJm5ic3A7KzcgKDQyMzM1KSA1LTQ2LTU0'">- показать</span></span>
- <span class="clearfix"></span>
- </p>,
- <p><span class="nm"><i class="glyphicon glyphicon-time"></i> график (часы) работы:</span> <span class="value time">ежедневно, 9:00–19:00</span></p>,
- <p data-lnk="b'aHR0cDovL3d3dy5oeXBlcmF1dG8ucnU='"><span class="nm"><i class="glyphicon glyphicon-globe"></i> официальный сайт:</span> <span class="value url"></span></p>,
- <p><span class="nm"><i class="glyphicon glyphicon-envelope"></i> электронная почта:</span> <span class="value email">administrator.bk@hyperauto.ru</span></p>]
- # Как обратиться только к <p data-lnk... чтобы получить b'aHR0cDovL3d3dy5oeXBlcmF1dG8ucnU= название сайта в кодировке
Add Comment
Please, Sign In to add comment