Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from bs4 import BeautifulSoup
- import requests
- import re
- import copy
- def validator(string, lesson):
- phone_valid = re.compile(r'(?:(?:\+?([1-9]|[0-9][0-9]|[0-9][0-9][0-9])\s*(?:[.-]\s*)?)?(?:\(\s*([2-9]1[02-9]|[2-9][02-8]1|[2-9][02-8][02-9])\s*\)|([0-9][1-9]|[0-9]1[02-9]|[2-9][02-8]1|[2-9][02-8][02-9]))\s*(?:[.-]\s*)?)?([2-9]1[02-9]|[2-9][02-9]1|[2-9][02-9]{2})\s*(?:[.-]\s*)?([0-9]{4})(?:\s*(?:#|x\.?|ext\.?|extension)\s*(\d+))?')
- res = phone_valid.findall(str(string))
- if res:
- lesson['Number'] = ''.join(res[0])
- return lesson
- return lesson # if all false
- link = 'http://localhost:63342/help_to_anybody/test.html?_ijt=sjk5nts2u87dpa6l6h5urrsh57'
- page = requests.get(link)
- soup = BeautifulSoup(page.content, "html.parser")
- lessons = []
- lesson = {
- 'Number': None,
- 'Name': None,
- 'Teachers': [],
- 'Type': None,
- 'Time': None,
- 'Room': None,
- }
- current_lesson = copy.deepcopy(lesson)
- for i in soup.find_all('p'):
- if all(current_lesson.values()):
- lessons.append(current_lesson)
- current_lesson = copy.deepcopy(lesson)
- current_lesson = validator(i.get_text, current_lesson)
- print(current_lesson)
- print(lessons)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement