Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from bs4 import BeautifulSoup
- import unittest
- def parse(path_to_file):
- with open(r'D:\jetbrains\Coursera\3_part\week_2\wiki\Stone_Age', 'r', encoding='utf-8') as fp:
- html_doc = fp.read()
- # Поместите ваш код здесь.
- # ВАЖНО!!!
- # При открытии файла, добавьте в функцию open необязательный параметр
- # encoding='utf-8', его отсутствие в коде будет вызвать падение вашего
- # решения на грейдере с ошибкой UnicodeDecodeError
- body = BeautifulSoup(html_doc, 'html.parser')
- soup = body.find(id="bodyContent")
- imgs = len([int(width['width']) for width in soup.find_all("img") if int(width['width']) >= 200])
- headers = len([heading.text for heading in soup.find_all(["h1", "h2", "h3", "h4", "h5", "h6"]) if
- heading.text.startswith(('E', 'C', 'T'))])
- linkslen = max([len(links.find_next_siblings()) for links in soup.select('a')])
- lists = len([tag for tag in soup.find_all(['ul', 'ol']) if not tag.find_parents(['ul', 'ol'])])
- return [imgs, headers, linkslen, lists]
- class TestParse(unittest.TestCase):
- def test_parse(self):
- test_cases = (
- ('wiki/Stone_Age', [13, 10, 12, 40]),
- ('wiki/Brain', [19, 5, 25, 11]),
- ('wiki/Artificial_intelligence', [8, 19, 13, 198]),
- ('wiki/Python_(programming_language)', [2, 5, 17, 41]),
- ('wiki/Spectrogram', [1, 2, 4, 7]),)
- for path, expected in test_cases:
- with self.subTest(path=path, expected=expected):
- self.assertEqual(parse(path), expected)
- if __name__ == '__main__':
- unittest.main()
Advertisement
Add Comment
Please, Sign In to add comment