Guest User

Untitled

a guest
Jun 27th, 2020
44
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.76 KB | None | 0 0
  1. from bs4 import BeautifulSoup
  2. import unittest
  3.  
  4.  
  5. def parse(path_to_file):
  6. with open(r'D:\jetbrains\Coursera\3_part\week_2\wiki\Stone_Age', 'r', encoding='utf-8') as fp:
  7. html_doc = fp.read()
  8.  
  9.  
  10. # Поместите ваш код здесь.
  11. # ВАЖНО!!!
  12. # При открытии файла, добавьте в функцию open необязательный параметр
  13. # encoding='utf-8', его отсутствие в коде будет вызвать падение вашего
  14. # решения на грейдере с ошибкой UnicodeDecodeError
  15.  
  16. body = BeautifulSoup(html_doc, 'html.parser')
  17. soup = body.find(id="bodyContent")
  18.  
  19. imgs = len([int(width['width']) for width in soup.find_all("img") if int(width['width']) >= 200])
  20. headers = len([heading.text for heading in soup.find_all(["h1", "h2", "h3", "h4", "h5", "h6"]) if
  21. heading.text.startswith(('E', 'C', 'T'))])
  22. linkslen = max([len(links.find_next_siblings()) for links in soup.select('a')])
  23. lists = len([tag for tag in soup.find_all(['ul', 'ol']) if not tag.find_parents(['ul', 'ol'])])
  24.  
  25. return [imgs, headers, linkslen, lists]
  26.  
  27.  
  28. class TestParse(unittest.TestCase):
  29. def test_parse(self):
  30. test_cases = (
  31. ('wiki/Stone_Age', [13, 10, 12, 40]),
  32. ('wiki/Brain', [19, 5, 25, 11]),
  33. ('wiki/Artificial_intelligence', [8, 19, 13, 198]),
  34. ('wiki/Python_(programming_language)', [2, 5, 17, 41]),
  35. ('wiki/Spectrogram', [1, 2, 4, 7]),)
  36.  
  37. for path, expected in test_cases:
  38. with self.subTest(path=path, expected=expected):
  39. self.assertEqual(parse(path), expected)
  40.  
  41.  
  42. if __name__ == '__main__':
  43. unittest.main()
Advertisement
Add Comment
Please, Sign In to add comment