Advertisement
Victorman5

ParsingTutorial

Mar 20th, 2021
864
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.21 KB | None | 0 0
  1. import requests
  2. from bs4 import BeautifulSoup
  3.  
  4.  
  5. def request_html(url: str) -> str:
  6.     return requests.get(url).text
  7.  
  8.  
  9. def build_search_url(song_autor: str, song_name: str) -> str:
  10.     LYRICS_STORAGE_URL = 'https://search.azlyrics.com/'
  11.     return f'{LYRICS_STORAGE_URL}?q=' \
  12.            f'{song_autor} ' \
  13.            f'{song_name}'
  14.  
  15.  
  16. def parse_best_match_url(html: str) -> str:
  17.     parser = BeautifulSoup(html, 'lxml')
  18.     table = parser.find_all('table', class_='table table-condensed')[0]
  19.     url = table.find_all('a', href=True)[0]['href']
  20.     return url
  21.  
  22.  
  23. def parse_lyrics(html: str) -> str:
  24.     parser = BeautifulSoup(html, 'lxml')
  25.     main_div = parser.find_all('div', class_='col-xs-12 col-lg-8 text-center')[0]
  26.     lyrics_div = main_div.find_all('div')[5]
  27.     return lyrics_div.text
  28.  
  29.  
  30. if __name__ == '__main__':
  31.     songs = [
  32.         ('Linkin park', 'Invisible'),
  33.         ('Bring me the horizon', 'Avalanche'),
  34.         ('MO', 'Nights with you')
  35.     ]
  36.     for autor, name in songs:
  37.         search_url = build_search_url(autor, name)
  38.         lyrics_url = parse_best_match_url(request_html(search_url))
  39.         lyrics = parse_lyrics(request_html(lyrics_url))
  40.         print(lyrics)
  41.        
  42.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement