Advertisement
Guest User

Untitled

a guest
Oct 21st, 2019
94
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.08 KB | None | 0 0
  1. >>> from bs4 import BeautifulSoup
  2. >>> def create_soup(path):
  3. ... with open(path, 'rb') as s:
  4. ... return BeautifulSoup(s.data(), 'lxml')
  5. ...
  6. >>> def format_eamusement(item):
  7. ... return '@'.join((item['data-name'], item['data-address']))
  8. ...
  9. >>> def scrape_eamusment(pattern):
  10. ... for infile in iglob(pattern):
  11. ... soup = create_soup(infile)
  12. ... items = soup.select('div[class="cl_shop_bloc"]')
  13. ... for shop in items:
  14. ... print(format_eamusement(shop))
  15. ...
  16. >>> scrape_eamusment('bm*.html') # beatmania
  17. >>> scrape_eamusment('n*.html') # NOSTALGIA
  18. >>> scrape_eamusment('bg*.html') # Bomber Girl
  19. >>> soup = create_soup('mj.html') # MJAC
  20. >>> def format_allnet(shop):
  21. ... name = shop.find('span', class_='store_name').text
  22. ... address = shop.find('span', class_='store_address').text
  23. ... return '@'.join((name, address))
  24. ...
  25. >>> items = soup.select('ul>li')
  26. >>> print('\n'.join(format_allnet(shop) for shop in items))
  27. >>> soup = create_soup('chrono.html')
  28. >>> items = soup.select('ul>li')
  29. >>> print('\n'.join(format_sega(shop) for shop in items))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement