SHARE
TWEET

Untitled

a guest Oct 21st, 2019 67 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. >>> from bs4 import BeautifulSoup
  2. >>> def create_soup(path):
  3. ...     with open(path, 'rb') as s:
  4. ...         return BeautifulSoup(s.data(), 'lxml')
  5. ...
  6. >>> def format_eamusement(item):
  7. ...     return '@'.join((item['data-name'], item['data-address']))
  8. ...
  9. >>> def scrape_eamusment(pattern):
  10. ...     for infile in iglob(pattern):
  11. ...         soup = create_soup(infile)
  12. ...         items = soup.select('div[class="cl_shop_bloc"]')
  13. ...         for shop in items:
  14. ...             print(format_eamusement(shop))
  15. ...
  16. >>> scrape_eamusment('bm*.html')  # beatmania
  17. >>> scrape_eamusment('n*.html') # NOSTALGIA
  18. >>> scrape_eamusment('bg*.html') # Bomber Girl
  19. >>> soup = create_soup('mj.html') # MJAC
  20. >>> def format_allnet(shop):
  21. ...     name = shop.find('span', class_='store_name').text
  22. ...     address = shop.find('span', class_='store_address').text
  23. ...     return '@'.join((name, address))
  24. ...
  25. >>> items = soup.select('ul>li')
  26. >>> print('\n'.join(format_allnet(shop) for shop in items))
  27. >>> soup = create_soup('chrono.html')
  28. >>> items = soup.select('ul>li')
  29. >>> print('\n'.join(format_sega(shop) for shop in items))
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top