Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- >>> from bs4 import BeautifulSoup
- >>> def create_soup(path):
- ... with open(path, 'rb') as s:
- ... return BeautifulSoup(s.data(), 'lxml')
- ...
- >>> def format_eamusement(item):
- ... return '@'.join((item['data-name'], item['data-address']))
- ...
- >>> def scrape_eamusment(pattern):
- ... for infile in iglob(pattern):
- ... soup = create_soup(infile)
- ... items = soup.select('div[class="cl_shop_bloc"]')
- ... for shop in items:
- ... print(format_eamusement(shop))
- ...
- >>> scrape_eamusment('bm*.html') # beatmania
- >>> scrape_eamusment('n*.html') # NOSTALGIA
- >>> scrape_eamusment('bg*.html') # Bomber Girl
- >>> soup = create_soup('mj.html') # MJAC
- >>> def format_allnet(shop):
- ... name = shop.find('span', class_='store_name').text
- ... address = shop.find('span', class_='store_address').text
- ... return '@'.join((name, address))
- ...
- >>> items = soup.select('ul>li')
- >>> print('\n'.join(format_allnet(shop) for shop in items))
- >>> soup = create_soup('chrono.html')
- >>> items = soup.select('ul>li')
- >>> print('\n'.join(format_sega(shop) for shop in items))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement