Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from bs4 import BeautifulSoup
- import requests
- import time
- import argparse
- import sys
- import time
- class ComicScraper(): # Class ComicScraper for scraping comic books
- def __init__(self, comic_titles, comic_prices, all_comics):
- self.comic_titles = comic_titles
- self.comic_prices = comic_prices
- self.all_comics = all_comics
- # url of comicbook site
- self.url = 'https://leagueofcomicgeeks.com/comics/new-comics/2020/'
- self.webpage = requests.get(self.url) # HTTP request for url
- # BeautifulSoup object of webpage
- self.soup = BeautifulSoup(self.webpage.content, 'html.parser')
- self.titles = list(
- map(BeautifulSoup.get_text, self.soup.find_all('div', class_='comic-title')))
- self.comicinfo = [x.replace(u'\xa0', u'').strip()
- for x in list(map(BeautifulSoup.get_text, self.soup.find_all('div', class_='comic-details comic-release')))
- ]
- self.prices = [
- prices[-5:] if prices[-5:].startswith('$') else 'No price' for prices in self.comicinfo]
- def main(self):
- if len(sys.argv) == 1:
- print("###### New Comics ######")
- for title, info in zip(self.titles, self.comicinfo):
- print(title, '--->', info)
- if self.all_comics:
- print("###### New Comics ######")
- for titles, info in zip(self, titles, self.comicinfo):
- print(title, '--->', info)
- if self.comic_titles and self.comic_prices:
- print("###### New Comics ######")
- for title, prices in zip(self.titles, self.prices):
- print(title, '--->', info)
- if self.comic_titles:
- for comic_title in self.comic_titles:
- print(comic_title)
- if self.comic_prices:
- for dol_amount in comic_prices:
- print(dol_amount)
- if __name__ == '__main__':
- parser = argparse.ArgumentParser()
- # Titles of comicbooks i.e "Detective Comics #1"
- parser.add_argument('-t', '--titles', help='Print comic titles ONLY', dest='titles')
- # Scrape prices of comic books in order
- parser.add_argument('-m', '--prices', help='Get comic prices ONLY', dest='prices')
- parser.add_argument('-a', '--all', help='Get titles, prices, publisher, and descriptions',
- dest='all_comics', action='store_true')
- args = parser.parse_args()
- scraper = ComicScraper(args.titles, args.prices, args.all_comics)
- scraper.main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement