Advertisement
Guest User

Untitled

a guest
Apr 10th, 2020
257
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.54 KB | None | 0 0
  1. from bs4 import BeautifulSoup
  2. import requests
  3. import time
  4. import argparse
  5. import sys
  6. import time
  7.  
  8.  
  9.  
  10. class ComicScraper():  # Class ComicScraper for scraping comic books
  11.     def __init__(self, comic_titles, comic_prices, all_comics):
  12.         self.comic_titles = comic_titles
  13.         self.comic_prices = comic_prices
  14.         self.all_comics = all_comics
  15.         # url of comicbook site
  16.         self.url = 'https://leagueofcomicgeeks.com/comics/new-comics/2020/'
  17.         self.webpage = requests.get(self.url)  # HTTP request for url
  18.         # BeautifulSoup object of webpage
  19.         self.soup = BeautifulSoup(self.webpage.content, 'html.parser')
  20.         self.titles = list(
  21.             map(BeautifulSoup.get_text, self.soup.find_all('div', class_='comic-title')))
  22.         self.comicinfo = [x.replace(u'\xa0', u'').strip()
  23.                           for x in list(map(BeautifulSoup.get_text, self.soup.find_all('div', class_='comic-details comic-release')))
  24.                           ]
  25.         self.prices = [
  26.             prices[-5:] if prices[-5:].startswith('$') else 'No price' for prices in self.comicinfo]
  27.  
  28.     def main(self):
  29.         if len(sys.argv) == 1:
  30.             print("###### New Comics ######")
  31.             for title, info in zip(self.titles, self.comicinfo):
  32.                 print(title, '--->', info)
  33.         if self.all_comics:
  34.             print("###### New Comics ######")
  35.             for titles, info in zip(self, titles, self.comicinfo):
  36.                 print(title, '--->', info)
  37.         if self.comic_titles and self.comic_prices:
  38.             print("###### New Comics ######")
  39.             for title, prices in zip(self.titles, self.prices):
  40.                 print(title, '--->', info)
  41.         if self.comic_titles:
  42.             for comic_title in self.comic_titles:
  43.                 print(comic_title)
  44.         if self.comic_prices:
  45.             for dol_amount in comic_prices:
  46.                 print(dol_amount)
  47.  
  48.  
  49. if __name__ == '__main__':
  50.     parser = argparse.ArgumentParser()
  51.     # Titles of comicbooks i.e "Detective Comics #1"
  52.     parser.add_argument('-t', '--titles', help='Print comic titles ONLY', dest='titles')
  53.     # Scrape prices of comic books in order
  54.     parser.add_argument('-m', '--prices', help='Get comic prices ONLY', dest='prices')
  55.  
  56.     parser.add_argument('-a', '--all', help='Get titles, prices, publisher, and descriptions',
  57.                         dest='all_comics', action='store_true')
  58.     args = parser.parse_args()
  59.     scraper = ComicScraper(args.titles, args.prices, args.all_comics)
  60.     scraper.main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement