Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python3
- # -*- coding: utf-8 -*-
- import random
- import re
- import sys
- import time
- import bs4
- #import numpy as np
- #import pandas as pd
- import requests
- from bs4 import BeautifulSoup
- import csv
- def delay() -> None:
- time.sleep(random.uniform(15, 30))
- return None
- def main() -> int:
- base: str = "https://www.bryant.com/bryant/en/us/products/air-conditioners/"
- content: dict = {
- "prodname": [],
- "model": [],
- "seer": [],
- "sound": [],
- "compressor": []
- }
- #d_list=[]
- delay()
- r: requests.Response = requests.get(base)
- if r.status_code == 200:
- soup: bs4.BeautifulSoup = BeautifulSoup(r.content, "html.parser")
- else:
- raise RuntimeError("Request to main page returned non-200 HTTP code.")
- #Parse product url
- for product in soup.find_all(
- #"span", {"class": "product-name"} comes up none
- #"p", {"class": "product-name"}
- #"div", {"class": "list-view-content col-xs-12 col-sm-9 col-lg-9 padleft0"}
- "div", {"class": "card-title"}
- ):
- # Parse product name.
- prodname = product.text
- #print('prodname:', prodname)
- #parse model
- for model in soup.find_all(
- "div", {"class": "card-subtitle"}
- ):
- model = model.text
- # Parse seer.
- for seer in soup.find_all(
- "span", {"class": "SEER Rating"}
- ):
- seer=seer.text
- # Parse sound.
- for sound in soup.find_all(
- "span", {"class": "Sound Rating (Decibels)"}
- ):
- sound=sound.text
- # Parse compressor
- for compressor in soup.find_all(
- "span", {"class": "Compressor Type"}
- ):
- compressor=compressor.text
- # Append all data belonging to this company
- # to the content dictionary.
- content["prodname"].append(prodname.strip())
- content["model"].append(model.strip())
- content["seer"].append(seer)
- content["sound"].append(sound)
- content["compressor"].append(compressor)
- delay()
- #print('Product', product,'Name',prodname)
- # Write scraped data to disk.
- with open('scraped_bryantProducts.csv', 'w') as fout:
- print(int, 'writing to', fout.name)
- #print 'writing to', fout.name
- writer = csv.writer(fout)
- row = (
- 'Product',
- 'Model',
- 'Seer',
- 'Sound',
- 'Compressor',
- )
- writer.writerow(row)
- #for key, values in content.items():
- # for value in values:
- # temp_list=[key, value]
- # d_list.append(temp_list)
- # writer.writerow(d_list)
- for x in prodname:
- #for key, val in content.items(): # with row () and for key, val only 5 lines of all last product;
- row=(prodname.strip(), model.strip(), seer, sound, compressor) #46 lines correct row format only last product 46 times
- #row=(content) # 37 lines of row names
- #row = ( #46 lines all last product info with for x; 5 lines of same last product info with key
- # prodname.strip(),
- # model.strip(),
- # seer,
- # sound,
- # compressor,
- #)
- writer.writerow(row)
- #for key, val in content.items():
- #writer.writerow([content.items()]) #5 lines with each product name but same else with key; 46 lines same product and everything with x
- #pass
- #pass
- #return
- #return 0
- if __name__ == "__main__":
- sys.exit(main())
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement