Advertisement
Guest User

Untitled

a guest
Oct 18th, 2019
98
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.21 KB | None | 0 0
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3.  
  4.  
  5. import random
  6. import re
  7. import sys
  8. import time
  9.  
  10. import bs4
  11. #import numpy as np
  12. #import pandas as pd
  13. import requests
  14. from bs4 import BeautifulSoup
  15. import csv
  16.  
  17. def delay() -> None:
  18. time.sleep(random.uniform(15, 30))
  19. return None
  20.  
  21. def main() -> int:
  22. base: str = "https://www.bryant.com/bryant/en/us/products/air-conditioners/"
  23. content: dict = {
  24. "prodname": [],
  25. "model": [],
  26. "seer": [],
  27. "sound": [],
  28. "compressor": []
  29. }
  30. #d_list=[]
  31. delay()
  32. r: requests.Response = requests.get(base)
  33. if r.status_code == 200:
  34. soup: bs4.BeautifulSoup = BeautifulSoup(r.content, "html.parser")
  35. else:
  36. raise RuntimeError("Request to main page returned non-200 HTTP code.")
  37.  
  38. #Parse product url
  39. for product in soup.find_all(
  40. #"span", {"class": "product-name"} comes up none
  41. #"p", {"class": "product-name"}
  42. #"div", {"class": "list-view-content col-xs-12 col-sm-9 col-lg-9 padleft0"}
  43. "div", {"class": "card-title"}
  44. ):
  45.  
  46. # Parse product name.
  47. prodname = product.text
  48. #print('prodname:', prodname)
  49.  
  50. #parse model
  51. for model in soup.find_all(
  52. "div", {"class": "card-subtitle"}
  53. ):
  54. model = model.text
  55.  
  56. # Parse seer.
  57. for seer in soup.find_all(
  58. "span", {"class": "SEER Rating"}
  59. ):
  60. seer=seer.text
  61. # Parse sound.
  62. for sound in soup.find_all(
  63. "span", {"class": "Sound Rating (Decibels)"}
  64. ):
  65. sound=sound.text
  66. # Parse compressor
  67. for compressor in soup.find_all(
  68. "span", {"class": "Compressor Type"}
  69. ):
  70. compressor=compressor.text
  71.  
  72.  
  73. # Append all data belonging to this company
  74. # to the content dictionary.
  75. content["prodname"].append(prodname.strip())
  76. content["model"].append(model.strip())
  77. content["seer"].append(seer)
  78. content["sound"].append(sound)
  79. content["compressor"].append(compressor)
  80. delay()
  81.  
  82. #print('Product', product,'Name',prodname)
  83. # Write scraped data to disk.
  84. with open('scraped_bryantProducts.csv', 'w') as fout:
  85. print(int, 'writing to', fout.name)
  86. #print 'writing to', fout.name
  87. writer = csv.writer(fout)
  88.  
  89. row = (
  90. 'Product',
  91. 'Model',
  92. 'Seer',
  93. 'Sound',
  94. 'Compressor',
  95.  
  96. )
  97. writer.writerow(row)
  98. #for key, values in content.items():
  99. # for value in values:
  100. # temp_list=[key, value]
  101. # d_list.append(temp_list)
  102. # writer.writerow(d_list)
  103. for x in prodname:
  104. #for key, val in content.items(): # with row () and for key, val only 5 lines of all last product;
  105. row=(prodname.strip(), model.strip(), seer, sound, compressor) #46 lines correct row format only last product 46 times
  106. #row=(content) # 37 lines of row names
  107. #row = ( #46 lines all last product info with for x; 5 lines of same last product info with key
  108. # prodname.strip(),
  109. # model.strip(),
  110. # seer,
  111. # sound,
  112. # compressor,
  113. #)
  114. writer.writerow(row)
  115. #for key, val in content.items():
  116. #writer.writerow([content.items()]) #5 lines with each product name but same else with key; 46 lines same product and everything with x
  117.  
  118. #pass
  119.  
  120. #pass
  121.  
  122. #return
  123.  
  124. #return 0
  125.  
  126.  
  127. if __name__ == "__main__":
  128. sys.exit(main())
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement