Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import scrapy
- import pymysql.cursors
- # Connect to the database
- connection = pymysql.connect(host='localhost',
- user='root',
- password='Kradz579032!!',
- db='db',
- charset='utf8mb4',
- cursorclass=pymysql.cursors.DictCursor)
- class QuotesSpider(scrapy.Spider):
- name = "quotes"
- def start_requests(self):
- urls = [
- 'https://www.banggood.com/Wholesale-RC-Helicopter-c-264.html',
- 'https://www.banggood.com/Wholesale-RC-Quadcopters-c-1848.html',
- ]
- for url in urls:
- yield scrapy.Request(url=url, callback=self.parse)
- def parse(self, response):
- for quote in response.css('.good_box_min'):
- yield {
- 'producttitle': response.css('a.middle_product_text_170717::text').extract(),
- 'productlink': response.css('a.middle_product_text_170717::attr(href)').extract(),
- 'productprice': response.css('span.price.wh_cn::text').extract(),
- #'text': quote.css('span.text::text').extract_first(),
- #'author': quote.css('small.author::text').extract_first(),
- #'tags': quote.css('div.tags a.tag::text').extract(),
- }
- next_page = response.css('a#listNextPage::attr(href)').extract_first()
- if next_page is not None:
- next_page = response.urljoin(next_page)
- yield scrapy.Request(next_page, callback=self.parse)
- try:
- with connection.cursor() as cursor:
- sql = "INSERT INTO `banggood` (`producttitle`, `productlink`, `productprice`) VALUES (%s, %s, %s)"
- cursor.execute(sql, ('title', 'productlink', 'productprice'))
- connection.commit()
- finally:
- connection.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement