from scrapy.spider import Spider from scrapy.shell import inspect_response from scrapy.http import Request,FormRequest from scrapy.exceptions import CloseSpider from boroughScrper.items import idoxpaSpiderItem from scrapy import log import urllib, time, MySQLdb, sys today = time.strftime("%x %X") class idoxpaSpider(Spider): pipeline = set([pipeline.Insert,]) name = 'idoxpaSpider' domain = 'https://www.example.com' start_urls = ["http://www.example.com/online-applications/search.do?action=monthlyList"] ### def parse(self, response): for parish in response.xpath("//*[@id='parish']/option/@value").extract(): for month in response.xpath("//*[@id='month']/option/text()").extract(): yield FormRequest.from_response(response, formname = 'searchCriteriaForm', formdata = { 'searchCriteria.parish':parish, 'month':month, 'dateType':'DC_Validated', 'searchType':'Application' }, callback = self.parse_results) def parse_results(self, response): inspect_response(response) class Insert(Pipeline): def __init__(self): self.conn = MySQLdb.connect(user=<>, passwd=<>, db=<>, host=<>, charset="utf8", use_unicode=True) self.cursor = self.conn.cursor() @check_spider_pipeline def process_item(self, item, spider): return item