Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from scrapy.spider import Spider
- from scrapy.shell import inspect_response
- from scrapy.http import Request,FormRequest
- from scrapy.exceptions import CloseSpider
- from boroughScrper.items import idoxpaSpiderItem
- from scrapy import log
- import urllib, time, MySQLdb, sys
- today = time.strftime("%x %X")
- class idoxpaSpider(Spider):
- pipeline = set([pipeline.Insert,])
- name = 'idoxpaSpider'
- domain = 'https://www.example.com'
- start_urls = ["http://www.example.com/online-applications/search.do?action=monthlyList"]
- ###
- def parse(self, response):
- for parish in response.xpath("//*[@id='parish']/option/@value").extract():
- for month in response.xpath("//*[@id='month']/option/text()").extract():
- yield FormRequest.from_response(response,
- formname = 'searchCriteriaForm',
- formdata = { 'searchCriteria.parish':parish, 'month':month, 'dateType':'DC_Validated', 'searchType':'Application' },
- callback = self.parse_results)
- def parse_results(self, response):
- inspect_response(response)
- class Insert(Pipeline):
- def __init__(self):
- self.conn = MySQLdb.connect(user=<>, passwd=<>, db=<>, host=<>, charset="utf8", use_unicode=True)
- self.cursor = self.conn.cursor()
- @check_spider_pipeline
- def process_item(self, item, spider):
- return item
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement