Guest User

Untitled

a guest
Apr 6th, 2015
250
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.50 KB | None | 0 0
  1. from scrapy.spider import Spider
  2. from scrapy.shell import inspect_response
  3. from scrapy.http import Request,FormRequest
  4. from scrapy.exceptions import CloseSpider
  5. from boroughScrper.items import idoxpaSpiderItem
  6. from scrapy import log
  7. import urllib, time, MySQLdb, sys
  8.  
  9. today = time.strftime("%x %X")
  10.  
  11. class idoxpaSpider(Spider):
  12.   pipeline = set([pipeline.Insert])
  13.  
  14.   name = 'idoxpaSpider'
  15.   domain = 'https://www.westminster.gov.uk'
  16.  
  17.   base_url = ["http://idoxpa.westminster.gov.uk/online-applications/pagedSearchResults.do?  action=page&searchCriteria.page"]
  18.  
  19.   start_urls = ["http://idoxpa.westminster.gov.uk/online-applications/search.do?action=monthlyList"]
  20.   ###
  21.   def parse(self, response):
  22.     for parish in response.xpath("//*[@id='parish']/option/@value").extract():
  23.       for month in response.xpath("//*[@id='month']/option/text()").extract():
  24.         yield FormRequest.from_response(response,
  25.                           formname = 'searchCriteriaForm',
  26.                           formdata = { 'searchCriteria.parish':parish, 'month':month, 'dateType':'DC_Validated', 'searchType':'Application' },
  27.                           callback = self.parse_results)
  28.  
  29.   def parse_results(self, response):
  30.     inspect_response(response)
  31.  
  32.  
  33. class Insert(Pipeline):
  34.   def __init__(self):
  35.     self.conn = MySQLdb.connect(user=<>, passwd=<>, db=<>, host=<>, charset="utf8", use_unicode=True)
  36.     self.cursor = self.conn.cursor()
  37.  
  38.   @check_spider_pipeline
  39.   def process_item(self, item, spider):
  40.     return item
Advertisement
Add Comment
Please, Sign In to add comment