Advertisement
Guest User

Untitled

a guest
Apr 6th, 2015
476
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. from scrapy.spider import Spider
  2. from scrapy.shell import inspect_response
  3. from scrapy.http import Request,FormRequest
  4. from scrapy.exceptions import CloseSpider
  5. from boroughScrper.items import idoxpaSpiderItem
  6. from scrapy import log
  7. import urllib, time, MySQLdb, sys
  8.  
  9. today = time.strftime("%x %X")
  10.  
  11. class idoxpaSpider(Spider):
  12.   pipeline = set([pipeline.Insert,])
  13.  
  14.   name = 'idoxpaSpider'
  15.   domain = 'https://www.example.com'
  16.  
  17.   start_urls = ["http://www.example.com/online-applications/search.do?action=monthlyList"]
  18.   ###
  19.   def parse(self, response):
  20.     for parish in response.xpath("//*[@id='parish']/option/@value").extract():
  21.       for month in response.xpath("//*[@id='month']/option/text()").extract():
  22.         yield FormRequest.from_response(response,
  23.                           formname = 'searchCriteriaForm',
  24.                           formdata = { 'searchCriteria.parish':parish, 'month':month, 'dateType':'DC_Validated', 'searchType':'Application' },
  25.                           callback = self.parse_results)
  26.  
  27.   def parse_results(self, response):
  28.     inspect_response(response)
  29.  
  30.  
  31. class Insert(Pipeline):
  32.   def __init__(self):
  33.     self.conn = MySQLdb.connect(user=<>, passwd=<>, db=<>, host=<>, charset="utf8", use_unicode=True)
  34.     self.cursor = self.conn.cursor()
  35.  
  36.   @check_spider_pipeline
  37.   def process_item(self, item, spider):
  38.     return item
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement