View difference between Paste ID: eK5FytEt and GSBhRsvM
SHOW: | | - or go back to the newest paste.
1
from scrapy.spider import Spider
2
from scrapy.shell import inspect_response
3
from scrapy.http import Request,FormRequest
4
from scrapy.exceptions import CloseSpider
5
from boroughScrper.items import idoxpaSpiderItem
6
from scrapy import log
7
import urllib, time, MySQLdb, sys
8
9
today = time.strftime("%x %X")
10
11
class idoxpaSpider(Spider):
12-
  pipeline = set([pipeline.Insert])
12+
  pipeline = set([pipeline.Insert,])
13
14
  name = 'idoxpaSpider'
15-
  domain = 'https://www.westminster.gov.uk'
15+
  domain = 'https://www.example.com'
16
17-
  base_url = ["http://idoxpa.westminster.gov.uk/online-applications/pagedSearchResults.do?	action=page&searchCriteria.page"]
17+
  start_urls = ["http://www.example.com/online-applications/search.do?action=monthlyList"]
18
  ###
19-
  start_urls = ["http://idoxpa.westminster.gov.uk/online-applications/search.do?action=monthlyList"]
19+
20
    for parish in response.xpath("//*[@id='parish']/option/@value").extract():
21
      for month in response.xpath("//*[@id='month']/option/text()").extract():
22
        yield FormRequest.from_response(response,
23
                          formname = 'searchCriteriaForm',
24
                          formdata = { 'searchCriteria.parish':parish, 'month':month, 'dateType':'DC_Validated', 'searchType':'Application' },
25
                          callback = self.parse_results)
26
27
  def parse_results(self, response):
28
    inspect_response(response)
29
30
31
class Insert(Pipeline):
32
  def __init__(self):
33
    self.conn = MySQLdb.connect(user=<>, passwd=<>, db=<>, host=<>, charset="utf8", use_unicode=True)
34
    self.cursor = self.conn.cursor()
35
36
  @check_spider_pipeline
37
  def process_item(self, item, spider):
38
    return item