Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from scrapy.item import Item, Field
- class Eb5Item(Item):
- description = Field()
- from scrapy.contrib.spiders import CrawlSpider, Rule
- from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
- from scrapy.selector import HtmlXPathSelector
- from parser_module.items import Eb5Item
- class Eb5Spider(CrawlSpider):
- name = 'eb5'
- allowed_domains = ["eb5info.com"]
- start_urls = ["http://eb5info.com/regional-centers"]
- def parse_item(self, response):
- hxs = HtmlXPathSelector(response)
- sites = hxs.select('//ul/li/a/@href')
- items = []
- for site in sites:
- item = Eb5Item()
- item['url'] = response.url
- item['phone'] = site.select("()").extract()
- items.append(item)
- return (items)
- from scrapy.item import Item, Field
- class Eb5Item(Item):
- # define the fields for your item here like:
- name = Field()
- email = Field()
- name = Field()
- description = Field()
- phone = Field()
- pass
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement