Advertisement
Guest User

Untitled

a guest
Oct 30th, 2014
136
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.98 KB | None | 0 0
  1. from scrapy.item import Item, Field
  2.  
  3. class Eb5Item(Item):
  4. description = Field()
  5.  
  6. from scrapy.contrib.spiders import CrawlSpider, Rule
  7. from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
  8. from scrapy.selector import HtmlXPathSelector
  9. from parser_module.items import Eb5Item
  10.  
  11. class Eb5Spider(CrawlSpider):
  12. name = 'eb5'
  13. allowed_domains = ["eb5info.com"]
  14. start_urls = ["http://eb5info.com/regional-centers"]
  15. def parse_item(self, response):
  16. hxs = HtmlXPathSelector(response)
  17. sites = hxs.select('//ul/li/a/@href')
  18. items = []
  19. for site in sites:
  20. item = Eb5Item()
  21. item['url'] = response.url
  22. item['phone'] = site.select("()").extract()
  23. items.append(item)
  24. return (items)
  25.  
  26. from scrapy.item import Item, Field
  27.  
  28. class Eb5Item(Item):
  29. # define the fields for your item here like:
  30. name = Field()
  31. email = Field()
  32. name = Field()
  33. description = Field()
  34. phone = Field()
  35. pass
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement