Advertisement
Guest User

Untitled

a guest
Jul 24th, 2014
170
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.70 KB | None | 0 0
  1. from scrapy.log import *
  2. from crawler_bhinneka.settings import *
  3. from crawler_bhinneka.items import *
  4. import pprint
  5. from MySQLdb import escape_string
  6. import urlparse
  7.  
  8. def complete_url(string):
  9. """Return complete url"""
  10. return "http://www.example.com" + string
  11.  
  12.  
  13. class BhinnekaSpider(CrawlSpider):
  14.  
  15. name = 'bhinneka_spider'
  16. start_urls = [
  17. 'http://www.example.com/listing.php?'
  18. ]
  19. def parse(self, response):
  20.  
  21. hxs = HtmlXPathSelector(response)
  22.  
  23. # HXS to find url that goes to detail page
  24. items = hxs.select('//td[@class="lcbrand"]/a/@href')
  25. for item in items:
  26. link = item.extract()
  27. print("my Url Link : ",complete_url(link))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement