Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from scrapy.log import *
- from crawler_bhinneka.settings import *
- from crawler_bhinneka.items import *
- import pprint
- from MySQLdb import escape_string
- import urlparse
- def complete_url(string):
- """Return complete url"""
- return "http://www.example.com" + string
- class BhinnekaSpider(CrawlSpider):
- name = 'bhinneka_spider'
- start_urls = [
- 'http://www.example.com/listing.php?'
- ]
- def parse(self, response):
- hxs = HtmlXPathSelector(response)
- # HXS to find url that goes to detail page
- items = hxs.select('//td[@class="lcbrand"]/a/@href')
- for item in items:
- link = item.extract()
- print("my Url Link : ",complete_url(link))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement