Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def unique_keeper(f):
- @wraps(f)
- def wrapper(*args):
- _result = f(*args)
- self_param = args[0]
- if link in self_param:
- print link, 'decorated'
- continue
- else:
- print 'no effect'
- return wrapper
- class CarPagesCrawler(BaseCrawler):
- def __init__(self):
- self.result = []
- @unique_keeper
- def crawl(self, link, maxlevel):
- _result = self.result
- if maxlevel == 0:
- return
- link_content = self.open_url(link)
- # all_links = set([_ for _ in LINK_RE.findall(link_content) if 'http://' in _ if link != _][:LINKS_QUANTITY])
- if len(all_links):
- for found_link in all_links:
- for keyword in KEYWORDS:
- if self.find_whole_word(keyword, found_link):
- print 'KEYWORD FOUND IN LINK'
- print (found_link, keyword)
- if found_link not in _result:
- _result.append(found_link)
- else:
- print 'KEYWORD NOT FOUND IN LINK'
- print 'TRYING TO OPEN LINK'
- self.crawl(found_link, maxlevel-1)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement