Advertisement
Guest User

Untitled

a guest
Mar 1st, 2015
194
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.13 KB | None | 0 0
  1. def unique_keeper(f):
  2. @wraps(f)
  3. def wrapper(*args):
  4. _result = f(*args)
  5. self_param = args[0]
  6. if link in self_param:
  7. print link, 'decorated'
  8. continue
  9. else:
  10. print 'no effect'
  11. return wrapper
  12.  
  13. class CarPagesCrawler(BaseCrawler):
  14. def __init__(self):
  15. self.result = []
  16.  
  17. @unique_keeper
  18. def crawl(self, link, maxlevel):
  19. _result = self.result
  20. if maxlevel == 0:
  21. return
  22.  
  23. link_content = self.open_url(link)
  24. # all_links = set([_ for _ in LINK_RE.findall(link_content) if 'http://' in _ if link != _][:LINKS_QUANTITY])
  25.  
  26. if len(all_links):
  27. for found_link in all_links:
  28. for keyword in KEYWORDS:
  29. if self.find_whole_word(keyword, found_link):
  30. print 'KEYWORD FOUND IN LINK'
  31. print (found_link, keyword)
  32. if found_link not in _result:
  33. _result.append(found_link)
  34. else:
  35. print 'KEYWORD NOT FOUND IN LINK'
  36. print 'TRYING TO OPEN LINK'
  37. self.crawl(found_link, maxlevel-1)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement