Pastebin launched a little side project called VERYVIRAL.com, check it out ;-) Want more features on Pastebin? Sign Up, it's FREE!
Guest

Untitled

By: a guest on Jul 23rd, 2012  |  syntax: Python  |  size: 1.11 KB  |  views: 147  |  expires: Never
download  |  raw  |  embed  |  report abuse  |  print
Text below is selected. Please press Ctrl+C to copy to your clipboard. (⌘+C on Mac)
  1. #New !_____!
  2. def isredditlink(link):
  3.         if re.findall(r'https?://([a-z0-9-]+\.)*reddit\.com(/.*)?',link) != []:
  4.                 return True
  5.         else:
  6.                 return False
  7.        
  8.  
  9. def extractlinks_self(htmltext):
  10.         linklist = []
  11.         links = re.findall(r'(?<=href=")([^"]+)"&gt;(.*?)\b&lt;',htmltext)
  12.         if links !=[]:
  13.                 for link in links:
  14.                         link = list(link)
  15.                         if link[0][:1] == '/':
  16.                                 link[0] = 'http://www.reddit.com'+link[0]
  17.                         if isredditlink(link[0]) == True:
  18.                                 linklist.append(link)                  
  19.         return linklist
  20.  
  21. links = extractlinks_self(htmltext)
  22. #do stuff
  23.  
  24.  
  25. #Old !_____!
  26. def check_link(link):
  27.         """Checks link to make sure its from reddit.com, return true if it is"""
  28.         found = re.search(r'http://www\.reddit\.com',link)
  29.         if found:
  30.                 return True
  31.         else:
  32.                 return False
  33.  
  34. links = re.findall(r'\[.*\]\(.*\)',selftext)
  35.                        
  36. if links != []:
  37.         reddittext = """"""
  38.         for link in links:
  39.                 linktext = re.search(r'\[([^\]]*)\]',link)
  40.                 linktext = linktext.group()
  41.                 linktext = linktext[1:-1]                              
  42.                 linkurl = re.search(r'\(([^)]*)\)',link)
  43.                 linkurl = linkurl.group()
  44.                 linkurl = linkurl[1:-1]
  45.                 if check_link(linkurl) == True:
  46.                         #Do stuff
clone this paste RAW Paste Data