Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #New !_____!
- def isredditlink(link):
- if re.findall(r'https?://([a-z0-9-]+\.)*reddit\.com(/.*)?',link) != []:
- return True
- else:
- return False
- def extractlinks_self(htmltext):
- linklist = []
- links = re.findall(r'(?<=href=")([^"]+)">(.*?)\b<',htmltext)
- if links !=[]:
- for link in links:
- link = list(link)
- if link[0][:1] == '/':
- link[0] = 'http://www.reddit.com'+link[0]
- if isredditlink(link[0]) == True:
- linklist.append(link)
- return linklist
- links = extractlinks_self(htmltext)
- #do stuff
- #Old !_____!
- def check_link(link):
- """Checks link to make sure its from reddit.com, return true if it is"""
- found = re.search(r'http://www\.reddit\.com',link)
- if found:
- return True
- else:
- return False
- links = re.findall(r'\[.*\]\(.*\)',selftext)
- if links != []:
- reddittext = """"""
- for link in links:
- linktext = re.search(r'\[([^\]]*)\]',link)
- linktext = linktext.group()
- linktext = linktext[1:-1]
- linkurl = re.search(r'\(([^)]*)\)',link)
- linkurl = linkurl.group()
- linkurl = linkurl[1:-1]
- if check_link(linkurl) == True:
- #Do stuff
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement