#New !_____!
def isredditlink(link):
if re.findall(r'https?://([a-z0-9-]+\.)*reddit\.com(/.*)?',link) != []:
return True
else:
return False
def extractlinks_self(htmltext):
linklist = []
links = re.findall(r'(?<=href=")([^"]+)">(.*?)\b<',htmltext)
if links !=[]:
for link in links:
link = list(link)
if link[0][:1] == '/':
link[0] = 'http://www.reddit.com'+link[0]
if isredditlink(link[0]) == True:
linklist.append(link)
return linklist
links = extractlinks_self(htmltext)
#do stuff
#Old !_____!
def check_link(link):
"""Checks link to make sure its from reddit.com, return true if it is"""
found = re.search(r'http://www\.reddit\.com',link)
if found:
return True
else:
return False
links = re.findall(r'\[.*\]\(.*\)',selftext)
if links != []:
reddittext = """"""
for link in links:
linktext = re.search(r'\[([^\]]*)\]',link)
linktext = linktext.group()
linktext = linktext[1:-1]
linkurl = re.search(r'\(([^)]*)\)',link)
linkurl = linkurl.group()
linkurl = linkurl[1:-1]
if check_link(linkurl) == True:
#Do stuff