Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import re
- exp = re.compile("([\w_-]+(?:(?:\.[\w_-]+)+))([\w.,@?^=%&:/~+#-]*[\w@?^=%&/~+#-])?")
- url = "makeuseof.comwhatnerd.comAPP:itunes.apple.com/us/app/makeuseof-tech-chats-news/id1016438893?mt=8APP:play.google.com/store/apps/details?id=com.makeuseof.makeuseof&hl=en, makeuseof.comwhatnerd.comblocksdecoded.comAPP:itunes.apple.com/us/app/makeuseof-tech-chats-news/id1016438893?mt=8APP:play.google.com/store/apps/details?id=com.makeuseof.makeuseof&hl=en, makeuseof.com, makeuseof.com, APP:itunes.apple.com/us/app/makeuseof-tech-chats-news/id1016438893?mt=8, APP:play.google.com/store/apps/details?id=com.makeuseof.makeuseof&hl=en"
- embedded = re.findall(exp, url)
- all_urls = []
- for _url in embedded:
- tmp = []
- for element in _url:
- _foo = element.split(".com")
- foo = list(map(lambda x: x + ".com", _foo))
- foo[-1] = foo[-1].replace(".com", "")
- tmp.extend(foo)
- all_urls.extend(tmp)
- all_urls
- output = []
- to_delete = []
- for index, url in enumerate(all_urls):
- if url.startswith("/"):
- tmp = all_urls[index - 1] + url
- output.append(tmp)
- if all_urls[index - 1].strip() != "":
- to_delete.append(all_urls[index - 1])
- else:
- output.append(url)
- output = list(filter(lambda x: x.strip() != "", output))
- output
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement