Advertisement
sreejith2904

Untitled

May 30th, 2019
176
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.32 KB | None | 0 0
  1. import re
  2.  
  3. exp = re.compile("([\w_-]+(?:(?:\.[\w_-]+)+))([\w.,@?^=%&:/~+#-]*[\w@?^=%&/~+#-])?")
  4.  
  5.  
  6. url = "makeuseof.comwhatnerd.comAPP:itunes.apple.com/us/app/makeuseof-tech-chats-news/id1016438893?mt=8APP:play.google.com/store/apps/details?id=com.makeuseof.makeuseof&hl=en, makeuseof.comwhatnerd.comblocksdecoded.comAPP:itunes.apple.com/us/app/makeuseof-tech-chats-news/id1016438893?mt=8APP:play.google.com/store/apps/details?id=com.makeuseof.makeuseof&hl=en, makeuseof.com, makeuseof.com, APP:itunes.apple.com/us/app/makeuseof-tech-chats-news/id1016438893?mt=8, APP:play.google.com/store/apps/details?id=com.makeuseof.makeuseof&hl=en"
  7. embedded = re.findall(exp, url)
  8.  
  9. all_urls = []
  10.  
  11. for _url in embedded:
  12.     tmp = []
  13.     for element in _url:
  14.         _foo = element.split(".com")
  15.         foo = list(map(lambda x: x + ".com", _foo))
  16.        
  17.         foo[-1] = foo[-1].replace(".com", "")
  18.         tmp.extend(foo)
  19.     all_urls.extend(tmp)
  20.  
  21. all_urls
  22.  
  23. output = []
  24. to_delete = []
  25.  
  26. for index, url in enumerate(all_urls):
  27.     if url.startswith("/"):
  28.         tmp = all_urls[index - 1] + url
  29.         output.append(tmp)
  30.         if all_urls[index - 1].strip() != "":
  31.             to_delete.append(all_urls[index - 1])
  32.     else:
  33.         output.append(url)
  34.        
  35. output = list(filter(lambda x: x.strip() != "", output))
  36.  
  37. output
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement