daily pastebin goal
68%
SHARE
TWEET

taguqoxora.py

a guest Feb 10th, 2015 204 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #!/usr/bin/python
  2.  
  3. """
  4. Turn a common_crawl identifier into a normal-looking URL
  5. """
  6.  
  7. import sys
  8. import re
  9.  
  10. def is_port(s):
  11.         if re.findall(r"\A\d+\Z", s):
  12.                 if int(s) <= 65535:
  13.                         return True
  14.         return False
  15.  
  16. for line in sys.stdin:
  17.         url = line.rstrip()
  18.         rest, schema =  url.rsplit(":", 1)
  19.  
  20.         try:
  21.                 domain, path = rest.split('/', 1)
  22.         except ValueError:
  23.                 domain = rest
  24.                 path = ''
  25.  
  26.         try:
  27.                 maybe_path, maybe_port = path.rsplit(":", 1)
  28.         except ValueError:
  29.                 port = None
  30.         else:
  31.                 if is_port(maybe_port):
  32.                         path = maybe_path
  33.                         port = maybe_port
  34.                 else:
  35.                         port = None
  36.  
  37.         sys.stdout.write(schema + '://' + '.'.join(domain.split('.')[::-1]) + (':' + port if port is not None else '') + '/' + path + "\n")
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top