CryptoJones

urldecoder.py

May 11th, 2023
283
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 5.04 KB | None | 0 0
  1. #!/usr/bin/env python
  2. __author__ = 'Eric Van Cleve'
  3. __copyright__ = 'Copyright 2019, Proofpoint Inc'
  4. __license__ = 'GPL v.3'
  5. __version__ = '3.0.1'
  6. __email__ = '[email protected]'
  7. __status__ = 'Production'
  8.  
  9.  
  10. import sys
  11. import re
  12. import string
  13. from argparse import ArgumentParser
  14. from base64 import urlsafe_b64decode
  15. if sys.version_info[0] < 3:
  16.     from urllib import unquote
  17.     import HTMLParser
  18.     htmlparser = HTMLParser.HTMLParser()
  19.     unescape = htmlparser.unescape
  20.     from string import maketrans
  21. else:
  22.     from urllib.parse import unquote
  23.     from html import unescape
  24.     maketrans = str.maketrans
  25.  
  26.  
  27. class URLDefenseDecoder(object):
  28.  
  29.     @staticmethod
  30.     def __init__():
  31.         URLDefenseDecoder.ud_pattern = re.compile(r'https://urldefense(?:\.proofpoint)?\.com/(v[0-9])/')
  32.         URLDefenseDecoder.v1_pattern = re.compile(r'u=(?P<url>.+?)&k=')
  33.         URLDefenseDecoder.v2_pattern = re.compile(r'u=(?P<url>.+?)&[dc]=')
  34.         URLDefenseDecoder.v3_pattern = re.compile(r'v3/__(?P<url>.+?)__;(?P<enc_bytes>.*?)!')
  35.         URLDefenseDecoder.v3_token_pattern = re.compile(r"\*(\*.)?")
  36.         URLDefenseDecoder.v3_single_slash = re.compile(r"^([a-z0-9+.-]+:/)([^/].+)", re.IGNORECASE)
  37.         URLDefenseDecoder.v3_run_mapping = {}
  38.         run_values = string.ascii_uppercase + string.ascii_lowercase + string.digits + '-' + '_'
  39.         run_length = 2
  40.         for value in run_values:
  41.             URLDefenseDecoder.v3_run_mapping[value] = run_length
  42.             run_length += 1
  43.  
  44.     def decode(self, rewritten_url):
  45.         match = self.ud_pattern.search(rewritten_url)
  46.         if match:
  47.             if match.group(1) == 'v1':
  48.                 return self.decode_v1(rewritten_url)
  49.             elif match.group(1) == 'v2':
  50.                 return self.decode_v2(rewritten_url)
  51.             elif match.group(1) == 'v3':
  52.                 return self.decode_v3(rewritten_url)
  53.             else:
  54.                 raise ValueError('Unrecognized version in: ', rewritten_url)
  55.         else:
  56.             raise ValueError('Does not appear to be a URL Defense URL')
  57.  
  58.     def decode_v1(self, rewritten_url):
  59.         match = self.v1_pattern.search(rewritten_url)
  60.         if match:
  61.             url_encoded_url = match.group('url')
  62.             html_encoded_url = unquote(url_encoded_url)
  63.             url = unescape(html_encoded_url)
  64.             return url
  65.         else:
  66.             raise ValueError('Error parsing URL')
  67.  
  68.     def decode_v2(self, rewritten_url):
  69.         match = self.v2_pattern.search(rewritten_url)
  70.         if match:
  71.             special_encoded_url = match.group('url')
  72.             trans = maketrans('-_', '%/')
  73.             url_encoded_url = special_encoded_url.translate(trans)
  74.             html_encoded_url = unquote(url_encoded_url)
  75.             url = unescape(html_encoded_url)
  76.             return url
  77.         else:
  78.             raise ValueError('Error parsing URL')
  79.  
  80.     def decode_v3(self, rewritten_url):
  81.         def replace_token(token):
  82.             if token == '*':
  83.                 character = self.dec_bytes[self.current_marker]
  84.                 self.current_marker += 1
  85.                 return character
  86.             if token.startswith('**'):
  87.                 run_length = self.v3_run_mapping[token[-1]]
  88.                 run = self.dec_bytes[self.current_marker:self.current_marker + run_length]
  89.                 self.current_marker += run_length
  90.                 return run
  91.  
  92.         def substitute_tokens(text, start_pos=0):
  93.             match = self.v3_token_pattern.search(text, start_pos)
  94.             if match:
  95.                 start = text[start_pos:match.start()]
  96.                 built_string = start
  97.                 token = text[match.start():match.end()]
  98.                 built_string += replace_token(token)
  99.                 built_string += substitute_tokens(text, match.end())
  100.                 return built_string
  101.             else:
  102.                 return text[start_pos:len(text)]
  103.  
  104.         match = self.v3_pattern.search(rewritten_url)
  105.         if match:
  106.             url = match.group('url')
  107.             singleSlash = self.v3_single_slash.findall(url)
  108.             if singleSlash and len(singleSlash[0]) == 2:
  109.                 url = singleSlash[0][0] + "/" + singleSlash[0][1]
  110.             encoded_url = unquote(url)
  111.             enc_bytes = match.group('enc_bytes')
  112.             enc_bytes += '=='
  113.             self.dec_bytes = (urlsafe_b64decode(enc_bytes)).decode('utf-8')
  114.             self.current_marker = 0
  115.             return substitute_tokens(encoded_url)
  116.  
  117.         else:
  118.             raise ValueError('Error parsing URL')
  119.  
  120.  
  121. def main():
  122.     parser = ArgumentParser(prog='URLDefenseDecode', description='Decode URLs rewritten by URL Defense. Supports v1, v2, and v3 URLs.')
  123.     parser.add_argument('rewritten_urls', nargs='+')
  124.     args = parser.parse_args()
  125.     urldefense_decoder = URLDefenseDecoder()
  126.     for rewritten_url in args.rewritten_urls:
  127.         try:
  128.             print(urldefense_decoder.decode(rewritten_url))
  129.         except ValueError as e:
  130.             print(e)
  131.  
  132.  
  133. if __name__ == '__main__':
  134.     main()
  135.  
Add Comment
Please, Sign In to add comment