Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- __author__ = 'Eric Van Cleve'
- __copyright__ = 'Copyright 2019, Proofpoint Inc'
- __license__ = 'GPL v.3'
- __version__ = '3.0.1'
- __email__ = '[email protected]'
- __status__ = 'Production'
- import sys
- import re
- import string
- from argparse import ArgumentParser
- from base64 import urlsafe_b64decode
- if sys.version_info[0] < 3:
- from urllib import unquote
- import HTMLParser
- htmlparser = HTMLParser.HTMLParser()
- unescape = htmlparser.unescape
- from string import maketrans
- else:
- from urllib.parse import unquote
- from html import unescape
- maketrans = str.maketrans
- class URLDefenseDecoder(object):
- @staticmethod
- def __init__():
- URLDefenseDecoder.ud_pattern = re.compile(r'https://urldefense(?:\.proofpoint)?\.com/(v[0-9])/')
- URLDefenseDecoder.v1_pattern = re.compile(r'u=(?P<url>.+?)&k=')
- URLDefenseDecoder.v2_pattern = re.compile(r'u=(?P<url>.+?)&[dc]=')
- URLDefenseDecoder.v3_pattern = re.compile(r'v3/__(?P<url>.+?)__;(?P<enc_bytes>.*?)!')
- URLDefenseDecoder.v3_token_pattern = re.compile(r"\*(\*.)?")
- URLDefenseDecoder.v3_single_slash = re.compile(r"^([a-z0-9+.-]+:/)([^/].+)", re.IGNORECASE)
- URLDefenseDecoder.v3_run_mapping = {}
- run_values = string.ascii_uppercase + string.ascii_lowercase + string.digits + '-' + '_'
- run_length = 2
- for value in run_values:
- URLDefenseDecoder.v3_run_mapping[value] = run_length
- run_length += 1
- def decode(self, rewritten_url):
- match = self.ud_pattern.search(rewritten_url)
- if match:
- if match.group(1) == 'v1':
- return self.decode_v1(rewritten_url)
- elif match.group(1) == 'v2':
- return self.decode_v2(rewritten_url)
- elif match.group(1) == 'v3':
- return self.decode_v3(rewritten_url)
- else:
- raise ValueError('Unrecognized version in: ', rewritten_url)
- else:
- raise ValueError('Does not appear to be a URL Defense URL')
- def decode_v1(self, rewritten_url):
- match = self.v1_pattern.search(rewritten_url)
- if match:
- url_encoded_url = match.group('url')
- html_encoded_url = unquote(url_encoded_url)
- url = unescape(html_encoded_url)
- return url
- else:
- raise ValueError('Error parsing URL')
- def decode_v2(self, rewritten_url):
- match = self.v2_pattern.search(rewritten_url)
- if match:
- special_encoded_url = match.group('url')
- trans = maketrans('-_', '%/')
- url_encoded_url = special_encoded_url.translate(trans)
- html_encoded_url = unquote(url_encoded_url)
- url = unescape(html_encoded_url)
- return url
- else:
- raise ValueError('Error parsing URL')
- def decode_v3(self, rewritten_url):
- def replace_token(token):
- if token == '*':
- character = self.dec_bytes[self.current_marker]
- self.current_marker += 1
- return character
- if token.startswith('**'):
- run_length = self.v3_run_mapping[token[-1]]
- run = self.dec_bytes[self.current_marker:self.current_marker + run_length]
- self.current_marker += run_length
- return run
- def substitute_tokens(text, start_pos=0):
- match = self.v3_token_pattern.search(text, start_pos)
- if match:
- start = text[start_pos:match.start()]
- built_string = start
- token = text[match.start():match.end()]
- built_string += replace_token(token)
- built_string += substitute_tokens(text, match.end())
- return built_string
- else:
- return text[start_pos:len(text)]
- match = self.v3_pattern.search(rewritten_url)
- if match:
- url = match.group('url')
- singleSlash = self.v3_single_slash.findall(url)
- if singleSlash and len(singleSlash[0]) == 2:
- url = singleSlash[0][0] + "/" + singleSlash[0][1]
- encoded_url = unquote(url)
- enc_bytes = match.group('enc_bytes')
- enc_bytes += '=='
- self.dec_bytes = (urlsafe_b64decode(enc_bytes)).decode('utf-8')
- self.current_marker = 0
- return substitute_tokens(encoded_url)
- else:
- raise ValueError('Error parsing URL')
- def main():
- parser = ArgumentParser(prog='URLDefenseDecode', description='Decode URLs rewritten by URL Defense. Supports v1, v2, and v3 URLs.')
- parser.add_argument('rewritten_urls', nargs='+')
- args = parser.parse_args()
- urldefense_decoder = URLDefenseDecoder()
- for rewritten_url in args.rewritten_urls:
- try:
- print(urldefense_decoder.decode(rewritten_url))
- except ValueError as e:
- print(e)
- if __name__ == '__main__':
- main()
Add Comment
Please, Sign In to add comment