SHARE
TWEET

Proxy

a guest Mar 8th, 2013 10 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #!/usr/bin/env python
  2. # Licence
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. #     http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # Tutorial by Shirobi
  15.  
  16. import os
  17. import re
  18. import urlparse
  19.  
  20. ################################################################################
  21.  
  22. # URLs that have absolute addresses
  23. ABSOLUTE_URL_REGEX = r"(http(s?):)?//(?P<url>[^\"'> \t\)]+)"
  24.  
  25. # URLs that are relative to the base of the current hostname.
  26. BASE_RELATIVE_URL_REGEX = r"/(?!(/)|(http(s?)://)|(url\())(?P<url>[^\"'> \t\)]*)"
  27.  
  28. # URLs that have '../' or './' to start off their paths.
  29. TRAVERSAL_URL_REGEX = r"(?P<relative>\.(\.)?)/(?!(/)|(http(s?)://)|(url\())(?P<url>[^\"'> \t\)]*)"
  30.  
  31. # URLs that are in the same directory as the requested URL.
  32. SAME_DIR_URL_REGEX = r"(?!(/)|(http(s?)://)|(url\())(?P<url>[^\"'> \t\)]+)"
  33.  
  34. # URL matches the root directory.
  35. ROOT_DIR_URL_REGEX = r"(?!//(?!>))/(?P<url>)(?=[ \t\n]*[\"'\)>/])"
  36.  
  37. # Start of a tag using 'src' or 'href'
  38. TAG_START = r"(?i)\b(?P<tag>src|href|action|url|background)(?P<equals>[\t ]*=[\t ]*)(?P<quote>[\"']?)"
  39.  
  40. # Start of a CSS import
  41. CSS_IMPORT_START = r"(?i)@import(?P<spacing>[\t ]+)(?P<quote>[\"']?)"
  42.  
  43. # CSS url() call
  44. CSS_URL_START = r"(?i)\burl\((?P<quote>[\"']?)"
  45.  
  46.  
  47. REPLACEMENT_REGEXES = [
  48.   (TAG_START + SAME_DIR_URL_REGEX,
  49.      "\g<tag>\g<equals>\g<quote>%(accessed_dir)s\g<url>"),
  50.  
  51.   (TAG_START + TRAVERSAL_URL_REGEX,
  52.      "\g<tag>\g<equals>\g<quote>%(accessed_dir)s/\g<relative>/\g<url>"),
  53.  
  54.   (TAG_START + BASE_RELATIVE_URL_REGEX,
  55.      "\g<tag>\g<equals>\g<quote>/%(base)s/\g<url>"),
  56.  
  57.   (TAG_START + ROOT_DIR_URL_REGEX,
  58.      "\g<tag>\g<equals>\g<quote>/%(base)s/"),
  59.  
  60.   # Need this because HTML tags could end with '/>', which confuses the
  61.   # tag-matching regex above, since that's the end-of-match signal.
  62.   (TAG_START + ABSOLUTE_URL_REGEX,
  63.      "\g<tag>\g<equals>\g<quote>/\g<url>"),
  64.  
  65.   (CSS_IMPORT_START + SAME_DIR_URL_REGEX,
  66.      "@import\g<spacing>\g<quote>%(accessed_dir)s\g<url>"),
  67.  
  68.   (CSS_IMPORT_START + TRAVERSAL_URL_REGEX,
  69.      "@import\g<spacing>\g<quote>%(accessed_dir)s/\g<relative>/\g<url>"),
  70.  
  71.   (CSS_IMPORT_START + BASE_RELATIVE_URL_REGEX,
  72.      "@import\g<spacing>\g<quote>/%(base)s/\g<url>"),
  73.  
  74.   (CSS_IMPORT_START + ABSOLUTE_URL_REGEX,
  75.      "@import\g<spacing>\g<quote>/\g<url>"),
  76.  
  77.   (CSS_URL_START + SAME_DIR_URL_REGEX,
  78.      "url(\g<quote>%(accessed_dir)s\g<url>"),
  79.  
  80.   (CSS_URL_START + TRAVERSAL_URL_REGEX,
  81.       "url(\g<quote>%(accessed_dir)s/\g<relative>/\g<url>"),
  82.  
  83.   (CSS_URL_START + BASE_RELATIVE_URL_REGEX,
  84.       "url(\g<quote>/%(base)s/\g<url>"),
  85.  
  86.   (CSS_URL_START + ABSOLUTE_URL_REGEX,
  87.       "url(\g<quote>/\g<url>"),
  88. ]
  89.  
  90. ################################################################################
  91.  
  92. def TransformContent(base_url, accessed_url, content):
  93.   url_obj = urlparse.urlparse(accessed_url)
  94.   accessed_dir = os.path.dirname(url_obj.path)
  95.   if not accessed_dir.endswith("/"):
  96.     accessed_dir += "/"
  97.  
  98.   for pattern, replacement in REPLACEMENT_REGEXES:
  99.     fixed_replacement = replacement % {
  100.       "base": base_url,
  101.       "accessed_dir": accessed_dir,
  102.     }
  103.     content = re.sub(pattern, fixed_replacement, content)
  104.   return content
RAW Paste Data
Top