Advertisement
Guest User

Proxy

a guest
Mar 8th, 2013
206
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.59 KB | None | 0 0
  1. #!/usr/bin/env python
  2. # Licence
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # Tutorial by Shirobi
  15.  
  16. import os
  17. import re
  18. import urlparse
  19.  
  20. ################################################################################
  21.  
  22. # URLs that have absolute addresses
  23. ABSOLUTE_URL_REGEX = r"(http(s?):)?//(?P<url>[^\"'> \t\)]+)"
  24.  
  25. # URLs that are relative to the base of the current hostname.
  26. BASE_RELATIVE_URL_REGEX = r"/(?!(/)|(http(s?)://)|(url\())(?P<url>[^\"'> \t\)]*)"
  27.  
  28. # URLs that have '../' or './' to start off their paths.
  29. TRAVERSAL_URL_REGEX = r"(?P<relative>\.(\.)?)/(?!(/)|(http(s?)://)|(url\())(?P<url>[^\"'> \t\)]*)"
  30.  
  31. # URLs that are in the same directory as the requested URL.
  32. SAME_DIR_URL_REGEX = r"(?!(/)|(http(s?)://)|(url\())(?P<url>[^\"'> \t\)]+)"
  33.  
  34. # URL matches the root directory.
  35. ROOT_DIR_URL_REGEX = r"(?!//(?!>))/(?P<url>)(?=[ \t\n]*[\"'\)>/])"
  36.  
  37. # Start of a tag using 'src' or 'href'
  38. TAG_START = r"(?i)\b(?P<tag>src|href|action|url|background)(?P<equals>[\t ]*=[\t ]*)(?P<quote>[\"']?)"
  39.  
  40. # Start of a CSS import
  41. CSS_IMPORT_START = r"(?i)@import(?P<spacing>[\t ]+)(?P<quote>[\"']?)"
  42.  
  43. # CSS url() call
  44. CSS_URL_START = r"(?i)\burl\((?P<quote>[\"']?)"
  45.  
  46.  
  47. REPLACEMENT_REGEXES = [
  48. (TAG_START + SAME_DIR_URL_REGEX,
  49. "\g<tag>\g<equals>\g<quote>%(accessed_dir)s\g<url>"),
  50.  
  51. (TAG_START + TRAVERSAL_URL_REGEX,
  52. "\g<tag>\g<equals>\g<quote>%(accessed_dir)s/\g<relative>/\g<url>"),
  53.  
  54. (TAG_START + BASE_RELATIVE_URL_REGEX,
  55. "\g<tag>\g<equals>\g<quote>/%(base)s/\g<url>"),
  56.  
  57. (TAG_START + ROOT_DIR_URL_REGEX,
  58. "\g<tag>\g<equals>\g<quote>/%(base)s/"),
  59.  
  60. # Need this because HTML tags could end with '/>', which confuses the
  61. # tag-matching regex above, since that's the end-of-match signal.
  62. (TAG_START + ABSOLUTE_URL_REGEX,
  63. "\g<tag>\g<equals>\g<quote>/\g<url>"),
  64.  
  65. (CSS_IMPORT_START + SAME_DIR_URL_REGEX,
  66. "@import\g<spacing>\g<quote>%(accessed_dir)s\g<url>"),
  67.  
  68. (CSS_IMPORT_START + TRAVERSAL_URL_REGEX,
  69. "@import\g<spacing>\g<quote>%(accessed_dir)s/\g<relative>/\g<url>"),
  70.  
  71. (CSS_IMPORT_START + BASE_RELATIVE_URL_REGEX,
  72. "@import\g<spacing>\g<quote>/%(base)s/\g<url>"),
  73.  
  74. (CSS_IMPORT_START + ABSOLUTE_URL_REGEX,
  75. "@import\g<spacing>\g<quote>/\g<url>"),
  76.  
  77. (CSS_URL_START + SAME_DIR_URL_REGEX,
  78. "url(\g<quote>%(accessed_dir)s\g<url>"),
  79.  
  80. (CSS_URL_START + TRAVERSAL_URL_REGEX,
  81. "url(\g<quote>%(accessed_dir)s/\g<relative>/\g<url>"),
  82.  
  83. (CSS_URL_START + BASE_RELATIVE_URL_REGEX,
  84. "url(\g<quote>/%(base)s/\g<url>"),
  85.  
  86. (CSS_URL_START + ABSOLUTE_URL_REGEX,
  87. "url(\g<quote>/\g<url>"),
  88. ]
  89.  
  90. ################################################################################
  91.  
  92. def TransformContent(base_url, accessed_url, content):
  93. url_obj = urlparse.urlparse(accessed_url)
  94. accessed_dir = os.path.dirname(url_obj.path)
  95. if not accessed_dir.endswith("/"):
  96. accessed_dir += "/"
  97.  
  98. for pattern, replacement in REPLACEMENT_REGEXES:
  99. fixed_replacement = replacement % {
  100. "base": base_url,
  101. "accessed_dir": accessed_dir,
  102. }
  103. content = re.sub(pattern, fixed_replacement, content)
  104. return content
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement