Advertisement
Flynnn

Untitled

Sep 5th, 2012
114
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
PHP 8.13 KB | None | 0 0
  1. <?php
  2.  
  3. header('Content-type: text/plain');
  4. error_reporting(E_ALL);
  5. ini_set("display_errors", 1);
  6.  
  7.  
  8.  
  9. function dbStr($string)
  10. {
  11.     //So, what we do here is simple:
  12.  
  13.     //Step one, detect a range of letters to remove. Ranges are in the form of substr_replace arguments that would ordinarily work if used seperately.
  14.     $ranges = dbStr_GetRanges($string);
  15.    
  16.     //Step two, carefully remove those ranges..... The point of this function is to make sure that ranges that intersect are handled properly.
  17.     return dbStr_FilterStringWithRanges($string, $ranges);
  18.    
  19.     //Why not do both steps in one blow? Oh god, that would create a mass of complicated and icky algorithms. I am not entirely positive it is possible to do such an algorithm clearly. The idea behind the ranges was to allow me to worry about figuring out what to delete first, and then LATER deal with how to delete it.
  20.    
  21.     //Oh, and, of course, the final step... mysql_strip_tags! : D
  22. }
  23.  
  24.  
  25.  
  26. //So, here is where we remove ranges smartly. The algorithm is as follows:
  27. /*
  28.     Check each range.
  29.         1. If the range is completely contianed by what we have already done... Don't do the range.
  30.         2. If the range intersects what we have already removed, make the range smaller to fit right next to what we have already removed.
  31.         4. Remove the range. use our offset counter to ensure we remove form the right place, and that we are not messed up by previous removes.
  32.         5. Update the counters required for the above steps to be accurate.
  33. */
  34. function dbStr_FilterStringWithRanges($string, $ranges)
  35. {
  36.     $offset = 0;
  37.     $maxidx = 0;
  38.     foreach ($ranges as $range)
  39.     {
  40.         //Make sure we are not deleting something way behind where we are.
  41.         if ($range[0] + $range[1] <= $maxidx) continue;
  42.    
  43.         //The main edge case is if the first part of the range has already been removed.
  44.         //here we account for this.
  45.         //First, detect if this is true:
  46.         if ($range[0] < $maxidx)
  47.         {
  48.             //It is...
  49.             $orig = $range[0];//Store the original
  50.             $range[0] = $maxidx;//Set the variable to be after what we have already deleted
  51.             $range[1] -= $range[0]-$orig;//Update the length accordingly
  52.         }
  53.        
  54.         //Finally remove the string
  55.         $string = substr_replace($string, '', $range[0]-$offset, $range[1]);
  56.        
  57.        
  58.         //Update our trackers
  59.         if ($range[0]+$range[1] > $maxidx) $maxidx = $range[0] + $range[1];
  60.         $offset += $range[1];
  61.     }
  62.     return $string;
  63. }
  64.  
  65.  
  66. /*
  67.     Here is the real gist of the code....
  68.     Basically what we do is this:
  69.     1. Generate a list of script or element nodes.
  70.     2. Determine if the node iso ne of these three types:
  71.         A: If the type is a single, lone wolf node with no ender, check its src and pass it if the src is trusted. Otherwise, set a range to remove it.
  72.         B: If the type is a close node, just remove it. Always.
  73.         C: If the type is an open node:
  74.             1. Find the close node
  75.             2. If there was noting between the open and close node, pretend that this is a lone star node and go back to A, but before doing that, make sure that the close node is protected from mode B.
  76.            
  77.     That's it!
  78. */
  79.  
  80. function dbStr_GetRanges($string)
  81. {
  82.     //Get the list of tags.
  83.     preg_match_all
  84.     (
  85.         "#<(/){0,1}?\s*?(?:script|embed)"."[^'\"/]*?(?:[^'\"/]*?[\"'](?:(?:\\\\\"|\\\\'|[^\"'])*?)['\"][^'\"/]*?)*?[^'\"/]*?"."(/){0,1}?>#imsSX",
  86.         $string,
  87.         $matches,
  88.         PREG_SET_ORDER|PREG_OFFSET_CAPTURE
  89.     );
  90.    
  91.     //Prepare to start storing deletion ranges
  92.     $ranges = array();//Store groups of numbers specifying ranges to delete.
  93.    
  94.    
  95.     foreach ($matches as $key=>$value)
  96.     {
  97.         if (!in_array($value, $matches))continue;//Apparantly removing items from the array does not affect the foreach iterator. Force it to.
  98.        
  99.        
  100.        
  101.         //Calculate the type of tag we found
  102.         $type = get_dbStrMatchType($value);
  103.         //echo $value[0][0] . " is type " . $type . "\n";
  104.        
  105.        
  106.         $possiblesave = null;
  107.        
  108.         //Respond accordingly
  109.         if ($type == 1)//Start tag
  110.         {
  111.        
  112.             //Find the close tag
  113.             $idx = strlen($string-1);
  114.             $len = 0;
  115.             $protectkey;
  116.             foreach ($matches as $key2=>$value2)
  117.             {
  118.                 if ($key2 < $key) continue;
  119.                 $type2 = get_dbStrMatchType($value2);
  120.                 if ($type2 == 2)
  121.                 {
  122.                     $idx = $value2[0][1];
  123.                     $len = strlen($value2[0][0]);
  124.                     $protectkey = $key2;
  125.                     //echo ($value2[0][0] == $matches[$key2][0][0])?"TRUE":"FALSE";
  126.                     break;
  127.                 }
  128.             }
  129.            
  130.            
  131.             //Get the text between the close tag and here
  132.             $substrstart = $value[0][1] + strlen($value[0][0]);
  133.             $content = substr($string, $substrstart, $idx - $substrstart);
  134.            
  135.             //If it is not empty, apply a range to delete everythign as a whole.
  136.             if (preg_match("#[^\s]#imsSX", $content))
  137.             {
  138.                 $ranges[] = array($value[0][1], ($idx+$len)-$value[0][1]);
  139.             }
  140.             //Otherwise, pretend that this is a lonewolf tag.
  141.             else
  142.             {
  143.                 //echo "we appear to be saving something";
  144.                 if (isset($protectkey))
  145.                 {
  146.                     //Pass on our posssible save key to the next step...
  147.                     $possiblesave = $protectkey;
  148.                 }
  149.                 $type = 3;
  150.             }
  151.            
  152.             //echo "\nType: ".$type."\n\tRange: " . $value[0][1] . "-" . $idx . "\n\tValue:". $value[0][0] . "\n\n\tInternal:" . substr($string, $substrstart, $idx - $substrstart);
  153.         }
  154.  
  155.         if ($type == 2)//End tag
  156.         {
  157.             //We always remove these
  158.             //echo "REMOVING" . $value[0][0];
  159.             $ranges[] = array($value[0][1], strlen($value[0][0]));
  160.         }
  161.         else if ($type == 3)//Lone wolf tag
  162.         {
  163.             //We are receiving all scripts or embeds that are either:
  164.             //Double tagged with nothing between them
  165.             //OR
  166.             //Single tag lonewolf.
  167.            
  168.             //Use a regex to find and count the srcs.
  169.             //Only allow ONE src. If there are none, something funny is going on.
  170.             //If there is more than one, a hacker is likely trying to trick the system.
  171.            
  172.             preg_match_all
  173.             (
  174.                 "#src=[\"']((\\\\\"|\\\\'|[^\"'])*?)['\"]#imsSX",
  175.                 $value[0][0],
  176.                 $submatches,
  177.                 PREG_SET_ORDER|PREG_OFFSET_CAPTURE
  178.             );
  179.             //print_r($submatches);
  180.            
  181.             //echo "count: " . count($submatches != 1) . " " . !approve_dbStrSrc($submatches[0][1][0]);
  182.             //If any number of srcs other than one is found, OR if the src that is found is not approved, schedule for deletion.
  183.             if (count($submatches) !=1 || !approve_dbStrSrc($submatches[0][1][0]))
  184.             {
  185.                 $ranges[] = array($value[0][1], strlen($value[0][0]));
  186.             }
  187.             else
  188.             {
  189.                 if ($possiblesave != null)//If this was a double tag lonewolf, then possiblesave will be set. We must save it.
  190.                 {
  191.                     unset($matches[$possiblesave]);
  192.                 }
  193.             }
  194.             $possiblesave = null;
  195.         }  
  196.     }
  197.     return $ranges;
  198. }
  199.  
  200.  
  201. //This reads one of the regex matches and determins the type of tag.
  202. function get_dbStrMatchType($val)
  203. {
  204.     if (count($val) == 3 && strcmp($val[2][0], "/")==0)
  205.     {
  206.         return 3;
  207.     }
  208.     else if (count($val) == 2 && strcmp($val[1][0], "/")==0)
  209.     {
  210.         return 2;
  211.     }
  212.     else
  213.     {
  214.         return 1;
  215.     }
  216. }
  217.  
  218. //This
  219. function approve_dbStrSrc($src)
  220. {
  221.     $dbStrTrusted = array
  222.     (
  223.         "http://www.youtube.com",
  224.         "http://youtube.com",
  225.         "http://widgets.twimg.com/",
  226.         "http://www.twiigs.com/",
  227.         "http://twiigs.com/",
  228.         "http://twitter.com/",
  229.         "http://www.twitter.com/",
  230.         "http://picasaweb.google.com",
  231.         "http://www.flickr.com",
  232.         "http://flickr.com",
  233.         "http://static.pbsrc.com/",
  234.     );
  235.     foreach ($dbStrTrusted as $trusted)
  236.     {
  237.         if (strpos($src, $trusted) === 0)
  238.         {
  239.             return true;
  240.         }
  241.     }
  242.  
  243.     return false;
  244. }
  245.  
  246.  
  247.  
  248.  
  249. echo "test" . dbStr
  250. (
  251. '
  252.  
  253. <embed type="application/x-shockwave-flash" src="http://picasaweb.google.com/s/c/bin/slideshow.swf" width="288" height="192" flashvars="host=picasaweb.google.com&amp;hl=en_US&amp;feat=flashalbum&amp;RGB=0x000000&amp;feed=http%3A%2F%2Fpicasaweb.google.com%2Fdata%2Ffeed%2Fapi%2Fuser%2F109941697484668010012%2Falbumid%2F5561383933745906193%3Falt%3Drss%26kind%3Dphoto%26authkey%3DGv1sRgCN2H88H41qeT6AE%26hl%3Den_US" pluginspage="http://www.macromedia.com/go/getflashplayer"></embed>
  254.  
  255. '.
  256.  
  257. "
  258.  
  259.  
  260. <script type='textjavascript'/>
  261. One
  262. <script type='textjavascript' src='asdf'/>
  263. Two
  264. <script  fubar=\"d\\\\\'erp\"  derplol=\"dlerp\">
  265.    //<script type='text/javascript' src='asdf'/>
  266.    asdfasfasdf
  267. </script>
  268. Three
  269. < script asfkjhsakfhjsadfjhsadfjhasfkjhasfklhasfkljahsdflkjashfklasjhf>
  270.    uyoiyoiuyoiuy
  271. </      script>
  272. <script>
  273. </script>
  274. Four
  275. ");
  276. ?>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement