Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- <?php
- header('Content-type: text/plain');
- error_reporting(E_ALL);
- ini_set("display_errors", 1);
- function dbStr($string)
- {
- //So, what we do here is simple:
- //Step one, detect a range of letters to remove. Ranges are in the form of substr_replace arguments that would ordinarily work if used seperately.
- $ranges = dbStr_GetRanges($string);
- //Step two, carefully remove those ranges..... The point of this function is to make sure that ranges that intersect are handled properly.
- return dbStr_FilterStringWithRanges($string, $ranges);
- //Why not do both steps in one blow? Oh god, that would create a mass of complicated and icky algorithms. I am not entirely positive it is possible to do such an algorithm clearly. The idea behind the ranges was to allow me to worry about figuring out what to delete first, and then LATER deal with how to delete it.
- //Oh, and, of course, the final step... mysql_strip_tags! : D
- }
- //So, here is where we remove ranges smartly. The algorithm is as follows:
- /*
- Check each range.
- 1. If the range is completely contianed by what we have already done... Don't do the range.
- 2. If the range intersects what we have already removed, make the range smaller to fit right next to what we have already removed.
- 4. Remove the range. use our offset counter to ensure we remove form the right place, and that we are not messed up by previous removes.
- 5. Update the counters required for the above steps to be accurate.
- */
- function dbStr_FilterStringWithRanges($string, $ranges)
- {
- $offset = 0;
- $maxidx = 0;
- foreach ($ranges as $range)
- {
- //Make sure we are not deleting something way behind where we are.
- if ($range[0] + $range[1] <= $maxidx) continue;
- //The main edge case is if the first part of the range has already been removed.
- //here we account for this.
- //First, detect if this is true:
- if ($range[0] < $maxidx)
- {
- //It is...
- $orig = $range[0];//Store the original
- $range[0] = $maxidx;//Set the variable to be after what we have already deleted
- $range[1] -= $range[0]-$orig;//Update the length accordingly
- }
- //Finally remove the string
- $string = substr_replace($string, '', $range[0]-$offset, $range[1]);
- //Update our trackers
- if ($range[0]+$range[1] > $maxidx) $maxidx = $range[0] + $range[1];
- $offset += $range[1];
- }
- return $string;
- }
- /*
- Here is the real gist of the code....
- Basically what we do is this:
- 1. Generate a list of script or element nodes.
- 2. Determine if the node iso ne of these three types:
- A: If the type is a single, lone wolf node with no ender, check its src and pass it if the src is trusted. Otherwise, set a range to remove it.
- B: If the type is a close node, just remove it. Always.
- C: If the type is an open node:
- 1. Find the close node
- 2. If there was noting between the open and close node, pretend that this is a lone star node and go back to A, but before doing that, make sure that the close node is protected from mode B.
- That's it!
- */
- function dbStr_GetRanges($string)
- {
- //Get the list of tags.
- preg_match_all
- (
- "#<(/){0,1}?\s*?(?:script|embed)"."[^'\"/]*?(?:[^'\"/]*?[\"'](?:(?:\\\\\"|\\\\'|[^\"'])*?)['\"][^'\"/]*?)*?[^'\"/]*?"."(/){0,1}?>#imsSX",
- $string,
- $matches,
- PREG_SET_ORDER|PREG_OFFSET_CAPTURE
- );
- //Prepare to start storing deletion ranges
- $ranges = array();//Store groups of numbers specifying ranges to delete.
- foreach ($matches as $key=>$value)
- {
- if (!in_array($value, $matches))continue;//Apparantly removing items from the array does not affect the foreach iterator. Force it to.
- //Calculate the type of tag we found
- $type = get_dbStrMatchType($value);
- //echo $value[0][0] . " is type " . $type . "\n";
- $possiblesave = null;
- //Respond accordingly
- if ($type == 1)//Start tag
- {
- //Find the close tag
- $idx = strlen($string-1);
- $len = 0;
- $protectkey;
- foreach ($matches as $key2=>$value2)
- {
- if ($key2 < $key) continue;
- $type2 = get_dbStrMatchType($value2);
- if ($type2 == 2)
- {
- $idx = $value2[0][1];
- $len = strlen($value2[0][0]);
- $protectkey = $key2;
- //echo ($value2[0][0] == $matches[$key2][0][0])?"TRUE":"FALSE";
- break;
- }
- }
- //Get the text between the close tag and here
- $substrstart = $value[0][1] + strlen($value[0][0]);
- $content = substr($string, $substrstart, $idx - $substrstart);
- //If it is not empty, apply a range to delete everythign as a whole.
- if (preg_match("#[^\s]#imsSX", $content))
- {
- $ranges[] = array($value[0][1], ($idx+$len)-$value[0][1]);
- }
- //Otherwise, pretend that this is a lonewolf tag.
- else
- {
- //echo "we appear to be saving something";
- if (isset($protectkey))
- {
- //Pass on our posssible save key to the next step...
- $possiblesave = $protectkey;
- }
- $type = 3;
- }
- //echo "\nType: ".$type."\n\tRange: " . $value[0][1] . "-" . $idx . "\n\tValue:". $value[0][0] . "\n\n\tInternal:" . substr($string, $substrstart, $idx - $substrstart);
- }
- if ($type == 2)//End tag
- {
- //We always remove these
- //echo "REMOVING" . $value[0][0];
- $ranges[] = array($value[0][1], strlen($value[0][0]));
- }
- else if ($type == 3)//Lone wolf tag
- {
- //We are receiving all scripts or embeds that are either:
- //Double tagged with nothing between them
- //OR
- //Single tag lonewolf.
- //Use a regex to find and count the srcs.
- //Only allow ONE src. If there are none, something funny is going on.
- //If there is more than one, a hacker is likely trying to trick the system.
- preg_match_all
- (
- "#src=[\"']((\\\\\"|\\\\'|[^\"'])*?)['\"]#imsSX",
- $value[0][0],
- $submatches,
- PREG_SET_ORDER|PREG_OFFSET_CAPTURE
- );
- //print_r($submatches);
- //echo "count: " . count($submatches != 1) . " " . !approve_dbStrSrc($submatches[0][1][0]);
- //If any number of srcs other than one is found, OR if the src that is found is not approved, schedule for deletion.
- if (count($submatches) !=1 || !approve_dbStrSrc($submatches[0][1][0]))
- {
- $ranges[] = array($value[0][1], strlen($value[0][0]));
- }
- else
- {
- if ($possiblesave != null)//If this was a double tag lonewolf, then possiblesave will be set. We must save it.
- {
- unset($matches[$possiblesave]);
- }
- }
- $possiblesave = null;
- }
- }
- return $ranges;
- }
- //This reads one of the regex matches and determins the type of tag.
- function get_dbStrMatchType($val)
- {
- if (count($val) == 3 && strcmp($val[2][0], "/")==0)
- {
- return 3;
- }
- else if (count($val) == 2 && strcmp($val[1][0], "/")==0)
- {
- return 2;
- }
- else
- {
- return 1;
- }
- }
- //This
- function approve_dbStrSrc($src)
- {
- $dbStrTrusted = array
- (
- "http://www.youtube.com",
- "http://youtube.com",
- "http://widgets.twimg.com/",
- "http://www.twiigs.com/",
- "http://twiigs.com/",
- "http://twitter.com/",
- "http://www.twitter.com/",
- "http://picasaweb.google.com",
- "http://www.flickr.com",
- "http://flickr.com",
- "http://static.pbsrc.com/",
- );
- foreach ($dbStrTrusted as $trusted)
- {
- if (strpos($src, $trusted) === 0)
- {
- return true;
- }
- }
- return false;
- }
- echo "test" . dbStr
- (
- '
- <embed type="application/x-shockwave-flash" src="http://picasaweb.google.com/s/c/bin/slideshow.swf" width="288" height="192" flashvars="host=picasaweb.google.com&hl=en_US&feat=flashalbum&RGB=0x000000&feed=http%3A%2F%2Fpicasaweb.google.com%2Fdata%2Ffeed%2Fapi%2Fuser%2F109941697484668010012%2Falbumid%2F5561383933745906193%3Falt%3Drss%26kind%3Dphoto%26authkey%3DGv1sRgCN2H88H41qeT6AE%26hl%3Den_US" pluginspage="http://www.macromedia.com/go/getflashplayer"></embed>
- '.
- "
- <script type='textjavascript'/>
- One
- <script type='textjavascript' src='asdf'/>
- Two
- <script fubar=\"d\\\\\'erp\" derplol=\"dlerp\">
- //<script type='text/javascript' src='asdf'/>
- asdfasfasdf
- </script>
- Three
- < script asfkjhsakfhjsadfjhsadfjhasfkjhasfklhasfkljahsdflkjashfklasjhf>
- uyoiyoiuyoiuy
- </ script>
- <script>
- </script>
- Four
- ");
- ?>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement