Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- public function parseBadWords( $text='', $stripUrls = TRUE )
- {
- /* @link http://community.invisionpower.com/resources/bugs.html/_/ip-board/report-center-bypass-word-filter-r40719 */
- if( self::$Perms['memberData']['member_group_id'] AND !self::$Perms['memberData']['g_id'] )
- {
- self::$Perms['memberData'] = array_merge( self::$Perms['memberData'], $this->caches['group_cache'][ self::$Perms['memberData']['member_group_id'] ] );
- if( self::$Perms['memberData']['mgroup_others'] )
- {
- self::$Perms['memberData'] = ips_MemberRegistry::setUpSecondaryGroups( self::$Perms['memberData'] );
- }
- }
- /* Empty text or bypass? */
- if ( $text == '' || self::$Perms['memberData']['g_bypass_badwords'] )
- {
- return $text;
- }
- $badwords = $this->cache->getCache('badwords');
- $temp_text = $text;
- $urls = array();
- /* Got any naughty words? */
- if ( ! is_array( $badwords ) OR ! count( $badwords ) )
- {
- return $text;
- }
- /* strip out URLs so replacements aren't made */
- if ( $stripUrls )
- {
- preg_match_all( '#((http|https|news|ftp)://(?:[^<>\)\[\"\s]+|[a-zA-Z0-9/\._\-!&\#;,%\+\?:=]+))#is', $text, $matches );
- foreach( $matches[0] as $m )
- {
- $c = count( $urls );
- $urls[ $c ] = $m;
- $text = str_replace( $m, '<!--url{' . $c . '}-->', $text );
- }
- }
- //-----------------------------------------
- // Convert back entities
- //-----------------------------------------
- for( $i = 65; $i <= 90; $i++ )
- {
- $text = str_replace( "&#" . $i . ";", chr($i), $text );
- }
- for( $i = 97; $i <= 122; $i++ )
- {
- $text = str_replace( "&#" . $i . ";", chr($i), $text );
- }
- /* IPSText::isUTF8() is horribly inefficient on large content with a lot of badwords - do that here so we don't have too every time. We can safely assume any adjustments during replacement are UTF8. */
- $isUTF8 = IPSText::isUTF8( $text );
- //-----------------------------------------
- // Go all loopy
- //-----------------------------------------
- foreach( $badwords as $r )
- {
- $r['type'] = str_replace( '&', '&', IPSText::UNhtmlspecialchars( $r['type'] ) );
- if ( $this->parseType != 'topics' )
- {
- $r['swop'] = strip_tags( $r['swop'] );
- }
- $replace = $r['swop'] ? $r['swop'] : '######';
- if ( $r['m_exact'] )
- {
- $r['type'] = preg_quote( $r['type'], "/" );
- /* Link */
- // if ( IPS_DOC_CHAR_SET == 'UTF-8' && $isUTF8 )
- // {
- // $text = preg_replace( '/(^|\p{L}|\s)' . $r['type'] . '(\p{L}|!|\?|\.|,|$)/i', "\\1{$replace}\\2", $text );
- // }
- // else
- // {
- // \b does not work well because it matches word boundary, which is technically a \w to \W shift
- // @see http://stackoverflow.com/questions/6531724/how-exactly-do-regular-expression-word-boundaries-work-in-php
- // What we really want to look for is a non-word character on either side, so this works
- // Bad word filter for $!^& becomes $!^&. Submitted in a post that is <p>$!^&</p> and </ is not a shift from non-word to word character
- if ( IPS_DOC_CHAR_SET == 'UTF-8' && $isUTF8 )
- {
- $text = preg_replace( '/(^|\W)' . $r['type'] . '(\W|$)/iu', "\\1" . $replace . "\\2", $text );
- }
- else
- {
- $text = preg_replace( '/(^|\W)' . $r['type'] . '(\W|$)/i', "\\1" . $replace . "\\2", $text );
- }
- /* I'd retest that for a dollar! */
- if ( strstr( $r['type'], '$' ) )
- {
- $test = preg_replace( '#(\\\\)?\$#', '$', $r['type'] );
- if ( DOC_IPS_CHAR_SET == 'UTF-8' && $isUTF8 )
- {
- $text = preg_replace( '/(^|\W)' . preg_quote( $test ) . '(\W|$)/iu', "\\1" . $replace . "\\2", $text );
- }
- else
- {
- $text = preg_replace( '/(^|\W)' . preg_quote( $test ) . '(\W|$)/i', "\\1" . $replace . "\\2", $text );
- }
- }
- // }
- }
- else
- {
- //----------------------------
- // 'ass' in 'class' kills css
- //----------------------------
- if( strtolower( $r['type'] ) == 'ass' )
- {
- $text = preg_replace( "/(?<!cl)" . $r['type'] . "/i", $replace, $text );
- }
- else
- {
- $text = str_ireplace( $r['type'], $replace, $text );
- }
- }
- }
- /* replace urls */
- if ( count( $urls ) )
- {
- preg_match_all( '#\<\!--url\{(\d+?)\}--\>#is', $text, $matches );
- for ( $i = 0; $i < count($matches[0]); $i++ )
- {
- if ( isset( $matches[1][$i] ) )
- {
- $text = str_replace( $matches[0][$i], $urls[ $matches[1][$i] ], $text );
- }
- }
- }
- return $text ? $text : $temp_text;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement