Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- <?php
- /* Grab Major bots name and version
- Hello everyone thats see this this is a beta code that i made isn't 100%
- but so far its working for the list of bots that i add it
- if you wanna help me to improve this code please send email to toslalompt@gmail.com
- with corrections,...
- */
- /*
- this ereg aint 100% but if you want to help me to improve feel free to comment/update via email Thankyou
- im starting in php so :)
- */
- $majorbots .= '(' ;
- $majorbots .= '[A-Za-z]+[A-Za-z-.: ]+|[A-Za-z-_.: ]+' ;
- $majorbots .= '(bot|robot|spider|crawler|curl)' ;
- $majorbots .= ')' ;
- $majorbots .= '[\/ ]?' ;
- $majorbots .= '([v]?[0-9]*\.?[0-9a-z\.]+)?' ;
- // the rest of the ua string
- $majorbots .= '(.*)' ;
- /* List of bots i tested */
- $bots = array (
- 'Mozilla/5.0 (compatible; DuckDuckGo-Favicons-Bot/1.0; +http://www.google.com/bot.html)' ,
- 'Mozilla/5.0 (compatible; Sogou Orionbot 3.0' ,
- 'Adidxbot/v4.1.0' ,
- 'crawler for netopian (http://www.netopian.co.uk/)' ,
- 'boitho.com-robot/1.1' ,
- 'SEMrushBot' ,
- 'eBot / v.1.0a (http://alfa.elchron.cz)' ,
- 'envolk[ITS]spider/1.6 ( http://www.envolk.com/envolkspider.html)' ,
- 'SpeedySpider - http://www.entireweb.com',
- 'Bot mailto:craftbot@yahoo.com' ,
- 'Sogou News Spider/4.0(+http://www.sogou.com/docs/help/webmasters.htm#07)',
- 'Mozilla/5.0 (compatible; DuckDuckGo Favicons-Bot/1.0 +http://www.google.com/bot.html)' ,
- 'Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.5 (like Gecko) (Exabot-Thumbnails)' ,
- 'Mozilla/5.0 (compatible; Exabot/3.0; +http://www.exabot.com/go/robot)' ,
- 'Mozilla/5.0 (compatible; Mail.RU_Bot/2.0; +http://go.mail.ru/help/robots)',
- 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0) AddSugarSpiderBot www.idealobserver.com' ,
- 'Mozilla/4.0 (compatible; BlitzBot)' ,
- 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)' ,
- 'FAST Enteprise Crawler/6 (www dot fastsearch dot com)',
- 'TinEye-bot/0.51 (see http://www.tineye.com/crawler.html)' ,
- 'Whoismindbot/1.0 (+http://www.whoismind.com/bot.html)' ,
- 'yacybot (/global; amd64 Linux 4.4.0-31-generic; java 1.8.0_91; Europe/en) http://yacy.net/bot.html' ,
- 'StatoolsBot (+http://www.statools.com/bot.html)',
- 'adidxbot/2.0 (+http://search.msn.com/msnbot.htm)' ,
- 'BlitzBOT@tricus.net (Mozilla compatible)' ,
- 'BlitzBOT@tricus.com (Mozilla compatible)' ,
- 'ConveraCrawler/0.9d (+http://www.authoritativeweb.com/crawl)',
- 'EmeraldShield.com WebBot (http://www.emeraldshield.com/webbot.aspx)' ,
- ) ;
- // http://www.useragentstring.com/pages/useragentstring.php?typ=Crawler
- echo ' testing '.count ( $bots ).' bots <br/> ' ;
- $pregRep = "{\(|\)|]|\[}" ;
- $numbDots = "[a-zA-Z0-9.]+" ;
- $clean = array ( $pregRep , "/mozilla\/".$numbDots." compatible;/" , "/windows nt ".$numbDots."/" , "/like gecko /" , "/linux khtml\/".$numbDots."/" ) ;
- $count = 1 ;
- foreach ( $bots as $key => $bot ) {
- // ereg to ( and ) and [ and ] from useragent
- // make full string lower
- $useragent = strtolower ( $bot ) ;
- // remove the words from useragent that i put in ereg
- $useragent = preg_replace ( $clean , '' , $useragent ) ;
- // remove the space that of some useragents have
- $useragent = preg_replace ( "{ \/ }" , '/' , $useragent ) ;
- // simple function to revome url from useragent and -
- $useragent = preg_replace ( "@http://(.*)| - http://(.*)$@" , '' , $useragent ) ;
- // now lets start checking
- // 1 going to check if is a bot
- if ( preg_match("/(bot|crawl|slurp|spider)/" , $useragent ) ) {
- // since its a bot now
- if ( preg_match ( "#^".$majorbots."$#" , $useragent , $match ) ) {
- // just to check wich boot im working with
- echo ' this bot is '.$bots[$key].' <br/> ';
- // and now print bot info with 6 fields
- echo ' print '.$count.' <br/> <pre> ' . print_r ( $match , 1 ) . '</pre>';
- // example
- /*
- (
- [0] => full string clean with my str_replace
- [1] => bot name
- [2] => bot|crawl|slurp|spider
- [3] => bot version
- [4] => rest of the bot sctring
- )
- */
- }
- }
- $count++ ;
- }
- ?>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement