Advertisement
tunnckoCore

Spiders, Crawlers, Robots List 2013

Feb 9th, 2013
133
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
PHP 5.29 KB | None | 0 0
  1. <?php
  2.  
  3.     $ROBOT_USER_AGENTS = array (
  4.     // note that this is meant to be used in a case-insensitive setup
  5.  
  6.     /**** THE BIG THREE ********/
  7.     'Googlebot',          /* Google see http://www.google.com/bot.html              */
  8.     'Googlebot-Mobile',
  9.     'Bingbot',            /* Microsoft Bing, see http://www.bing.com/bingbot.htm   */
  10.     'Slurp',              /* Yahoo, see http://help.yahoo.com/help/us/ysearch/slurp */
  11.  
  12.     /**** Home grown ********/
  13.     'java',
  14.     'wget',
  15.     'curl',
  16.     'Validator',
  17.     'Commons-HttpClient',
  18.     'Python-urllib',
  19.     'libwww',
  20.     'httpunit',
  21.     'nutch',
  22.     'phpcrawl',           /* added 2012-09/17, see http://phpcrawl.cuab.de/ */
  23.  
  24.     /** The others */
  25.     'msnbot',             /* see http://search.msn.com/msnbot.htm   */
  26.     'Adidxbot',           /* see http://onlinehelp.microsoft.com/en-us/bing/hh204496.aspx */
  27.     'blekkobot',          /* see http://blekko.com/about/blekkobot */
  28.     'teoma',
  29.     'ia_archiver',
  30.     'GingerCrawler',
  31.     'webmon ',            /* the space is required so as not to match webmoney */
  32.     'httrack',
  33.     'webcrawler',
  34.     'FAST-WebCrawler',
  35.     'FAST Enterprise Crawler',
  36.     'convera',
  37.     'biglotron',
  38.     'grub.org',
  39.     'UsineNouvelleCrawler',
  40.     'antibot',
  41.     'netresearchserver',
  42.     'speedy',
  43.     'fluffy',
  44.     'jyxobot',
  45.     'bibnum.bnf',
  46.     'findlink',
  47.     'exabot',
  48.     'gigabot',
  49.     'msrbot',
  50.     'seekbot',
  51.     'ngbot',
  52.     'panscient',
  53.     'yacybot',
  54.     'AISearchBot',
  55.     'IOI',
  56.     'ips-agent',
  57.     'tagoobot',
  58.     'MJ12bot',
  59.     'dotbot',
  60.     'woriobot',
  61.     'yanga',
  62.     'buzzbot',
  63.     'mlbot',
  64.     'yandex',
  65.     'purebot',            /* added 2010/01/19  */
  66.     'Linguee Bot',        /* added 2010/01/26, see http://www.linguee.com/bot */
  67.     'Voyager',            /* added 2010/02/01, see http://www.kosmix.com/crawler.html */
  68.     'CyberPatrol',        /* added 2010/02/11, see http://www.cyberpatrol.com/cyberpatrolcrawler.asp */
  69.     'voilabot',           /* added 2010/05/18 */
  70.     'baiduspider',        /* added 2010/07/15, see http://www.baidu.jp/spider/ */
  71.     'citeseerxbot',       /* added 2010/07/17 */
  72.     'spbot',              /* added 2010/07/31, see http://www.seoprofiler.com/bot */
  73.     'twengabot',          /* added 2010/08/03, see http://www.twenga.com/bot.html */
  74.     'postrank',           /* added 2010/08/03, see http://www.postrank.com */
  75.     'turnitinbot',        /* added 2010/09/26, see http://www.turnitin.com */
  76.     'scribdbot',          /* added 2010/09/28, see http://www.scribd.com */
  77.     'page2rss',           /* added 2010/10/07, see http://www.page2rss.com */
  78.     'sitebot',            /* added 2010/12/15, see http://www.sitebot.org */
  79.     'linkdex',            /* added 2011/01/06, see http://www.linkdex.com */
  80.     'ezooms',             /* added 2011/04/27, see http://www.phpbb.com/community/viewtopic.php?f=64&t=935605&start=450#p12948289 */
  81.     'dotbot',             /* added 2011/04/27 */
  82.     'mail\\.ru',          /* added 2011/04/27 */
  83.     'discobot',           /* added 2011/05/03, see http://discoveryengine.com/discobot.html */
  84.     'heritrix',           /* added 2011/06/21, see http://crawler.archive.org/ */
  85.     'findthatfile',       /* added 2011/06/21, see http://www.findthatfile.com/ */
  86.     'europarchive.org',   /* added 2011/06/21, see  http://www.europarchive.org/ */
  87.     'NerdByNature.Bot',   /* added 2011/07/12, see http://www.nerdbynature.net/bot*/
  88.     'sistrix crawler',    /* added 2011/08/02 */
  89.     'Aboundex',           /* added 2011/09/28, see http://www.aboundex.com/crawler/ */
  90.     'domaincrawler',      /* added 2011/10/21 */
  91.     'wbsearchbot',        /* added 2011/12/21, see http://www.warebay.com/bot.html */
  92.     'summify',            /* added 2012/01/04, see http://summify.com */
  93.     'ccbot',              /* added 2012/02/05, see http://www.commoncrawl.org/bot.html */
  94.     'edisterbot',         /* added 2012/02/25 */
  95.     'seznambot',          /* added 2012/03/14 */
  96.     'ec2linkfinder',      /* added 2012/03/22 */
  97.     'gslfbot',            /* added 2012/04/03 */
  98.     'aihitbot',           /* added 2012/04/16 */
  99.     'intelium_bot',       /* added 2012/05/07 */
  100.     'facebookexternalhit',/* added 2012/05/07 */
  101.     'yeti',               /* added 2012/05/07 */
  102.     'RetrevoPageAnalyzer',/* added 2012/05/07 */
  103.     'lb-spider',          /* added 2012/05/07 */
  104.     'sogou',              /* added 2012/05/13, see http://www.sogou.com/docs/help/webmasters.htm#07 */
  105.     'lssbot',             /* added 2012/05/15 */
  106.     'careerbot',          /* added 2012/05/23, see http://www.career-x.de/bot.html */
  107.     'wotbox',             /* added 2012/06/12, see http://www.wotbox.com */
  108.     'wocbot',             /* added 2012/07/25, see http://www.wocodi.com/crawler */
  109.     'ichiro',             /* added 2012/08/28, see http://help.goo.ne.jp/help/article/1142 */
  110.     'DuckDuckBot',        /* added 2012/09/19, see http://duckduckgo.com/duckduckbot.html */
  111.     'lssrocketcrawler',   /* added 2012/09/24 */
  112.     'drupact',            /* added 2012/09/27, see http://www.arocom.de/drupact */
  113.     'webcompanycrawler',  /* added 2012/10/03 */
  114.     'acoonbot',           /* added 2012/10/07, see http://www.acoon.de/robot.asp */  
  115.     'openindexspider',    /* added 2012/10/26, see http://www.openindex.io/en/webmasters/spider.html */
  116.     'gnam gnam spider'    /* added 2012/10/31 */
  117.     'SurveyBot'       /* added 2013/02/09, added by tunnckoCore */
  118.     'AhrefsBot'           /* added 2013/02/09, added by tunnckoCore */
  119.     'eSobiSubscriber'     /* added 2013/02/09, added by tunnckoCore */
  120.     'Trident'             /* added 2013/02/09, added by tunnckoCore */
  121.     );
  122.  
  123. ?>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement