daily pastebin goal
11%
SHARE
TWEET

Bot Detection System

jessicakennedy1028 Sep 12th, 2018 81 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. <?php
  2. /**
  3.  * Generates a list of robot useragent deffinitions for use with
  4.  * $_SERVER['HTTP_USER_AGENT'] to identify robots
  5.  *
  6.  * Created by: Muddy_Funster @ forums.phpfreaks.com - 09/2013
  7.  * Updated and modified by: Jessica Brown for use on DIABLOS - 07/2017
  8.  *
  9.  * A Huge Thank You to Psycho, Kicken and Thorpe @ forums.phpfreaks.com
  10.  * for their help and advice.
  11.  *
  12.  * This links into the robotstext.org site to access thier current
  13.  * robot list.  It then produces an array of these useragents that
  14.  * can be used to check if a visitor is a robot or not.
  15.  * Call: $yourVar = new getRobots();
  16.  * Setter : $yourVar->setExclude(mixed $mixed)
  17.  * Getter : $robotArray = $yourVar->makeBots;
  18.  * $yourVar->exclude(mixed $mixed); - send values to be excluded.
  19.  *         Accepts either an array of values or a single string vlaue
  20.  * JSON output (if you want to pass to javascript): echo $yourVar;
  21.  *
  22.  * --------------------------------------------------------------
  23.  * @example 1 : PHP BOT Check
  24.  *
  25.  * $bots = new getRobots;
  26.  * $bots->setExclude(array("", "none", "no", "yes"));
  27.  * $bots->makeBotList();
  28.  * $botArray = $bots->robots;
  29.  *
  30.  * if(!in_array($_SERVER('HTTP_USER_AGENT'), $botArray){
  31.  *        import_request_variables("g", "user_"); //example of something to do
  32.  *         ...
  33.  *         ...
  34.  * }
  35.  * else{
  36.  *        echo "Bot Safe Site Visited"; //example of something to do
  37.  *         ...
  38.  *         ...
  39.  * }
  40.  * -------------------------------------------------------------
  41.  * @example 2 : output to JSON
  42.  *
  43.  * $bots = new getRobots;
  44.  * $bots->setExclude("");
  45.  * $bots->setExclude("none");
  46.  * $bots->setExclude("???");
  47.  * $bots->setExclude("no");
  48.  * $bots->setExclude("yes");
  49.  * $bots->makeBotList();
  50.  *
  51.  * header("Content-type: application/json");
  52.  * echo $bots;
  53.  * exit;
  54.  * -----------------------------------------------------------
  55.  *
  56.  * @param array $robots the array list of useragents
  57.  * @param array $excludes array of exlusions from the bot list
  58.  * @param string $url static url value for linking to the
  59.  * @param string $lfPath path to generate subfolder to store cache files in
  60.  * @param string $masterFile path to master cache file of robotstxt.org data
  61.  * @param string $botFile path to cached bot file for qicker repeat array building
  62.  * @param string $mdCheckFile path to md5Checksum cache to establish if cached bot file can be used
  63.  * @param array $hashVals generated md5 values from current call
  64.  * @param array $hashFileVals values from md5 checksum cache file use for comparison
  65.  * @param string $output contents retrieved from robotstxt.org site
  66.  * @return array getBots() returns array of robot user aganents
  67.  * @return string __toString() Returns JSON string of Object{"robots":array[{"numericalKey":"useragentText"}]
  68.  */
  69. class getRobots{
  70.     public $robots;
  71.     public $excludes;
  72.     private $url;
  73.     private $lfPath;
  74.     private $masterFile;
  75.     private $botFile;
  76.     private $mdCheckFile;
  77.     private $hashVals;
  78.     private $hashFileVals;
  79.     private $output;
  80.  
  81.     public function __construct(){
  82.         $this->url = "http://www.robotstxt.org/db/all.txt";
  83.         $this->lfPath= substr(__FILE__,0,strripos(__FILE__,'\\')+1).'robots';
  84.         $this->masterFile= $this->lfPath.'\\rbtList.txt';
  85.         $this->botFile = $this->lfPath."\\allBots.txt";
  86.         $this->mdCheckFile = $this->lfPath."\\mdHashFile.txt";
  87.         $this->excludes[] = "Due to a deficiency in Java it's not currently possible to set the User-Agent.";
  88.         $this->excludes[] = "Due to a deficiency in Java it's not currently possible";
  89.         if(!is_dir($this->lfPath)){
  90.             if(!mkdir($this->lfPath)){
  91.                 throw new RuntimeException("error creating directory! PHP must have write permissions for this folder -- $lfPath");
  92.             }
  93.         }
  94.     }
  95.     public function setExclude($mixed){
  96.     $mixed = (array)$mixed;
  97.     $this->excludes = array_merge($this->excludes, $mixed);
  98.     $this->excludes = array_unique($this->excludes);
  99.     sort($this->excludes);
  100.     }
  101.     public function makeBots(){
  102.         $this->checkFile();
  103.         $this->checkBotList();
  104.     }
  105.     private function checkFile(){
  106.         if (file_exists($this->masterFile)){
  107.             $mtime = filemtime($this->masterFile);
  108.             $ctx = stream_context_create(array(
  109.                 'http' => array(
  110.                     'header' => "If-modified-since: ".gmdate(DATE_RFC1123, $mtime)
  111.                 )
  112.             ));
  113.         }
  114.         else {
  115.             $ctx = stream_context_create();
  116.         }
  117.         $fp = fopen($this->url, 'rb', false, $ctx);
  118.         $this->output = stream_get_contents($fp);
  119.         $meta = stream_get_meta_data($fp);
  120.         if (strpos($meta['wrapper_data'][0], ' 200 ') !== false){
  121.             file_put_contents($this->masterFile, $this->output);
  122.         }
  123.         fclose($fp);
  124.     }
  125.     private function checkBotList(){
  126.         $robots = array();
  127.         $this->hashVals[0] = md5(implode("|",$this->excludes));
  128.         if(!file_exists($this->mdCheckFile)){
  129.             $fileVals = explode("\n",$this->output);
  130.         }
  131.         else{
  132.             $this->hashFileVals = file($this->mdCheckFile);
  133.             if(trim($this->hashVals[0]) == trim($this->hashFileVals[0])){
  134.                 $this->robots = file($this->botFile);
  135.  
  136.             }
  137.             else{
  138.                 $fileVals = file($this->masterFile);
  139.             }
  140.  
  141.         }
  142.         if(isset($fileVals)){
  143.             foreach ($fileVals as $line=>$text){
  144.                 if (strpos($text, "robot-useragent:") !== FALSE){
  145.                     $robots[] = trim(substr($text,16));
  146.                 }
  147.             }
  148.             $filterRobs = array_diff($robots, $this->excludes);
  149.             $filterRobs = array_unique($filterRobs);
  150.             $this->robots = $filterRobs;
  151.             $botOut = implode("\n", $filterRobs);
  152.             $botHandle = fopen($this->botFile, 'w');
  153.             fwrite($botHandle, $botOut);
  154.             fclose($botHandle);
  155.             $this->hashVals[1] = md5(implode("|", $filterRobs));
  156.             $difCheck = array_diff($this->hashVals, (array)$this->hashFileVals);
  157.             if(count($difCheck) >= 1){
  158.                 $writeback = implode("\n", $this->hashVals);
  159.                 $mdHandle = fopen($this->mdCheckFile, 'w');
  160.                 fwrite($mdHandle, $writeback);
  161.             }
  162.         }
  163.     }
  164.     public function __toString(){
  165.         return json_encode(array('robots' => $this->robots));
  166.     }
  167. }
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top