Advertisement
Fwaky

Tibia.com scrape

Dec 25th, 2015
124
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
PHP 7.79 KB | None | 0 0
  1. <?php
  2. // We need to force the time to CEST
  3. date_default_timezone_set("Europe/Berlin");
  4. /**
  5.  * Author: Johan Hultin
  6.  * Last revision: 2013-06-29 21:13:00
  7.  */
  8.  
  9. class TibiaDotCom
  10. {
  11.     private $vocations = array("None", "Druid", "Elder Druid", "Sorcerer", "Master sorcerer", "Paladin", "Royal Paladin", "Knight", "Elite knight", "Unknown");
  12.     public function vocationId($vocation){
  13.         $vocations = $this->vocations;
  14.         return array_search($vocation, $vocations);
  15.     }
  16.     /**
  17.      * Gets the list of the character's deaths
  18.      *
  19.      * @param string $name the character name
  20.      * @return array the deaths
  21.      */
  22.     public function characterDeaths($name)
  23.     {
  24.         $html = $this->getUrl("http://www.tibia.com/community/?subtopic=characters&name=".$name);
  25.  
  26.         if (false !== stripos($html, "<b>Could not find character</b>")) {
  27.             throw new CharacterNotFoundException($name);
  28.         }
  29.  
  30.         $domd = $this->getDOMDocument($html);
  31.         $domx = new DOMXPath($domd);
  32.         $rows = $domx->query("//b[text() = 'Character Deaths']/ancestor::table[1]//tr[position() > 1]");
  33.         $deaths = array();
  34.  
  35.         foreach ($rows as $row) {
  36.             $date = $row->firstChild->nodeValue;
  37.             $text = $row->lastChild->nodeValue;
  38.  
  39.             preg_match("/Died at Level (\\d+) by (.+)\\./", $text, $matches);
  40.  
  41.             $deaths[] = array(
  42.                 "date"      =>  DateTime::createFromFormat("M d Y, H:i:s T", $date),
  43.                 "level"     =>  $matches[1],
  44.                 "reason"    =>  $matches[2],
  45.             );
  46.         }
  47.  
  48.         return $deaths;
  49.     }
  50.  
  51.     /**
  52.      * Gets information about the given character
  53.      *
  54.      * @param string $name
  55.      * @return array character data
  56.      */
  57.     public function characterInfo($name)
  58.     {
  59.         $html = $this->getUrl("http://www.tibia.com/community/?subtopic=characters&name=".$name);
  60.  
  61.         if (false !== stripos($html, "<b>Could not find character</b>")) {
  62.             $character["not_found"] = true;
  63.         } else {
  64.  
  65.             // this will be used later while we go through all the rows in the charinfo table
  66.             $map = array(
  67.                 "Name:" => "name",
  68.                 "Former Names:" => "former_names",
  69.                 "Sex:" => "sex",
  70.                 "Vocation:" => "vocation",
  71.                 "Level:" => "level",
  72.                 "World:" => "world",
  73.                 "Former world:" => "former_world",
  74.                 "Residence:" => "residence",
  75.                 "Achievement Points:" => "achievement_points",
  76.                 "Last login:" => "last_login",
  77.                 "Comment:" => "comment",
  78.                 "Account Status:" => "account_status",
  79.                 "Married to:" => "married_to",
  80.                 "House:" => "house",
  81.                 "Guild membership:" => "guild",
  82.                 "Comment:" => "comment",
  83.             );
  84.  
  85.             $domd = $this->getDOMDocument($html);
  86.             $domx = new DOMXPath($domd);
  87.             $character = array();
  88.  
  89.             $rows = $domx->query("//div[@class='BoxContent']/table[1]/tr[position() > 1]");
  90.             foreach ($rows as $row) {
  91.                 $name  = trim($row->firstChild->nodeValue);
  92.                 $value = trim($row->lastChild->nodeValue);
  93.  
  94.                 if (isset($map[$name])) {
  95.                     $character[$map[$name]] = $value;
  96.                 } else {
  97.                     $character[$name] = $value;
  98.                 }
  99.             }
  100.  
  101.             // value cleanup
  102.  
  103.             $character["last_login"] = DateTime::createFromFormat("M d Y, H:i:s T", $character["last_login"]);
  104.  
  105.             if (isset($character["guild"])) {
  106.                 $values = explode(" of the ", $character["guild"]);
  107.                 $character["guild"] = array(
  108.                     "name"  =>  $values[1],
  109.                     "rank"  =>  $values[0],
  110.                 );
  111.             }
  112.  
  113.             if (isset($character["house"])) {
  114.                 $values = explode(" is paid until ", $character["house"]);
  115.                 $character["house"] = $values[0];
  116.             }
  117.         }
  118.         return $character;
  119.     }
  120.  
  121.     /**
  122.      * Return the list of characters online at the given world
  123.      *
  124.      * @param type $world
  125.      * @return array characters with name, level and vocation
  126.      */
  127.     public function whoIsOnline($world)
  128.     {
  129.         $html = $this->getUrl("http://www.tibia.com/community/?subtopic=worlds&world=" . $world);
  130.         $domd = $this->getDOMDocument($html);
  131.  
  132.         $domx = new DOMXPath($domd);
  133.         $characters = $domx->query("//table[@class='Table2']//tr[position() > 1]");
  134.         $ret = array();
  135.  
  136.         foreach ($characters as $character) {
  137.             $name     = $domx->query("td[1]/a[@href]", $character)->item(0)->nodeValue;
  138.             $level    = $domx->query("td[2]", $character)->item(0)->nodeValue;
  139.             $vocation = $domx->query("td[3]", $character)->item(0)->nodeValue;
  140.  
  141.             $ret[] = array(
  142.                 "name"      =>  $name,
  143.                 "level"     =>  $level,
  144.                 "vocation"  =>  $vocation,
  145.             );
  146.         }
  147.  
  148.         return $ret;
  149.     }
  150.     /**
  151.      * Retrieves a list of all worlds
  152.      *
  153.      * @param null
  154.      * @return array of all worlds.
  155.      */
  156.     public function getWorlds(){
  157.         $html = $this->getUrl("http://www.tibia.com/community/?subtopic=worlds");
  158.        
  159.         $domd = $this->getDOMDocument($html);
  160.         $domx = new DOMXPath($domd);
  161.         $worlds = $domx->query("//table[@class='TableContent']//tr[position() > 1]");
  162.         $ret = array();
  163.        
  164.         foreach($worlds as $world){
  165.             $name = $domx->query("td[1]/a[@href]", $world)->item(0)->nodeValue;
  166.             $type = $domx->query("td[4]", $world)->item(0)->nodeValue;
  167.             $area = $domx->query("td[3]", $world)->item(0)->nodeValue;
  168.            
  169.             $ret[] = array(
  170.                 "name"      => $name,
  171.                 "location"  => $area,
  172.                 "worldtype" => $type,
  173.             );
  174.         }
  175.        
  176.         return $ret;
  177.     }
  178.     /**
  179.      * Retrieves a list of all worlds
  180.      *
  181.      * @param $world, $type, $page
  182.      * @return array highscores data.
  183.      */
  184.     public function getHighscores($world, $type, $page){
  185.         $type = strtolower($type);
  186.         $html = $this->getUrl("http://www.tibia.com/community/?subtopic=highscores&world=".$world."&list=".$type."&page=".$page);
  187.         $domd = $this->getDOMDocument($html);
  188.         $domx = new DOMXPath($domd);
  189.         $worlds = $domx->query("//b[text() = 'Rank']/ancestor::table[1]//tr[position() > 1]");
  190.         $ret = array();
  191.         foreach($worlds as $world){
  192.             @$rank = $domx->query("td[1]", $world)->item(0)->nodeValue;
  193.             @$name = $domx->query("td[2]/a[@href]", $world)->item(0)->nodeValue;
  194.             if($type == 'experience'){
  195.                 @$level = $domx->query("td[3]", $world)->item(0)->nodeValue;
  196.                 @$skill = $domx->query("td[4]", $world)->item(0)->nodeValue;
  197.             } else {
  198.                 @$skill = $domx->query("td[3]", $world)->item(0)->nodeValue;
  199.             }
  200.             $ret[] = array(
  201.                 "name"      => @$name,
  202.                 "rank"  => @$rank,
  203.                 "value" => @$skill,
  204.                 "level" => @$level,
  205.             );
  206.         }
  207.         unset($ret[0]); // To prevent an extra row!
  208.         return $ret;
  209.     }
  210.     /**
  211.      * Creates a DOMDocument object from a given html string
  212.      *
  213.      * @param string $html
  214.      * @return DOMDocument
  215.      */
  216.     private function getDOMDocument($html)
  217.     {
  218.         $domd = new DOMDocument("1.0", "utf-8");
  219.  
  220.         $replace = array(
  221.             "&#160;"    =>  " ", // non-breaking space in the page's code
  222.             chr(160)    =>  " ", // non-breaking space in character comments
  223.         );
  224.         $html = str_replace(array_keys($replace), array_values($replace), $html);
  225.  
  226.         $html = mb_convert_encoding($html, "utf-8", "iso-8859-1");
  227.  
  228.         libxml_use_internal_errors(true);
  229.         $domd->loadHTML($html);
  230.         libxml_use_internal_errors(false);
  231.  
  232.         return $domd;
  233.     }
  234.  
  235.     /**
  236.      * Fetches a page from tibia.com and returns its body
  237.      *
  238.      * @param string $url
  239.      * @return string
  240.      * @throws \RuntimeException if a http error occurs
  241.      */
  242.     private function getUrl($url)
  243.     {
  244.          // create curl resource
  245.         $ch = curl_init();
  246.  
  247.         // set url
  248.         curl_setopt($ch, CURLOPT_URL, $url);
  249.  
  250.         //return the transfer as a string
  251.         curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
  252.  
  253.         // $output contains the output string
  254.         $output = curl_exec($ch);
  255.         return $output;
  256.     }
  257. }
  258. ?>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement