Advertisement
Guest User

AFG CRAWL

a guest
Jun 29th, 2016
98
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
PHP 1.74 KB | None | 0 0
  1. <?php
  2.    
  3.     //I used the simple_html_dom lib.
  4.     //You can find it here: http://simplehtmldom.sourceforge.net/
  5.     include_once 'lib/simple_html_dom.php';
  6.  
  7.  
  8.     function getJob($link) {
  9.         $r = Array("id" => 0, "salary" => 0);
  10.         $r["id"] = intval(substr($link, strpos($link, "id=") + 3));
  11.         $job = file_get_html($link);
  12.         $r["salary"] = floatval($job->find(".price", 0)->plaintext);
  13.         return $r;
  14.     }
  15.  
  16.  
  17.     function crawl($web, &$jobs,$pag) {
  18.         $c = file_get_html($web);
  19.        
  20.         $ads = $c->find("tbody tr h3 a");
  21.         foreach($ads as $ad) {  //All the ads of this page
  22.             $s = getJob($ad->href);
  23.             array_push($jobs, $s);
  24.         }
  25.  
  26.         $pagin = $c->find(".paginate li .searchPaginationNext");
  27.         if(count($pagin) != 0) { //Is not the last page.
  28.             $pagines = $c->find(".paginate li .searchPaginationNonSelected");
  29.             $found =  false;
  30.             for($i = 0; $i < count($pagines) and !$found; $i++) { //To find the first valid link
  31.                 $actpag = intval(substr($pagines[$i]->href, strpos($pagines[$i]->href, "iPage=") + 6));
  32.                 if($actpag > $pag) {
  33.                     $found = true;
  34.                     crawl($pagines[$i]->href, $jobs, $actpag); //Call for the next valid page
  35.                 }
  36.             }
  37.         }
  38.     }
  39.  
  40.     function calcAvg(&$jobs) {
  41.         $div = 0.0;
  42.         $sum = 0.0;
  43.         foreach ($jobs as $job) {
  44.             if($job["salary"] != 0) {
  45.                 $sum = $sum + $job["salary"];
  46.                 $div = $div + 1.0;
  47.             }
  48.         }
  49.  
  50.         return $sum/$div;
  51.     }
  52.  
  53.     function selectedJobs(&$jobs) {
  54.         $avg = calcAvg($jobs);
  55.         $r = array();
  56.         foreach($jobs as $job) {
  57.             if($job["salary"] >= $avg) array_push($r, $job);
  58.         }
  59.  
  60.         return $r;
  61.     }
  62.  
  63.     $jobs = array();
  64.     crawl("http://tests.tivort.com/index.php?page=search", $jobs, 1);
  65.     $result = json_encode(selectedJobs($jobs));
  66.     $json = fopen("result.json", "w");
  67.     fwrite($json, $result);
  68.  
  69.     echo "\n";
  70. ?>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement