Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- <?php
- //I used the simple_html_dom lib.
- //You can find it here: http://simplehtmldom.sourceforge.net/
- include_once 'lib/simple_html_dom.php';
- function getJob($link) {
- $r = Array("id" => 0, "salary" => 0);
- $r["id"] = intval(substr($link, strpos($link, "id=") + 3));
- $job = file_get_html($link);
- $r["salary"] = floatval($job->find(".price", 0)->plaintext);
- return $r;
- }
- function crawl($web, &$jobs,$pag) {
- $c = file_get_html($web);
- $ads = $c->find("tbody tr h3 a");
- foreach($ads as $ad) { //All the ads of this page
- $s = getJob($ad->href);
- array_push($jobs, $s);
- }
- $pagin = $c->find(".paginate li .searchPaginationNext");
- if(count($pagin) != 0) { //Is not the last page.
- $pagines = $c->find(".paginate li .searchPaginationNonSelected");
- $found = false;
- for($i = 0; $i < count($pagines) and !$found; $i++) { //To find the first valid link
- $actpag = intval(substr($pagines[$i]->href, strpos($pagines[$i]->href, "iPage=") + 6));
- if($actpag > $pag) {
- $found = true;
- crawl($pagines[$i]->href, $jobs, $actpag); //Call for the next valid page
- }
- }
- }
- }
- function calcAvg(&$jobs) {
- $div = 0.0;
- $sum = 0.0;
- foreach ($jobs as $job) {
- if($job["salary"] != 0) {
- $sum = $sum + $job["salary"];
- $div = $div + 1.0;
- }
- }
- return $sum/$div;
- }
- function selectedJobs(&$jobs) {
- $avg = calcAvg($jobs);
- $r = array();
- foreach($jobs as $job) {
- if($job["salary"] >= $avg) array_push($r, $job);
- }
- return $r;
- }
- $jobs = array();
- crawl("http://tests.tivort.com/index.php?page=search", $jobs, 1);
- $result = json_encode(selectedJobs($jobs));
- $json = fopen("result.json", "w");
- fwrite($json, $result);
- echo "\n";
- ?>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement