Advertisement
Guest User

3333

a guest
Oct 31st, 2014
156
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
PHP 1.79 KB | None | 0 0
  1.  
  2. <?php
  3. // $html        = the html on the page
  4. // $current_url = the full url that the html came from (only needed for $repath)
  5. // $repath      = converts ../ and / and // urls to full valid urls
  6. function pageLinks($html, $current_url = "", $repath = false){
  7.     preg_match_all("/\<a.+?href=(\"|')(?!javascript:|#)(.+?)(\"|')/i", $html, $matches);
  8.     $links = array();
  9.     if(isset($matches[2])){
  10.         $links = $matches[2];
  11.     }
  12.     if($repath && count($links) > 0 && strlen($current_url) > 0){
  13.         $pathi      = pathinfo($current_url);
  14.         $dir        = $pathi["dirname"];
  15.         $base       = parse_url($current_url);
  16.         $split_path = explode("/", $dir);
  17.         $url        = "";
  18.         foreach($links as $k => $link){
  19.             if(preg_match("/^\.\./", $link)){
  20.                 $total = substr_count($link, "../");
  21.                 for($i = 0; $i < $total; $i++){
  22.                     array_pop($split_path);
  23.                 }
  24.                 $url = implode("/", $split_path) . "/" . str_replace("../", "", $link);
  25.             }elseif(preg_match("/^\/\//", $link)){
  26.                 $url = $base["scheme"] . ":" . $link;
  27.             }elseif(preg_match("/^\/|^.\//", $link)){
  28.                 $url = $base["scheme"] . "://" . $base["host"] . $link;
  29.             }elseif(preg_match("/^[a-zA-Z0-9]/", $link)){
  30.                 if(preg_match("/^http/", $link)){
  31.                     $url = $link;
  32.                 }else{
  33.                     $url       = $dir . "/" . $link;
  34.                 }
  35.             }
  36.             $links[$k] = $url;
  37.         }
  38.     }
  39.     return $links;
  40. }
  41. header("content-type: text/plain");
  42. $url = "site";
  43. $html = file_get_contents($url);
  44. $variavel = (pageLinks($html, $url, true));
  45.  
  46. print_r($variavel);
  47. ?>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement