Juno_okyo

Get links on a page with DomDocument

Jan 27th, 2015
263
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
PHP 1.09 KB | None | 0 0
  1. /*
  2. Function to get all links on a certain url using the DomDocument
  3. Source: http://www.binarytides.com/php-get-links-on-a-page-with-domdocument/
  4. */
  5.  
  6. function get_links($link)
  7. {
  8.     //return array
  9.     $ret = array();
  10.      
  11.     /*** a new dom object ***/
  12.     $dom = new domDocument;
  13.      
  14.     /*** get the HTML (suppress errors) ***/
  15.     @$dom->loadHTML(file_get_contents($link));
  16.      
  17.     /*** remove silly white space ***/
  18.     $dom->preserveWhiteSpace = false;
  19.      
  20.     /*** get the links from the HTML ***/
  21.     $links = $dom->getElementsByTagName('a');
  22.      
  23.     /*** loop over the links ***/
  24.     foreach ($links as $tag)
  25.     {
  26.         $ret[$tag->getAttribute('href')] = $tag->childNodes->item(0)->nodeValue;
  27.     }
  28.      
  29.     return $ret;
  30. }
  31.  
  32.  
  33. //Link to open and search for links
  34. $link = "http://junookyo.blogspot.com/";
  35.  
  36. /*** get the links ***/
  37. $urls = get_links($link);
  38.  
  39. /*** check for results ***/
  40. if(sizeof($urls) > 0)
  41. {
  42.     foreach($urls as $key=>$value)
  43.     {
  44.         echo $key . ' - '. $value . '<br >';
  45.     }
  46. }
  47. else
  48. {
  49.     echo "No links found at $link";
  50. }
Add Comment
Please, Sign In to add comment