Guest User

simplehtmldom

a guest
Oct 26th, 2015
211
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
PHP 2.08 KB | None | 0 0
  1.  
  2. <?php
  3.  
  4. include "simple_html_dom.php";
  5. include "print_var.php";
  6. $url = "http://metodist.lbz.ru/communication/forum/forum29/?print=Y";
  7. $itemNum = 0;
  8. $themes;
  9.  
  10.  
  11. function get_web_page( $url ){
  12.     $options = array(
  13.         CURLOPT_RETURNTRANSFER => true,     // return web page
  14.         CURLOPT_HEADER         => false,    // don't return headers
  15.         CURLOPT_FOLLOWLOCATION => true,     // follow redirects
  16.         CURLOPT_ENCODING       => "",       // handle all encodings
  17.         CURLOPT_USERAGENT      => "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.52 Safari/537.17", // who am i
  18.         CURLOPT_AUTOREFERER    => true,     // set referer on redirect
  19.         CURLOPT_CONNECTTIMEOUT => 120,      // timeout on connect
  20.         CURLOPT_TIMEOUT        => 120,      // timeout on response
  21.         CURLOPT_MAXREDIRS      => 10,       // stop after 10 redirects
  22.     );
  23.     $ch      = curl_init( $url );
  24.     curl_setopt_array( $ch, $options );
  25.     $content = curl_exec( $ch );
  26.     $err     = curl_errno( $ch );
  27.     $errmsg  = curl_error( $ch );
  28.     $header  = curl_getinfo( $ch );
  29.     curl_close( $ch );
  30.     $header['errno']   = $err;
  31.     $header['errmsg']  = $errmsg;
  32.     $header['content'] = $content;
  33.     return $header['content'];
  34. }
  35.  
  36.  
  37. //var_dump( iconv('windows-1251', 'utf-8', get_web_page('http://metodist.lbz.ru/communication/forum/forum29/?print=Y') ));
  38.  
  39. $pageLinks[] = $url;
  40. $html = file_get_html($url);
  41.  
  42. foreach($html->find('div[class=forum-page-navigation]') as $element){
  43.     foreach($element->find('a') as $link){     
  44.         if((in_array('http://metodist.lbz.ru'.$link->href, $pageLinks) == false) && is_string($link->href)){
  45.             $pageLinks[] = 'http://metodist.lbz.ru'.$link->href;
  46.         }
  47.     }
  48. }// массив ссылок на все страницы форума
  49.  
  50. unset($html);
  51.  
  52.  
  53. for($i =0; $i<count($pageLinks);$i++){
  54.     //echo "<a href='".$pageLinks[$i]."'>lol</a><br>";
  55.     //echo $pageLinks[$i].'<br>';
  56.     $str = iconv('windows-1251', 'utf-8', get_web_page($pageLinks[$i]));
  57.    
  58.     echo $str;
  59.     unset($str);
  60. }
  61.  
  62. print_var($themes);
  63.  
  64. ?>
Advertisement
Add Comment
Please, Sign In to add comment