Advertisement
joris

phpQuery

Aug 15th, 2012
96
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
PHP 2.04 KB | None | 0 0
  1. <?php
  2. include "phpQuery/phpQuery.php";
  3.  function crawl($url, $baseUrl='', $config = array())
  4.     {
  5.         $cookiePath="";
  6.  
  7.         $tempname = "cookie";
  8.         if (!isset($config['get']))
  9.         {
  10.             $config['get'] = true;
  11.         }
  12.         if($cookiePath!='')
  13.         if (file_exists($cookiePath))
  14.         {
  15.             $cookie_file = $cookiePath;
  16.         }
  17.         else
  18.         {
  19.             ($cookie_file_fd = fopen($cookiePath, 'w')) or dbgd('The cookie file could not be opened. Make sure this directory has the correct permissions');
  20.             $cookie_file = $cookiePath;
  21.             fclose($cookie_file_fd);
  22.             chmod($cookiePath, 0755);
  23.         }
  24.         $ch = curl_init();
  25.         curl_setopt($ch, CURLOPT_TIMEOUT, 200);
  26.         if (isset($config['headers']))
  27.         curl_setopt($ch, CURLOPT_HTTPHEADER, $config['headers']);
  28.         curl_setopt($ch, CURLOPT_HEADER, 0);
  29.         curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1');
  30.         curl_setopt($ch, CURLOPT_URL, $url);
  31.         curl_setopt($ch, CURLOPT_REFERER, $baseUrl);
  32.         //curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
  33.         if (!isset($config['returntransfer']))
  34.             curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
  35.         else
  36.             curl_setopt($ch, CURLOPT_RETURNTRANSFER, $config['returntransfer']);
  37.         if ($cookiePath != "")
  38.         {
  39.             curl_setopt($ch, CURLOPT_COOKIEJAR, $cookiePath);
  40.             curl_setopt($ch, CURLOPT_COOKIEFILE, $cookiePath);
  41.         }
  42.         if (isset($config['postdata']))
  43.         {
  44.             curl_setopt($ch, CURLOPT_POST, 1);
  45.             curl_setopt($ch, CURLOPT_POSTFIELDS, $config['postdata']);
  46.         }
  47.         if ($config['get'] == false)
  48.         {
  49.             curl_setopt($ch, CURLOPT_HTTPGET, true);
  50.         }
  51.         if (isset($config['getInfo']))
  52.         {
  53.             print_r(curl_getinfo($ch));
  54.             exit;
  55.         }
  56.         $data = curl_exec($ch);
  57.         if ($data)
  58.             return $data;
  59.         else
  60.             throw new CHttpException('404', 'Halaman tidak ditemukan');
  61.     }
  62.  
  63. $Link = "http://www.antaranews.com/rss/nasional";
  64. $data=crawl($Link);
  65. $doc=phpQuery::newDocumentHTML($data);
  66.  
  67.     $item=$doc->find("item");
  68.     foreach($item as $itm){
  69.         $itm=pq($itm);
  70.         $title=$itm->find("title")->text();
  71.         echo "<br>".$title."\n\n";
  72.         //masukin fungsi save ke database disini
  73.     }
  74.    
  75. ?>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement