Guest User

Sitemap generator

a guest
Oct 22nd, 2012
340
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
PHP 1.71 KB | None | 0 0
  1. <?php
  2. // Inculde the phpcrawl-mainclass
  3. include("libs/PHPCrawler.class.php");
  4.  
  5. class SitemapGenerator extends PHPCrawler
  6. {
  7.   protected $sitemap_output_file;
  8.  
  9.   public function setSitemapOutputFile($file)
  10.   {
  11.     $this->sitemap_output_file = $file;
  12.    
  13.     if (file_exists($this->sitemap_output_file)) unlink($this->sitemap_output_file);
  14.    
  15.     file_put_contents($this->sitemap_output_file,
  16.                       "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n".
  17.                       "<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">\r\n",
  18.                       FILE_APPEND);
  19.   }
  20.  
  21.   public function handleDocumentInfo($DocInfo)
  22.   {
  23.     // Just detect linebreak for output ("\n" in CLI-mode, otherwise "<br>").
  24.     if (PHP_SAPI == "cli") $lb = "\n";
  25.     else $lb = "<br />";
  26.    
  27.     echo "Adding ".$DocInfo->url." to sitemap file".$lb;
  28.    
  29.     file_put_contents($this->sitemap_output_file, " <url>\r\n".
  30.                                                   "  <loc>".$DocInfo->url."</loc>\r\n".
  31.                                                   " </url>\r\n", FILE_APPEND);
  32.    
  33.     flush();
  34.   }
  35.  
  36.   public function closeFile()
  37.   {
  38.     file_put_contents($this->sitemap_output_file, '</urlset>', FILE_APPEND);
  39.   }
  40. }
  41.  
  42. $crawler = new SitemapGenerator();
  43. $crawler->setSitemapOutputFile("sitemap.xml"); // Set output-file
  44. $crawler->setURL("www.php.net");
  45. $crawler->addContentTypeReceiveRule("#text/html#");
  46. $crawler->addURLFilterRule("#\.(jpg|jpeg|gif|png)$# i");
  47.  
  48. // ... apply all other options and rules to the crawler
  49.  
  50. $crawler->setPageLimit(10); // Just for testing
  51. $crawler->goMultiProcessed(5); // Or use go() if you don't want multiple processes
  52. $crawler->closeFile();
  53. ?>
Advertisement
Add Comment
Please, Sign In to add comment