Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- <?php
- // Inculde the phpcrawl-mainclass
- include("libs/PHPCrawler.class.php");
- class SitemapGenerator extends PHPCrawler
- {
- protected $sitemap_output_file;
- public function setSitemapOutputFile($file)
- {
- $this->sitemap_output_file = $file;
- if (file_exists($this->sitemap_output_file)) unlink($this->sitemap_output_file);
- file_put_contents($this->sitemap_output_file,
- "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n".
- "<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">\r\n",
- FILE_APPEND);
- }
- public function handleDocumentInfo($DocInfo)
- {
- // Just detect linebreak for output ("\n" in CLI-mode, otherwise "<br>").
- if (PHP_SAPI == "cli") $lb = "\n";
- else $lb = "<br />";
- echo "Adding ".$DocInfo->url." to sitemap file".$lb;
- file_put_contents($this->sitemap_output_file, " <url>\r\n".
- " <loc>".$DocInfo->url."</loc>\r\n".
- " </url>\r\n", FILE_APPEND);
- flush();
- }
- public function closeFile()
- {
- file_put_contents($this->sitemap_output_file, '</urlset>', FILE_APPEND);
- }
- }
- $crawler = new SitemapGenerator();
- $crawler->setSitemapOutputFile("sitemap.xml"); // Set output-file
- $crawler->setURL("www.php.net");
- $crawler->addContentTypeReceiveRule("#text/html#");
- $crawler->addURLFilterRule("#\.(jpg|jpeg|gif|png)$# i");
- // ... apply all other options and rules to the crawler
- $crawler->setPageLimit(10); // Just for testing
- $crawler->goMultiProcessed(5); // Or use go() if you don't want multiple processes
- $crawler->closeFile();
- ?>
Advertisement
Add Comment
Please, Sign In to add comment