Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- <?php
- use WSS\Alfresco\AlfrescoUtils;
- /**
- * @author wichmann
- *
- */
- class WSS_Site {
- const SITEMAP_XML_FILENAME = "sitemap.xml";
- /**
- * The Units that shall not get a sitemap in any case (they may be set to "online" ...)
- * @var array
- */
- private static $UNWANTED_UNITS = array('comuser','test','training','user');
- private $unit_key_filter;
- private $output_type = "XML"; // HTML|XML default = XML
- private $used_downloads;
- /**
- *
- * instanzierbar mit einem pfad z.b. www.schott.com/tubing/ dann wird nur eine sitemap
- * für alle seiten erstellt die unter tubing liegen
- * @param string $unit_key_filter just the unit key, no path
- */
- public function __construct($unit_key_filter= null) {
- $this->unit_key_filter = $unit_key_filter;
- }
- public function create_sitemap_index() {
- $doc = new DOMDocument('1.0', 'UTF-8');
- $root = $doc->createElementNS("http://www.sitemaps.org/schemas/sitemap/0.9",'sitemapindex');
- $doc->appendChild($root);
- $units = WSS_Unit_Table::instance()->findAll(null, true, WSS_Config::isUs() );
- foreach ($units as $unit){
- $unitPath = $unit->createUnitPath();
- if ($unitPath == '')
- $unitPath = WSS_Path_Model::createUnitLanguagePath($unit->unit_key, $unit->unit_key);
- $sitemapPath = $unitPath . '/' . self::SITEMAP_XML_FILENAME;
- $sitemapAbsolutePath = $_SERVER['DOCUMENT_ROOT'] . $sitemapPath;
- if($this->isUnitRelevant($unit->unit_key)){
- $this->unit_key_filter = $unit->unit_key;
- // old sitemap file may not exist yet
- if (file_exists($sitemapAbsolutePath)) {
- $oldSitemap = file_get_contents($sitemapAbsolutePath);
- } else {
- $oldSitemap = '';
- }
- $newSitemap = $this->create_sitemap();
- if (strcmp($oldSitemap, $newSitemap)!=0) {
- file_put_contents($sitemapAbsolutePath, $newSitemap);
- }
- $root->appendChild($this->addSitemapIndexElement($doc, WSS_Url::create($sitemapPath)->render(true), WSS_Date::unix_timestamp_to_w3c(filemtime($sitemapAbsolutePath))));
- } else {
- if(file_exists($sitemapAbsolutePath))
- unlink($sitemapAbsolutePath);
- }
- }
- clearstatcache();
- foreach (WSS_Config::instance()->get('sitemap.index-additions.uri') as $sitemapUri) {
- if (WSS_Url::exists($sitemapUri) || WSS_Url::isRedirect($sitemapUri)) {
- $root->appendChild($this->addSitemapIndexElement($doc, $sitemapUri));
- }
- }
- $doc->formatOutput = true;
- $indexSitemap = $doc->saveXML();
- file_put_contents($_SERVER['DOCUMENT_ROOT'] . '/' . self::SITEMAP_XML_FILENAME, $indexSitemap);
- return $indexSitemap;
- }
- private function addSitemapIndexElement(DOMDocument $doc, $loc, $lastmod = null) {
- $sitemap = $doc->createElement('sitemap');
- $sitemap->appendChild($doc->createElement('loc',$loc));
- if (!empty($lastmod)) {
- $sitemap->appendChild($doc->createElement('lastmod',$lastmod));
- }
- return $sitemap;
- }
- public function create_sitemap( $output_type = 'XML' ) {
- if( $output_type == "HTML" )
- {
- $this->output_type = "HTML";
- }
- if( $this->output_type == "HTML" )
- {
- // HTML OUTPUT
- $sitemap = "<html>\n";
- $sitemap .= "<head>\n";
- $sitemap .= "<title>Sitemap</title>\n";
- $sitemap .= "</head>\n";
- $sitemap .= "<body>\n";
- $sitemap .= "<ul>";
- $sitemap .= $this->create_sitemap_entries();
- $sitemap .= "</ul>";
- $sitemap .= "</body>\n";
- $sitemap .= "</html>\n";
- }else{
- // XML OUTPUT
- $sitemap = "<?xml version='1.0' encoding='UTF-8'?>\n";
- $sitemap .= "<urlset xmlns='http://www.sitemaps.org/schemas/sitemap/0.9'>\n";
- $sitemap .= $this->create_sitemap_entries();
- $sitemap .= "</urlset>\n";
- }
- return $sitemap;
- }
- public function createVideoSitemap() {
- $doc = new DOMDocument('1.0', 'UTF-8');
- $urlSet = $doc->createElementNS('http://www.sitemaps.org/schemas/sitemap/0.9', 'urlset');
- $doc->appendChild( $urlSet );
- // We add videos only once to the sitemap.
- $collectedVideos = array();
- // First collect corporate videos
- foreach (WSS_Unit_Language_Table::instance()->findAll(array(WSS_Unit_Row::TYPE_CORPORATE), true, WSS_Config::isUs())
- as $unit) {
- $collectedVideos = $this->createVideoSitemapForPath($unit->createUnitLanguagePath(), $doc, $collectedVideos);
- }
- // Then collect other videos
- $this->createVideoSitemapForPath('%', $doc, $collectedVideos);
- $doc->formatOutput = true;
- $content = $doc->saveXML();
- $sitemapPath = $_SERVER['DOCUMENT_ROOT'] . '/video_sitemap.xml';
- file_put_contents($sitemapPath, $content);
- }
- public function createTubingSelectorSitemap($saveXmlFile = true) {
- $products = [];
- if (WSS_Config::isSchott() && WSS_Config::isCom()) {
- $doc = new DOMDocument('1.0', 'UTF-8');
- $doc->appendChild($doc->createElementNS('http://www.sitemaps.org/schemas/sitemap/0.9', 'urlset'));
- $masterUrl = WSS_Config::getSchemeAndHost().'/common/tubing/selector-rest-master-settings.php?language=english®ion=all';
- $rawMasterContent = file_get_contents($masterUrl);
- if($rawMasterContent){
- // Increase timeout for fetching selector meta data:
- // As of 11.06. 2018 necessary on preview servers. This might be because of additionally set dl categories.
- $ctx = stream_context_create(array('http'=>
- array(
- 'timeout' => 180, // Seconds
- ),
- 'https'=>
- array(
- 'timeout' => 180, // Seconds
- ),
- ));
- foreach (json_decode($rawMasterContent)->masterSettings->language_list as $lang) {
- $catalogueUrl = WSS_Config::getSchemeAndHost().'/common/tubing/selector-rest-catalogue.php?language='.$lang. '®ion=all';
- $rawCatalogueContent = file_get_contents($catalogueUrl, false, $ctx);
- if($rawCatalogueContent){
- foreach (json_decode($rawCatalogueContent) as $product) {/* @see Tubing_Selector_CatalogueJson */
- $path = '/tubing/english/product_selector/#!/region--all/lang--' . $lang . '/product--' . $product->productId . '/';
- $doc->getElementsByTagName('urlset')->item(0)->appendChild($this->createEntry($doc, $path, WSS_Date::unix_timestamp_to_w3c($product->modificationDate)));
- $products[] = ['loc' => $path, 'lastmod' => $product->modificationDate];
- }
- } else {
- WSS_Logger::warn('XML site: Tubing product meta data call failed: '.$catalogueUrl);
- }
- $path = '/tubing/english/product_selector/#!/region--all/lang--'.$lang.'/filter--list/';
- $doc->getElementsByTagName('urlset')->item(0)->appendChild($this->createEntry($doc, $path));
- $products[] = ['loc' => $path];
- }
- } else {
- WSS_Logger::warn('XML site: Tubing product master data call failed: '.$masterUrl);
- }
- $doc->formatOutput = true;
- if ($saveXmlFile && count($products) > 0) {
- file_put_contents($_SERVER['DOCUMENT_ROOT'] . '/tubing/english/product_selector/sitemap.xml', $doc->saveXML());
- }
- }
- return $products;
- }
- /**
- *
- * @param string $path
- * @param DOMDocument $doc
- * @param array $collectedVideos
- * @return array collected videos with added new onews
- */
- private function createVideoSitemapForPath($path, DOMDocument $doc, array $collectedVideos) {
- $is_us = WSS_Config::isUs();
- $seitenTable = WSS_Pagepart_Table::instance();
- foreach ($seitenTable->findByBlock('media_videolibrary.slf', $path.'%') as $seiten) {
- $status = $seiten->findPage();
- if ($status->is_online($is_us)) {
- $path = $status->create_path();
- $url = $this->createEntry($doc, $path, WSS_Date::mysql_timestamp_to_w3c($status->cdate));
- // might be null if filtered
- if ($url != null) {
- foreach ($seitenTable->findByBlock('media_videolibrary_item.slf', $seiten->URLDIR,$seiten->URLFILE) as $itemSeiten) {
- $seitenTitle = $seitenTable->findByPathAndBaustein($path, $itemSeiten->getChildBaustein('Title',$itemSeiten->selfID));
- $title = null;
- $thumbnail_loc = null;
- $videoAsset = WSS_Asset_Null::newInstance();
- $Teaser = '';
- if ($seitenTitle!=null) {
- $title = $seitenTitle->fetchContent();
- }
- $seitencontent_loc = $seitenTable->findByPathAndBaustein($path, $itemSeiten->getChildBaustein('Video',$itemSeiten->selfID));
- if ($seitencontent_loc!=null) {
- $videoAsset= WSS_Asset_ManagerFactory::create()->find($seitencontent_loc->alfresco_id);
- }
- $seitenthumbnail_loc = $seitenTable->findByPathAndBaustein($path, $itemSeiten->getChildBaustein('Image',$itemSeiten->selfID));
- if ($seitenthumbnail_loc!=null) {
- $thumbnail_loc = $seitenthumbnail_loc->IncludeFile;
- }
- $seitenTeaser = $seitenTable->findByPathAndBaustein($path, $itemSeiten->getChildBaustein('Teaser',$itemSeiten->selfID));
- if ($seitenTeaser!=null) {
- $Teaser = $seitenTeaser->fetchContent();
- }
- if ($title!=''
- && $thumbnail_loc!=''
- && !$videoAsset->isEmpty()
- ) {
- if (isset($collectedVideos[$videoAsset->getId()])
- || array_search($title, $collectedVideos)!==FALSE) {
- // do not add video twice
- // We also do not add videos with same title because Google does
- // not like this, although this may cause loss of some really different videos
- continue;
- } else {
- $collectedVideos[$videoAsset->getId()] = $title;
- }
- $video = $url->appendChild($doc->createElement('video:video'));
- $titleElement = $video->appendChild($doc->createElement('video:title'));
- $titleElement->appendChild($doc->createCDATASection($title));
- if ($Teaser=='') {
- $description = $title;
- } else {
- $description = $Teaser;
- }
- $descriptionElement = $video->appendChild($doc->createElement('video:description'));
- $descriptionElement->appendChild($doc->createCDATASection($description));
- $video->appendChild($doc->createElement('video:thumbnail_loc',WSS_Url::create($thumbnail_loc)->render(true)));
- $video->appendChild($doc->createElement('video:content_loc',$videoAsset->getDocument()));
- if (!empty($videoAsset->getDuration())) {
- $video->appendChild($doc->createElement('video:duration',$videoAsset->getDuration()));
- }
- }
- }
- // Only add video page if it contains new videos
- if ($url->getElementsByTagName('video')) {
- $doc->getElementsByTagName('urlset')->item(0)->appendChild($url);
- }
- }
- }
- }
- return $collectedVideos;
- }
- private function create_sitemap_entries() {
- $sitemap = '';
- $sitemap .= $this->create_sitemap_page_entries();
- $sitemap .= $this->create_sitemap_downloads_entries();
- $unit = WSS_Unit_Table::fetchFromCache($this->unit_key_filter);
- if ($unit->is_corporate())
- $sitemap .= $this->create_sitemap_news_entries();
- if($unit->is_corporate() || $unit->is_so())
- $sitemap .= $this->create_sitemap_jobs_entries($unit);
- //TODO still required after Alf migration?
- if($this->unit_key_filter == 'advanced_optics') {
- $sitemap .= $this->create_abbe_downloads_entries();
- } else if($this->unit_key_filter == 'tubing' || $this->output_type == "HTML") {
- $sitemap .= $this->createTubingSelectorEntries();
- }
- return $sitemap;
- }
- /**
- *
- * @param DOMDocument $doc
- * @param string $path
- * @param string $lastmod
- * @return DOMElement|NULL
- */
- private function createEntry( DOMDocument $doc, $path, $lastmod = null) {
- if ($this->isPathRelevant($path)) {
- $loc = $this->getLocLink($path);
- $url = $doc->createElement('url');
- $url->appendChild($doc->createElement('loc',$loc));
- if ( $lastmod != NULL ) {
- $url->appendChild($doc->createElement('lastmod',$lastmod));
- }
- return $url;
- } else {
- return null;
- }
- }
- public function isUnitRelevant( $unitKey ) {
- if ($unitKey==WSS_Unit_Row::CORPORATE_UNIT_PSEUDO_KEY) {
- // handling corporate download links
- $unitKey = 'english';
- }
- if(in_array($unitKey, static::$UNWANTED_UNITS)){
- return false;
- }
- $unit = WSS_Unit_Table::fetchFromCache($unitKey);
- if ( $unit == null || ! $unit->is_online()) {
- return false;
- }
- return true;
- }
- public function isPathRelevant( $path ) {
- // Check for excluded pathes
- $offlinePathes = WSS_Config::instance()->get('visibility.search-engine-restricted');
- if ($offlinePathes != '' && preg_match($offlinePathes, $path) == 1) {
- return false;
- }
- $pathParts = parse_url($path);
- $unitKey = WSS_Path_Model::newInstance($pathParts['path'])->getUnitDirectory();
- return $this->isUnitRelevant($unitKey);
- }
- private function getLocLink($path, $isAbsolutePath = false) {
- $loc = ($isAbsolutePath) ? $path : WSS_Url::create($path)->render(true);
- $loc = WSS_String::specialChars($loc);
- return $loc;
- }
- /**
- *
- * @param string $path: may contain query string
- * @param string $lastmod: W3C format like "yyyy-MM-dd'T'HH:mm:ss'+01:00'"
- * @param string $unitKey: given for Alfresco downloads. If set then this param is relevant for filtering only.
- * @param boolean $isAbsolutePath
- * @return string
- */
- public function create_sitemap_entry( $path, $lastmod, $unitKey = null, $isAbsolutePath = false )
- {
- if ($unitKey==null ) {
- if (!$this->isPathRelevant($path)) {
- return '';
- }
- } else {
- if (!$this->isUnitRelevant($unitKey)) {
- return '';
- }
- }
- $entry = '';
- $loc = $this->getLocLink($path, $isAbsolutePath);
- if( $this->output_type == "HTML")
- {
- // HTML OUTPUT
- $entry = "<li><a href=\"" . $loc . "\">" . $loc . "</a></li>\n";
- }else{
- // XML OUTPUT
- $entry = "<url>\n";
- $entry .= "<loc>".$loc."</loc>\n";
- if ( $lastmod != NULL ) {
- $entry .= "<lastmod>".$lastmod."</lastmod>\n";
- }
- $entry .= "</url>\n";
- }
- return $entry;
- }
- private function create_sitemap_downloads_entries() {
- $pages = "";
- if (!isset($this->used_downloads)) {
- $this->used_downloads = [];
- $units = WSS_Unit_Table::instance()->findAll(null, true, WSS_Config::isUs() );
- foreach ($units as $unit){
- //TODO not all dls are added due to Alf limit of 1000 search results
- foreach (WSS_Asset_ManagerFactory::create()->createQuery()
- ->setRegionBlacklist()
- ->setPath(WSS_Asset_XPath::newInstance($unit->unit_key)->toOnlineDownloadFolderRecursive())
- //->setPath(WSS_Asset_XPath::newInstanceDefault()->toOnlineDownloadFolderRecursiveAllUnits())
- ->getResultList() as $download) { /* @var $download WSS_Asset_Base */
- $path = $download->downloadPath();
- $this->used_downloads[$path] = $download;
- }
- }
- }
- foreach ( $this->used_downloads as $download_path => $value ) {
- $pages .= $this->create_sitemap_entry( $download_path,
- date("c", strtotime($value->getModified())),
- AlfrescoUtils::unitKeyFromQNamePath($value->getQnamePath()) );
- }
- return $pages;
- }
- private function create_sitemap_jobs_entries(WSS_Unit_Row $unit) {
- $pages = '';
- foreach($unit->findLanguages() as $language){
- $jobsPagePath = WSS_Unit_Path::getByUnitLanguagePath(WSS_Unit_Path::JOBS, $unit->getUnitPath().'/'.$language);
- if(WSS_Page_Table::instance()->isOnlinePath($jobsPagePath)){
- $locale = WSS_Language::getLocale($language);
- if($locale == null)
- $locale = 'en_US';
- foreach(WSS_Bootstrap::getService('WSSSuccessFactorsService')->createSitemapJobs($locale, $jobsPagePath) as $job){
- $pages .= $this->create_sitemap_entry(WSS_Config::getSchemeAndHost().$job->link, date('c', $job->timestamp), null, true);
- }
- }
- }
- return $pages;
- }
- private function create_sitemap_news_entries() {
- $pages = '';
- $selection = WSS_News_Selection::create();
- $selection->setIsOnlineFiltered(WSS_News_Table::MAIN_UNIT_KEY);
- $selection->setShowArchivedNews(true);
- $newsList = $selection->fetch();
- foreach ( $newsList as $wssNews ) {
- $link = $wssNews->getItemLink();
- $pages .= $this->create_sitemap_entry($link,
- $wssNews->get_newsdate() );
- }
- return $pages;
- }
- private function create_sitemap_page_entries() {
- $pages = "";
- $table = new WSS_Page_Table();
- $isUS = WSS_Config::isUs();
- $path = '/'.$this->unit_key_filter;
- $pages .= $this->addWSS_Page_Row($table->findVisitorContent($path, $isUS));
- if ( $isUS ) {
- $pages .= $this->addWSS_Page_Row($table->findOnlineFiltered($path));
- }
- return $pages;
- }
- private function addWSS_Page_Row($list) {
- $pages = "";
- foreach ( $list as $status_Settings ) {
- if ( $status_Settings->isRelevantForCrawler() ) {
- $pages .= $this->create_sitemap_page_entry( $status_Settings );
- }
- }
- return $pages;
- }
- private function create_sitemap_page_entry( WSS_Page_Row $status_Settings ) {
- return $this->create_sitemap_entry( $status_Settings->create_path(),
- WSS_Date::mysql_timestamp_to_w3c($status_Settings->cdate) );
- }
- private function create_abbe_downloads_entries()
- {
- // require does not work for unit tests
- require_once($_SERVER['DOCUMENT_ROOT'].'/advanced_optics/abbe_pdf/datasheets.inc');
- $pages = "";
- $items = abbe_collect_downloads();
- foreach ( $items as $path )
- {
- $absolute_path = $_SERVER['DOCUMENT_ROOT'].$path;
- $lastmod = filemtime($absolute_path);
- $pages .= $this->create_sitemap_entry($path,
- WSS_Date::unix_timestamp_to_w3c($lastmod));
- }
- return $pages;
- }
- private function createTubingSelectorEntries()
- {
- $pages = '';
- foreach($this->createTubingSelectorSitemap(false) as $product){
- $pages .= $this->create_sitemap_entry($product['loc'], empty($product['lastmod']) ? '' : WSS_Date::unix_timestamp_to_w3c($product['lastmod']));
- }
- return $pages;
- }
- }
- ?>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement