Advertisement
Guest User

Untitled

a guest
Aug 19th, 2019
82
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 17.89 KB | None | 0 0
  1. <?php
  2.  
  3. use WSS\Alfresco\AlfrescoUtils;
  4.  
  5. /**
  6. * @author wichmann
  7. *
  8. */
  9. class WSS_Site {
  10.  
  11. const SITEMAP_XML_FILENAME = "sitemap.xml";
  12.  
  13. /**
  14. * The Units that shall not get a sitemap in any case (they may be set to "online" ...)
  15. * @var array
  16. */
  17. private static $UNWANTED_UNITS = array('comuser','test','training','user');
  18.  
  19.  
  20. private $unit_key_filter;
  21. private $output_type = "XML"; // HTML|XML default = XML
  22. private $used_downloads;
  23.  
  24. /**
  25. *
  26. * instanzierbar mit einem pfad z.b. www.schott.com/tubing/ dann wird nur eine sitemap
  27. * für alle seiten erstellt die unter tubing liegen
  28. * @param string $unit_key_filter just the unit key, no path
  29. */
  30. public function __construct($unit_key_filter= null) {
  31. $this->unit_key_filter = $unit_key_filter;
  32. }
  33.  
  34. public function create_sitemap_index() {
  35. $doc = new DOMDocument('1.0', 'UTF-8');
  36. $root = $doc->createElementNS("http://www.sitemaps.org/schemas/sitemap/0.9",'sitemapindex');
  37. $doc->appendChild($root);
  38. $units = WSS_Unit_Table::instance()->findAll(null, true, WSS_Config::isUs() );
  39. foreach ($units as $unit){
  40. $unitPath = $unit->createUnitPath();
  41. if ($unitPath == '')
  42. $unitPath = WSS_Path_Model::createUnitLanguagePath($unit->unit_key, $unit->unit_key);
  43. $sitemapPath = $unitPath . '/' . self::SITEMAP_XML_FILENAME;
  44. $sitemapAbsolutePath = $_SERVER['DOCUMENT_ROOT'] . $sitemapPath;
  45. if($this->isUnitRelevant($unit->unit_key)){
  46.  
  47. $this->unit_key_filter = $unit->unit_key;
  48. // old sitemap file may not exist yet
  49. if (file_exists($sitemapAbsolutePath)) {
  50. $oldSitemap = file_get_contents($sitemapAbsolutePath);
  51. } else {
  52. $oldSitemap = '';
  53. }
  54. $newSitemap = $this->create_sitemap();
  55. if (strcmp($oldSitemap, $newSitemap)!=0) {
  56. file_put_contents($sitemapAbsolutePath, $newSitemap);
  57. }
  58.  
  59. $root->appendChild($this->addSitemapIndexElement($doc, WSS_Url::create($sitemapPath)->render(true), WSS_Date::unix_timestamp_to_w3c(filemtime($sitemapAbsolutePath))));
  60.  
  61. } else {
  62.  
  63. if(file_exists($sitemapAbsolutePath))
  64. unlink($sitemapAbsolutePath);
  65.  
  66. }
  67. }
  68.  
  69. clearstatcache();
  70. foreach (WSS_Config::instance()->get('sitemap.index-additions.uri') as $sitemapUri) {
  71. if (WSS_Url::exists($sitemapUri) || WSS_Url::isRedirect($sitemapUri)) {
  72. $root->appendChild($this->addSitemapIndexElement($doc, $sitemapUri));
  73. }
  74. }
  75.  
  76. $doc->formatOutput = true;
  77. $indexSitemap = $doc->saveXML();
  78. file_put_contents($_SERVER['DOCUMENT_ROOT'] . '/' . self::SITEMAP_XML_FILENAME, $indexSitemap);
  79.  
  80. return $indexSitemap;
  81. }
  82.  
  83. private function addSitemapIndexElement(DOMDocument $doc, $loc, $lastmod = null) {
  84. $sitemap = $doc->createElement('sitemap');
  85. $sitemap->appendChild($doc->createElement('loc',$loc));
  86. if (!empty($lastmod)) {
  87. $sitemap->appendChild($doc->createElement('lastmod',$lastmod));
  88. }
  89. return $sitemap;
  90. }
  91.  
  92. public function create_sitemap( $output_type = 'XML' ) {
  93.  
  94. if( $output_type == "HTML" )
  95. {
  96. $this->output_type = "HTML";
  97. }
  98.  
  99. if( $this->output_type == "HTML" )
  100. {
  101. // HTML OUTPUT
  102. $sitemap = "<html>\n";
  103. $sitemap .= "<head>\n";
  104. $sitemap .= "<title>Sitemap</title>\n";
  105. $sitemap .= "</head>\n";
  106. $sitemap .= "<body>\n";
  107. $sitemap .= "<ul>";
  108. $sitemap .= $this->create_sitemap_entries();
  109. $sitemap .= "</ul>";
  110. $sitemap .= "</body>\n";
  111. $sitemap .= "</html>\n";
  112. }else{
  113. // XML OUTPUT
  114. $sitemap = "<?xml version='1.0' encoding='UTF-8'?>\n";
  115. $sitemap .= "<urlset xmlns='http://www.sitemaps.org/schemas/sitemap/0.9'>\n";
  116. $sitemap .= $this->create_sitemap_entries();
  117. $sitemap .= "</urlset>\n";
  118. }
  119. return $sitemap;
  120. }
  121.  
  122. public function createVideoSitemap() {
  123. $doc = new DOMDocument('1.0', 'UTF-8');
  124. $urlSet = $doc->createElementNS('http://www.sitemaps.org/schemas/sitemap/0.9', 'urlset');
  125. $doc->appendChild( $urlSet );
  126. // We add videos only once to the sitemap.
  127. $collectedVideos = array();
  128. // First collect corporate videos
  129. foreach (WSS_Unit_Language_Table::instance()->findAll(array(WSS_Unit_Row::TYPE_CORPORATE), true, WSS_Config::isUs())
  130. as $unit) {
  131. $collectedVideos = $this->createVideoSitemapForPath($unit->createUnitLanguagePath(), $doc, $collectedVideos);
  132. }
  133. // Then collect other videos
  134. $this->createVideoSitemapForPath('%', $doc, $collectedVideos);
  135. $doc->formatOutput = true;
  136. $content = $doc->saveXML();
  137. $sitemapPath = $_SERVER['DOCUMENT_ROOT'] . '/video_sitemap.xml';
  138. file_put_contents($sitemapPath, $content);
  139. }
  140.  
  141. public function createTubingSelectorSitemap($saveXmlFile = true) {
  142. $products = [];
  143. if (WSS_Config::isSchott() && WSS_Config::isCom()) {
  144. $doc = new DOMDocument('1.0', 'UTF-8');
  145. $doc->appendChild($doc->createElementNS('http://www.sitemaps.org/schemas/sitemap/0.9', 'urlset'));
  146.  
  147. $masterUrl = WSS_Config::getSchemeAndHost().'/common/tubing/selector-rest-master-settings.php?language=english&region=all';
  148. $rawMasterContent = file_get_contents($masterUrl);
  149. if($rawMasterContent){
  150. // Increase timeout for fetching selector meta data:
  151. // As of 11.06. 2018 necessary on preview servers. This might be because of additionally set dl categories.
  152. $ctx = stream_context_create(array('http'=>
  153. array(
  154. 'timeout' => 180, // Seconds
  155. ),
  156. 'https'=>
  157. array(
  158. 'timeout' => 180, // Seconds
  159. ),
  160. ));
  161. foreach (json_decode($rawMasterContent)->masterSettings->language_list as $lang) {
  162. $catalogueUrl = WSS_Config::getSchemeAndHost().'/common/tubing/selector-rest-catalogue.php?language='.$lang. '&region=all';
  163. $rawCatalogueContent = file_get_contents($catalogueUrl, false, $ctx);
  164. if($rawCatalogueContent){
  165. foreach (json_decode($rawCatalogueContent) as $product) {/* @see Tubing_Selector_CatalogueJson */
  166. $path = '/tubing/english/product_selector/#!/region--all/lang--' . $lang . '/product--' . $product->productId . '/';
  167. $doc->getElementsByTagName('urlset')->item(0)->appendChild($this->createEntry($doc, $path, WSS_Date::unix_timestamp_to_w3c($product->modificationDate)));
  168. $products[] = ['loc' => $path, 'lastmod' => $product->modificationDate];
  169. }
  170. } else {
  171. WSS_Logger::warn('XML site: Tubing product meta data call failed: '.$catalogueUrl);
  172. }
  173. $path = '/tubing/english/product_selector/#!/region--all/lang--'.$lang.'/filter--list/';
  174. $doc->getElementsByTagName('urlset')->item(0)->appendChild($this->createEntry($doc, $path));
  175. $products[] = ['loc' => $path];
  176. }
  177. } else {
  178. WSS_Logger::warn('XML site: Tubing product master data call failed: '.$masterUrl);
  179. }
  180. $doc->formatOutput = true;
  181. if ($saveXmlFile && count($products) > 0) {
  182. file_put_contents($_SERVER['DOCUMENT_ROOT'] . '/tubing/english/product_selector/sitemap.xml', $doc->saveXML());
  183. }
  184. }
  185. return $products;
  186. }
  187.  
  188. /**
  189. *
  190. * @param string $path
  191. * @param DOMDocument $doc
  192. * @param array $collectedVideos
  193. * @return array collected videos with added new onews
  194. */
  195. private function createVideoSitemapForPath($path, DOMDocument $doc, array $collectedVideos) {
  196.  
  197. $is_us = WSS_Config::isUs();
  198. $seitenTable = WSS_Pagepart_Table::instance();
  199. foreach ($seitenTable->findByBlock('media_videolibrary.slf', $path.'%') as $seiten) {
  200. $status = $seiten->findPage();
  201. if ($status->is_online($is_us)) {
  202. $path = $status->create_path();
  203. $url = $this->createEntry($doc, $path, WSS_Date::mysql_timestamp_to_w3c($status->cdate));
  204. // might be null if filtered
  205. if ($url != null) {
  206. foreach ($seitenTable->findByBlock('media_videolibrary_item.slf', $seiten->URLDIR,$seiten->URLFILE) as $itemSeiten) {
  207. $seitenTitle = $seitenTable->findByPathAndBaustein($path, $itemSeiten->getChildBaustein('Title',$itemSeiten->selfID));
  208. $title = null;
  209. $thumbnail_loc = null;
  210. $videoAsset = WSS_Asset_Null::newInstance();
  211. $Teaser = '';
  212. if ($seitenTitle!=null) {
  213. $title = $seitenTitle->fetchContent();
  214. }
  215. $seitencontent_loc = $seitenTable->findByPathAndBaustein($path, $itemSeiten->getChildBaustein('Video',$itemSeiten->selfID));
  216. if ($seitencontent_loc!=null) {
  217. $videoAsset= WSS_Asset_ManagerFactory::create()->find($seitencontent_loc->alfresco_id);
  218. }
  219. $seitenthumbnail_loc = $seitenTable->findByPathAndBaustein($path, $itemSeiten->getChildBaustein('Image',$itemSeiten->selfID));
  220. if ($seitenthumbnail_loc!=null) {
  221. $thumbnail_loc = $seitenthumbnail_loc->IncludeFile;
  222. }
  223. $seitenTeaser = $seitenTable->findByPathAndBaustein($path, $itemSeiten->getChildBaustein('Teaser',$itemSeiten->selfID));
  224. if ($seitenTeaser!=null) {
  225. $Teaser = $seitenTeaser->fetchContent();
  226. }
  227. if ($title!=''
  228. && $thumbnail_loc!=''
  229. && !$videoAsset->isEmpty()
  230. ) {
  231. if (isset($collectedVideos[$videoAsset->getId()])
  232. || array_search($title, $collectedVideos)!==FALSE) {
  233. // do not add video twice
  234. // We also do not add videos with same title because Google does
  235. // not like this, although this may cause loss of some really different videos
  236. continue;
  237. } else {
  238. $collectedVideos[$videoAsset->getId()] = $title;
  239. }
  240. $video = $url->appendChild($doc->createElement('video:video'));
  241. $titleElement = $video->appendChild($doc->createElement('video:title'));
  242. $titleElement->appendChild($doc->createCDATASection($title));
  243. if ($Teaser=='') {
  244. $description = $title;
  245. } else {
  246. $description = $Teaser;
  247. }
  248. $descriptionElement = $video->appendChild($doc->createElement('video:description'));
  249. $descriptionElement->appendChild($doc->createCDATASection($description));
  250. $video->appendChild($doc->createElement('video:thumbnail_loc',WSS_Url::create($thumbnail_loc)->render(true)));
  251. $video->appendChild($doc->createElement('video:content_loc',$videoAsset->getDocument()));
  252. if (!empty($videoAsset->getDuration())) {
  253. $video->appendChild($doc->createElement('video:duration',$videoAsset->getDuration()));
  254. }
  255. }
  256. }
  257. // Only add video page if it contains new videos
  258. if ($url->getElementsByTagName('video')) {
  259. $doc->getElementsByTagName('urlset')->item(0)->appendChild($url);
  260. }
  261. }
  262. }
  263. }
  264.  
  265. return $collectedVideos;
  266. }
  267.  
  268. private function create_sitemap_entries() {
  269. $sitemap = '';
  270. $sitemap .= $this->create_sitemap_page_entries();
  271. $sitemap .= $this->create_sitemap_downloads_entries();
  272. $unit = WSS_Unit_Table::fetchFromCache($this->unit_key_filter);
  273. if ($unit->is_corporate())
  274. $sitemap .= $this->create_sitemap_news_entries();
  275. if($unit->is_corporate() || $unit->is_so())
  276. $sitemap .= $this->create_sitemap_jobs_entries($unit);
  277. //TODO still required after Alf migration?
  278. if($this->unit_key_filter == 'advanced_optics') {
  279. $sitemap .= $this->create_abbe_downloads_entries();
  280. } else if($this->unit_key_filter == 'tubing' || $this->output_type == "HTML") {
  281. $sitemap .= $this->createTubingSelectorEntries();
  282. }
  283.  
  284. return $sitemap;
  285. }
  286. /**
  287. *
  288. * @param DOMDocument $doc
  289. * @param string $path
  290. * @param string $lastmod
  291. * @return DOMElement|NULL
  292. */
  293. private function createEntry( DOMDocument $doc, $path, $lastmod = null) {
  294. if ($this->isPathRelevant($path)) {
  295. $loc = $this->getLocLink($path);
  296. $url = $doc->createElement('url');
  297. $url->appendChild($doc->createElement('loc',$loc));
  298. if ( $lastmod != NULL ) {
  299. $url->appendChild($doc->createElement('lastmod',$lastmod));
  300. }
  301. return $url;
  302. } else {
  303. return null;
  304. }
  305. }
  306.  
  307. public function isUnitRelevant( $unitKey ) {
  308. if ($unitKey==WSS_Unit_Row::CORPORATE_UNIT_PSEUDO_KEY) {
  309. // handling corporate download links
  310. $unitKey = 'english';
  311. }
  312. if(in_array($unitKey, static::$UNWANTED_UNITS)){
  313. return false;
  314. }
  315. $unit = WSS_Unit_Table::fetchFromCache($unitKey);
  316. if ( $unit == null || ! $unit->is_online()) {
  317. return false;
  318. }
  319. return true;
  320. }
  321.  
  322. public function isPathRelevant( $path ) {
  323. // Check for excluded pathes
  324. $offlinePathes = WSS_Config::instance()->get('visibility.search-engine-restricted');
  325. if ($offlinePathes != '' && preg_match($offlinePathes, $path) == 1) {
  326. return false;
  327. }
  328. $pathParts = parse_url($path);
  329. $unitKey = WSS_Path_Model::newInstance($pathParts['path'])->getUnitDirectory();
  330. return $this->isUnitRelevant($unitKey);
  331. }
  332.  
  333. private function getLocLink($path, $isAbsolutePath = false) {
  334. $loc = ($isAbsolutePath) ? $path : WSS_Url::create($path)->render(true);
  335. $loc = WSS_String::specialChars($loc);
  336. return $loc;
  337. }
  338. /**
  339. *
  340. * @param string $path: may contain query string
  341. * @param string $lastmod: W3C format like "yyyy-MM-dd'T'HH:mm:ss'+01:00'"
  342. * @param string $unitKey: given for Alfresco downloads. If set then this param is relevant for filtering only.
  343. * @param boolean $isAbsolutePath
  344. * @return string
  345. */
  346. public function create_sitemap_entry( $path, $lastmod, $unitKey = null, $isAbsolutePath = false )
  347. {
  348. if ($unitKey==null ) {
  349. if (!$this->isPathRelevant($path)) {
  350. return '';
  351. }
  352. } else {
  353. if (!$this->isUnitRelevant($unitKey)) {
  354. return '';
  355. }
  356. }
  357.  
  358. $entry = '';
  359. $loc = $this->getLocLink($path, $isAbsolutePath);
  360. if( $this->output_type == "HTML")
  361. {
  362. // HTML OUTPUT
  363. $entry = "<li><a href=\"" . $loc . "\">" . $loc . "</a></li>\n";
  364. }else{
  365. // XML OUTPUT
  366. $entry = "<url>\n";
  367. $entry .= "<loc>".$loc."</loc>\n";
  368. if ( $lastmod != NULL ) {
  369. $entry .= "<lastmod>".$lastmod."</lastmod>\n";
  370. }
  371. $entry .= "</url>\n";
  372. }
  373.  
  374. return $entry;
  375. }
  376.  
  377. private function create_sitemap_downloads_entries() {
  378. $pages = "";
  379. if (!isset($this->used_downloads)) {
  380.  
  381. $this->used_downloads = [];
  382. $units = WSS_Unit_Table::instance()->findAll(null, true, WSS_Config::isUs() );
  383. foreach ($units as $unit){
  384. //TODO not all dls are added due to Alf limit of 1000 search results
  385. foreach (WSS_Asset_ManagerFactory::create()->createQuery()
  386. ->setRegionBlacklist()
  387. ->setPath(WSS_Asset_XPath::newInstance($unit->unit_key)->toOnlineDownloadFolderRecursive())
  388. //->setPath(WSS_Asset_XPath::newInstanceDefault()->toOnlineDownloadFolderRecursiveAllUnits())
  389. ->getResultList() as $download) { /* @var $download WSS_Asset_Base */
  390. $path = $download->downloadPath();
  391. $this->used_downloads[$path] = $download;
  392. }
  393. }
  394.  
  395. }
  396. foreach ( $this->used_downloads as $download_path => $value ) {
  397. $pages .= $this->create_sitemap_entry( $download_path,
  398. date("c", strtotime($value->getModified())),
  399. AlfrescoUtils::unitKeyFromQNamePath($value->getQnamePath()) );
  400. }
  401. return $pages;
  402. }
  403.  
  404. private function create_sitemap_jobs_entries(WSS_Unit_Row $unit) {
  405. $pages = '';
  406. foreach($unit->findLanguages() as $language){
  407. $jobsPagePath = WSS_Unit_Path::getByUnitLanguagePath(WSS_Unit_Path::JOBS, $unit->getUnitPath().'/'.$language);
  408. if(WSS_Page_Table::instance()->isOnlinePath($jobsPagePath)){
  409. $locale = WSS_Language::getLocale($language);
  410. if($locale == null)
  411. $locale = 'en_US';
  412. foreach(WSS_Bootstrap::getService('WSSSuccessFactorsService')->createSitemapJobs($locale, $jobsPagePath) as $job){
  413. $pages .= $this->create_sitemap_entry(WSS_Config::getSchemeAndHost().$job->link, date('c', $job->timestamp), null, true);
  414. }
  415. }
  416. }
  417. return $pages;
  418. }
  419.  
  420. private function create_sitemap_news_entries() {
  421. $pages = '';
  422. $selection = WSS_News_Selection::create();
  423. $selection->setIsOnlineFiltered(WSS_News_Table::MAIN_UNIT_KEY);
  424. $selection->setShowArchivedNews(true);
  425. $newsList = $selection->fetch();
  426. foreach ( $newsList as $wssNews ) {
  427. $link = $wssNews->getItemLink();
  428. $pages .= $this->create_sitemap_entry($link,
  429. $wssNews->get_newsdate() );
  430. }
  431. return $pages;
  432. }
  433.  
  434. private function create_sitemap_page_entries() {
  435. $pages = "";
  436. $table = new WSS_Page_Table();
  437. $isUS = WSS_Config::isUs();
  438. $path = '/'.$this->unit_key_filter;
  439. $pages .= $this->addWSS_Page_Row($table->findVisitorContent($path, $isUS));
  440. if ( $isUS ) {
  441. $pages .= $this->addWSS_Page_Row($table->findOnlineFiltered($path));
  442. }
  443. return $pages;
  444. }
  445.  
  446. private function addWSS_Page_Row($list) {
  447. $pages = "";
  448. foreach ( $list as $status_Settings ) {
  449. if ( $status_Settings->isRelevantForCrawler() ) {
  450. $pages .= $this->create_sitemap_page_entry( $status_Settings );
  451. }
  452. }
  453. return $pages;
  454. }
  455.  
  456. private function create_sitemap_page_entry( WSS_Page_Row $status_Settings ) {
  457. return $this->create_sitemap_entry( $status_Settings->create_path(),
  458. WSS_Date::mysql_timestamp_to_w3c($status_Settings->cdate) );
  459. }
  460.  
  461. private function create_abbe_downloads_entries()
  462. {
  463. // require does not work for unit tests
  464. require_once($_SERVER['DOCUMENT_ROOT'].'/advanced_optics/abbe_pdf/datasheets.inc');
  465. $pages = "";
  466.  
  467. $items = abbe_collect_downloads();
  468. foreach ( $items as $path )
  469. {
  470. $absolute_path = $_SERVER['DOCUMENT_ROOT'].$path;
  471. $lastmod = filemtime($absolute_path);
  472. $pages .= $this->create_sitemap_entry($path,
  473. WSS_Date::unix_timestamp_to_w3c($lastmod));
  474. }
  475. return $pages;
  476. }
  477.  
  478. private function createTubingSelectorEntries()
  479. {
  480. $pages = '';
  481. foreach($this->createTubingSelectorSitemap(false) as $product){
  482. $pages .= $this->create_sitemap_entry($product['loc'], empty($product['lastmod']) ? '' : WSS_Date::unix_timestamp_to_w3c($product['lastmod']));
  483. }
  484. return $pages;
  485. }
  486.  
  487. }
  488.  
  489.  
  490. ?>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement