sitemap = $sitemap; } /** * Set as true to make the request 404. Used stop the display of empty sitemaps or * invalid requests. * * @param bool $bool Is this a bad request. True or false. */ function set_bad_sitemap( $bool ) { $this->bad_sitemap = (bool) $bool; } /** * Initialize sitemaps. Add sitemap rewrite rules and query var */ function init() { $GLOBALS['wp']->add_query_var( 'sitemap' ); $GLOBALS['wp']->add_query_var( 'sitemap_n' ); add_rewrite_rule( 'sitemap_index\.xml$', 'index.php?sitemap=1', 'top' ); add_rewrite_rule( '([^/]+?)-sitemap([0-9]+)?\.xml$', 'index.php?sitemap=$matches[1]&sitemap_n=$matches[2]', 'top' ); } /** * Hijack requests for potential sitemaps. */ function redirect() { $type = get_query_var( 'sitemap' ); if ( empty( $type ) ) return; $this->build_sitemap( $type ); // 404 for invalid or emtpy sitemaps if ( $this->bad_sitemap ) { $GLOBALS['wp_query']->is_404 = true; return; } $this->output(); die(); } /** * Attempt to build the requested sitemap. Sets $bad_sitemap if this isn't * for the root sitemap, a post type or taxonomy. * * @param string $type The requested sitemap's identifier. */ function build_sitemap( $type ) { $type = apply_filters( 'wpseo_build_sitemap_post_type', $type ); if ( $type == 1 ) $this->build_root_map(); else if ( post_type_exists( $type ) ) $this->build_post_type_map( $type ); else if ( $tax = get_taxonomy( $type ) ) $this->build_tax_map( $tax ); else if ( has_action( 'wpseo_do_sitemap_' . $type ) ) do_action( 'wpseo_do_sitemap_' . $type ); else $this->bad_sitemap = true; } /** * Build the root sitemap -- example.com/sitemap_index.xml -- which lists sub-sitemaps * for other content types. * * @todo lastmod for sitemaps? */ function build_root_map() { global $wpdb; $options = get_wpseo_options(); $this->sitemap = '' . "\n"; $base = $GLOBALS['wp_rewrite']->using_index_permalinks() ? 'index.php/' : ''; // reference post type specific sitemaps foreach ( get_post_types( array( 'public' => true ) ) as $post_type ) { if ( $post_type == 'attachment' ) continue; if ( isset( $options['post_types-' . $post_type . '-not_in_sitemap'] ) && $options['post_types-' . $post_type . '-not_in_sitemap'] ) continue; $count = $wpdb->get_var( $wpdb->prepare( "SELECT COUNT(ID) FROM $wpdb->posts WHERE post_type = %s AND post_status = 'publish' LIMIT 1", $post_type ) ); // don't include post types with no posts if ( !$count ) continue; $n = ( $count > 1000 ) ? (int) ceil( $count / 1000 ) : 1; for ( $i = 0; $i < $n; $i++ ) { $count = ( $n > 1 ) ? $i + 1 : ''; if ( empty( $count ) || $count == $n ) { $date = $this->get_last_modified( $post_type ); } else { $date = $wpdb->get_var( $wpdb->prepare( "SELECT post_modified_gmt FROM $wpdb->posts WHERE post_status = 'publish' AND post_type = %s ORDER BY post_modified_gmt ASC LIMIT 1 OFFSET %d", $post_type, $i * 1000 + 999 ) ); $date = date( 'c', strtotime( $date ) ); } $this->sitemap .= '' . "\n"; $this->sitemap .= '' . home_url( $base . $post_type . '-sitemap' . $count . '.xml' ) . '' . "\n"; $this->sitemap .= '' . htmlspecialchars( $date ) . '' . "\n"; $this->sitemap .= '' . "\n"; } } // reference taxonomy specific sitemaps foreach ( get_taxonomies( array( 'public' => true ) ) as $tax ) { if ( in_array( $tax, array( 'link_category', 'nav_menu', 'post_format' ) ) ) continue; if ( isset( $options['taxonomies-' . $tax . '-not_in_sitemap'] ) && $options['taxonomies-' . $tax . '-not_in_sitemap'] ) continue; // don't include taxonomies with no terms if ( !$wpdb->get_var( $wpdb->prepare( "SELECT term_id FROM $wpdb->term_taxonomy WHERE taxonomy = %s AND count != 0 LIMIT 1", $tax ) ) ) continue; // Retrieve the post_types that are registered to this taxonomy and then retrieve last modified date for all of those combined. $taxobj = get_taxonomy( $tax ); $date = $this->get_last_modified( $taxobj->object_type ); $this->sitemap .= '' . "\n"; $this->sitemap .= '' . home_url( $base . $tax . '-sitemap.xml' ) . '' . "\n"; $this->sitemap .= '' . htmlspecialchars( $date ) . '' . "\n"; $this->sitemap .= '' . "\n"; } // allow other plugins to add their sitemaps to the index $this->sitemap .= apply_filters( 'wpseo_sitemap_index', '' ); $this->sitemap .= ''; } /** * Build a sub-sitemap for a specific post type -- example.com/post_type-sitemap.xml * * @param string $post_type Registered post type's slug */ function build_post_type_map( $post_type ) { $options = get_wpseo_options(); if ( ( isset( $options['post_types-' . $post_type . '-not_in_sitemap'] ) && $options['post_types-' . $post_type . '-not_in_sitemap'] ) || in_array( $post_type, array( 'revision', 'nav_menu_item', 'attachment' ) ) ) { $this->bad_sitemap = true; return; } $output = ''; $front_id = get_option( 'page_on_front' ); if ( !$front_id && ( $post_type == 'post' || $post_type == 'page' ) ) { $output .= $this->sitemap_url( array( 'loc' => home_url( '/' ), 'pri' => 1, 'chf' => 'daily', ) ); } else if ( $front_id && $post_type == 'post' ) { $page_for_posts = get_option( 'page_for_posts' ); if ( $page_for_posts ) { $output .= $this->sitemap_url( array( 'loc' => get_permalink( $page_for_posts ), 'pri' => 1, 'chf' => 'daily', ) ); } } if ( function_exists( 'get_post_type_archive_link' ) ) { $archive = get_post_type_archive_link( $post_type ); if ( $archive ) { $output .= $this->sitemap_url( array( 'loc' => $archive, 'pri' => 0.8, 'chf' => 'weekly', 'mod' => $this->get_last_modified( $post_type ) // get_lastpostmodified( 'gmt', $post_type ) #17455 ) ); } } global $wpdb; $join_filter = ''; $join_filter = apply_filters( 'wpseo_typecount_join', $join_filter, $post_type ); $where_filter = ''; $where_filter = apply_filters( 'wpseo_typecount_where', $where_filter, $post_type ); $typecount = $wpdb->get_var( "SELECT COUNT(ID) FROM $wpdb->posts {$join_filter} WHERE post_status = 'publish' AND post_password = '' AND post_type = '$post_type' {$where_filter}" ); if ( $typecount == 0 && empty( $archive ) ) { $this->bad_sitemap = true; return; } // Let's flush the object cache so we're not left with garbage from other plugins wp_cache_flush(); $stackedurls = array(); $steps = 25; $n = (int) get_query_var( 'sitemap_n' ); $offset = ( $n > 1 ) ? ( $n - 1 ) * 1000 : 0; $total = $offset + 1000; if ( $total > $typecount ) $total = $typecount; // We grab post_date, post_name, post_author and post_status too so we can throw these objects into get_permalink, which saves a get_post call for each permalink. while ( $total > $offset ) { $join_filter = apply_filters( 'wpseo_posts_join', '', $post_type ); $where_filter = apply_filters( 'wpseo_posts_where', '', $post_type ); // Optimized query per this thread: http://wordpress.org/support/topic/plugin-wordpress-seo-by-yoast-performance-suggestion // Also see http://explainextended.com/2009/10/23/mysql-order-by-limit-performance-late-row-lookups/ $posts = $wpdb->get_results( "SELECT l.ID, post_content, post_name, post_author, post_parent, post_modified_gmt, post_date, post_date_gmt FROM ( SELECT ID FROM $wpdb->posts {$join_filter} WHERE post_status = 'publish' AND post_password = '' AND post_type = '$post_type' {$where_filter} ORDER BY post_modified ASC LIMIT $steps OFFSET $offset ) o JOIN $wpdb->posts l ON l.ID = o.ID ORDER BY l.ID" ); /* $posts = $wpdb->get_results("SELECT ID, post_content, post_name, post_author, post_parent, post_modified_gmt, post_date, post_date_gmt FROM $wpdb->posts {$join_filter} WHERE post_status = 'publish' AND post_password = '' AND post_type = '$post_type' {$where_filter} ORDER BY post_modified ASC LIMIT $steps OFFSET $offset"); */ $offset = $offset + $steps; foreach ( $posts as $p ) { $p->post_type = $post_type; $p->post_status = 'publish'; $p->filter = 'sample'; if ( wpseo_get_value( 'meta-robots-noindex', $p->ID ) && wpseo_get_value( 'sitemap-include', $p->ID ) != 'always' ) continue; if ( wpseo_get_value( 'sitemap-include', $p->ID ) == 'never' ) continue; if ( wpseo_get_value( 'redirect', $p->ID ) && strlen( wpseo_get_value( 'redirect', $p->ID ) ) > 0 ) continue; $url = array(); $url['mod'] = ( isset( $p->post_modified_gmt ) && $p->post_modified_gmt != '0000-00-00 00:00:00' ) ? $p->post_modified_gmt : $p->post_date_gmt; $url['chf'] = 'weekly'; $url['loc'] = get_permalink( $p ); $canonical = wpseo_get_value( 'canonical', $p->ID ); if ( $canonical && $canonical != '' && $canonical != $url['loc'] ) { // Let's assume that if a canonical is set for this page and it's different from the URL of this post, that page is either // already in the XML sitemap OR is on an external site, either way, we shouldn't include it here. continue; } else { if ( isset( $options['trailingslash'] ) && $options['trailingslash'] && $p->post_type != 'post' ) $url['loc'] = trailingslashit( $url['loc'] ); } $pri = wpseo_get_value( 'sitemap-prio', $p->ID ); if ( is_numeric( $pri ) ) $url['pri'] = $pri; elseif ( $p->post_parent == 0 && $p->post_type == 'page' ) $url['pri'] = 0.8; else $url['pri'] = 0.6; if ( $p->ID == $front_id ) $url['pri'] = 1.0; $url['images'] = array(); $content = $p->post_content; if ( function_exists( 'get_the_post_thumbnail' ) ) { $content = '

' . get_the_post_thumbnail( $p->ID, 'full' ) . '

' . $content; } $host = str_replace( 'www.', '', parse_url( get_bloginfo( 'url' ), PHP_URL_HOST ) ); if ( preg_match_all( '/]+>/', $content, $matches ) ) { foreach ( $matches[0] as $img ) { if ( preg_match( '/src=("|\')([^"|\']+)("|\')/', $img, $match ) ) { $src = $match[2]; if ( strpos( $src, 'http' ) !== 0 ) { if ( $src[0] != '/' ) continue; $src = get_bloginfo( 'url' ) . $src; } if ( strpos( $src, $host ) === false ) continue; if ( $src != esc_url( $src ) ) continue; if ( isset( $url['images'][$src] ) ) continue; $image = array( 'src' => apply_filters( 'wpseo_xml_sitemap_img_src', $src, $p ) ); if ( preg_match( '/title=("|\')([^"\']+)("|\')/', $img, $match ) ) $image['title'] = str_replace( array( '-', '_' ), ' ', $match[2] ); if ( preg_match( '/alt=("|\')([^"\']+)("|\')/', $img, $match ) ) $image['alt'] = str_replace( array( '-', '_' ), ' ', $match[2] ); $image = apply_filters( 'wpseo_xml_sitemap_img', $image, $p ); $url['images'][] = $image; } } } if ( preg_match_all( '/\[gallery/', $p->post_content, $matches ) ) { $attachments = get_children( array( 'post_parent' => $p->ID, 'post_status' => 'inherit', 'post_type' => 'attachment', 'post_mime_type' => 'image' ) ); foreach ( $attachments as $att_id => $attachment ) { $src = wp_get_attachment_image_src( $att_id, 'large', false ); $image = array( 'src' => apply_filters( 'wpseo_xml_sitemap_img_src', $src[0], $p ) ); if ( $alt = get_post_meta( $att_id, '_wp_attachment_image_alt', true ) ) $image['alt'] = $alt; $image['title'] = $attachment->post_title; $image = apply_filters( 'wpseo_xml_sitemap_img', $image, $p ); $url['images'][] = $image; } } $url['images'] = apply_filters( 'wpseo_sitemap_urlimages', $url['images'], $p->ID ); if ( !in_array( $url['loc'], $stackedurls ) ) { $output .= $this->sitemap_url( $url ); $stackedurls[] = $url['loc']; } // Clear the post_meta and the term cache for the post, as we no longer need it now. wp_cache_delete( $p->ID, 'post_meta' ); // clean_object_term_cache( $p->ID, $post_type ); } } if ( empty( $output ) ) { $this->bad_sitemap = true; return; } $this->sitemap = 'sitemap .= 'xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd" '; $this->sitemap .= 'xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . "\n"; $this->sitemap .= $output; // Filter to allow adding extra URLs, only do this on the first XML sitemap, not on all. if ( $n == 0 ) $this->sitemap .= apply_filters( 'wpseo_sitemap_' . $post_type . '_content', '' ); $this->sitemap .= ''; } /** * Build a sub-sitemap for a specific taxonomy -- example.com/tax-sitemap.xml * * @param string $taxonomy Registered taxonomy's slug */ function build_tax_map( $taxonomy ) { $options = get_wpseo_options(); if ( ( isset( $options['taxonomies-' . $taxonomy->name . '-not_in_sitemap'] ) && $options['taxonomies-' . $taxonomy->name . '-not_in_sitemap'] ) || in_array( $taxonomy, array( 'link_category', 'nav_menu', 'post_format' ) ) ) { $this->bad_sitemap = true; return; } $terms = get_terms( $taxonomy->name, array( 'hide_empty' => true ) ); global $wpdb; $output = ''; foreach ( $terms as $c ) { $url = array(); if ( wpseo_get_term_meta( $c, $c->taxonomy, 'noindex' ) && wpseo_get_term_meta( $c, $c->taxonomy, 'sitemap_include' ) != 'always' ) continue; if ( wpseo_get_term_meta( $c, $c->taxonomy, 'sitemap_include' ) == 'never' ) continue; $url['loc'] = wpseo_get_term_meta( $c, $c->taxonomy, 'canonical' ); if ( !$url['loc'] ) { $url['loc'] = get_term_link( $c, $c->taxonomy ); if ( isset( $options['trailingslash'] ) && $options['trailingslash'] ) $url['loc'] = trailingslashit( $url['loc'] ); } if ( $c->count > 10 ) { $url['pri'] = 0.6; } else if ( $c->count > 3 ) { $url['pri'] = 0.4; } else { $url['pri'] = 0.2; } // Grab last modified date $sql = "SELECT MAX(p.post_date) AS lastmod FROM $wpdb->posts AS p INNER JOIN $wpdb->term_relationships AS term_rel ON term_rel.object_id = p.ID INNER JOIN $wpdb->term_taxonomy AS term_tax ON term_tax.term_taxonomy_id = term_rel.term_taxonomy_id AND term_tax.taxonomy = '$c->taxonomy' AND term_tax.term_id = $c->term_id WHERE p.post_status = 'publish' AND p.post_password = ''"; $url['mod'] = $wpdb->get_var( $sql ); $url['chf'] = 'weekly'; $output .= $this->sitemap_url( $url ); } if ( empty( $output ) ) { $this->bad_sitemap = true; return; } $this->sitemap = 'sitemap .= 'xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd" '; $this->sitemap .= 'xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . "\n"; $this->sitemap .= $output . ''; } /** * Spit out the generated sitemap and relevant headers and encoding information. */ function output() { // Prevent the search engines from indexing the XML Sitemap. header( 'X-Robots-Tag: noindex, follow', true ); header( 'Content-Type: text/xml' ); echo ''; echo $this->sitemap; echo "\n" . ''; if ( WP_DEBUG ) echo "\n" . ''; } /** * Build the tag for a given URL. * * @param array $url Array of parts that make up this entry * @return string */ function sitemap_url( $url ) { if ( isset( $url['mod'] ) ) $date = mysql2date( "Y-m-d\TH:i:s+00:00", $url['mod'] ); else $date = date( 'c' ); $output = "\t\n"; $output .= "\t\t" . $url['loc'] . "\n"; $output .= "\t\t" . $date . "\n"; $output .= "\t\t" . $url['chf'] . "\n"; $output .= "\t\t" . str_replace( ',', '.', $url['pri'] ) . "\n"; if ( isset( $url['images'] ) && count( $url['images'] ) > 0 ) { foreach ( $url['images'] as $img ) { $output .= "\t\t\n"; $output .= "\t\t\t" . esc_html( $img['src'] ) . "\n"; if ( isset( $img['title'] ) ) $output .= "\t\t\t" . _wp_specialchars( html_entity_decode( $img['title'], ENT_QUOTES, get_bloginfo('charset') ) ) . "\n"; if ( isset( $img['alt'] ) ) $output .= "\t\t\t" . _wp_specialchars( html_entity_decode( $img['alt'], ENT_QUOTES, get_bloginfo('charset') ) ) . "\n"; $output .= "\t\t\n"; } } $output .= "\t\n"; return $output; } /** * Notify search engines of the updated sitemap. */ function ping_search_engines() { $options = get_option( 'wpseo_xml' ); $base = $GLOBALS['wp_rewrite']->using_index_permalinks() ? 'index.php/' : ''; $sitemapurl = urlencode( home_url( $base . 'sitemap_index.xml' ) ); // Always ping Google and Bing, optionally ping Ask and Yahoo! wp_remote_get( 'http://www.google.com/webmasters/tools/ping?sitemap=' . $sitemapurl ); wp_remote_get( 'http://www.bing.com/webmaster/ping.aspx?sitemap=' . $sitemapurl ); if ( isset( $options['xml_ping_yahoo'] ) && $options['xml_ping_yahoo'] ) wp_remote_get( 'http://search.yahooapis.com/SiteExplorerService/V1/updateNotification?appid=3usdTDLV34HbjQpIBuzMM1UkECFl5KDN7fogidABihmHBfqaebDuZk1vpLDR64I-&url=' . $sitemapurl ); if ( isset( $options['xml_ping_ask'] ) && $options['xml_ping_ask'] ) wp_remote_get( 'http://submissions.ask.com/ping?sitemap=' . $sitemapurl ); } /** * Make a request for the sitemap index so as to cache it before the arrival of the search engines. */ function hit_sitemap_index() { $base = $GLOBALS['wp_rewrite']->using_index_permalinks() ? 'index.php/' : ''; $url = home_url( $base . 'sitemap_index.xml' ); wp_remote_get( $url ); } /** * Hook into redirect_canonical to stop trailing slashes on sitemap.xml URLs * * @param string $redirect The redirect URL currently determined. * @return bool|string $redirect */ function canonical( $redirect ) { $sitemap = get_query_var( 'sitemap' ); if ( !empty( $sitemap ) ) return false; return $redirect; } /** * Get the modification date for the last modified post in the post type: * * @param array $post_types Post types to get the last modification date for * @return string */ function get_last_modified( $post_types ) { global $wpdb; if ( !is_array( $post_types ) ) $post_types = array( $post_types ); $result = 0; foreach ( $post_types as $post_type ) { $key = 'lastpostmodified:gmt:' . $post_type; $date = wp_cache_get( $key, 'timeinfo' ); if ( !$date ) { $date = $wpdb->get_var( $wpdb->prepare( "SELECT post_modified_gmt FROM $wpdb->posts WHERE post_status = 'publish' AND post_type = %s ORDER BY post_modified_gmt DESC LIMIT 1", $post_type ) ); if ( $date ) wp_cache_set( $key, $date, 'timeinfo' ); } if ( strtotime( $date ) > $result ) $result = strtotime( $date ); } // Transform to W3C Date format. $result = date( 'c', $result ); return $result; } } global $wpseo_sitemaps; $wpseo_sitemaps = new WPSEO_Sitemaps();