SHARE
TWEET

PostSaver.php

a guest Feb 5th, 2019 141 in 72 days
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. <?php
  2. /**
  3.  * Created by PhpStorm.
  4.  * User: turgutsaricam
  5.  * Date: 24/08/16
  6.  * Time: 22:09
  7.  */
  8.  
  9. namespace WPCCrawler\Objects\Crawling\Savers;
  10.  
  11.  
  12. use WP_User_Query;
  13. use WPCCrawler\Exceptions\DuplicatePostException;
  14. use WPCCrawler\Exceptions\StopSavingException;
  15. use WPCCrawler\Factory;
  16. use WPCCrawler\Objects\Crawling\Bot\PostBot;
  17. use WPCCrawler\Objects\Crawling\Data\PostData;
  18. use WPCCrawler\PostDetail\PostDetailsService;
  19. use WPCCrawler\PostDetail\PostSaverData;
  20. use WPCCrawler\Objects\Enums\ErrorType;
  21. use WPCCrawler\Objects\Enums\InformationMessage;
  22. use WPCCrawler\Objects\Enums\InformationType;
  23. use WPCCrawler\Objects\File\MediaFile;
  24. use WPCCrawler\Objects\File\MediaService;
  25. use WPCCrawler\Objects\Informing\Information;
  26. use WPCCrawler\Objects\Informing\Informer;
  27. use WPCCrawler\Objects\Settings\SettingsImpl;
  28. use WPCCrawler\Objects\Traits\ErrorTrait;
  29. use WPCCrawler\Objects\Traits\SettingsTrait;
  30. use WPCCrawler\Utils;
  31.  
  32. class PostSaver extends AbstractSaver {
  33.    
  34.     use SettingsTrait;
  35.     use ErrorTrait;
  36.  
  37.     private static $DEBUG = false;
  38.  
  39.     /** @var string Stores ID of the site for which the last post crawl was performed. */
  40.     public $optionLastCrawledSiteId = '_wpcc_last_crawled_site_id';
  41.  
  42.     /** @var string Stores ID of the site for which the last post recrawl was performed */
  43.     public $optionLastRecrawledSiteId = '_wpcc_last_recrawled_site_id';
  44.  
  45.     /** @var string Stores source URLs as an array. Each inserted post will have this meta. */
  46.     private $postMetaSourceUrls = '_wpcc_source_urls';
  47.  
  48.     /** @var string Stores first page URL of the target post. Each inserted post will have this meta. */
  49.     private $postMetaPostFirstPageUrl = '_wpcc_post_url';
  50.  
  51.     /*
  52.      *
  53.      */
  54.  
  55.     /** @var string Prefix that will be added to the meta keys used in regular crawling task */
  56.     public $cronCrawlPostMetaPrefix = '_cron';
  57.  
  58.     /** @var string Prefix that will be added to the meta keys used in recrawl task */
  59.     public $cronRecrawlPostMetaPrefix = '_cron_recrawl';
  60.  
  61.     /*
  62.      * DUPLICATE CHECK TYPES
  63.      */
  64.  
  65.     const DUPLICATE_CHECK_URL       = 'url';
  66.     const DUPLICATE_CHECK_TITLE     = 'title';
  67.     const DUPLICATE_CHECK_CONTENT   = 'content';
  68.  
  69.     /*
  70.      *
  71.      */
  72.  
  73.     /** @var PostData */
  74.     private $data;
  75.  
  76.     /** @var bool Stores whether the current task is a recrawl task or not. */
  77.     private $isRecrawl = false;
  78.    
  79.     /*
  80.      *
  81.      */
  82.    
  83.     /** @var string|null */
  84.     private $nextPageUrl = null;
  85.    
  86.     /** @var array|null */
  87.     private $nextPageUrls = null;
  88.  
  89.     /** @var bool */
  90.     private $isFirstPage = false;
  91.  
  92.     /** @var null|object */
  93.     private $urlTuple = null;
  94.  
  95.     /** @var string|null */
  96.     private $urlToCrawl = null;
  97.    
  98.     /** @var int|null */
  99.     private $postId = null;
  100.  
  101.     /** @var int|null */
  102.     private $draftPostId = null;
  103.  
  104.     /** @var int|null */
  105.     private $siteIdToCheck = null;
  106.  
  107.     /** @var bool */
  108.     private $updateLastCrawled = false;
  109.    
  110.     /** @var string|null */
  111.     private $postUrl = null;
  112.    
  113.     /** @var PostBot|null */
  114.     private $bot = null;
  115.    
  116.     /** @var bool */
  117.     private $contentExists = true;
  118.  
  119.     /**
  120.      * Update (recrawl) a post of a URL tuple.
  121.      *
  122.      * @param object $urlTuple A row in wpcc_urls table
  123.      * @return null
  124.      */
  125.     public function executePostRecrawl($urlTuple) {
  126.         $this->setRequestMade(false);
  127.         $this->clearErrors();
  128.  
  129.         // Do not proceed if the URL tuple is not found or it does not have a saved post ID.
  130.         if(!$urlTuple || !$urlTuple->saved_post_id) return null;
  131.  
  132.         $this->isRecrawl = true;
  133.  
  134.         $siteIdToCheck = $urlTuple->post_id;
  135.  
  136.         // Get settings for the site ID
  137.         $settings = get_post_meta($siteIdToCheck);
  138.  
  139.         $this->setSettings($settings, Factory::postService()->getSingleMetaKeys());
  140.  
  141.         $prefix             = $this->getCronPostMetaPrefix();
  142.         $lastRecrawledUrlId = $this->getSetting($prefix . '_last_crawled_url_id');
  143.         $nextPageUrl        = $this->getSetting($prefix . '_post_next_page_url');
  144.         $nextPageUrls       = $this->getSetting($prefix . '_post_next_page_urls');
  145.         $draftPostId        = $this->getSetting($prefix . '_post_draft_id');
  146.  
  147.         // If the post with saved_post_id does not exist, make URL tuple's saved_post_id null, and stop.
  148.         $post = get_post($lastRecrawledUrlId && $draftPostId ? $draftPostId : $urlTuple->saved_post_id);
  149.         if(!$post) {
  150.             Factory::databaseService()->updateUrlSavedPostId($lastRecrawledUrlId, null);
  151.  
  152.             // Otherwise, make variables null to continue with the URL tuple.
  153.             $lastRecrawledUrlId = null;
  154.             $nextPageUrl = null;
  155.             $nextPageUrls = null;
  156.             $draftPostId = null;
  157.         }
  158.  
  159.         $this->savePost(
  160.             $siteIdToCheck,
  161.             $settings,
  162.             // If there is a draft post ID, it means that post is not finished to be saved. So, use URL ID of the draft
  163.             // post instead of the ID of the current URL tuple.
  164.             $lastRecrawledUrlId && $draftPostId ? $lastRecrawledUrlId : $urlTuple->id,
  165.             true,
  166.             $nextPageUrl,
  167.             $nextPageUrls,
  168.             $lastRecrawledUrlId && $draftPostId ? $draftPostId : $urlTuple->saved_post_id
  169.         );
  170.     }
  171.  
  172.     /**
  173.      * Save a post for a site. This method does two things:
  174.      * <li>Save a post's next page if there is a post that has pages and has not yet saved completely.</li>
  175.      * <li>Save an unsaved post.</li>
  176.      *
  177.      * @param int  $siteIdToCheck Site ID for which a post will be saved
  178.      */
  179.     public function executePostSave($siteIdToCheck) {
  180.         $this->setRequestMade(false);
  181.         $this->clearErrors();
  182.  
  183.         if(!$siteIdToCheck) return;
  184.  
  185.         $this->isRecrawl = false;
  186.  
  187.         // Get settings for the site ID
  188.         $settings = get_post_meta($siteIdToCheck);
  189.  
  190.         $this->setSettings($settings, Factory::postService()->getSingleMetaKeys());
  191.  
  192.         $prefix             = $this->getCronPostMetaPrefix();
  193.         $lastCrawledUrlId   = $this->getSetting($prefix . '_last_crawled_url_id');
  194.         $nextPageUrl        = $this->getSetting($prefix . '_post_next_page_url');
  195.         $nextPageUrls       = $this->getSetting($prefix . '_post_next_page_urls');
  196.         $draftPostId        = $this->getSetting($prefix . '_post_draft_id');
  197.  
  198.         $this->savePost($siteIdToCheck, $settings, $lastCrawledUrlId, true, $nextPageUrl, $nextPageUrls, $draftPostId);
  199.     }
  200.  
  201.     /*
  202.      *
  203.      */
  204.  
  205.     /**
  206.      * Save a post to the database. This method does 3 things:
  207.      * <ul>
  208.      * <li> If a urlId is supplied, saves its post URL to the database. This is used to save a post manually. Just pick
  209.      * an ID from the database.</li>
  210.      * <li> If there are only siteIdToCheck and its settings, then a URL will be found by using CRON settings and saved
  211.      * to the database.</li>
  212.      * <li> If there are urlId, nextPageUrl(s) and draftPostId, then a next page will be saved for the specified urlId.</li>
  213.      * </ul>
  214.      *
  215.      * @param int         $siteIdToCheck     Site ID which the post belongs to, to get the settings for crawling
  216.      * @param array       $settings          Settings for siteIdToCheck
  217.      * @param null|int    $urlId             ID of a URL tuple from wpcc_urls table
  218.      * @param bool        $updateLastCrawled True if you want to update CRON options about last crawled site, false
  219.      *                                       otherwise
  220.      * @param null|string $nextPageUrl       Next page URL for the post, if exists
  221.      * @param null|array  $nextPageUrls      All next page URLs for the post, if exists
  222.      * @param null|int    $draftPostId       ID of a post which is used to save content for this post, for previous
  223.      *                                       pages
  224.      * @return int|null Post ID, or null if the post is not saved
  225.      */
  226.     public function savePost($siteIdToCheck, $settings, $urlId = null, $updateLastCrawled = false,
  227.                              $nextPageUrl = null, $nextPageUrls = null, $draftPostId = null) {
  228.  
  229.         if(!$this->getSettings()) $this->setSettings($settings, Factory::postService()->getSingleMetaKeys());
  230.  
  231.         // Initialize instance variables
  232.         $this->urlToCrawl           = false;
  233.         $this->isFirstPage          = true;
  234.         $this->nextPageUrls         = $nextPageUrls;
  235.         $this->nextPageUrl          = $nextPageUrl;
  236.         $this->draftPostId          = $draftPostId;
  237.         $this->siteIdToCheck        = $siteIdToCheck;
  238.         $this->updateLastCrawled    = $updateLastCrawled;
  239.        
  240.         if(static::$DEBUG) {
  241.             var_dump('Last Crawled Url ID: ' . $urlId);
  242.             var_dump('Next Page URL: ' . $this->nextPageUrl);
  243.             var_dump('Next Page URLs:');
  244.             var_dump($this->nextPageUrls);
  245.             var_dump('Draft Post ID: ' . $this->draftPostId);
  246.         }
  247.  
  248.         try {
  249.             // Prepare $this->isFirstPage, $this->urlTuple, and $this->urlToCrawl
  250.             $this->prepareUrlTupleToCrawl($urlId);
  251.  
  252.             // Lock the URL tuple so that it won't be selected as the URL to crawl again during saving process
  253.             Factory::databaseService()->updateUrlSavedStatus($this->urlTuple->id, $this->urlTuple->is_saved, $this->urlTuple->saved_post_id, $this->urlTuple->update_count, true);
  254.  
  255.             $mainSiteUrl    = $this->getSetting('_main_page_url');
  256.             $this->postUrl  = Utils::prepareUrl($mainSiteUrl, $this->urlToCrawl);
  257.  
  258.             // Create a new bot
  259.             $this->bot = new PostBot($settings, $this->siteIdToCheck);
  260.  
  261.             // Prepare the post data
  262.             $this->preparePostData();
  263.  
  264.             // Prepare next page URL
  265.             $this->prepareNextPageUrl();
  266.  
  267.             // Check content existence
  268.             $this->checkAndReactToContentExistence();
  269.  
  270.             // Prepare the post data and store it in the PostData instance
  271.             $this->data->setWpPostData($this->createWPPostData());
  272.  
  273.             // Check if the post is duplicate and, if so, handle the situation.
  274.             $this->handleIfDuplicate();
  275.  
  276.             // Insert the prepared post data into the database.
  277.             $this->insertPostData();
  278.  
  279.             // Set post's category if it belongs to a custom taxonomy
  280.             $this->saveCategories();
  281.  
  282.             // Delete already-existing attachments when updating a post.
  283.             $this->maybeDeleteAttachments();
  284.  
  285.             // Save featured image
  286.             $this->saveFeaturedImage();
  287.  
  288.             // Save meta keywords
  289.             $this->saveMetaKeywords();
  290.  
  291.             // Save meta description
  292.             $this->saveMetaDescription();
  293.  
  294.             // Save attachments
  295.             $galleryAttachmentIds = $this->saveAttachments();
  296.  
  297.             /*
  298.              * SAVE REGISTERED POST DETAILS
  299.              */
  300.  
  301.             // Create the data that will be used by the savers
  302.             $saverData = new PostSaverData(
  303.                 $this,
  304.                 $this->postId,
  305.                 $this->data,
  306.                 $this->isRecrawl,
  307.                 $this->isFirstPage,
  308.                 $this->urlTuple,
  309.                 $galleryAttachmentIds
  310.             );
  311.  
  312.             // Save registered post details
  313.             PostDetailsService::getInstance()->save($this->bot, $saverData);
  314.  
  315.             /*
  316.              *
  317.              */
  318.  
  319.             // Save custom meta. This should be done at last to allow the user to override some previously-set post meta values.
  320.             $this->saveCustomMeta();
  321.  
  322.             // Save custom taxonomies. This should be done at last to allow the user to override some previously-set taxonomy values.
  323.             $this->saveCustomTaxonomies();
  324.            
  325.         } catch (StopSavingException $e) {
  326.             // If the saving operation must be stopped, return null.
  327.             return null;
  328.  
  329.         } catch(DuplicatePostException $e) {
  330.             $this->onDuplicatePostException($e, isset($saverData) ? $saverData : null);
  331.  
  332.             // Return.
  333.             return null;
  334.         }
  335.  
  336.         /*
  337.          *
  338.          */
  339.  
  340.         // Save related meta
  341.         if($this->updateLastCrawled)
  342.             $this->updateLastCrawled($this->siteIdToCheck, $this->nextPageUrl ? $this->urlTuple->id : null, $this->nextPageUrl, $this->nextPageUrls, $this->nextPageUrl ? $this->postId : '');
  343.  
  344.         // Save post URL as post meta
  345.         if($this->isFirstPage && $this->postId && isset($this->urlTuple->url))
  346.             update_post_meta($this->postId, $this->postMetaPostFirstPageUrl, $this->urlTuple->url);
  347.  
  348.         // Update saved_at if this is the first page and the URL tuple does not have a saved_post_id
  349.         if($this->isFirstPage && $this->postId && !$this->urlTuple->saved_post_id) {
  350.             Factory::databaseService()->updateUrlPostSavedAt($this->urlTuple->id, $this->postId, $this->data->getDateCreated());
  351.         }
  352.  
  353.         // If this is the last page, tidy up things.
  354.         if(!$this->nextPageUrl) {
  355.  
  356.             // Set this URL as saved
  357.             if(!$this->isRecrawl) {
  358.                 Factory::databaseService()->updateUrlSavedStatus(
  359.                     $this->urlTuple->id,
  360.                     true,
  361.                     $this->postId ? $this->postId : null,
  362.                     $this->urlTuple->update_count,
  363.                     false
  364.                 );
  365.  
  366.             // Otherwise, set this URL as recrawled
  367.             } else {
  368.                 Factory::databaseService()->updateUrlRecrawledStatus($this->urlTuple->id, $this->urlTuple->update_count + 1, false);
  369.             }
  370.  
  371.         // Otherwise, remove the lock so that the next page can be saved. Also, make this URL not saved so that it won't
  372.         // be selected as a URL that needs to be crawled for post crawling event.
  373.         } else {
  374.             Factory::databaseService()->updateUrlSavedStatus($this->urlTuple->id, false, $this->postId ? $this->postId : null, $this->urlTuple->update_count, false);
  375.         }
  376.  
  377.         if(static::$DEBUG) {
  378.             var_dump('Last Crawled Url ID: '    . $this->urlTuple->id);
  379.             var_dump('Category ID: '            . $this->urlTuple->category_id);
  380.             var_dump('Next Page URL: '          . $this->nextPageUrl);
  381.             var_dump('Next Page URLs:');
  382.             var_dump($this->nextPageUrls);
  383.             var_dump('Draft Post ID: '          . ($this->nextPageUrl ? $this->postId : ''));
  384.         }
  385.  
  386.         return $this->postId;
  387.     }
  388.  
  389.     /**
  390.      * Handles what happens when there is a duplicate post.
  391.      *
  392.      * @param DuplicatePostException $e
  393.      * @param null|PostSaverData $saverData
  394.      * @since 1.8.0
  395.      */
  396.     private function onDuplicatePostException(DuplicatePostException $e, $saverData) {
  397.         // There is a duplicate post.
  398.         $duplicateId = $e->getCode();
  399.  
  400.         /**
  401.          * Fires just after a post is decided to be duplicate. At this point, no new post is inserted to the database
  402.          * and the saved files are not deleted yet.
  403.          *
  404.          * @param int $siteIdToCheck    ID of the site
  405.          * @param int $duplicatePostId  Found duplicate post ID
  406.          * @param PostData $data        Data retrieved from the target post URL
  407.          * @param string $postUrl       URL of the post
  408.          * @param PostSaver $this       PostSaver itself
  409.          * @since 1.6.3
  410.          */
  411.         do_action('wpcc/post/after_decided_duplicate', $this->siteIdToCheck, $duplicateId, $this->data, $this->postUrl, $this);
  412.  
  413.         // Make the factories delete the things they are concerned with. Make them delete only if there is a
  414.         // saver data. If saver data does not exist, it means they did not save anything, since their savers were
  415.         // not called.
  416.         if ($saverData) {
  417.             PostDetailsService::getInstance()->delete($this->bot->getSettingsImpl(), $saverData);
  418.         }
  419.  
  420.         // If there is a PostData, delete the attachments.
  421.         if ($this->data) $this->data->deleteAttachments();
  422.  
  423.         // If there is a post saved, delete it from the database. If there is a different draft post ID, delete it as well.
  424.         $postIds = array_unique([$this->postId, $this->draftPostId]);
  425.         foreach($postIds as $postId) $this->deletePost($postId);
  426.  
  427.         // If there are gallery attachment IDs, delete them as well.
  428.         if ($saverData && $saverData->getGalleryAttachmentIds()) {
  429.             foreach($saverData->getGalleryAttachmentIds() as $mediaId) wp_delete_post($mediaId, true);
  430.         }
  431.  
  432.         $this->resetLastCrawled($this->siteIdToCheck);
  433.  
  434.         // Set this URL as saved so that this won't be tried to be saved again and unlock it.
  435.         Factory::databaseService()->updateUrlSavedStatus($this->urlTuple->id, true, null, $this->urlTuple->update_count, false);
  436.  
  437.         /*
  438.          * Notify the user
  439.          */
  440.  
  441.         $msg0 = _wpcc('A duplicate post has been found.');
  442.  
  443.         $msg1 = sprintf(
  444.             _wpcc('Current URL: %1$s, Duplicate post ID: %2$s, Duplicate post title: %3$s, Site ID: %4$s.'),
  445.             $this->postUrl,
  446.             $duplicateId,
  447.             get_the_title($duplicateId),
  448.             $this->siteIdToCheck
  449.         );
  450.  
  451.         $msg2 = _wpcc('The URL is not saved and it is marked as saved so that it will not be tried again.');
  452.  
  453.         $info = Information::fromInformationMessage(
  454.             InformationMessage::DUPLICATE_POST,
  455.             implode(' ', [$msg0, $msg1, $msg2]),
  456.             InformationType::INFO
  457.         );
  458.  
  459.         Informer::add($info->setException($e)->addAsLog());
  460.     }
  461.  
  462.     /**
  463.      * Delete post media, thumbnail and the post itself with ID
  464.      *
  465.      * @param int $postId ID of the post to be deleted
  466.      * @since 1.8.0
  467.      */
  468.     private function deletePost($postId) {
  469.         if (!$postId) return;
  470.  
  471.         // Delete the thumbnail
  472.         Utils::deletePostThumbnail($postId);
  473.  
  474.         // Delete the attachments
  475.         foreach(get_attached_media('image', $postId) as $mediaPost) wp_delete_post($mediaPost->ID);
  476.  
  477.         // Delete the post without sending it to trash.
  478.         wp_delete_post($postId, true);
  479.     }
  480.  
  481.     /**
  482.      * Assigns {@link urlToCrawl}, {@link isFirstPage} and {@link urlTuple} instance variables, considering whether
  483.      * this is a recrawl or not.
  484.      *
  485.      * @param int|null $lastCrawledUrlId
  486.      * @throws StopSavingException
  487.      */
  488.     private function prepareUrlTupleToCrawl($lastCrawledUrlId) {
  489.         global $wpdb;
  490.  
  491.         // Decide what we're doing. Crawling a next page for the same post, or a new post?
  492.         if($this->nextPageUrl && $lastCrawledUrlId) {
  493.             // We're getting a next page for a post.
  494.             $this->isFirstPage = false;
  495.  
  496.             $query = "SELECT * FROM " . Factory::databaseService()->getDbTableUrlsName() . " WHERE id = %d";
  497.             $results = $wpdb->get_results($wpdb->prepare($query, $lastCrawledUrlId));
  498.  
  499.             // If the URL is not found, then reset the cron options for this site and stop.
  500.             if (empty($results)) {
  501.                 error_log(
  502.                     "WPCC - There are a next page URL and a last crawled URL ID, but the URL does not exist in database."
  503.                     . " URL ID: " . $lastCrawledUrlId
  504.                     . ", Next Page URL: " . $this->nextPageUrl
  505.                 );
  506.  
  507.                 if($this->updateLastCrawled) {
  508.                     $this->resetLastCrawled($this->siteIdToCheck);
  509.  
  510.                 } else {
  511.                     error_log("WPCC - CRON settings for last-crawled are not reset. This may cause a loop where no post will be saved.");
  512.                 }
  513.  
  514.                 $this->addError(ErrorType::URL_TUPLE_NOT_EXIST);
  515.                 Informer::add(Information::fromInformationMessage(
  516.                     InformationMessage::URL_TUPLE_NOT_EXIST,
  517.                     null,
  518.                     InformationType::ERROR
  519.                 )->addAsLog());
  520.  
  521.                 // Stop crawling
  522.                 throw new StopSavingException();
  523.             }
  524.  
  525.             // Get the URL tuple we will work on
  526.             $this->urlTuple = $results[0];
  527.  
  528.             // Set the page url we should crawl
  529.             $this->urlToCrawl = $this->nextPageUrl;
  530.  
  531.         } else {
  532.             // We're getting a specified post or a random-ish one
  533.             $this->urlTuple = $lastCrawledUrlId ? Factory::databaseService()->getUrlById($lastCrawledUrlId) : null;
  534.  
  535.             if(!$this->urlTuple || (!$this->isRecrawl && $this->urlTuple->is_saved)) {
  536.                 // We're getting a new post. Let's find a URL tuple to save.
  537.                 $this->urlTuple = $this->getUrlTupleToCrawl($this->siteIdToCheck, $lastCrawledUrlId);
  538.  
  539.                 // If no URL is found, then reset the cron options for this site and stop.
  540.                 if($this->urlTuple === null) {
  541.                     error_log("WPCC - No URL is found in the database."
  542.                         . " Site ID to check: " . ($this->siteIdToCheck ? $this->siteIdToCheck : 'does not exist')
  543.                         . ", Last Crawled URL ID: " . ($lastCrawledUrlId ? $lastCrawledUrlId : 'does not exist')
  544.                     );
  545.  
  546.                     if($this->updateLastCrawled) {
  547.                         $this->resetLastCrawled($this->siteIdToCheck);
  548.  
  549.                     } else {
  550.                         error_log("WPCC - CRON settings for last-crawled are not reset. This may cause a loop where no post will be saved.");
  551.                     }
  552.  
  553.                     $this->addError(ErrorType::URL_TUPLE_NOT_EXIST);
  554.                     Informer::add(Information::fromInformationMessage(
  555.                         InformationMessage::URL_TUPLE_NOT_EXIST,
  556.                         null,
  557.                         InformationType::ERROR
  558.                     )->addAsLog());
  559.  
  560.                     // Stop crawling
  561.                     throw new StopSavingException();
  562.                 }
  563.             }
  564.  
  565.             // Set the page url we should crawl
  566.             $this->urlToCrawl = $this->urlTuple->url;
  567.  
  568.         }
  569.  
  570.         if(static::$DEBUG) var_dump($this->urlTuple);
  571.  
  572.         // Do not proceed if this URL tuple is locked.
  573.         if($this->urlTuple->is_locked) {
  574.             $this->addError(ErrorType::URL_LOCKED);
  575.             Informer::add(Information::fromInformationMessage(
  576.                 InformationMessage::URL_LOCKED,
  577.                 null,
  578.                 InformationType::ERROR
  579.             )->addAsLog());
  580.  
  581.             // Stop crawling
  582.             throw new StopSavingException();
  583.         }
  584.     }
  585.  
  586.     /**
  587.      * Sends a request to the target URL, retrieves a PostData, and assigns it to {@link data}.
  588.      *
  589.      * @throws StopSavingException
  590.      */
  591.     private function preparePostData() {
  592.         $this->data = $this->bot->crawlPost($this->postUrl);
  593.         $this->setRequestMade(true);
  594.  
  595.         // If there is an error with the connection, reset last crawled and set this URL as saved. By this way,
  596.         // this URL won't be tried again in the future.
  597.         if($this->data === null) {
  598.             $this->resetLastCrawled($this->siteIdToCheck);
  599.  
  600.             $this->addError(ErrorType::URL_COULD_NOT_BE_FETCHED);
  601.             Informer::add(Information::fromInformationMessage(
  602.                 InformationMessage::URL_COULD_NOT_BE_FETCHED,
  603.                 $this->postUrl,
  604.                 InformationType::ERROR
  605.             )->addAsLog());
  606.  
  607.             // If the URL tuple does not have a post, delete it.
  608.             if(!$this->urlTuple->saved_post_id) {
  609.                 Factory::databaseService()->deleteUrl($this->urlTuple->id);
  610.  
  611.                 // Write an error
  612.                 error_log("WPCC - The URL cannot be fetched (" . $this->postUrl . "). There was a connection error. The URL is
  613.                    deleted.");
  614.  
  615.                 // Stop saving
  616.                 throw new StopSavingException();
  617.             }
  618.  
  619.             // Set this URL as saved
  620.             Factory::databaseService()->updateUrlSavedStatus($this->urlTuple->id, true, $this->urlTuple->saved_post_id, $this->urlTuple->update_count, false);
  621.  
  622.             // If this is a recrawl, mark this URL as recrawled so that it won't be tried again and again.
  623.             if($this->isRecrawl) {
  624.                 Factory::databaseService()->updateUrlRecrawledStatus($this->urlTuple->id, $this->urlTuple->update_count + 1, false);
  625.             }
  626.  
  627.             // Write an error
  628.             error_log("WPCC - The URL cannot be fetched (" . $this->postUrl . "). There was a connection error. The URL is
  629.                marked as saved now. Last crawled settings are reset.");
  630.  
  631.             // Stop saving
  632.             throw new StopSavingException();
  633.         }
  634.  
  635.     }
  636.  
  637.     /**
  638.      * Prepares {@link nextPageUrl} and {@link nextPageUrls}
  639.      */
  640.     private function prepareNextPageUrl() {
  641.         // Reset next page variables and assign them according to the data.
  642.         $this->nextPageUrl = '';
  643.  
  644.         // If the post should be paginated, get the next page's URL (or URLs) and store it as option
  645.         if($this->data->isPaginate()) {
  646.             if($this->data->getNextPageUrl()) {
  647.                 // The post has a next page URL on each page.
  648.                 $this->nextPageUrl = $this->data->getNextPageUrl();
  649.  
  650.             } else if($this->data->getAllPageUrls()) {
  651.  
  652.                 if(static::$DEBUG) var_dump("All page URLs are found.");
  653.  
  654.                 // If there is no next page URLs, then this is the first time we crawl this post.
  655.                 // First, save all page URLs.
  656.                 if(!$this->nextPageUrls || empty($this->nextPageUrls)) {
  657.                     if(static::$DEBUG) var_dump('Next Page URLs is false or empty. Get them from the data.');
  658.                     // The post has all URLs for pages in a page.
  659.                     $this->nextPageUrls = $this->data->getAllPageUrls();
  660.  
  661.                     // Check if the urls array contains the current page. If so, remove the current page.
  662.                     foreach ($this->nextPageUrls as $key => &$mUrl) {
  663.                         if ($mUrl["data"] == $this->postUrl) {
  664.                             unset($this->nextPageUrls[$key]);
  665.                             if(static::$DEBUG) var_dump("Unset " . $mUrl);
  666.                         }
  667.                     }
  668.  
  669.                     // Reset the keys of the array
  670.                     $this->nextPageUrls = array_values(array_map(function($url) {
  671.                         return $url["data"];
  672.                     }, $this->nextPageUrls));
  673.                 }
  674.  
  675.                 if(static::$DEBUG) var_dump("Next Page URLs: ");
  676.                 if(static::$DEBUG) var_dump($this->nextPageUrls);
  677.  
  678.                 // Get the next page URL.
  679.                 if(!empty($this->nextPageUrls)) {
  680.                     if(static::$DEBUG) var_dump("Next page URLs is not empty. Find next page URL.");
  681.                     if(static::$DEBUG) var_dump("Current URL is: " . $this->urlToCrawl);
  682.  
  683.                     // We have next page URLs. Find the next page URL.
  684.                     $currentUrlPos = false;
  685.                     foreach ($this->nextPageUrls as $key => $url) {
  686.                         if(static::$DEBUG) var_dump("Possible Current URL: " . $url);
  687.  
  688.                         if ($url == $this->urlToCrawl) {
  689.                             $currentUrlPos = $key;
  690.  
  691.                             if(static::$DEBUG) var_dump("Current URL pos is found as " . $currentUrlPos . ", which is " . $url);
  692.  
  693.                             break;
  694.                         }
  695.                     }
  696.  
  697.                     // If current URL is found among next page URLs, and it is not the last URL, then we can get the next
  698.                     // URL as next page URL.
  699.                     if ($currentUrlPos !== false && $currentUrlPos < sizeof($this->nextPageUrls) - 1) {
  700.                         if(static::$DEBUG) var_dump("Current URL position is valid: " . $currentUrlPos . ". Get the next item in the list.");
  701.                         $this->nextPageUrl = $this->nextPageUrls[$currentUrlPos + 1];
  702.  
  703.                         // If current URL is not found among next page URLs, then get the first URL as next page URL.
  704.                     } else if($currentUrlPos === false) {
  705.                         if(static::$DEBUG) var_dump("Current URL Position is false. Get the first URL in the list.");
  706.                         $this->nextPageUrl = $this->nextPageUrls[0];
  707.                     }
  708.  
  709.                     // Otherwise, next page URL will be empty, since it is not assigned.
  710.  
  711.                     // Also, since there is no next page, reset all next pages.
  712.                     if(!$this->nextPageUrl) $this->nextPageUrls = [];
  713.                 }
  714.  
  715.             }
  716.         }
  717.     }
  718.  
  719.     /**
  720.      * Checks the content existence and, if it does not exist, sets next page URLs as null. Sets the value of
  721.      * {@link contentExists}.
  722.      */
  723.     private function checkAndReactToContentExistence() {
  724.         // Sometimes, next pages may be empty due to a malfunction of the site. Scenario is that the post does not have
  725.         // content on the next page, but there is a link on the page indicating there is a next page. In this case,
  726.         // the crawler cannot find any content in the next page. If this is the case, do not continue to next pages.
  727.         $this->contentExists = true;
  728.  
  729.         // Get main post template
  730.         $templateMain = $this->getSetting('_post_template_main');
  731.         $clearedTemplateMain = $templateMain;
  732.  
  733.         // Remove short codes
  734.         // First get predefined short codes
  735.         $allShortCodes = Factory::postService()->getPredefinedShortCodes();
  736.  
  737.         // Now get user-defined short codes
  738.         $shortCodeSelectors = $this->getSetting('_post_custom_content_shortcode_selectors');
  739.         if($shortCodeSelectors) {
  740.             foreach ($shortCodeSelectors as $selector) {
  741.                 if (isset($selector["short_code"]) && $selector["short_code"]) {
  742.                     $allShortCodes[] = "[" . $selector["short_code"] . "]";
  743.                 }
  744.             }
  745.         }
  746.  
  747.         // Now remove them from the original raw template
  748.         foreach($allShortCodes as $shortCode) {
  749.             $clearedTemplateMain = str_replace($shortCode, "", $clearedTemplateMain);
  750.         }
  751.  
  752.         if(static::$DEBUG) var_dump("Cleared Template Main:" . $clearedTemplateMain);
  753.         if(static::$DEBUG) var_dump("Original Template Main: " . $templateMain);
  754.         if(static::$DEBUG) var_dump($allShortCodes);
  755.         if(static::$DEBUG) var_dump(mb_strlen($this->data->getTemplate()) <= mb_strlen($clearedTemplateMain));
  756.  
  757.         // Now, check if the prepared template's length is greater than that of short-codes-removed template. So, if
  758.         // the prepared template's length is less, it means the page is empty. Hence, we do not have any variables in
  759.         // the page.
  760.         if (!$this->data->getTemplate() || mb_strlen($this->data->getTemplate()) <= mb_strlen($clearedTemplateMain)) {
  761.             $this->nextPageUrl = null;
  762.             $this->nextPageUrls = null;
  763.             $this->contentExists = false;
  764.         }
  765.     }
  766.  
  767.     /**
  768.      * Prepares post data array that contains the required WordPress post variables and their values, using {@link data}.
  769.      * @return array Prepared post data array
  770.      */
  771.     private function createWPPostData() {
  772.         // Get general settings
  773.         // If this site has different settings, then use them.
  774.         if($this->getSetting('_do_not_use_general_settings')) {
  775.             $allowComments  = $this->getSetting('_wpcc_allow_comments');
  776.             $postStatus     = $this->getSetting('_wpcc_post_status');
  777.             $postType       = $this->getSetting('_wpcc_post_type');
  778.             $postAuthor     = $this->getSetting('_wpcc_post_author');
  779.             $tagLimit       = $this->getSetting('_wpcc_post_tag_limit');
  780.             $postPassword   = $this->getSetting('_wpcc_post_password');
  781.  
  782.             // Otherwise, go on with general settings.
  783.         } else {
  784.             $allowComments  = get_option('_wpcc_allow_comments');
  785.             $postStatus     = get_option('_wpcc_post_status');
  786.             $postType       = get_option('_wpcc_post_type');
  787.             $postAuthor     = get_option('_wpcc_post_author');
  788.             $tagLimit       = get_option('_wpcc_post_tag_limit', 0);
  789.             $postPassword   = get_option('_wpcc_post_password');
  790.         }
  791.  
  792.         // Prepare the data
  793.         if($this->data->getPreparedTags() && $tagLimit && ((int) $tagLimit) > 0 && sizeof($this->data->getPreparedTags()) > $tagLimit) {
  794.             $this->data->setPreparedTags(array_slice($this->data->getPreparedTags(), 0, $tagLimit));
  795.         }
  796.  
  797.         // Check if we have a draft post ID to edit
  798.         $content = '';
  799.         $sourceUrls = [];
  800.         $post = null;
  801.  
  802.         if($this->draftPostId && $post = get_post($this->draftPostId)) {
  803.  
  804.             if(!$this->isFirstPage) {
  805.  
  806.                 $content = $post->post_content;
  807.                 if(!empty($content)) {
  808.                     $content = $content . "<!--nextpage-->";
  809.                 }
  810.  
  811.                 // Get source URLs
  812.                 $sourceUrls = get_post_meta($this->draftPostId, $this->postMetaSourceUrls, true);
  813.  
  814.                 if(!$sourceUrls) $sourceUrls = [];
  815.             }
  816.         }
  817.  
  818.         // Append current source URL
  819.         $sourceUrls[] = $this->postUrl;
  820.  
  821.         /*
  822.          * PREPARE POST DATA
  823.          */
  824.  
  825.         // If post author is not set, then set the first administrator as post author.
  826.         if(!$postAuthor) {
  827.             $userQuery = new WP_User_Query([
  828.                 'role'      => 'Administrator',
  829.                 'fields'    =>  'ID',
  830.                 'number'    =>  1
  831.             ]);
  832.             $postAuthor = $userQuery->get_results()[0];
  833.         }
  834.  
  835.         $postData = [
  836.             'ID' => $this->draftPostId ? $this->draftPostId : 0,
  837.             // If there is a next page to append to this post, then make this post's status draft no matter what.
  838.             // Otherwise, go on with the settings.
  839.             'post_status'       => $this->nextPageUrl           ? 'draft'       : ($postStatus ? $postStatus : 'draft'),
  840.             'post_type'         => post_type_exists($postType)  ? $postType     : 'post',
  841.             'post_password'     => $postPassword                ? $postPassword : '',
  842.             'post_category'     => [$this->urlTuple->category_id],
  843.             'meta_input'        => [
  844.                 // Store the source URLs just in case
  845.                 $this->postMetaSourceUrls       =>  $sourceUrls
  846.             ],
  847.         ];
  848.  
  849.         // If this is the first page of the newly created post.
  850.         if(!$this->isRecrawl && $this->isFirstPage) {
  851.             // Set the date
  852.             $postDate = $this->data->getDateCreated();
  853.             $postData["post_date"] = $postDate;
  854.  
  855.             // Set the slug if there exists one
  856.             if ($this->data->getSlug()) $postData['post_name'] = $this->data->getSlug();
  857.         }
  858.  
  859.         // If content exists, append in to the content of the original post
  860.         if($this->contentExists) {
  861.             $postData['post_content'] = $content . $this->data->getTemplate();
  862.  
  863.             // Otherwise, do not change the content.
  864.         } else if($post) {
  865.             $postData['post_content'] = $post->post_content;
  866.         }
  867.  
  868.         // If this is the first page, set other required data
  869.         if($this->isFirstPage || !$post) {
  870.             $postData = array_merge($postData, [
  871.                 'post_author'       => $postAuthor,
  872.                 'post_title'        => $this->data->getTitle()        ? $this->data->getTitle() : '',
  873.                 'post_excerpt'      => $this->data->getExcerpt()      ? $this->data->getExcerpt()["data"] : '',
  874.                 'comment_status'    => $allowComments                 ? 'open' : 'closed',
  875.                 'tags_input'        => $this->data->getPreparedTags() ? $this->data->getPreparedTags() : ''
  876.             ]);
  877.  
  878.             if($post) {
  879.                 $postData = array_merge($postData, [
  880.                     'post_date'         => $post->post_date,
  881.                     'post_date_gmt'     => $post->post_date_gmt,
  882.                     'post_name'         => $post->post_name,
  883.                     'guid'              => $post->guid,
  884.                 ]);
  885.             }
  886.  
  887.         // Set everything from the current found post. Even if this is an update, WP requires some variables again.
  888.         } else if($post) {
  889.             $postData = array_merge($postData, [
  890.                 'post_author'       => $post->post_author,
  891.                 'post_title'        => $post->post_title,
  892.                 'post_excerpt'      => $post->post_excerpt,
  893.                 'comment_status'    => $post->comment_status,
  894.                 'post_date'         => $post->post_date,
  895.                 'post_date_gmt'     => $post->post_date_gmt,
  896.                 'post_name'         => $post->post_name,
  897.                 'guid'              => $post->guid,
  898.             ]);
  899.         }
  900.  
  901.         return $postData;
  902.     }
  903.  
  904.     /**
  905.      * Checks if the post is duplicate. If it is, deletes its attachments, deletes the draft post, resets last-crawled
  906.      * CRON metas, marks the URL tuple as saved.
  907.      *
  908.      * @throws DuplicatePostException If the post is duplicate and saving process should no longer continue
  909.      */
  910.     private function handleIfDuplicate() {
  911.         // No need to do this when recrawling.
  912.         if ($this->isRecrawl) return;
  913.  
  914.         // Try to find a duplicate post
  915.         $duplicatePostId = $this->isDuplicate($this->urlTuple->url, $this->data->getWpPostData(), $this->isFirstPage, !$this->nextPageUrl);
  916.  
  917.         // If none, stop.
  918.         if (!$duplicatePostId) return;
  919.  
  920.         // This is a duplicate post. Throw a duplicate post exception.
  921.         throw new DuplicatePostException(_wpcc("A duplicate post is found."), $duplicatePostId);
  922.     }
  923.  
  924.     /**
  925.      * Inserts given post data into the database. This also sets {@link postId} as the inserted post's ID.
  926.      *
  927.      * @throws StopSavingException
  928.      */
  929.     private function insertPostData() {
  930.         // Get the post data
  931.         $postData = $this->data->getWpPostData();
  932.  
  933.         /**
  934.          * Modify post data before it is saved to the database.
  935.          *
  936.          * @param array $postData       The data that will be passed to wp_insert_post function.
  937.          * @param PostData $data        Data retrieved from the target post page's source code
  938.          * @param PostBot $bot          PostBot object used to retrieve the data from the target page
  939.          * @param PostSaver $this       PostSaver itself
  940.          * @param int $siteIdToCheck    ID of the site that stores the settings
  941.          * @param string $postUrl       URL of the post
  942.          * @param array $urlTuple       An array containing info about the URL. This array is retrieved from the URL table.
  943.          *                              Hence, it has all the columns and their values in that table.
  944.          * @param bool isRecrawl        True if this is fired for a recrawl.
  945.          *
  946.          * @return array|null $postData Modified post data. Return null if you do not want to save the post.
  947.          * @since 1.6.3
  948.          */
  949.         $postData = apply_filters('wpcc/post/wp-post', $postData, $this->data, $this->bot, $this, $this->siteIdToCheck, $this->postUrl, $this->urlTuple, $this->isRecrawl);
  950.  
  951.         // If the post data is null, do not save the post.
  952.         if($postData === null) throw new StopSavingException();
  953.  
  954.         /**
  955.          * Fires just before a post is inserted/updated.
  956.          *
  957.          * @param array $postData       Data that will be used to create/update a post in the database. If 'ID' key has
  958.          *                              a valid integer value, this means this is fired for an update.
  959.          * @param PostData $data        Data retrieved from the target site according to the site settings
  960.          * @param PostSaver $this       PostSaver itself
  961.          * @param int $siteIdToCheck    ID of the site for which the post is retrieved
  962.          * @param string $postUrl       URL of the post
  963.          * @param array $urlTuple       An array containing the URL data. The keys are columns of the DB table storing the URLs.
  964.          * @param bool $isRecrawl       True if this is a recrawl.
  965.          * @param bool $isFirstPage     True if this is the first page of the post
  966.          * @since 1.6.3
  967.          */
  968.         do_action('wpcc/post/before_save', $postData, $this->data, $this, $this->siteIdToCheck, $this->postUrl, $this->urlTuple, $this->isRecrawl, $this->isFirstPage);
  969.  
  970.         //
  971.  
  972.         $this->postId = wp_insert_post($postData);
  973.  
  974.         //
  975.  
  976.         /**
  977.          * Fires just after a post is inserted/updated.
  978.          *
  979.          * @param array $postData       Data that was used to create/update a post in the database. If 'ID' key has
  980.          *                              a valid integer value, this means this is fired for an update.
  981.          * @param PostData $data        Data retrieved from the target site according to the site settings
  982.          * @param PostSaver $this       PostSaver itself
  983.          * @param int $siteIdToCheck    ID of the site for which the post is retrieved
  984.          * @param string $postUrl       URL of the post
  985.          * @param array $urlTuple       An array containing the URL data. The keys are columns of the DB table storing the URLs.
  986.          * @param bool $isRecrawl       True if this is a recrawl.
  987.          * @param int $postId           ID of the saved post
  988.          * @param bool $isFirstPage     True if this is the first page of the post
  989.          * @since 1.6.3
  990.          */
  991.         do_action('wpcc/post/after_save', $postData, $this->data, $this, $this->siteIdToCheck, $this->postUrl, $this->urlTuple, $this->isRecrawl, $this->postId, $this->isFirstPage);
  992.  
  993.         if($this->draftPostId && $this->postId != $this->draftPostId) {
  994.             error_log("Draft post ID ({$this->draftPostId}) and inserted post ID ({$this->postId}) are different.");
  995.         }
  996.  
  997.         if(static::$DEBUG) var_dump("Inserted Post ID: " . $this->postId);
  998.  
  999.         // Set the WP post data to PostData, since $postData might have been modified
  1000.         $this->data->setWpPostData($postData);
  1001.     }
  1002.  
  1003.     /**
  1004.      * Sets the custom post taxonomy if the post's category belongs to a custom category taxonomy.
  1005.      *
  1006.      * @since 1.8.0
  1007.      */
  1008.     private function saveCategories() {
  1009.         // Do this only in the first page
  1010.         if (!$this->isFirstPage) return;
  1011.  
  1012.         // Get the categories
  1013.         $categories = Utils::getCategories($this->getSettingsImpl());
  1014.  
  1015.         // Find the selected category's taxonomy
  1016.         $taxonomy = null;
  1017.         foreach($categories as $categoryItem) {
  1018.             $id = Utils::array_get($categoryItem, 'id');
  1019.             if (!$id) continue;
  1020.  
  1021.             if ($id == $this->urlTuple->category_id) {
  1022.                 $taxonomy = Utils::array_get($categoryItem, 'taxonomy');
  1023.                 break;
  1024.             }
  1025.         }
  1026.  
  1027.         // If a taxonomy is not found, use the default WP category taxonomy
  1028.         if (!$taxonomy) $taxonomy = 'category';
  1029.  
  1030.         // Set the categories under the defined taxonomy
  1031.         $this->insertAndSetPostCategories($taxonomy);
  1032.     }
  1033.  
  1034.     /**
  1035.      * Sets the category of the post
  1036.      *
  1037.      * @param string $catTaxonomy Category taxonomy
  1038.      * @since 1.8.0
  1039.      */
  1040.     private function insertAndSetPostCategories($catTaxonomy = 'category') {
  1041.         // If this is a recrawl, remove already-existing categories.
  1042.         if ($this->isRecrawl) {
  1043.             wp_set_post_terms($this->postId, [], $catTaxonomy, false);
  1044.         }
  1045.  
  1046.         // Define the category taxonomy and get the category names that should be added as the post's categories.
  1047.         $categoryNames = $this->data->getCategoryNames();
  1048.  
  1049.         // Get the post category defined in the category map
  1050.         $term = get_term_by('id', $this->urlTuple->category_id, $catTaxonomy);
  1051.         $mainCatTermId = $term && isset($term->term_id) ? $term->term_id : null;
  1052.  
  1053.         // If there is no category name, set the main category ID as the category ID specified in the category map and
  1054.         // stop.
  1055.         if (!$categoryNames) {
  1056.             if($mainCatTermId !== null) {
  1057.                 wp_set_post_terms($this->postId, $this->urlTuple->category_id, $catTaxonomy, false);
  1058.             }
  1059.  
  1060.             return;
  1061.         }
  1062.  
  1063.         // Get whether the user wants to use the category ID defined in the category map or not
  1064.         $doNotAddCategoryDefinedInMap = $this->getSettingForCheckbox('_post_category_do_not_add_category_in_map');
  1065.  
  1066.         // Insert/retrieve the category term IDs.
  1067.         $categoryIds = $this->insertPostCategories($categoryNames, $catTaxonomy, $doNotAddCategoryDefinedInMap ? null : $mainCatTermId);
  1068.  
  1069.         // If there is no category, stop.
  1070.         if (!$categoryIds) return;
  1071.  
  1072.         // Set the category IDs of the post
  1073.         $result = wp_set_post_terms($this->postId, $categoryIds, $catTaxonomy, false);
  1074.         if (is_wp_error($result)) {
  1075.             $info = Informer::addError(_wpcc('Category IDs could not be assigned to the post.'));
  1076.             if (is_a($result, \WP_Error::class)) {
  1077.                 /** @var \WP_Error $result */
  1078.                 $info->setDetails($info->getDetails() . ' ' . $result->get_error_message());
  1079.             }
  1080.  
  1081.             $info->addAsLog();
  1082.         }
  1083.     }
  1084.  
  1085.     /**
  1086.      * Inserts/retrieves product categories considering the settings.
  1087.      *
  1088.      * @param array    $categoryNames Category names to be set as product's category, possibly retrieved from
  1089.      *                                {@link WooCommerceDetailData::getCategoryNames()}. See
  1090.      *                                {@link WooCommerceDetailData::getCategoryNames()} for details.
  1091.      * @param string   $catTaxonomy   Taxonomy name to which the categories inserted. Possible 'product_cat'
  1092.      * @param int|null $mainCatTermId Category ID that will be the parent of the inserted categories. Null if you do
  1093.      *                                not want to set a parent to the to-be-inserted categories.
  1094.      * @return array Array of taxonomy IDs that can be assigned to the product
  1095.      * @since 1.8.0
  1096.      */
  1097.     private function insertPostCategories($categoryNames, $catTaxonomy, $mainCatTermId) {
  1098.         // Insert/retrieve the category taxonomies
  1099.         $categoryIds = [];
  1100.         foreach($categoryNames as $catNameValue) {
  1101.             // If the category name value is not an array, make it an array to keep the algorithm simple.
  1102.             if (!is_array($catNameValue)) $catNameValue = [$catNameValue];
  1103.  
  1104.             // We need to add all categories hierarchically.
  1105.  
  1106.             // Store the parent term ID.
  1107.             $parentTermId = $mainCatTermId;
  1108.  
  1109.             $isError = false;
  1110.             $hierarchicalCatIds = [];
  1111.  
  1112.             // Add the categories one by one
  1113.             foreach($catNameValue as $catName) {
  1114.                 $args = $parentTermId !== null ? ['parent' => $parentTermId] : [];
  1115.                 $termId = Utils::insertTerm($catName, $catTaxonomy, $args);
  1116.  
  1117.                 // If a term ID could not be retrieved, stop.
  1118.                 if ($termId === null) {
  1119.                     $isError = true;
  1120.                     break;
  1121.                 }
  1122.  
  1123.                 // Add the term ID to the hierarchical category IDs
  1124.                 $hierarchicalCatIds[] = $termId;
  1125.  
  1126.                 // Set this term ID as the previous term ID so that it can be set as the next category's parent.
  1127.                 $parentTermId = $termId;
  1128.             }
  1129.  
  1130.             // If there was an error, it means at least one of the categories could not be inserted. In this case,
  1131.             // do not set successfully-retrieved category IDs as the category of the post, since the user wants
  1132.             // all of the categories.
  1133.             if ($isError) continue;
  1134.  
  1135.             if ($hierarchicalCatIds) $categoryIds = array_merge($categoryIds, $hierarchicalCatIds);
  1136.  
  1137.         }
  1138.  
  1139.         return $categoryIds;
  1140.     }
  1141.  
  1142.     /*
  1143.      *
  1144.      */
  1145.  
  1146.     /**
  1147.      * Deletes already-existing attachments when updating the post, and when this is the first page of the post.
  1148.      */
  1149.     private function maybeDeleteAttachments() {
  1150.         // Do this only when this is the first page, we are updating the post, and a post ID exists
  1151.         if(!$this->isFirstPage || !$this->isRecrawl || !$this->postId) return;
  1152.  
  1153.         // Delete already-attached media
  1154.         $alreadyAttachedMedia = get_attached_media('image', $this->postId);
  1155.         foreach($alreadyAttachedMedia as $mediaPost) {
  1156.             wp_delete_post($mediaPost->ID);
  1157.         }
  1158.  
  1159.         // Delete the already existing thumbnail of the post
  1160.         Utils::deletePostThumbnail($this->postId);
  1161.     }
  1162.  
  1163.     /**
  1164.      * Saves featured image of the post
  1165.      */
  1166.     private function saveFeaturedImage() {
  1167.         // If this is not the first page or the post ID does not exist, stop.
  1168.         if(!$this->isFirstPage || !$this->postId) return;
  1169.  
  1170.         // Get the thumbnail image file path
  1171.         $mediaFile = null;
  1172.         if($this->urlTuple->thumbnail_url) {
  1173.             $thumbnailUrl = $this->urlTuple->thumbnail_url;
  1174.  
  1175.             // If there is no thumbnail image URL, stop.
  1176.             if (!$thumbnailUrl) return;
  1177.  
  1178.             // Prepare the thumbnail URL
  1179.             try {
  1180.                 $thumbnailUrl = $this->bot->resolveUrl($thumbnailUrl);
  1181.             } catch (\Exception $e) {
  1182.                 Informer::addError(_wpcc('URL could not be resolved') . ' - ' . $thumbnailUrl)->addAsLog();
  1183.             }
  1184.  
  1185.             // Save the featured image
  1186.             $file = MediaService::getInstance()->saveMedia($thumbnailUrl, $this->getSetting("_wpcc_http_user_agent", null));
  1187.             if (!$file) return;
  1188.  
  1189.             $mediaFile = new MediaFile($thumbnailUrl, $file['file']);
  1190.  
  1191.         } else if($this->data->getThumbnailData()) {
  1192.             $mediaFile = $this->data->getThumbnailData();
  1193.         }
  1194.  
  1195.         // If there is no file, stop.
  1196.         if (!$mediaFile) return;
  1197.  
  1198.         // Save as attachment and get the attachment id.
  1199.         try {
  1200.             $thumbnailAttachmentId = MediaService::getInstance()->insertMedia($this->postId, $mediaFile);
  1201.         } catch (\Exception $e) {
  1202.             Informer::addError(_wpcc('Media file does not have a local path.'))->addAsLog();
  1203.             return;
  1204.         }
  1205.  
  1206.         // Set the media ID
  1207.         $mediaFile->setMediaId($thumbnailAttachmentId);
  1208.  
  1209.         // Set this attachment as post thumbnail
  1210.         set_post_thumbnail($this->postId, $thumbnailAttachmentId);
  1211.     }
  1212.  
  1213.     /**
  1214.      * Saves meta keywords
  1215.      */
  1216.     private function saveMetaKeywords() {
  1217.         // If this is not the first page or the post ID does not exist, stop.
  1218.         if(!$this->isFirstPage || !$this->postId) return;
  1219.  
  1220.         if(!$this->data->getMetaKeywords()) return;
  1221.  
  1222.         $key = get_option('_wpcc_meta_keywords_meta_key');
  1223.         if (!$key) return;
  1224.  
  1225.         Utils::savePostMeta($this->postId, $key, $this->data->getMetaKeywords(), true);
  1226.     }
  1227.  
  1228.     /**
  1229.      * Saves meta description
  1230.      */
  1231.     private function saveMetaDescription() {
  1232.         // If this is not the first page or the post ID does not exist, stop.
  1233.         if(!$this->isFirstPage || !$this->postId) return;
  1234.  
  1235.         if(!$this->data->getMetaDescription()) return;
  1236.  
  1237.         $key = get_option('_wpcc_meta_description_meta_key');
  1238.         if(!$key) return;
  1239.  
  1240.         Utils::savePostMeta($this->postId, $key, $this->data->getMetaDescription(), true);
  1241.     }
  1242.  
  1243.     /**
  1244.      * Saves attachments
  1245.      *
  1246.      * @return array Gallery attachment IDs
  1247.      */
  1248.     private function saveAttachments() {
  1249.         if(!$this->postId || !$this->data->getAttachmentData()) return [];
  1250.  
  1251.         $galleryAttachmentIds = [];
  1252.  
  1253.         foreach($this->data->getAttachmentData() as $mediaFile) {
  1254.             // Insert the media
  1255.             try {
  1256.                 $attachmentId = MediaService::getInstance()->insertMedia($this->postId, $mediaFile);
  1257.             } catch (\Exception $e) {
  1258.                 Informer::addError(_wpcc('Media file does not have a local path.'))->addAsLog();
  1259.                 continue;
  1260.             }
  1261.  
  1262.             // Set the media ID
  1263.             $mediaFile->setMediaId($attachmentId);
  1264.  
  1265.             if($mediaFile->isGalleryImage()) {
  1266.                 $galleryAttachmentIds[] = $attachmentId;
  1267.             }
  1268.  
  1269.         }
  1270.  
  1271.         // Add srcset attributes to media elements in the content.
  1272.         $this->setMediaSrcSetsInContent();
  1273.  
  1274.         return $galleryAttachmentIds;
  1275.     }
  1276.  
  1277.     /**
  1278.      * Updates the post content such that media elements in the content have srcset attributes.
  1279.      *
  1280.      * @since 1.8.0
  1281.      */
  1282.     private function setMediaSrcSetsInContent() {
  1283.         // Change the template by adding srcset attributes.
  1284.         $oldTemplate = $this->addMediaSrcSetsToTemplate();
  1285.  
  1286.         // If there was no change, no need to continue.
  1287.         if ($oldTemplate === false) return;
  1288.  
  1289.         // Update the post content
  1290.         $this->updatePostContentForCurrentTemplate($oldTemplate);
  1291.     }
  1292.  
  1293.     /**
  1294.      * Modifies the current template of {@link $data} by adding srcset attributes to media elements.
  1295.      *
  1296.      * @return string|false The old template if there is a change in the template. Otherwise, false.
  1297.      * @since 1.8.0
  1298.      */
  1299.     private function addMediaSrcSetsToTemplate() {
  1300.         // If the function that creates srcset does not exist, stop.
  1301.         if (!function_exists('wp_get_attachment_image_srcset')) return false;
  1302.  
  1303.         // If there is no attachment data, stop.
  1304.         if (!$this->data->getAttachmentData()) return false;
  1305.  
  1306.         // Get the template
  1307.         $template = $this->data->getTemplate();
  1308.         if (!$template) return false;
  1309.  
  1310.         // Create a dummy crawler for the post template
  1311.         $dummyTemplateCrawler = $this->bot->createDummyCrawler($template);
  1312.  
  1313.         foreach($this->data->getAttachmentData() as $mediaFile) {
  1314.             // If the media does not have an ID, continue with the next one.
  1315.             if (!$mediaFile->getMediaId()) continue;
  1316.  
  1317.             // Get the srcset
  1318.             $srcSet = wp_get_attachment_image_srcset($mediaFile->getMediaId());
  1319.             if (!$srcSet) continue;
  1320.  
  1321.             // Add the srcset to the corresponding media element
  1322.             $this->bot->modifyMediaElement($dummyTemplateCrawler, $mediaFile, function(MediaFile $mediaFile, \DOMElement $element) use (&$srcSet) {
  1323.                 $element->setAttribute('srcset', $srcSet);
  1324.             });
  1325.         }
  1326.  
  1327.         // Get the modified content
  1328.         $newTemplate = $this->bot->getContentFromDummyCrawler($dummyTemplateCrawler);
  1329.  
  1330.         // If there is no change, stop.
  1331.         if ($newTemplate === $template) return false;
  1332.  
  1333.         // Update the post content
  1334.         $this->data->setTemplate($newTemplate);
  1335.  
  1336.         return $template;
  1337.     }
  1338.  
  1339.     /**
  1340.      * Updates the post content to reflect changes made to the current template which will be retrieved from
  1341.      * {@link $data} ({@link PostData::getTemplate()}).
  1342.      *
  1343.      * @param string $oldTemplate Old template that will be changed with the new one which will be retrieved from
  1344.      *                            {@link $data}
  1345.      * @since 1.8.0
  1346.      */
  1347.     private function updatePostContentForCurrentTemplate($oldTemplate) {
  1348.         // If there is no post ID, we cannot update the content.
  1349.         if (!$this->postId) return;
  1350.  
  1351.         $wpPostData = $this->data->getWpPostData();
  1352.         $newPostContent = $this->data->getTemplate();
  1353.  
  1354.         // If this is not the first page, it means the template was appended to the content of the previous pages.
  1355.         if (!$this->isFirstPage) {
  1356.             // Get the existing content
  1357.             $existingContent = Utils::array_get($wpPostData, 'post_content', null);
  1358.  
  1359.             // If there is an existing content
  1360.             if ($existingContent) {
  1361.                 // Replace the unmodified template with the modified one in the existing content. By this way,
  1362.                 // previous content will not be changed and the changes will be reflected properly.
  1363.                 $newPostContent = str_replace($oldTemplate, $this->data->getTemplate(), $existingContent);
  1364.             }
  1365.  
  1366.         }
  1367.  
  1368.         // Update the post's content with new post content
  1369.         wp_update_post([
  1370.             'ID'           => $this->postId,
  1371.             'post_content' => $newPostContent
  1372.         ]);
  1373.  
  1374.         // Update content of WP post data in the PostData
  1375.         $wpPostData['post_content'] = $newPostContent;
  1376.         $this->data->setWpPostData($wpPostData);
  1377.     }
  1378.  
  1379.     /**
  1380.      * Saves custom post meta
  1381.      */
  1382.     private function saveCustomMeta() {
  1383.         if(!$this->postId || !$this->data->getCustomMeta()) return;
  1384.  
  1385.         foreach($this->data->getCustomMeta() as $metaData) {
  1386.             $metaValue  = $metaData["data"];
  1387.             $metaKey    = $metaData["meta_key"];
  1388.  
  1389.             // Delete old meta values first when updating. Do this only when the first page is being crawled.
  1390.             if($this->isFirstPage && $this->isRecrawl) {
  1391.                 delete_post_meta($this->postId, $metaKey);
  1392.             }
  1393.  
  1394.             // If it must be saved as multiple
  1395.             if(isset($metaData["multiple"]) && $metaData["multiple"]) {
  1396.  
  1397.                 // If the value is array
  1398.                 if(is_array($metaValue)) {
  1399.                     if(empty($metaValue)) continue;
  1400.  
  1401.                     // Add each value
  1402.                     foreach($metaValue as $value) {
  1403.                         add_post_meta($this->postId, $metaKey, $value, false);
  1404.                     }
  1405.  
  1406.                 } else {
  1407.                     // Otherwise, add it directly
  1408.                     add_post_meta($this->postId, $metaKey, $metaValue, false);
  1409.                 }
  1410.  
  1411.             } else {
  1412.                 // Otherwise, save it as a single post meta.
  1413.                 update_post_meta($this->postId, $metaKey, $metaValue);
  1414.             }
  1415.         }
  1416.     }
  1417.  
  1418.     /**
  1419.      * Saves custom post taxonomies
  1420.      * @since 1.8.0
  1421.      */
  1422.     private function saveCustomTaxonomies() {
  1423.         if(!$this->postId || !$this->data->getCustomTaxonomies()) return;
  1424.  
  1425.         // Delete old taxonomy values first when updating. Do this only when the first page is being crawled.
  1426.         if($this->data->getCustomTaxonomies() && $this->isFirstPage && $this->isRecrawl) {
  1427.             $taxNames = array_unique(array_map(function($v) {
  1428.                 return $v['taxonomy'];
  1429.             }, $this->data->getCustomTaxonomies()));
  1430.  
  1431.             wp_delete_object_term_relationships($this->postId, $taxNames);
  1432.         }
  1433.  
  1434.         foreach($this->data->getCustomTaxonomies() as $taxonomyData) {
  1435.             $taxValue = $taxonomyData['data'];
  1436.             $taxName = $taxonomyData['taxonomy'];
  1437.             $isAppend = isset($taxonomyData['append']) && $taxonomyData['append'];
  1438.  
  1439.             // Make sure the value is an array.
  1440.             if (!is_array($taxValue)) $taxValue = [$taxValue];
  1441.  
  1442.             // Save them as terms
  1443.             $termIds = [];
  1444.             foreach($taxValue as $tv) {
  1445.                 $termId = Utils::insertTerm($tv, $taxName);
  1446.                 if (!$termId) continue;
  1447.  
  1448.                 $termIds[] = $termId;
  1449.             }
  1450.  
  1451.             // If there is no term ID, continue with the next one.
  1452.             if (!$termIds) continue;
  1453.  
  1454.             wp_set_post_terms($this->postId, $termIds, $taxName, $isAppend);
  1455.         }
  1456.     }
  1457.  
  1458.     /**
  1459.      * Does the updates for post-crawling event
  1460.      *
  1461.      * @param int         $siteId           Last updated site ID
  1462.      * @param int         $lastCrawledUrlId ID of the URL from the urls table which is crawled
  1463.      * @param string|null $nextPageUrl      Next page URL
  1464.      * @param array|null  $nextPageUrls     Next page URLs
  1465.      * @param int|null    $draftPostId      Draft post ID
  1466.      */
  1467.     private function updateLastCrawled($siteId, $lastCrawledUrlId, $nextPageUrl, $nextPageUrls, $draftPostId) {
  1468.         // Get the prefix for the CRON meta keys of the current task
  1469.         $prefix = $this->getCronPostMetaPrefix();
  1470.  
  1471.         Utils::savePostMeta($siteId, $prefix . '_last_crawled_url_id',    $lastCrawledUrlId,                     true);
  1472.         Utils::savePostMeta($siteId, $prefix . '_post_next_page_url',     $nextPageUrl,                          true);
  1473.         Utils::savePostMeta($siteId, $prefix . '_post_next_page_urls',    $nextPageUrls,                         true);
  1474.         Utils::savePostMeta($siteId, $prefix . '_post_draft_id',          $draftPostId ? $draftPostId : '',      true);
  1475.         Utils::savePostMeta($siteId, $prefix . '_last_crawled_at',        current_time('mysql'),           true);
  1476.  
  1477.         // Set last crawled site id if there is no draft post ID. By this way, if there is a paged post crawling in progress,
  1478.         // before we get a post from another site, we finish crawling all pages of current post.
  1479.         if(!$draftPostId) update_option($this->isRecrawl ? $this->optionLastRecrawledSiteId : $this->optionLastCrawledSiteId, $siteId, false);
  1480.     }
  1481.  
  1482.     /**
  1483.      * Updates last recrawled site ID option
  1484.      *
  1485.      * @param int $siteId
  1486.      */
  1487.     public function updateLastRecrawledSiteId($siteId) {
  1488.         update_option($this->optionLastRecrawledSiteId, $siteId, false);
  1489.     }
  1490.  
  1491.     /**
  1492.      * Reset CRON metas about last-crawled URL
  1493.      *
  1494.      * @param int $siteId ID of the site
  1495.      */
  1496.     public function resetLastCrawled($siteId) {
  1497.         $this->updateLastCrawled($siteId, null, null, null, null);
  1498.     }
  1499.  
  1500.     /**
  1501.      * Get a URL tuple to crawl. This method is good for crawling URLs uniformly, by getting a URL from a different
  1502.      * category.
  1503.      *
  1504.      * @param int $siteId Site ID for which a URL tuple will be retrieved
  1505.      * @param int $lastCrawledUrlId Last crawled URL id from urls table
  1506.      * @return null|object Null or found URL tuple as object
  1507.      */
  1508.     public function getUrlTupleToCrawl($siteId, $lastCrawledUrlId) {
  1509.         global $wpdb;
  1510.         $tableName = Factory::databaseService()->getDbTableUrlsName();
  1511.  
  1512.         // If last crawled URL id is null, then get the first URL that needs to be saved.
  1513.         if($lastCrawledUrlId === null) {
  1514.             // Get the last crawled URL ID instead of getting the first found URL ID that needs saving.
  1515.             $query = "SELECT * FROM $tableName WHERE is_saved = TRUE AND is_locked = FALSE AND saved_post_id IS NOT NULL AND post_id = %d ORDER BY saved_at DESC LIMIT 1";
  1516.             $results = $wpdb->get_results($wpdb->prepare($query, $siteId));
  1517.  
  1518.             // Then, if a URL is found, call this method with that URL ID so that another URL ID from a different
  1519.             // category can be found.
  1520.             if(!empty($results)) return $this->getUrlTupleToCrawl($siteId, $results[0]->id);
  1521.  
  1522.             // Otherwise, if there is no last crawled URL, get the first URL that needs to be saved.
  1523.             $query = "SELECT * FROM $tableName WHERE is_saved = FALSE AND is_locked = FALSE AND saved_post_id IS NULL AND post_id = %d LIMIT 1";
  1524.             $results = $wpdb->get_results($wpdb->prepare($query, $siteId));
  1525.  
  1526.             return empty($results) ? null : $results[0];
  1527.         }
  1528.  
  1529.         // Get the last crawled URL as object from the table
  1530.         $query = "SELECT * FROM $tableName WHERE id = %d";
  1531.         $results = $wpdb->get_results($wpdb->prepare($query, $lastCrawledUrlId));
  1532.  
  1533.         // If the URL is not found in the table, then get the first URL that needs to be saved or return null.
  1534.         // Recalling this method with a null lastCrawledSiteId will do the job.
  1535.         if(empty($results)) {
  1536.             return $this->getUrlTupleToCrawl($siteId, null);
  1537.         }
  1538.  
  1539.         // Get the tuple as object
  1540.         $lastCrawledUrlTuple = $results[0];
  1541.  
  1542.         // Get reference category ID and try to get a URL for the next category.
  1543.         $referenceCategoryId = $lastCrawledUrlTuple->category_id;
  1544.  
  1545.         // Find all categories with an unsaved URL for the target site ID.
  1546.         $query = "SELECT DISTINCT category_id FROM $tableName  WHERE is_saved = FALSE AND is_locked = FALSE AND saved_post_id IS NULL AND post_id = %d";
  1547.         $categoryIds = $wpdb->get_results($wpdb->prepare($query, $siteId));
  1548.  
  1549.         // If there is no category, it means there is no URL to be saved. Return null.
  1550.         if(empty($categoryIds)) return null;
  1551.  
  1552.         // Try to find a URL with a category different than the reference category. If there is no other category, then
  1553.         // find a URL with the reference category ID.
  1554.         $referenceCategoryPos = null;
  1555.         foreach($categoryIds as $key => $categoryIdObject) {
  1556.             if($categoryIdObject->category_id == $referenceCategoryId) {
  1557.                 $referenceCategoryPos = $key;
  1558.                 break;
  1559.             }
  1560.         }
  1561.  
  1562.         // If the reference category is not found, get the first category in the list.
  1563.         // If the reference category is the last item in the list, get the first category in the list.
  1564.         // Otherwise, get the category next to the reference category.
  1565.         $targetCategoryId = null;
  1566.         if($referenceCategoryPos === null || $referenceCategoryPos == sizeof($categoryIds) - 1) {
  1567.             $targetCategoryId = $categoryIds[0]->category_id;
  1568.         } else {
  1569.             $targetCategoryId = $categoryIds[$referenceCategoryPos + 1]->category_id;
  1570.         }
  1571.  
  1572.         // Now, get a URL that needs to be saved and belongs to the target site ID and target category ID.
  1573.         $query = "SELECT * FROM $tableName WHERE post_id = %d AND category_id = %d AND is_saved = FALSE AND is_locked = FALSE AND saved_post_id IS NULL LIMIT 1";
  1574.         $results = $wpdb->get_results($wpdb->prepare($query, [$siteId, $targetCategoryId]));
  1575.  
  1576.         // The results cannot be empty according to the logic. Return the first found URL tuple.
  1577.         return $results[0];
  1578.     }
  1579.  
  1580.     /**
  1581.      * Check if a post is duplicate considering the current settings set by {@link SettingsTrait::setSettings}.
  1582.      *
  1583.      * @param string     $url         URL of the post
  1584.      * @param array|null $postData    An array having keys named as columns in wp_posts table. And their values, of
  1585.      *                                course.
  1586.      * @param bool       $isFirstPage True if this check is done for the first page of the post.
  1587.      * @param bool       $isLastPage  True if this check is done for the last page of the post.
  1588.      * @return false|int Previously saved post ID if this is a duplicate. Otherwise, false.
  1589.      */
  1590.     public function isDuplicate($url, $postData, $isFirstPage, $isLastPage) {
  1591.         // If this is not the first and the last page, no need to check for duplicate.
  1592.         if(!$isFirstPage && !$isLastPage) return false;
  1593.  
  1594.         // Get the current post ID
  1595.         $currentPostId = Utils::array_get($postData, "ID");
  1596.         if(!$currentPostId) $currentPostId = 0;
  1597.  
  1598.         // Get the settings for duplicate checking
  1599.         $duplicateCheckSettingValues = $this->getSetting('_duplicate_check_types');
  1600.  
  1601.         // The values are stored under 0 key. So, make sure 0 key exists.
  1602.         if(!$duplicateCheckSettingValues || !isset($duplicateCheckSettingValues[0])) return false;
  1603.  
  1604.         $values = $duplicateCheckSettingValues[0];
  1605.         $checkUrl = isset($values[PostSaver::DUPLICATE_CHECK_URL]);
  1606.         $checkTitle = isset($values[PostSaver::DUPLICATE_CHECK_TITLE]);
  1607.         $checkContent = isset($values[PostSaver::DUPLICATE_CHECK_CONTENT]);
  1608.  
  1609.         global $wpdb;
  1610.  
  1611.         $id = null;
  1612.  
  1613.         // If this is the first page, check URL and title
  1614.         if($isFirstPage) {
  1615.             // Check the URL
  1616.             if($checkUrl && $url) {
  1617.                 // Check the URL with and without a trailing slash
  1618.                 $query = "SELECT post_id
  1619.                    FROM {$wpdb->postmeta}
  1620.                    WHERE meta_key = '{$this->postMetaPostFirstPageUrl}'
  1621.                      AND (meta_value = %s OR meta_value = %s)
  1622.                      AND post_id <> %d;
  1623.                ";
  1624.                 $id = $wpdb->get_var($wpdb->prepare($query, trailingslashit($url), untrailingslashit($url), $currentPostId));
  1625.             }
  1626.  
  1627.             // Check the title
  1628.             if(!$id && $checkTitle && $postData) {
  1629.                 $postTitle = Utils::array_get($postData, "post_title");
  1630.                 $postType = Utils::array_get($postData, "post_type");
  1631.  
  1632.                 $query = "SELECT ID FROM {$wpdb->posts} WHERE post_title = %s AND post_type = %s AND ID <> %d";
  1633.                 $id = $wpdb->get_var($wpdb->prepare($query, $postTitle, $postType, $currentPostId));
  1634.             }
  1635.         }
  1636.  
  1637.         // If this is the last page, check the content
  1638.         if(!$id && $isLastPage && $checkContent && $postData) {
  1639.             $postContent = Utils::array_get($postData, "post_content");
  1640.             $postType = Utils::array_get($postData, "post_type");
  1641.  
  1642.             $query = "SELECT ID FROM {$wpdb->posts} WHERE post_content = %s AND post_type = %s AND ID <> %d";
  1643.             $id = $wpdb->get_var($wpdb->prepare($query, $postContent, $postType, $currentPostId));
  1644.         }
  1645.  
  1646.         // If a duplicate post is found, add an error.
  1647.         if($id) {
  1648.             $this->addError(ErrorType::DUPLICATE_POST, $id);
  1649.             Informer::add(Information::fromInformationMessage(
  1650.                 InformationMessage::DUPLICATE_POST,
  1651.                 _wpcc("Post ID") . ": {$id}",
  1652.                 InformationType::ERROR
  1653.             )->addAsLog());
  1654.         }
  1655.  
  1656.         return $id ? $id : false;
  1657.     }
  1658.  
  1659.     /**
  1660.      * Get post meta prefix for the meta keys that will be used to store data for current task.
  1661.      * @see $recrawlPostMetaPrefix
  1662.      * @see $crawlPostMetaPrefix
  1663.      * @return string
  1664.      */
  1665.     private function getCronPostMetaPrefix() {
  1666.         return $this->isRecrawl ? $this->cronRecrawlPostMetaPrefix : $this->cronCrawlPostMetaPrefix;
  1667.     }
  1668.  
  1669.     /**
  1670.      * @param bool $isRecrawl See {@link isRecrawl}
  1671.      */
  1672.     public function setIsRecrawl($isRecrawl) {
  1673.         $this->isRecrawl = $isRecrawl;
  1674.     }
  1675.  
  1676.     /*
  1677.      * STATIC METHODS
  1678.      */
  1679.  
  1680.     /**
  1681.      * Get duplicate check types prepared to be shown in a select element.
  1682.      *
  1683.      * @param array $settings Post settings
  1684.      * @return array Returns an array with "values" and "defaults" keys, both of which has an array value. The
  1685.      *               key-description pairs are stored under "values" key. "defaults" stores key-defaultValue pairs.
  1686.      */
  1687.     public static function getDuplicateCheckOptionsForSelect($settings) {
  1688.         $result = [
  1689.             "values" => [
  1690.                 PostSaver::DUPLICATE_CHECK_URL     => _wpcc("URL"),
  1691.                 PostSaver::DUPLICATE_CHECK_TITLE   => _wpcc("Title"),
  1692.                 PostSaver::DUPLICATE_CHECK_CONTENT => _wpcc("Content"),
  1693.             ],
  1694.             "defaults" => [
  1695.                 PostSaver::DUPLICATE_CHECK_URL     => 1,
  1696.                 PostSaver::DUPLICATE_CHECK_TITLE   => 1,
  1697.                 PostSaver::DUPLICATE_CHECK_CONTENT => 0,
  1698.             ]
  1699.         ];
  1700.  
  1701.         // Get the duplicate check options from the post details
  1702.         $postSettings = new SettingsImpl($settings, Factory::postService()->getSingleMetaKeys());
  1703.         $options = PostDetailsService::getInstance()->getDuplicateOptions($postSettings);
  1704.         if ($options) {
  1705.             $result["values"] = array_merge($result["values"], $options["values"]);
  1706.             $result["defaults"] = array_merge($result["defaults"], $options["defaults"]);
  1707.         }
  1708.  
  1709.         return $result;
  1710.     }
  1711.  
  1712.     /*
  1713.      * GETTERS
  1714.      */
  1715.  
  1716.     /**
  1717.      * Get the next page URL that is found in {@link savePost()} method.
  1718.      *
  1719.      * @return null|string
  1720.      */
  1721.     public function getNextPageUrl() {
  1722.         return $this->nextPageUrl;
  1723.     }
  1724.  
  1725.     /**
  1726.      * Get the next page URLs that are found in {@link savePost()} method. This returns a non-null value only if the post
  1727.      * has all page URLs in a single page.
  1728.      *
  1729.      * @return array|null
  1730.      */
  1731.     public function getNextPageUrls() {
  1732.         return $this->nextPageUrls;
  1733.     }
  1734.    
  1735. }
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top