Source of file SwiftypeSiteTreeCrawlerExtension.php
Size: 9,482 Bytes - Last Modified: 2021-12-24T05:17:12+00:00
/var/www/docs.ssmods.com/process/src/src/Extensions/SwiftypeSiteTreeCrawlerExtension.php
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303 | <?php namespace Ichaber\SSSwiftype\Extensions; use Ichaber\SSSwiftype\Service\SwiftypeCrawler; use SilverStripe\CMS\Model\SiteTree; use SilverStripe\CMS\Model\SiteTreeExtension; use SilverStripe\Control\Director; use SilverStripe\Core\Config\Config; use SilverStripe\Versioned\Versioned; /** * Class SwiftypeSiteTreeCrawlerExtension * * @package Ichaber\SSSwiftype\Extensions * @property SiteTree|$this $owner */ class SwiftypeSiteTreeCrawlerExtension extends SiteTreeExtension { /** * Urls to crawl * * array keyed by getOwnerKey * * @var array */ private $urlsToCrawl = []; /** * @param array $urls */ public function setUrlsToCrawl(array $urls) { $this->urlsToCrawl = $urls; } /** * @return array */ public function getUrlsToCrawl(): array { return $this->urlsToCrawl; } /** * We need to collate Urls before we write, just in case an author has changed the Page's Url Segment. If they * have, then we need to request Swiftype to reindex both the old Url (which should then be marked by Swiftype * as a 404), and the new Url */ public function onBeforeWrite(): void { $this->collateUrls(); } /** * After a publish has occurred, we can collate and process immediately (no need to split things out like during * an unpublish) * * @param SiteTree|mixed $original * @return void */ public function onAfterPublish(&$original): void { $this->collateUrls(); $this->processCollatedUrls(); // Check to see if the clearing of cache has been disabled (useful for unit testing, or any other reason you // might have to disable it) $clearCacheDisabled = Config::inst()->get(static::class, 'clear_cache_disabled'); if ($clearCacheDisabled) { return; } // It's important that we clear the cache after we have finished requesting reindex from Swiftype $this->clearCacheSingle(); } /** * We need to collate the Urls to be purged *before* we complete the unpublish action (otherwise, the LIVE Urls * will no longer be available, since the page is now unpublished) */ public function onBeforeUnpublish(): void { $this->collateUrls(); } /** * After the unpublish has completed, we can now request Swiftype to reindex the Urls that we collated */ public function onAfterUnpublish(): void { $this->processCollatedUrls(); // Check to see if the clearing of cache has been disabled (useful for unit testing, or any other reason you // might have to disable it) $clearCacheDisabled = Config::inst()->get(static::class, 'clear_cache_disabled'); if ($clearCacheDisabled) { return; } // It's important that we clear the cache after we have finished requesting reindex from Swiftype $this->clearCacheSingle(); } /** * You may need to clear the cache at some point during your particular process * * Reset all Urls for any/all objects that might be in the cache (keeping in mind that Extensions are singleton, * so the UrlsToCache could be accessed via singleton and it could contain Urls for many owner objects) * * We don't use flushCache (which is called from DataObject) because this is called between write and un/publish, * and we need our cache to persist through these states */ public function clearCacheAll(): void { $this->setUrlsToCrawl([]); } /** * You may need to clear the cache at some point during your particular process * * Reset only the Urls related to this particular owner object (keeping in mind that Extensions are singleton, * so the UrlsToCache could be accessed via singleton and it could contain Urls for many owner objects) * * We don't use flushCache (which is called from DataObject) because this is called between write and un/publish, * and we need our cache to persist through these states */ public function clearCacheSingle(): void { $urls = $this->getUrlsToCrawl(); $key = $this->getOwnerKey(); // Nothing for us to do here if ($key === null) { return; } // Nothing for us to do here if (!array_key_exists($key, $urls)) { return; } // Remove this key and it's Urls unset($urls[$key]); $this->setUrlsToCrawl($urls); } /** * Collate Urls to crawl * * Extensions are singleton, so we use the owner key to make sure that we're only processing Urls directly related * to the desired record. * * You might need to collate more than one URL per Page (maybe you're using Fluent or another translation module). * This is the method you will want to override in order to add that additional logic. */ public function collateUrls(): void { // Grab any existing Urls so that we can add to it $urls = $this->getUrlsToCrawl(); // Set us to a LIVE stage/reading_mode $this->withVersionContext(function() use (&$urls) { /** @var SiteTree $owner */ $owner = $this->getOwner(); $key = $this->getOwnerKey(); // We can't do anything if we don't have a key to use if ($key === null) { return; } // Create a new container for this key if (!array_key_exists($key, $urls)) { $urls[$key] = []; } // Grab the absolute live link without ?stage=Live appended $link = $owner->getAbsoluteLiveLink(false); // If this record is not published, or we're unable to get a "Live Link" (for whatever reason), then there // is nothing more we can do here if (!$link) { return; } // Nothing for us to do here, the Link is already being tracked if (in_array($link, $urls[$key])) { return; } // Add our base URL to this key $urls[$key][] = $link; }); // Update the Urls we have stored for indexing $this->setUrlsToCrawl($urls); } /** * Send requests to Swiftype to reindex each of the Urls that we have previously collated */ protected function processCollatedUrls(): void { // Fetch the Urls that we need to reindex $key = $this->getOwnerKey(); // We can't do anything if we don't have a key to process if ($key === null) { return; } $urls = $this->getUrlsToCrawl(); // There is nothing for us to do here if there are no Urls if (count(array_keys($urls)) === 0) { return; } // There are no Urls for this particular key if (!array_key_exists($key, $urls)) { return; } // Force the reindexing of each URL we collated foreach ($urls[$key] as $url) { $this->forceSwiftypeIndex($url); } } /** * @param string $updateUrl * @return bool */ protected function forceSwiftypeIndex(string $updateUrl): bool { // We don't reindex dev environments if (Director::isDev()) { return true; } $crawler = SwiftypeCrawler::create(); return $crawler->send($updateUrl); } /** * @return string */ protected function getOwnerKey(): ?string { $owner = $this->owner; // Can't generate a key if the owner has not yet been written to the DB if (!$owner->isInDB()) { return null; } $key = str_replace('\\', '', $owner->ClassName . $owner->ID); return $key; } /** * Sets the version context to Live as that's what crawlers will (normally) see * * The main function is to suppress the ?stage=Live querystring. LeftAndMain will set the default * reading mode to 'DRAFT' when initialising so to counter this we need to re-set the default * reading mode back to LIVE * * @param callable $callback */ private function withVersionContext(callable $callback): void { Versioned::withVersionedMode(static function() use ($callback) { // Grab our current stage and reading mode $originalDefaultReadingMode = Versioned::get_default_reading_mode(); $originalReadingMode = Versioned::get_reading_mode(); $originalStage = Versioned::get_stage(); // Set our stage and reading mode to LIVE Versioned::set_default_reading_mode('Stage.' . Versioned::LIVE); Versioned::set_reading_mode('Stage.' . Versioned::LIVE); Versioned::set_stage(Versioned::LIVE); // Process whatever callback was provided $callback(); // Set us back to the original stage and reading mode if ($originalReadingMode) { Versioned::set_default_reading_mode($originalDefaultReadingMode); Versioned::set_reading_mode($originalReadingMode); } if ($originalStage) { Versioned::set_stage($originalStage); } }); } } |