Source of file ElasticaService.php
Size: 20,681 Bytes - Last Modified: 2021-12-23T10:58:34+00:00
/var/www/docs.ssmods.com/process/src/src/SilverStripe/Elastica/ElasticaService.php
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694 | <?php namespace SilverStripe\Elastica; use Elastica\Client; use Elastica\Query; use Elastica\Search; /** * A service used to interact with elastic search. */ class ElasticaService { /** * @var \Elastica\Document[] */ protected $buffer = array(); /** * @var bool controls whether indexing operations are buffered or not */ protected $buffered = false; /** * @var \Elastica\Client Elastica Client object */ private $client; /** * @var string index name */ private $indexName; /** * The code of the locale being indexed or searched. * * @var string e.g. th_TH, en_US */ private $locale; /** * Mapping of DataObject ClassName and whether it is in the SiteTree or not. * * @var array; */ private static $site_tree_classes = array(); /** * Counter used to for testing, records indexing requests. * * @var int */ public static $indexing_request_ctr = 0; /** * Array of highlighted fields, e.g. Title, Title.standard. If this is empty then the * ShowHighlight field of SearchableField is used to determine which fields to highlight. * * @var array */ private $highlightedFields = array(); /** * The number of documents to index currently for this locale. * * @var int The number of documents left to index */ private $nDocumentsToIndexForLocale = 0; /* Set the highlight fields for subsequent searches */ public function setHighlightedFields($newHighlightedFields) { $this->highlightedFields = $newHighlightedFields; } /* Enable this to allow test classes not to be ignored when indexing */ public $test_mode = false; /** * @param \Elastica\Client $client * @param string $newIndexName Name of the new index */ public function __construct(Client $client, $newIndexName) { $this->client = $client; $this->indexName = $newIndexName; // Check for default flag value, which uses the root path as the index name if ($newIndexName == 'USE_WWW_PATH_AS_NAME') { $name = \Director::baseFolder(); $name = str_replace('/', '-', $name); $name = substr($name, 1); $name = strtolower($name); $this->indexName = $name; } $this->locale = \i18n::default_locale(); } public function setTestMode($newTestMode) { $this->test_mode = $newTestMode; } /** * @return \Elastica\Client */ public function getClient() { return $this->client; } /** * @return \Elastica\Index */ public function getIndex() { $index = $this->getClient()->getIndex($this->getLocaleIndexName()); return $index; } public function setLocale($newLocale) { $this->locale = $newLocale; } public function getIndexName() { return $this->indexName; } private function getLocaleIndexName() { $name = $this->indexName.'-'.$this->locale; $name = strtolower($name); $name = str_replace('-', '_', $name); return $name; } /** * Performs a search query and returns a result list. * * @param \Elastica\Query|string|array $query * @param string|array $types List of comma separated SilverStripe classes to search, or blank for all * * @return \Elastica\ResultList */ public function search($query, $types = '') { $query = Query::create($query); // may be a string if (is_string($types)) { $types = explode(',', $types); } $data = $query->toArray(); $query->MoreLikeThis = isset($data['query']['more_like_this']); $search = new Search(new Client()); // get results from all shards, this makes test repeatable if ($this->test_mode) { $search->setOption('search_type', Search::OPTION_SEARCH_TYPE_DFS_QUERY_THEN_FETCH); } $search->addIndex($this->getLocaleIndexName()); $this->addTypesToSearch($search, $types, $query); $highlights = $this->getHighlightingConfig(); $this->addExtractedQueryTermsForMoreLikeThis($query, $highlights); $query->setHighlight($highlights); $search->addIndex($this->getLocaleIndexName()); if (!empty($types)) { foreach ($types as $type) { $search->addType($type); } } $params = $search->getOptions(); $searchResults = $search->search($query, $params); if (isset($this->MoreLikeThisTerms)) { $searchResults->MoreLikeThisTerms = $this->MoreLikeThisTerms; } return $searchResults; } /** * @param Query $query */ private function addExtractedQueryTermsForMoreLikeThis($query, &$highlights) { if ($query->MoreLikeThis) { $termsMatchingQuery = array(); foreach ($this->MoreLikeThisTerms as $field => $terms) { $termQuery = array('multi_match' => array( 'query' => implode(' ', $terms), 'type' => 'most_fields', 'fields' => array($field), )); $termsMatchingQuery[$field] = array('highlight_query' => $termQuery); } $highlights['fields'] = $termsMatchingQuery; } } /** * @param Search $search * @param Query $query */ private function addTypesToSearch(&$search, $types, $query) { // If the query is a 'more like this' we can get the terms used for searching by performing // an extra query, in this case a query validation with explain and rewrite turned on $this->checkForTermsMoreLikeThis($query, $search); if (!empty($types)) { foreach ($types as $type) { $search->addType($type); } } } private function getHighlightingConfig() { $highlightsCfg = \Config::inst()->get('Elastica', 'Highlights'); $preTags = $highlightsCfg['PreTags']; $postTags = $highlightsCfg['PostTags']; $fragmentSize = $highlightsCfg['Phrase']['FragmentSize']; $nFragments = $highlightsCfg['Phrase']['NumberOfFragments']; $stringFields = $this->highlightedFields; $usingProvidedHighlightFields = true; if (sizeof($stringFields) == 0) { $filter = array('Type' => 'string', 'ShowHighlights' => true); $stringFields = \SearchableField::get()->filter($filter)->map('Name')->toArray(); $usingProvidedHighlightFields = false; } $highlightFields = array(); foreach ($stringFields as $name) { // Add the stemmed and the unstemmed for now $fieldName = $name; if (!$usingProvidedHighlightFields) { $fieldName .= '.standard'; } $highlightFields[$fieldName] = array( 'fragment_size' => $fragmentSize, 'number_of_fragments' => $nFragments, 'no_match_size' => 200, ); } $highlights = array( 'pre_tags' => array($preTags), 'post_tags' => array($postTags), 'fields' => $highlightFields, ); return $highlights; } private function checkForTermsMoreLikeThis($elasticaQuery, $search) { if ($elasticaQuery->MoreLikeThis) { $path = $search->getPath(); $termData = array(); $data = $elasticaQuery->toArray(); $termData['query'] = $data['query']; $path = str_replace('_search', '_validate/query', $path); $params = array('explain' => true, 'rewrite' => true); if ($this->test_mode) { $params['search_type'] = Search::OPTION_SEARCH_TYPE_DFS_QUERY_THEN_FETCH; } $response = $this->getClient()->request( $path, \Elastica\Request::GET, $termData, $params ); $rData = $response->getData(); $terms = null; // keep in scope if (isset($rData['explanations'])) { $explanation = $rData['explanations'][0]['explanation']; $terms = ElasticaUtil::parseSuggestionExplanation($explanation); } if (isset($terms)) { $this->MoreLikeThisTerms = $terms; } } } /** * Ensure that the index is present. */ protected function ensureIndex() { $index = $this->getIndex(); if (!$index->exists()) { $this->createIndex(); } } /** * Ensure that there is a mapping present. * * @param \Elastica\Type Type object * @param SilverStripe\Elastica\Searchable DataObject that implements Searchable * * @return \Elastica\Mapping Mapping object */ protected function ensureMapping(\Elastica\Type $type, \DataObject $record) { $mapping = $type->getMapping(); if ($mapping == array()) { $this->ensureIndex(); $mapping = $record->getElasticaMapping(); $type->setMapping($mapping); $mapping = $mapping->toArray(); } return $mapping; } /** * Either creates or updates a record in the index. * * @param Searchable $record */ public function index($record) { $document = $record->getElasticaDocument(); $typeName = $record->getElasticaType(); if ($this->buffered) { if (array_key_exists($typeName, $this->buffer)) { $this->buffer[$typeName][] = $document; } else { $this->buffer[$typeName] = array($document); } } else { $index = $this->getIndex(); $type = $index->getType($typeName); $this->ensureMapping($type, $record); $type->addDocument($document); $index->refresh(); ++self::$indexing_request_ctr; } } /** * Begins a bulk indexing operation where documents are buffered rather than * indexed immediately. */ public function startBulkIndex() { $this->buffered = true; } public function listIndexes($trace) { $command = "curl 'localhost:9200/_cat/indices?v'"; exec($command, $op); ElasticaUtil::message("\n++++ $trace ++++\n"); ElasticaUtil::message(print_r($op, 1)); ElasticaUtil::message("++++ /{$trace} ++++\n\n"); return $op; } /** * Ends the current bulk index operation and indexes the buffered documents. */ public function endBulkIndex() { $index = $this->getIndex(); foreach ($this->buffer as $type => $documents) { $amount = 0; foreach (array_keys($this->buffer) as $key) { $amount += sizeof($this->buffer[$key]); } $index->getType($type)->addDocuments($documents); $index->refresh(); ElasticaUtil::message("\tAdding $amount documents to the index\n"); if (isset($this->StartTime)) { $elapsed = microtime(true) - $this->StartTime; $timePerDoc = ($elapsed) / ($this->nDocumentsIndexed); $documentsRemaining = $this->nDocumentsToIndexForLocale - $this->nDocumentsIndexed; $eta = ($documentsRemaining) * $timePerDoc; $hours = (int) ($eta / 3600); $minutes = (int) (($eta - $hours * 3600) / 60); $seconds = (int) (0.5 + $eta - $minutes * 60 - $hours * 3600); $etaHR = "{$hours}h {$minutes}m {$seconds}s"; ElasticaUtil::message("ETA to completion of indexing $this->locale ($documentsRemaining documents): $etaHR"); } ++self::$indexing_request_ctr; } $this->buffered = false; $this->buffer = array(); } /** * Deletes a record from the index. * * @param Searchable $record */ public function remove($record) { $index = $this->getIndex(); $type = $index->getType($record->getElasticaType()); $type->deleteDocument($record->getElasticaDocument()); $index->refresh(); } /** * Creates the index and the type mappings. */ public function define() { $index = $this->getIndex(); # Recreate the index if ($index->exists()) { $index->delete(); } $this->createIndex(); foreach ($this->getIndexedClasses() as $class) { $sng = singleton($class); $mapping = $sng->getElasticaMapping(); $mapping->setType($index->getType($sng->getElasticaType())); $mapping->send(); } } /** * Refresh an array of records in the index. * * @param array $records */ protected function refreshRecords($records) { foreach ($records as $record) { if ($record->showRecordInSearch()) { $this->index($record); } } } /** * Get a List of all records by class. Get the "Live data" If the class has the "Versioned" extension. * * @param string $class Class Name * @param int $pageSize Optional page size, only a max of this number of records returned * @param int $page Page number to return * * @return \DataList $records */ protected function recordsByClassConsiderVersioned($class, $pageSize = 0, $page = 0) { $offset = $page * $pageSize; if ($class::has_extension('Versioned')) { if ($pageSize > 0) { $records = \Versioned::get_by_stage($class, 'Live')->limit($pageSize, $offset); } else { $records = \Versioned::get_by_stage($class, 'Live'); } } else { if ($pageSize > 0) { $records = $class::get()->limit($pageSize, $offset); } else { $records = $class::get(); } } return $records; } /** * Refresh the records of a given class within the search index. * * @param string $class Class Name */ protected function refreshClass($class) { $nRecords = $this->recordsByClassConsiderVersioned($class)->count(); $batchSize = 500; $pages = $nRecords / $batchSize + 1; for ($i = 0; $i < $pages; ++$i) { $this->startBulkIndex(); $pagedRecords = $this->recordsByClassConsiderVersioned($class, $batchSize, $i); $this->nDocumentsIndexed += $pagedRecords->count(); $batch = $pagedRecords->toArray(); $this->refreshRecords($batch); $this->endBulkIndex(); } } /** * Re-indexes each record in the index. */ public function refresh() { $this->StartTime = microtime(true); $classes = $this->getIndexedClasses(); //Count the number of documents for this locale $amount = 0; foreach ($classes as $class) { $amount += $this->recordsByClassConsiderVersioned($class)->count(); } $this->nDocumentsToIndexForLocale = $amount; $this->nDocumentsIndexed = 0; foreach ($this->getIndexedClasses() as $classname) { ElasticaUtil::message("Indexing class $classname"); $inSiteTree = null; if (isset(self::$site_tree_classes[$classname])) { $inSiteTree = self::$site_tree_classes[$classname]; } else { $inSiteTree = SearchableHelper::isInSiteTree($classname); self::$site_tree_classes[$classname] = $inSiteTree; } if ($inSiteTree) { // this prevents the same item being indexed twice due to class inheritance if ($classname === 'SiteTree') { $this->refreshClass($classname); } // Data objects } else { $this->refreshClass($classname); } } ElasticaUtil::message("Completed indexing documents for locale $this->locale\n"); } /** * Reset the current index. */ public function reset() { $index = $this->getIndex(); $index->delete(); $this->createIndex(); } private function createIndex() { $index = $this->getIndex(); $settings = $this->getIndexSettingsForCurrentLocale()->generateConfig(); $index->create($settings, true); } /** * Get the index settings for the current locale. * * @return IndexSettings index settings for the current locale */ public function getIndexSettingsForCurrentLocale() { $result = null; $indexSettings = \Config::inst()->get('Elastica', 'indexsettings'); if (isset($indexSettings[$this->locale])) { $settingsClassName = $indexSettings[$this->locale]; $result = \Injector::inst()->create($settingsClassName); } else { throw new \Exception('ERROR: No index settings are provided for locale '.$this->locale."\n"); } return $result; } /** * Gets the classes which are indexed (i.e. have the extension applied). * * @return array */ public function getIndexedClasses() { $classes = array(); $whitelist = array('SearchableTestPage', 'SearchableTestFatherPage', 'SearchableTestGrandFatherPage', 'FlickrPhotoTO', 'FlickrTagTO', 'FlickrPhotoTO', 'FlickrAuthorTO', 'FlickrSetTO', ); foreach (\ClassInfo::subclassesFor('DataObject') as $candidate) { $instance = singleton($candidate); $interfaces = class_implements($candidate); // Only allow test classes in testing mode if (isset($interfaces['TestOnly'])) { if (in_array($candidate, $whitelist)) { if (!$this->test_mode) { continue; } } else { // If it's not in the test whitelist we definitely do not want to know continue; } } if ($instance->hasExtension('SilverStripe\\Elastica\\Searchable')) { $classes[] = $candidate; } } return $classes; } /** * Get the number of indexing requests made. Used for testing bulk indexing. * * @return int indexing request counter */ public function getIndexingRequestCtr() { return self::$indexing_request_ctr; } /** * Get the term vectors in the index for the provided Searchable is_object. * * @param Searchable $searchable An object that implements Searchable * * @return array array of field name to terms indexed */ public function getTermVectors($searchable) { $params = array(); $fieldMappings = $searchable->getElasticaMapping()->getProperties(); $fields = array_keys($fieldMappings); $allFields = array(); foreach ($fields as $field) { array_push($allFields, $field); $mapping = $fieldMappings[$field]; if (isset($mapping['fields'])) { $subFields = array_keys($mapping['fields']); foreach ($subFields as $subField) { $name = $field.'.'.$subField; array_push($allFields, $name); } } } sort($allFields); $data = array( 'fields' => $allFields, 'offsets' => true, 'payloads' => true, 'positions' => true, 'term_statistics' => true, 'field_statistics' => true, ); $path = $this->getIndex()->getName().'/'.$searchable->ClassName.'/'.$searchable->ID.'/_termvector'; $response = $this->getClient()->request( $path, \Elastica\Request::GET, $data, $params ); $data = $response->getData(); return isset($data['term_vectors']) ? $data['term_vectors'] : array(); } } |