Source of file DocumentWriter.php
Size: 8,773 Bytes - Last Modified: 2021-12-23T10:29:30+00:00
/var/www/docs.ssmods.com/process/src/thirdparty/Zend/Search/Lucene/Index/SegmentWriter/DocumentWriter.php
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253 | <?php /** * Zend Framework * * LICENSE * * This source file is subject to the new BSD license that is bundled * with this package in the file LICENSE.txt. * It is also available through the world-wide-web at this URL: * http://framework.zend.com/license/new-bsd * If you did not receive a copy of the license and are unable to * obtain it through the world-wide-web, please send an email * to license@zend.com so we can send you a copy immediately. * * @category Zend * @package Zend_Search_Lucene * @subpackage Index * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com) * @license http://framework.zend.com/license/new-bsd New BSD License * @version $Id: DocumentWriter.php 20096 2010-01-06 02:05:09Z bkarwin $ */ /** * Zend_Search_Lucene_Index_SegmentWriter */ require_once 'Zend/Search/Lucene/Index/SegmentWriter.php'; /** * @category Zend * @package Zend_Search_Lucene * @subpackage Index * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com) * @license http://framework.zend.com/license/new-bsd New BSD License */ class Zend_Search_Lucene_Index_SegmentWriter_DocumentWriter extends Zend_Search_Lucene_Index_SegmentWriter { /** * Term Dictionary * Array of the Zend_Search_Lucene_Index_Term objects * Corresponding Zend_Search_Lucene_Index_TermInfo object stored in the $_termDictionaryInfos * * @var array */ protected $_termDictionary; /** * Documents, which contain the term * * @var array */ protected $_termDocs; /** * Object constructor. * * @param Zend_Search_Lucene_Storage_Directory $directory * @param string $name */ public function __construct(Zend_Search_Lucene_Storage_Directory $directory, $name) { parent::__construct($directory, $name); $this->_termDocs = array(); $this->_termDictionary = array(); } /** * Adds a document to this segment. * * @param Zend_Search_Lucene_Document $document * @throws Zend_Search_Lucene_Exception */ public function addDocument(Zend_Search_Lucene_Document $document) { /** * Zend_Search_Lucene_Search_Similarity */ include_once 'Zend/Search/Lucene/Search/Similarity.php'; $storedFields = array(); $docNorms = array(); $similarity = Zend_Search_Lucene_Search_Similarity::getDefault(); foreach ($document->getFieldNames() as $fieldName) { $field = $document->getField($fieldName); if ($field->storeTermVector) { /** * @todo term vector storing support */ include_once 'Zend/Search/Lucene/Exception.php'; throw new Zend_Search_Lucene_Exception('Store term vector functionality is not supported yet.'); } if ($field->isIndexed) { if ($field->isTokenized) { /** * Zend_Search_Lucene_Analysis_Analyzer */ include_once 'Zend/Search/Lucene/Analysis/Analyzer.php'; $analyzer = Zend_Search_Lucene_Analysis_Analyzer::getDefault(); $analyzer->setInput($field->value, $field->encoding); $position = 0; $tokenCounter = 0; while (($token = $analyzer->nextToken()) !== null) { $tokenCounter++; $term = new Zend_Search_Lucene_Index_Term($token->getTermText(), $field->name); $termKey = $term->key(); if (!isset($this->_termDictionary[$termKey])) { // New term $this->_termDictionary[$termKey] = $term; $this->_termDocs[$termKey] = array(); $this->_termDocs[$termKey][$this->_docCount] = array(); } else if (!isset($this->_termDocs[$termKey][$this->_docCount])) { // Existing term, but new term entry $this->_termDocs[$termKey][$this->_docCount] = array(); } $position += $token->getPositionIncrement(); $this->_termDocs[$termKey][$this->_docCount][] = $position; } if ($tokenCounter == 0) { // Field contains empty value. Treat it as non-indexed and non-tokenized $field = clone($field); $field->isIndexed = $field->isTokenized = false; } else { $docNorms[$field->name] = chr( $similarity->encodeNorm( $similarity->lengthNorm( $field->name, $tokenCounter )* $document->boost* $field->boost ) ); } } else if (($fieldUtf8Value = $field->getUtf8Value()) == '') { // Field contains empty value. Treat it as non-indexed and non-tokenized $field = clone($field); $field->isIndexed = $field->isTokenized = false; } else { $term = new Zend_Search_Lucene_Index_Term($fieldUtf8Value, $field->name); $termKey = $term->key(); if (!isset($this->_termDictionary[$termKey])) { // New term $this->_termDictionary[$termKey] = $term; $this->_termDocs[$termKey] = array(); $this->_termDocs[$termKey][$this->_docCount] = array(); } else if (!isset($this->_termDocs[$termKey][$this->_docCount])) { // Existing term, but new term entry $this->_termDocs[$termKey][$this->_docCount] = array(); } $this->_termDocs[$termKey][$this->_docCount][] = 0; // position $docNorms[$field->name] = chr( $similarity->encodeNorm( $similarity->lengthNorm($field->name, 1)* $document->boost* $field->boost ) ); } } if ($field->isStored) { $storedFields[] = $field; } $this->addField($field); } foreach ($this->_fields as $fieldName => $field) { if (!$field->isIndexed) { continue; } if (!isset($this->_norms[$fieldName])) { $this->_norms[$fieldName] = str_repeat( chr($similarity->encodeNorm($similarity->lengthNorm($fieldName, 0))), $this->_docCount ); } if (isset($docNorms[$fieldName])) { $this->_norms[$fieldName] .= $docNorms[$fieldName]; } else { $this->_norms[$fieldName] .= chr($similarity->encodeNorm($similarity->lengthNorm($fieldName, 0))); } } $this->addStoredFields($storedFields); } /** * Dump Term Dictionary (.tis) and Term Dictionary Index (.tii) segment files */ protected function _dumpDictionary() { ksort($this->_termDictionary, SORT_STRING); $this->initializeDictionaryFiles(); foreach ($this->_termDictionary as $termId => $term) { $this->addTerm($term, $this->_termDocs[$termId]); } $this->closeDictionaryFiles(); } /** * Close segment, write it to disk and return segment info * * @return Zend_Search_Lucene_Index_SegmentInfo */ public function close() { if ($this->_docCount == 0) { return null; } $this->_dumpFNM(); $this->_dumpDictionary(); $this->_generateCFS(); /** * Zend_Search_Lucene_Index_SegmentInfo */ include_once 'Zend/Search/Lucene/Index/SegmentInfo.php'; return new Zend_Search_Lucene_Index_SegmentInfo( $this->_directory, $this->_name, $this->_docCount, -1, null, true, true ); } } |