Source of file Analyzer.php
Size: 5,428 Bytes - Last Modified: 2021-12-23T10:29:30+00:00
/var/www/docs.ssmods.com/process/src/thirdparty/Zend/Search/Lucene/Analysis/Analyzer.php
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198 | <?php /** * Zend Framework * * LICENSE * * This source file is subject to the new BSD license that is bundled * with this package in the file LICENSE.txt. * It is also available through the world-wide-web at this URL: * http://framework.zend.com/license/new-bsd * If you did not receive a copy of the license and are unable to * obtain it through the world-wide-web, please send an email * to license@zend.com so we can send you a copy immediately. * * @category Zend * @package Zend_Search_Lucene * @subpackage Analysis * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com) * @license http://framework.zend.com/license/new-bsd New BSD License * @version $Id: Analyzer.php 20096 2010-01-06 02:05:09Z bkarwin $ */ /** * User land classes and interfaces turned on by Zend/Search/Analyzer.php file inclusion. */ /** * @todo Section should be removed with ZF 2.0 release as obsolete */ if (!defined('ZEND_SEARCH_LUCENE_COMMON_ANALYZER_PROCESSED')) { /** * Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8 */ include_once 'Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8.php'; /** * Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8_CaseInsensitive */ include_once 'Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8/CaseInsensitive.php'; /** * Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num */ include_once 'Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8Num.php'; /** * Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num_CaseInsensitive */ include_once 'Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8Num/CaseInsensitive.php'; /** * Zend_Search_Lucene_Analysis_Analyzer_Common_Text */ include_once 'Zend/Search/Lucene/Analysis/Analyzer/Common/Text.php'; /** * Zend_Search_Lucene_Analysis_Analyzer_Common_Text_CaseInsensitive */ include_once 'Zend/Search/Lucene/Analysis/Analyzer/Common/Text/CaseInsensitive.php'; /** * Zend_Search_Lucene_Analysis_Analyzer_Common_TextNum */ include_once 'Zend/Search/Lucene/Analysis/Analyzer/Common/TextNum.php'; /** * Zend_Search_Lucene_Analysis_Analyzer_Common_TextNum_CaseInsensitive */ include_once 'Zend/Search/Lucene/Analysis/Analyzer/Common/TextNum/CaseInsensitive.php'; } /** * An Analyzer is used to analyze text. * It thus represents a policy for extracting index terms from text. * * Note: * Lucene Java implementation is oriented to streams. It provides effective work * with a huge documents (more then 20Mb). * But engine itself is not oriented such documents. * Thus Zend_Search_Lucene analysis API works with data strings and sets (arrays). * * @category Zend * @package Zend_Search_Lucene * @subpackage Analysis * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com) * @license http://framework.zend.com/license/new-bsd New BSD License */ abstract class Zend_Search_Lucene_Analysis_Analyzer { /** * The Analyzer implementation used by default. * * @var Zend_Search_Lucene_Analysis_Analyzer */ private static $_defaultImpl; /** * Input string * * @var string */ protected $_input = null; /** * Input string encoding * * @var string */ protected $_encoding = ''; /** * Tokenize text to a terms * Returns array of Zend_Search_Lucene_Analysis_Token objects * * Tokens are returned in UTF-8 (internal Zend_Search_Lucene encoding) * * @param string $data * @return array */ public function tokenize($data, $encoding = '') { $this->setInput($data, $encoding); $tokenList = array(); while (($nextToken = $this->nextToken()) !== null) { $tokenList[] = $nextToken; } return $tokenList; } /** * Tokenization stream API * Set input * * @param string $data */ public function setInput($data, $encoding = '') { $this->_input = $data; $this->_encoding = $encoding; $this->reset(); } /** * Reset token stream */ abstract public function reset(); /** * Tokenization stream API * Get next token * Returns null at the end of stream * * Tokens are returned in UTF-8 (internal Zend_Search_Lucene encoding) * * @return Zend_Search_Lucene_Analysis_Token|null */ abstract public function nextToken(); /** * Set the default Analyzer implementation used by indexing code. * * @param Zend_Search_Lucene_Analysis_Analyzer $similarity */ public static function setDefault(Zend_Search_Lucene_Analysis_Analyzer $analyzer) { self::$_defaultImpl = $analyzer; } /** * Return the default Analyzer implementation used by indexing code. * * @return Zend_Search_Lucene_Analysis_Analyzer */ public static function getDefault() { /** * Zend_Search_Lucene_Analysis_Analyzer_Common_Text_CaseInsensitive */ include_once 'Zend/Search/Lucene/Analysis/Analyzer/Common/Text/CaseInsensitive.php'; if (!self::$_defaultImpl instanceof Zend_Search_Lucene_Analysis_Analyzer) { self::$_defaultImpl = new Zend_Search_Lucene_Analysis_Analyzer_Common_Text_CaseInsensitive(); } return self::$_defaultImpl; } } |