Source of file Taggable.php
Size: 21,164 Bytes - Last Modified: 2022-01-13T10:01:16+00:00
/var/www/docs.ssmods.com/process/src/code/Taggable.php
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606 | <?php class Taggable extends DataExtension { // secret stuff // ------------ protected static $cache = []; // Framework // --------- public static $default_num_page_items = 10; protected static $tags_page_link = null; private static $db = array( 'Tags' => 'Text', 'MetaKeywords' => 'Text', 'ReGenerateTags' => 'Boolean', 'ReGenerateKeywords' => 'Boolean', 'RestrictToKnownTags' => 'Boolean', 'TreatHashTagsAsKnownTags' => 'Boolean', 'BlockScrape' => 'Boolean', ); private static $defaults = array( 'ReGenerateTags' => true, 'ReGenerateKeywords' => true, 'TreatHashTagsAsKnownTags' => true, 'RestrictToKnownTags' => false ); private static $indexes = array( 'Tags' => array( 'type' => 'fulltext', 'value' => '"Tags"' ) ); /* These fields do not display in model admin also where is updateCMSFields_forPopup */ public function updateCMSFields(FieldList $fields) { $fields->removeByName('BlockScrape'); if (get_class($fields->fieldByName('Root.Main')) == 'TabSet') { $fields->addFieldsToTab('Root.Main.Metadata', $this->getTagFields()); } else if (get_class($fields->fieldByName('Root')) == 'TabSet') { $fields->addFieldsToTab('Root.Metadata', $this->getTagFields()); } else if (get_class($fields) == 'FieldSet' || get_class($fields) == 'FieldList') { foreach ($this->getTagFields() as $f) { $fields->push($f); } } } // static Methods // --------------- protected static function get_blacklisted_words() { return array( 'of','a','the','and','an','or','nor', 'but','is','if','then','else','when', 'at','from','by','on','off','for', 'in','out','over','to','into','with', 'also','back','well','big','when','where', 'why','who','which', 'it', 'be', 'so', 'far', 'one', 'our', 'we','only','they','this', 'i', 'do', 'there', 'just', 'that' ); } public static function str_to_tags($str) { $tags = array_map('trim', explode(',', $str)); $out = array(); foreach ($tags as $tag) { if (!in_array(strtolower($tag), static::get_blacklisted_words())) { $out[] = trim($tag, ',.!?'); } } return $out; } // Actual Methods // -------------- public function getIncludeInDump(){ $includeInDump = method_exists($this->owner, 'getIncludeInDump') ? $this->owner->getIncludeInDump() : array(); $includeInDump = ( !empty($includeInDump) && is_array($includeInDump) ) ? $includeInDump : array() ; $includeInDump[] = 'TagURLStr'; $includeInDump = array_unique($includeInDump); return $includeInDump; } /** * @return array */ protected function getTagFields() { $fields = new FieldList( LiteralField::create('BlockScrapeTitle', '<p>Block tag and meta keywords generation</p>'), SelectionGroup::create('BlockScrape', [ new SelectionGroup_Item( true, [], 'Yes' ), new SelectionGroup_Item( false, [ new CheckboxField('ReGenerateTags', 'Regenerate tags on save'), new CheckboxField('ReGenerateKeywords', 'Regenerate keywords on save'), new CheckboxField('RestrictToKnownTags', 'Restrict to known terms when regenerating'), new CheckboxField('TreatHashTagsAsKnownTags', 'Treat hash tags as known tags'), ], 'No' ), ])->addExtraClass('field'), new TextField('MetaKeywords', 'Meta Keywords (comma separated)'), new TextField('Tags', 'Tags (comma separated)') ); return $fields; } // need to get these to work properly public function getExplodedTags(){ return static::explode_tags($this->owner->Tags); } public function setExplodedTags($tags){ $this->owner->Tags = is_array($tags) ? implode(',', array_map('trim', $tags)) : $tags ; } public function getTagURLStr(){ return $this->owner->Tags ? self::tags2Links($this->owner->Tags) : null ; } /** * extracts hashtags from a string * @param string $str [description] * @return array [description] */ public static function extract_hash_tags($str) { $hashtags = []; preg_match_all('/(#\w+)/u', $str, $matches); if ($matches) { $hashtagsArray = array_count_values($matches[0]); $hashtags = array_keys($hashtagsArray); } return $hashtags; } /** * converts a string of tags into an array * @param string $tags [description] * @return array [description] */ public static function explode_tags($tags) { if (is_array($tags)) return $tags; return array_map('trim', explode(',', $tags)); } /** * cache proxy method for DataObjectHelper * @return [type] [description] */ protected static function extended_classes() { $key = 'extended_classes'; if (empty(static::$cache[$key])) { static::$cache[$key] = DataObjectHelper::getExtendedClasses('Taggable'); } return static::$cache[$key]; } /** * cache proxy method for DataObjectHelper * @param [type] $className [description] * @return [type] [description] */ protected static function table_for_class($className) { $key = 'table_for_class' . $className; if (empty(static::$cache[$key])) { static::$cache[$key] = DataObjectHelper::getTableForClass($className); } return static::$cache[$key]; } /** * cache proxy method for DataObjectHelper * @param [type] $className [description] * @param [type] $prop [description] * @return [type] [description] */ protected static function extension_table_for_class_with_property($className, $prop) { $key = 'extension_table_for_class_with_property' . $className . $prop; if (empty(static::$cache[$key])) { static::$cache[$key] = DataObjectHelper::getExtensionTableForClassWithProperty($className, $prop); } return static::$cache[$key]; } /** * cache proxy method for DataList */ protected static function all_tags() { $tKey = 'full-tag-list'; if (empty(static::$cache[$tKey])) static::$cache[$tKey] = new DataList('Tag'); return static::$cache[$tKey]; } /** * cache proxy method for all_tags()->map */ protected static function all_tag_arr() { $tKey = 'full-tag-list-arr'; if (empty(static::$cache[$tKey])) { $r = array(); foreach (static::all_tags() as $tag) { $r[] = $tag->Title; } static::$cache[$tKey] = $r; } return static::$cache[$tKey]; } /** * converts an arg into a safe key for the cache * @param polymorphic $arg [description] * @return string [description] */ protected static function safe_args($arg) { if (is_array($arg)) $arg = implode('_', $arg); return preg_replace('/[^A-Za-z0-9]/', '_', $arg); } /** * Returns a datalist filtered by tags * @param string $className the name of the class to get * @param array|string $tags description] * @param string $tags $where an additional SQL fragment to append to the where clause * @return DataList the data list containing the tagged content */ public static function tagged_with($className, $tags, $where = '', $lookupMode = 'OR') { // validate args if ($lookupMode != 'AND' && $lookupMode != 'OR') throw new Exception('Invalid lookupMode supplied'); // generate a cache key $key = preg_replace('/[^A-Za-z0-9]/', '_', __FUNCTION__) . implode( '_', array_map( array(get_called_class(), 'safe_args'), func_get_args() ) ); // chache hit? if (empty(static::$cache[$key])) { // sanity check if (!is_array($tags)) $tags = static::explode_tags($tags); // set where fragment $tWhere = ''; // build tag filter foreach ($tags as $tag) { $tWhere .= ($tWhere ? $lookupMode : '' ) . ' Tags REGEXP \'(^|,| )+' . Convert::raw2sql($tag) . '($|,| )+\''; } // allow for AND / OR to be supplied in the $where $firstWord = explode(' ', strtoupper(trim($where)))[0]; if ($where && $firstWord != 'AND' && $firstWord != 'OR') $where = 'AND (' . $where . ')'; // compile complete where $where = '(' . $tWhere . ') ' . $where; // store this Datalist for later static::$cache[$key] = DataList::create($className)->where($where); } // return the cached value return static::$cache[$key]; } /** * ye olde getTaggedWith method - hopefully superceeded by the tagged_with method * @param [type] $tags [description] * @param [type] $filterSql [description] * @param integer $start [description] * @param integer $limit [description] * @param string $lookupMode if AND then you get content tagged with all ptovided tags * if OR then you get content tagged with at least one of the provided tags */ public static function getTaggedWith($tags, $filterSql = null, $start = 0, $limit = 40, $lookupMode = 'OR') { // generate a cache key $key = preg_replace('/[^A-Za-z0-9]/', '_', __FUNCTION__) . implode( '_', array_map( array(get_called_class(), 'safe_args'), func_get_args() ) ); // chache hit? if (empty(static::$cache[$key])) { // clean up input if (!is_array($tags)) $tags = static::explode_tags($tags); if ($lookupMode != 'AND' && $lookupMode != 'OR') throw new Exception('Invalid lookupMode supplied'); // Set some vars $classes = static::extended_classes(); $set = new ArrayList; $db = AbcDB::getInstance(); $sql = ''; $tables = $joins = $filter = array(); // Build Query Data foreach($classes as $className){ // Fetch Class Data $table = static::table_for_class($className); $extTable = static::extension_table_for_class_with_property($className, 'Tags'); // $tables we are working with if ($table) $tables[$table] = $table; // join if ($table && $extTable && $table!=$extTable) { $joins[$table][] = $extTable; } elseif($extTable) { $tables[$extTable] = $extTable; } // Where if ($table) $where[$table][] = "LOWER(" .$table . ".ClassName) = '" . strtolower($className) . "'"; // Tag filter // Should be REGEX so we don't get partial matches if ($extTable) { foreach ($tags as $tag) { $filter[$table][] = $extTable . ".Tags REGEXP '(^|,| )+" . Convert::raw2sql($tag) . "($|,| )+'"; } } } // Build Query foreach($tables as $table){ if (array_key_exists($table, $joins)){ // Prepare Where Statement $uWhere = array_unique($where[$table]); $uFilter = array_unique($filter[$table]); // this lookupMode injection will prob break something in AND mode $wSql = "(".implode(' OR ',$uWhere).") AND (".implode(' ' . $lookupMode . ' ',$uFilter).")"; // Make the rest of the SQL if ($sql) $sql.= "UNION ALL"."\n\n"; $rowCountSQL = !$sql ? "SQL_CALC_FOUND_ROWS " : "" ; $sql.= "SELECT " . $rowCountSQL . $table . ".ClassName, " . $table . ".ID" . "\n"; $sql.= "FROM " . $table . "\n"; // join $join = array_unique($joins[$table]); foreach($join as $j){ $sql .= " LEFT JOIN " . $j . " ON " . $table . ".ID = " . $j . ".ID" . "\n"; } // Add the WHERE statement $sql .= "WHERE " . $wSql . "\n\n"; } } // Add Global Filter to Query if ($filterSql) { $sql .= (count($tables) == 1 ? "AND " : "WHERE ") . $filterSql; } // Add Limits to Query $sql .= " LIMIT " . $start . "," . $limit; // Get Data $result = $db->query($sql); $result = $result ? $result->fetchAll(PDO::FETCH_OBJ) : array() ; // Convert to DOs foreach( $result as $entry ){ // Make the data easier to work with $entry = (object) $entry; $className = $entry->ClassName; // this is faster but might not pull in relations //$dO = new $className; //$dO = DataObjectHelper::populate($dO, $entry); // this is slower, but will be more reliable $dO = DataObject::get_by_id($className, $entry->ID); $set->push($dO); } $set->unlimitedRowCount = $db->query('SELECT FOUND_ROWS() AS total')->fetch(PDO::FETCH_OBJ)->total; static::$cache[$key] = $set; } return static::$cache[$key]; } // attach specific urls to tags for rendering public static function tags2Links($strTags){ // find the url of the tags page if (!$tagsPageURL = self::getTagPageLink()) throw new Exception('There is no page of type TagsPage in the site tree'); $outputTags = explode(',',$strTags); $tempTags = array(); foreach($outputTags as $oTags){ array_push($tempTags, "<a href='".$tagsPageURL."tag/".trim($oTags)."'>".trim($oTags)."</a>"); } return implode(', ', $tempTags); } public static function getTagPageLink(){ if (!self::$tags_page_link){ if (!$tagsPage = DataObject::get_one('TagPage')) return false ; self::$tags_page_link = $tagsPage->Link(); } return self::$tags_page_link; } public function getAssociatedLink(){ if (method_exists($this->owner, 'Link')) return $this->owner->Link(); return false; } public function getAssociatedImage(){ if (method_exists($this->owner, 'getAssociatedImage')) return $this->owner->getAssociatedImage(); if (method_exists($this->owner, 'getAddImage')) return $this->owner->getAddImage(); if (method_exists($this->owner, 'Image')) return $this->owner->Image(); return false; } // onBeforeWrite // ---------------------------------------------------------------------------- /** * we currently always append hashtags * this might produce unexpected results if they are using RestrictToKnownTags * are hashtags "known tags"? * do we need another flag e.g. TreatHashTagsAsKnownTags? * appending tags to the Tag table on save? * @return void */ public function onBeforeWrite() { // call the parent onBeforeWrite parent::onBeforeWrite(); // do nothing if block scrape is set if ($this->owner->BlockScrape) return; // add some tags if there are none or we are forcing a refresh if ( !$this->owner->Tags || $this->owner->ReGenerateTags || $this->owner->ReGenerateKeywords ) { // double check to see if there are any meta key words and we aren't forcing a refresh if ( !empty($this->owner->MetaKeywords) && !$this->owner->ReGenerateTags && !$this->owner->ReGenerateKeywords ) { // if there are keywords and no tags use the keywords $this->owner->Tags = $this->owner->MetaKeywords; } // there were no meta keywords or we are forcing a refresh else { // get the blacklist $exclude = static::get_blacklisted_words(); // init recievers $words = $parsed = array(); // look at the existing tags if ($this->owner->RestrictToKnownTags) { // handle the loading $tags = static::all_tag_arr(); // compare each tag with the content foreach ($tags as $tag) { // title weighting x3 if (stripos(strip_tags($this->owner->Title), $tag) !== false) $words = array_merge($words, array($tag, $tag, $tag)); // add the content if (stripos(strip_tags($this->owner->Content), $tag) !== false) $words[] = $tag; } } // analyse the text else { // generate words from content $titlePieces = explode(' ', strip_tags($this->owner->Title)); // title weighting x3 if (!empty($this->owner->Title)) $words = array_merge($words, $titlePieces, $titlePieces, $titlePieces); // add the content if (!empty($this->owner->Content)) $words = array_merge($words, explode(' ', strip_tags($this->owner->Content))); } // generate weightings foreach($words as $word){ $word = strtolower(trim(html_entity_decode(strval($word)))); $word = trim($word, ',.!'); if ($word && !in_array(strtolower($word),$exclude) && substr($word,0,1) != '&' && strlen($word) > 3) $parsed[$word] = !empty($parsed[$word]) ? ($parsed[$word] + 1) : 1 ; } // sort by weight and extract the top 15 arsort($parsed); $sample = array_keys(array_slice($parsed, 0, 15)); // check again $dChecked = array(); foreach ($sample as $value) { $value = strval($value); if (!empty($value) && strlen($value) > 3 ) $dChecked[] = $value; } // append any hashtags if ( !$this->owner->RestrictToKnownTags || ( $this->owner->RestrictToKnownTags && $this->owner->TreatHashTagsAsKnownTags ) ) { $dChecked = array_merge( $dChecked, static::extract_hash_tags($this->owner->Title . ' ' . $this->owner->Content) ); } // generate string $tags = implode(', ', $dChecked); // update tags if there are none or we are doing a forced update if ($this->owner->ReGenerateTags || !$this->owner->Tags) $this->owner->Tags = $tags; // update meta keywords if there are none // there's a reconciliation between tags and keywords further down if ($this->owner->ReGenerateKeywords) $this->owner->MetaKeywords = $tags; } } // add meta keywords if there are none if (!$this->owner->MetaKeywords) { if ($this->owner->Tags) $this->owner->MetaKeywords = $this->owner->Tags; } // lowercase $this->owner->Tags = strtolower($this->owner->Tags); $this->owner->MetaKeywords = strtolower($this->owner->MetaKeywords); } } |