Source of file ReviewInternalLinks.php
Size: 8,900 Bytes - Last Modified: 2021-12-23T10:42:12+00:00
/var/www/docs.ssmods.com/process/src/src/Tasks/ReviewInternalLinks.php
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265 | <?php namespace Sunnysideup\MigrateData\Tasks; use SilverStripe\CMS\Model\SiteTree; use SilverStripe\Core\Environment; use SilverStripe\ORM\DataList; use SilverStripe\ORM\DB; class ReviewInternalLinks extends MigrateDataTaskBase { protected $title = 'Review Internal Links'; protected $description = 'Goes through all the imported content and reviews internal links'; protected $type = 0; protected $count = 0; protected $step = 10; protected $filter = 10; protected $allLinks = []; protected $replacements = []; protected $fieldsToTest = [ 'Content', ]; protected static $filtered_class_names = []; public function run($request) { Environment::increaseTimeLimitTo(); Environment::increaseMemoryLimitTo(); $tableHTML = ' <table> <thead> <tr> <th>Count</th> <th>Links</th> <th>Title</th> <th>Content</th> </tr> </thead> <tbody> '; $ids = []; if ($request->getVar('ids')) { $ids = explode(',', $request->getVar('ids')); } if ($request->getVar('type')) { $this->type = $request->getVar('type'); } $objects = DataList::create(); if (count($ids)) { echo $tableHTML; $objects = SiteTree::get()->sort('ID', 'ASC')->filter(['ID' => $ids]); foreach ($objects as $object) { $this->printFields($object); } } else { echo ' By default 200 random pages are loaded. '; echo $tableHTML; if ($request->getVar('page')) { if ('all' === $request->getVar('page')) { $isPage = false; $limit = 5000; $this->step = 10; $start = 0; } else { $isPage = true; $limit = 500; $start = $limit * ((int) $request->getVar('page') - 1); echo '<h1>Page: ' . (int) $request->getVar('page') . '</h1>'; } } else { $isPage = false; $limit = 50; $this->step = 51; $start = 0; echo '<h1>Random Selection</h1>'; } for ($i = 0; $i < $limit; $i += $this->step) { $objects = null; if ($isPage) { $objects = SiteTree::get()->sort('ID', 'ASC')->limit($this->step, $i + $start); } $filter = $this->Config()->get('filtered_class_names'); if (! empty($filter)) { $objects = $objects->filter($filter); } foreach ($objects as $object) { $this->printFields($object); } } } $linksAll = []; ksort($this->allLinks); foreach ($this->allLinks as $url => $details) { $linksAll[] = $url . ' | ' . $details['count']; } echo ' <tr> <th>---</th> <th>---</th> <th>Full List of Links</th> <th> <ul> <li> ' . implode('</li><li>', $linksAll) . ' </li> </ul> </th> </tr> '; $replacementsAll = []; ksort($this->replacements); foreach ($this->replacements as $details) { $replacementsAll[] = 'FR: ' . $details['from'] . '<br />TO: ' . $details['to'] . '<br />RS: ' . $details['result'] . '<br /><br />'; } echo ' <tr> <th>---</th> <th>---</th> <th>Full List of Links</th> <th> <ul> <li> ' . implode('</li><li>', $replacementsAll) . ' </li> </ul> </th> </tr> '; echo '</tbody></table>'; } public function printFields($object) { ++$this->count; $links = []; foreach ($this->fieldsToTest as $field) { if (! empty($object->{$field})) { $dom = new \DOMDocument(); @$dom->loadHTML( mb_convert_encoding($object->{$field}, 'HTML-ENTITIES', 'UTF-8'), LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD ); // if (! $dom) { // $links[] = 'Error 1 in ' . $field; // // continue; // } if (! $this->type || 'a' === strtolower($this->type)) { $hrefs = $dom->getElementsByTagName('a'); for ($i = 0; $i < $hrefs->length; ++$i) { $href = $hrefs->item($i); $url = $href->getAttribute('href'); $this->cleanupLittleMistake($object, $field, $url); $links[$url] = $url . ' | A | ' . $field; if (! isset($this->allLinks[$url])) { $this->allLinks[$url] = [ 'count' => 0, 'type' => 'A', ]; } ++$this->allLinks[$url]['count']; } } if (! $this->type || 'img' === strtolower($this->type)) { $hrefs = $dom->getElementsByTagName('img'); for ($i = 0; $i < $hrefs->length; ++$i) { $href = $hrefs->item($i); $url = $href->getAttribute('src'); $this->cleanupLittleMistake($object, $field, $url); $links[$url] = $url . ' | IMG | ' . $field; if (! isset($this->allLinks[$url])) { $this->allLinks[$url] = [ 'count' => 0, 'type' => 'IMG', ]; } ++$this->allLinks[$url]['count']; } } else { $links[] = 'Error 2 in ' . $field; } } } echo '<tr> <td>' . $this->count . '</td> <td> <h6><a href="' . $object->CMSEditLink() . '">CMS</></h6> <h6><a href="' . $object->Link() . '">Site</a></h6> </td> <td> ' . $object->Title . ' </td> <td> <ul> <li> ' . implode('</li><li>', $links) . ' </li> </ul> </td> </tr>'; } protected function performMigration() { } private function cleanupLittleMistake($object, $field, $url) { $oldNeedles = [ 'assets/oldsite/assets/' => 'images/assets/', 'assets/oldsite/' => 'images/', ]; foreach ($oldNeedles as $oldNeedle => $newNeedle) { if (false !== strpos($url, $oldNeedle)) { $replacementURL = str_replace($oldNeedle, $newNeedle, $url); $table = ''; // if($this->urlExists($url) === false && $this->urlExists($replacementURL) === true) { if (strpos($url, "'")) { user_error('bad url: ' . $url); } if ('Content' === $field) { $table = 'SiteTree'; } foreach (['', '_Live', '_Versions'] as $stage) { DB::query(' UPDATE "' . $table . $stage . '" SET "' . $field . '" = REPLACE( "' . $field . '", \'' . $url . '\', \'' . $replacementURL . '\' ) WHERE ID = ' . $object->ID . '; '); } $this->replacements[] = [ 'from' => $url, 'to' => $replacementURL, 'result' => SiteTree::get_by_id($object->ID)->{$field}, ]; } } } } |