Source of file class_WikiRetriever.php
Size: 3,898 Bytes - Last Modified: 2021-12-23T10:07:55+00:00
/var/www/docs.ssmods.com/process/src/thirdparty/wikiparser-1.0/class_WikiRetriever.php
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146 | <?php /* WikiRetriever * Version 1.0 * Copyright 2005, Steve Blinch * http://code.blitzaffe.com * * This class fetches one or more documents from Wikipedia (or any other MediaWiki wiki), * translates them into HTML, and returns them. * * * EXAMPLE * * // * // Simple WikiRetriever usage example * // * require_once('class_WikiRetriever.php'); * * $url = "http://www.yourwikisite.com"; * $titles = array("First_article","Second_article"); * $wiki = &new WikiRetriever(); * if (!$wiki->retrieve($url,$titles)) { * die("Error: $wiki->error"); * } else { * var_dump($wiki->pages); * } * * * LICENSE * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * * */ // require_once("libcurlemu/class_HTTPRetriever.php"); require_once("class_XMLParser.php"); //require_once(dirname(__FILE__)."/class_WikiWrapper.php"); require_once(dirname(__FILE__)."/class_WikiParser.php"); class WikiRetriever { function WikiRetriever() { $this->http = &new HTTPRetriever(); $this->http->caching = 3600; $this->xml = &new XMLParser(); } function retrieve($wikiurl,$titles,$level=0) { $this->_recursionlevel = $level; $this->wikiurl = $wikiurl; //if (substr($wikiurl,-1)!='/') $wikiurl .= '/'; if (!is_array($titles)) $titles = array($titles); $titles = implode("\r\n",$titles); $values = array( 'action'=>'submit', 'pages'=>$titles, 'curonly'=>'true', ); if ($this->http->post($wikiurl."Special:Export",$this->http->make_query_string($values))) { $this->parse_xml($this->http->response); return true; } else { $this->error = "HTTP request error: #{$this->http->result_code}: {$this->http->result_text}"; return false; } } function retrieve_urls($wikiurl,$urls) { $urls = explode("\n",$urls); foreach ($urls as $k=>$url) { $url = trim($url); $p = strrpos($url,'/'); $urls[$k] = substr($url,$p+1); } return $this->retrieve($wikiurl,$urls); } function parse_xml($xml) { $this->xmldata = $this->xml->xmlize($xml); $this->pages = array(); foreach ($this->xmldata['mediawiki']['#']['page'] as $k=>$page) { $page = $page['#']; $pagedata = array( 'title'=>$page['title'][0]['#'], 'timestamp'=>$page['revision'][0]['#']['timestamp'][0]['#'], 'text'=>$page['revision'][0]['#']['text'][0]['#'], ); $pagedata['html'] = $this->wiki_html($pagedata['text'],$pagedata['title']); $this->pages[] = $pagedata; } return true; } function wiki_html($text,$title) { $parser = &new WikiParser(); $output = $parser->parse($text,$title); // handle redirection if ($parser->redirect) { $this->_recursionlevel++; if ($this->recursionlevel==10) { $output = "Redirection limit exceeded"; } else { $sub = &new WikiRetriever(); $sub->retrieve($this->wikiurl,$parser->redirect,$this->_recursionlevel); $output = $sub->pages[0]['html']; unset($sub); } } return $output; /* //$text = preg_replace('/\[\[([^\|\]]*?\|)?([^\]]+?)\]\]/i','\\2',$text); $parser = &new Parser(); $options = &new WikiWrapperOptions(); $title = &new Title(); $content = $parser->parse($text,$title,$options); return $content->mText; */ } } ?> |