Web Page Extractor
Example showing how to extract data from a Yahoo search page.
PHP Source:
<?php
/**
* Accessing a Remote HTML Page Example
*/
require_once "raxan/pdi/autostart.php";
//Raxan::config('debug',true);
class WebPageExtractor extends RaxanWebPage {
protected $query = 'pre vs iphone';
protected $url = 'http://search.yahoo.com/search?p=';
function _init() {
$this->title('Web Page Extractor');
$this->loadCSS('master');
$this->content('<div class="container c30 prepend-top"><form method="post">'.
'<h2 class="bottom">Web Page Extractor</h2>'.
'<p>Extract search results from a yahoo search page</p>'.
'<label>Web Search:</label><br />'.
'<input type="text" name="query" value="'.$this->query.'" /> '.
'<input type="submit" value="Search" />'.
'</form><div id="result" class="pad"></div></div>'
);
$this->preserveFormContent = true;
}
function _load() {
$q = $this->clientRequest()->text('query');
if ($q) $this->query = $q;
}
function _prerender(){
// load remote html page
$url = $this->url.urlencode($this->query);
$search = new RaxanWebPage($url);
// find the search titles (h3)
$titles = $search['h3']; $html = '';
foreach($titles->get() as $node){
$html.= P($node)->html()."<br />\n"; //
}
$this['#result']->html($html);
}
}
?>