function scrapeTEDRSS($url, $sector)
{
print $url . " " . $sector . "\n";
// $xml = scraperWiki::scrape($url);
$curl = curl_init($url);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($curl, CURLOPT_MAXREDIRS, 10);
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($curl, CURLOPT_TIMEOUT, 20);
// 10 second before aborting
// try CURLOPT_CONNECTTIMEOUT (in seconds)
// try CURLOPT_LOW_SPEED_LIMIT (to define what slow is, with):
// curl_setopt($curl, CURLOPT_LOW_SPEED_TIME, 10); (10 second at low speed before aborting
$xml = curl_exec($curl);
print curl_error($curl) . "\n";
$dom = new simple_html_dom();
$dom->load($xml);
$items = $dom->find("item");
foreach ($items as $item) {
$guid = $item->find("guid");
$noticeURL = str_replace("TEXT", "DATA", $guid[0]->plaintext);
print $noticeURL . " " . $sector . " " . memory_get_usage() / 1000000 . "MB";
echo "\n";
// $record = scrapeTEDDataPage ($noticeURL, $sector);
$record = array('time' => microtime(true), 'sector' => $sector, 'url' => $noticeURL);
scraperwiki::save(array('sector', 'url'), $record);
sleep(1);
}
$dom->__destruct();
unset($items);
unset($dom);
unset($xml);
print memory_get_usage() / 1024 / 1024 . "MB\n";
}