protected function prepareRequests(InputInterface $input, OutputInterface $output)
{
$requestSetCollection = array();
// collection of all sets of requests (set = sitemap)
// looping through all sitemaps, get, validate and prepare them
foreach ($this->_sitemaps as $sitemap) {
$requestSet = array();
// a sitemap
$requestSet['metadata'] = $sitemap;
$xml = '';
// Getting the XML from URL & validate it
if (isset($sitemap['sitemap_url'])) {
if ($this->validateUrl($sitemap['sitemap_url'])) {
try {
$curl = $this->getCurl();
$curl->get($sitemap['sitemap_url']);
if ($curl->http_status_code == '200') {
try {
$xml = new \Varien_Simplexml_Element($curl->response);
} catch (\Exception $e) {
$output->writeln('<error>' . $e->getMessage() . ' ' . $sitemap['sitemap_url'] . '</error>');
continue;
}
}
} catch (\Exception $e) {
$output->writeln('<error>An error occured while getting the sitemap: ' . $e->getMessage() . '</error>');
continue;
}
} else {
$output->writeln('<error>The URL: ' . $sitemap['sitemap_url'] . ' is not valid.</error>');
continue;
}
// getting the sitemap from a file
} else {
if (file_exists($this->_magentoRootFolder . $sitemap['relative_path'])) {
try {
$xml = new \Varien_Simplexml_Element(file_get_contents($this->_magentoRootFolder . $sitemap['relative_path']));
} catch (\Exception $e) {
$output->writeln('<error>' . $e->getMessage() . ' ' . $sitemap['relative_path'] . '</error>');
continue;
}
// converting a txt of urls to magento sitemap structure (hypernode internal)
} elseif (file_exists($sitemap['relative_path'])) {
if (end(explode('.', $sitemap['relative_path'])) == 'txt') {
$xml = new \Varien_Simplexml_Element($this->convertTxtToXml(file_get_contents($sitemap['relative_path'])));
} else {
$output->writeln('<error>Only a txt url list is currently supported for absolute paths.</error>');
}
}
}
// creating batches
if ($xml) {
$requestSet['requests'] = array();
$urls = array();
foreach ($xml->children() as $child) {
array_push($urls, $child->loc);
}
$replace = false;
// finding out which replace strategy to use
if ($this->_options['sitemap'] && $this->_options['compare-url'] && $this->_options['current-url']) {
$replace = 3;
// Replace and compare
} elseif ($requestSet['metadata']['base_url']) {
if (!$this->matchUrls($requestSet['metadata']['base_url'], $urls[0])['status']) {
$replace = $this->askReplaceOrCompare($input, $output, $requestSet['metadata']['base_url'], $urls[0]);
}
}
$i = 1;
foreach ($urls as $url) {
$requestBatch = array();
// batch for curling
// replace strategy execution
if ($replace) {
if ($replace == 1) {
// Use site from sitemap
array_push($requestBatch, $this->replaceUrlByParse($url, $requestSet['metadata']['base_url']));
} elseif ($replace == 2) {
// Use both (side by side)
array_push($requestBatch, $this->replaceUrlByParse($url, $requestSet['metadata']['base_url']));
//left
array_push($requestBatch, (string) $url);
// right
} elseif ($replace == 3) {
array_push($requestBatch, $this->replaceUrl($url, $this->_options['current-url']));
array_push($requestBatch, $this->replaceUrl($url, $this->_options['compare-url']));
} else {
array_push($requestBatch, (string) $url);
}
} else {
array_push($requestBatch, (string) $url);
// no replace, just crawl
}
array_push($requestSet['requests'], $requestBatch);
if ($this->_options['limit'] && $i >= $this->_options['limit']) {
break;
} else {
$i++;
}
}
//endforeach
}
//endif $xml @todo verify that no empty set is returned with bad $xml (prio)
array_push($requestSetCollection, $requestSet);
}
return $requestSetCollection;
}