public function actionRenderMetrics()
{
if (!$this->parsingDom) {
$this->parsingDom = true;
$oldPath = method_exists(craft()->templates, 'getTemplatesPath') ? craft()->templates->getTemplatesPath() : craft()->path->getTemplatesPath();
$newPath = craft()->path->getPluginsPath() . 'seomatic/templates';
method_exists(craft()->templates, 'setTemplatesPath') ? craft()->templates->setTemplatesPath($newPath) : craft()->path->setTemplatesPath($newPath);
/* -- Render the SEOmatic display preview template */
$url = urldecode(craft()->request->getParam('url'));
if (UrlHelper::isAbsoluteUrl($url)) {
$urlParts = parse_url($url);
if (isset($urlParts['scheme'])) {
$rootUrl = $urlParts['scheme'] . "://" . $urlParts['host'];
} else {
$rootUrl = "http" . "://" . $urlParts['host'];
}
if (isset($urlParts['port'])) {
$rootUrl .= $urlParts['port'] . "/";
} else {
$rootUrl .= "/";
}
$keywordsParam = urldecode(craft()->request->getParam('keywords'));
$keywordsKeys = explode(",", $keywordsParam);
$keywords = array();
/* -- Silly work-around for what appears to be a file_get_contents bug with https -> http://stackoverflow.com/questions/10524748/why-im-getting-500-error-when-using-file-get-contents-but-works-in-a-browser */
$opts = array('http' => array('header' => "User-Agent:Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080311 Firefox/2.0.0.13\r\n"));
$context = stream_context_create($opts);
$dom = HtmlDomParser::file_get_html($url, false, $context);
if ($dom) {
$textStatistics = new TS\TextStatistics();
/* -- See if robots.txt exists */
$hasRobotsTxt = false;
$hasSitemap = false;
$sitemapUrl = rtrim($rootUrl, '/') . "/sitemap.xml";
$foundSitemapUrl = "";
$robotsUrl = rtrim($rootUrl, '/') . "/robots.txt";
$robots = @file_get_contents($robotsUrl, false, $context);
if ($robots !== false) {
$hasRobotsTxt = true;
$lines = explode("\n", $robots);
foreach ($lines as $line) {
$line = ltrim($line);
$searchStr = 'Sitemap';
$pos = strpos($line, $searchStr);
if ($pos !== false) {
$pos += strlen($searchStr);
$foundSitemapUrl = substr($line, $pos);
$foundSitemapUrl = trim($sitemapUrl, ':');
$foundSitemapUrl = trim($sitemapUrl);
}
}
}
/* -- Check to see if a sitemap exists */
if ($foundSitemapUrl) {
$siteMapContents = "";
$siteMapContents = @file_get_contents($sitemapUrl, false, $context, 0, 1);
if ($siteMapContents !== false) {
$hasSitemap = true;
}
}
$siteMapContents = "";
$siteMapContents = @file_get_contents($sitemapUrl, false, $context, 0, 1);
if ($siteMapContents !== false) {
$hasSitemap = true;
}
/* -- See if the site is https */
$sslReturnCode = 0;
$sslUrl = "https" . "://" . $urlParts['host'];
if (isset($urlParts['port'])) {
$sslUrl .= $sslUrl['port'] . '/';
} else {
$sslUrl .= '/';
}
$ch = curl_init($sslUrl);
curl_setopt($ch, CURLOPT_NOBODY, true);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
$open_basedir = ini_get('open_basedir');
if (empty($open_basedir)) {
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
}
curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080311 Firefox/2.0.0.13');
curl_exec($ch);
$sslReturnCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
curl_close($ch);
/* -- Check to see if the page is valid */
$validatorUrl = "https://validator.w3.org/check?uri=" . urlencode($url) . "&output=json";
$ch = curl_init();
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080311 Firefox/2.0.0.13');
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_URL, $validatorUrl);
$validatorResult = curl_exec($ch);
curl_close($ch);
$validatorStatus = $validatorErrors = $validatorWarnings = "";
if ($validatorResult) {
$searchStr = "X-W3C-Validator-Status: ";
$pos = strpos($validatorResult, $searchStr);
if ($pos !== false) {
$pos += strlen($searchStr);
$validatorStatus = substr($validatorResult, $pos, strpos($validatorResult, PHP_EOL, $pos) - $pos);
}
$searchStr = "X-W3C-Validator-Errors: ";
$pos = strpos($validatorResult, $searchStr);
if ($pos !== false) {
$pos += strlen($searchStr);
$validatorErrors = substr($validatorResult, $pos, strpos($validatorResult, PHP_EOL, $pos) - $pos);
}
$searchStr = "X-W3C-Validator-Warnings: ";
$pos = strpos($validatorResult, $searchStr);
if ($pos !== false) {
$pos += strlen($searchStr);
$validatorWarnings = substr($validatorResult, $pos, strpos($validatorResult, PHP_EOL, $pos) - $pos);
}
}
$validatorUrl = "https://validator.w3.org/check?uri=" . urlencode($url);
/* -- Check Google Pagespeed insights for desktop */
$pagespeedDesktopScore = "";
$pagespeedDesktopUrl = "https://www.googleapis.com/pagespeedonline/v2/runPagespeed?url=" . urlencode($url) . "&strategy=desktop";
$ch = curl_init();
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080311 Firefox/2.0.0.13');
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_URL, $pagespeedDesktopUrl);
$pagespeedDesktopResult = curl_exec($ch);
curl_close($ch);
$pageSpeedPageStats = array();
if ($pagespeedDesktopResult) {
$pagespeedJson = json_decode($pagespeedDesktopResult, true);
if ($pagespeedJson) {
if (!empty($pagespeedJson['pageStats'])) {
$pageSpeedPageStats = $pagespeedJson['pageStats'];
if (empty($pageSpeedPageStats['htmlResponseBytes'])) {
$pageSpeedPageStats['htmlResponseBytes'] = 0;
}
if (empty($pageSpeedPageStats['cssResponseBytes'])) {
$pageSpeedPageStats['cssResponseBytes'] = 0;
}
if (empty($pageSpeedPageStats['imageResponseBytes'])) {
$pageSpeedPageStats['imageResponseBytes'] = 0;
}
if (empty($pageSpeedPageStats['javascriptResponseBytes'])) {
$pageSpeedPageStats['javascriptResponseBytes'] = 0;
}
if (empty($pageSpeedPageStats['otherResponseBytes'])) {
$pageSpeedPageStats['otherResponseBytes'] = 0;
}
$pageSpeedPageStats['totalResponseBytes'] = $pageSpeedPageStats['htmlResponseBytes'] + $pageSpeedPageStats['cssResponseBytes'] + $pageSpeedPageStats['imageResponseBytes'] + $pageSpeedPageStats['javascriptResponseBytes'] + $pageSpeedPageStats['otherResponseBytes'];
}
if (isset($pagespeedJson['responseCode']) && ($pagespeedJson['responseCode'] == "200" || $pagespeedJson['responseCode'] == "301" || $pagespeedJson['responseCode'] == "302")) {
if (isset($pagespeedJson['ruleGroups']['SPEED']['score'])) {
$pagespeedDesktopScore = intval($pagespeedJson['ruleGroups']['SPEED']['score']);
}
}
}
}
$pagespeedDesktopUrl = "https://developers.google.com/speed/pagespeed/insights/?url=" . urlencode($url) . "&tab=desktop";
/* -- Check Google Pagespeed insights for desktop */
$pagespeedMobileScore = "";
$pagespeedMobileUsability = "";
$pagespeedMobileUrl = "https://www.googleapis.com/pagespeedonline/v2/runPagespeed?url=" . urlencode($url) . "&strategy=mobile";
$ch = curl_init();
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080311 Firefox/2.0.0.13');
curl_setopt($ch, CURLOPT_URL, $pagespeedMobileUrl);
$pagespeedMobileResult = curl_exec($ch);
curl_close($ch);
if ($pagespeedMobileResult) {
$pagespeedJson = json_decode($pagespeedMobileResult, true);
if ($pagespeedJson) {
if (isset($pagespeedJson['responseCode']) && ($pagespeedJson['responseCode'] == "200" || $pagespeedJson['responseCode'] == "301" || $pagespeedJson['responseCode'] == "302")) {
if (isset($pagespeedJson['ruleGroups']['SPEED']['score'])) {
$pagespeedMobileScore = intval($pagespeedJson['ruleGroups']['SPEED']['score']);
}
if (isset($pagespeedJson['ruleGroups']['USABILITY']['score'])) {
$pagespeedMobileUsability = intval($pagespeedJson['ruleGroups']['USABILITY']['score']);
}
}
}
}
$pagespeedMobileUrl = "https://developers.google.com/speed/pagespeed/insights/?url=" . urlencode($url) . "&tab=mobile";
/* -- Scrape for JSON-LD before we remove the <script> tags */
$jsonLdTypes = array();
foreach ($dom->find('script[type=application/ld+json]') as $elem) {
$jsonArray = json_decode($elem->innertext, true);
if (isset($jsonArray['@type'])) {
array_push($jsonLdTypes, $jsonArray['@type']);
}
}
$jsonLdTypes = array_unique($jsonLdTypes);
/* -- Remove inline <script> and <style> tags, and then strip the DOM down */
foreach ($dom->find('style') as $element) {
$element->outertext = '';
}
foreach ($dom->find('script') as $element) {
$element->outertext = '';
}
$strippedDom = html_entity_decode($dom->plaintext);
// $strippedDom = preg_replace('@[^0-9a-z\.\!]+@i', ', ', $strippedDom);
$strippedDom = stripslashes($strippedDom);
$htmlDom = html_entity_decode($dom->outertext);
// $htmlDom = preg_replace('@[^0-9a-z\.\!]+@i', '', $htmlDom);
/* -- SEO statistics */
$titleTag = html_entity_decode($dom->find('title', 0)->plaintext);
$titleLength = strlen($titleTag);
$metaDescriptionTag = "";
$metaDescriptionLength = 0;
$elem = $dom->find('meta[name=description]', 0);
if ($elem) {
$metaDescriptionTag = html_entity_decode($elem->content);
$metaDescriptionLength = strlen($metaDescriptionTag);
}
$metaTwitterTag = "";
$elem = $dom->find('meta[name=twitter:card],meta[property=twitter:card]', 0);
if ($elem) {
$metaTwitterTag = html_entity_decode($elem->content);
}
$metaOpenGraphTag = "";
$elem = $dom->find('meta[property=og:type],meta[property=og:url],meta[property=og:title]', 0);
if ($elem) {
$metaOpenGraphTag = html_entity_decode($elem->content);
}
$hasRelPublisherTag = false;
$elem = $dom->find('link[rel=publisher]', 0);
if ($elem) {
$hasRelPublisherTag = true;
}
$emptyImageAlts = count($dom->find('img[!alt]'));
$h1Tags = count($dom->find('h1'));
$h2Tags = count($dom->find('h2'));
$h3Tags = count($dom->find('h3'));
$h4Tags = count($dom->find('h4'));
$h5Tags = count($dom->find('h5'));
$totalHTags = $h1Tags + $h2Tags + $h3Tags + $h4Tags + $h5Tags;
$effectiveHTags = true;
if ($h1Tags != 1) {
$effectiveHTags = false;
}
if ($totalHTags < 3) {
$effectiveHTags = false;
}
if ($h2Tags == 0 && ($h3Tags || $h4Tags || $h5Tags)) {
$effectiveHTags = false;
}
if ($h3Tags == 0 && ($h4Tags || $h5Tags)) {
$effectiveHTags = false;
}
if ($h4Tags == 0 && $h5Tags) {
$effectiveHTags = false;
}
$textToHtmlRatio = strlen($strippedDom) / (strlen($htmlDom) - strlen($strippedDom)) * 100;
$strippedDom = preg_replace('/\\s+/', ' ', $strippedDom);
/* -- Extract the page keywords, and clean them up a bit */
$pageKeywords = craft()->seomatic->extractKeywords($strippedDom);
$pageKeywords = str_replace(",,", ",", $pageKeywords);
$pageKeywords = str_replace(" ,", ",", $pageKeywords);
$pageKeywords = str_replace(" .", ".", $pageKeywords);
$pageKeywords = preg_replace('/\\.+/', '.', $pageKeywords);
$pageKeywords = preg_replace('/,+/', ',', $pageKeywords);
$pageKeywords = str_replace(",.,", ",", $pageKeywords);
$pageKeywords = html_entity_decode($pageKeywords, ENT_COMPAT, 'UTF-8');
/* -- Focus keywords */
foreach ($keywordsKeys as $keywordsKey) {
$keywordsKey = trim($keywordsKey);
if (strlen($keywordsKey)) {
$appearsInH1Tag = 0;
foreach ($dom->find('h1') as $element) {
$appearsInH1Tag += substr_count(strtolower($element->plaintext), strtolower($keywordsKey));
}
foreach ($dom->find('h2') as $element) {
$appearsInH1Tag += substr_count(strtolower($element->plaintext), strtolower($keywordsKey));
}
$appearsInImgTag = 0;
foreach ($dom->find('img') as $element) {
$appearsInImgTag += substr_count(strtolower($element->alt), strtolower($keywordsKey));
}
$appearsInAhrefTag = 0;
foreach ($dom->find('a') as $element) {
$appearsInAhrefTag += substr_count(strtolower($element->plaintext), strtolower($keywordsKey));
}
$keywords[$keywordsKey] = array('appearsInTitleTag' => substr_count(strtolower($titleTag), strtolower($keywordsKey)), 'appearsInUrl' => substr_count(strtolower($url), strtolower($keywordsKey)), 'appearsInMetaDescriptionTag' => substr_count(strtolower($metaDescriptionTag), strtolower($keywordsKey)), 'appearsInH1Tag' => $appearsInH1Tag, 'appearsInAhrefTag' => $appearsInAhrefTag, 'appearsInImgTag' => $appearsInImgTag, 'appearsInPageKeywords' => substr_count(strtolower($pageKeywords), strtolower($keywordsKey)), 'appearsOnWebPage' => substr_count(strtolower($strippedDom), strtolower($keywordsKey)));
}
}
/* -- Text statistics */
$wordCount = $textStatistics->wordCount($strippedDom);
$readingTime = floor($wordCount / 200);
if ($readingTime === 0) {
$readingTime = 1;
}
$fleschKincaidReadingEase = $textStatistics->fleschKincaidReadingEase($strippedDom);
$fleschKincaidGradeLevel = $textStatistics->fleschKincaidGradeLevel($strippedDom);
$gunningFogScore = $textStatistics->gunningFogScore($strippedDom);
$colemanLiauIndex = $textStatistics->colemanLiauIndex($strippedDom);
$smogIndex = $textStatistics->smogIndex($strippedDom);
$automatedReadabilityIndex = $textStatistics->automatedReadabilityIndex($strippedDom);
$vars = array('titleTag' => $titleTag, 'titleLength' => $titleLength, 'metaDescriptionTag' => $metaDescriptionTag, 'metaDescriptionLength' => $metaDescriptionLength, 'metaTwitterTag' => $metaTwitterTag, 'metaOpenGraphTag' => $metaOpenGraphTag, 'hasRelPublisherTag' => $hasRelPublisherTag, 'jsonLdTypes' => $jsonLdTypes, 'hasRobotsTxt' => $hasRobotsTxt, 'hasSitemap' => $hasSitemap, 'emptyImageAlts' => $emptyImageAlts, 'validatorUrl' => $validatorUrl, 'validatorStatus' => $validatorStatus, 'validatorErrors' => $validatorErrors, 'validatorWarnings' => $validatorWarnings, 'pageSpeedPageStats' => $pageSpeedPageStats, 'pagespeedDesktopScore' => $pagespeedDesktopScore, 'pagespeedDesktopUrl' => $pagespeedDesktopUrl, 'pagespeedMobileScore' => $pagespeedMobileScore, 'pagespeedMobileUsability' => $pagespeedMobileUsability, 'pagespeedMobileUrl' => $pagespeedMobileUrl, 'sslReturnCode' => $sslReturnCode, 'h1Tags' => $h1Tags, 'h2Tags' => $h2Tags, 'h3Tags' => $h3Tags, 'h4Tags' => $h4Tags, 'h5Tags' => $h5Tags, 'effectiveHTags' => $effectiveHTags, 'textToHtmlRatio' => $textToHtmlRatio, 'wordCount' => $wordCount, 'readingTime' => $readingTime, 'pageKeywords' => $pageKeywords, 'keywords' => $keywords, 'fleschKincaidReadingEase' => $fleschKincaidReadingEase, 'fleschKincaidGradeLevel' => $fleschKincaidGradeLevel, 'gunningFogScore' => $gunningFogScore, 'colemanLiauIndex' => $colemanLiauIndex, 'smogIndex' => $smogIndex, 'automatedReadabilityIndex' => $automatedReadabilityIndex);
//$htmlText = craft()->templates->render('_seo_metrics.twig', $vars);
$this->renderTemplate('_seo_metrics.twig', $vars);
} else {
$this->renderTemplate('_error', array('errorMessage' => "Error parsing the DOM. Is this a valid, publicly accessible URL?"));
}
} else {
$this->renderTemplate('_error', array('errorMessage' => "Error loading the webpage. Is this a valid, publicly accessible URL?"));
}
method_exists(craft()->templates, 'setTemplatesPath') ? craft()->templates->setTemplatesPath($oldPath) : craft()->path->setTemplatesPath($oldPath);
}
$this->parsingDom = false;
}