Goose\Modules\Extractors\MetaExtractor::cleanTitle PHP Method

cleanTitle() private method

Ported from python-goose https://github.com/grangier/python-goose/ by Xavier Grangier
private cleanTitle ( string $title ) : string
$title string
return string
    private function cleanTitle($title)
    {
        $openGraph = $this->article()->getOpenGraph();
        // Check if we have the site name in OpenGraph data
        if (isset($openGraph['site_name'])) {
            $title = str_replace($openGraph['site_name'], '', $title);
        }
        // Try to remove the domain from URL
        if ($this->article()->getDomain()) {
            $title = str_ireplace($this->article()->getDomain(), '', $title);
        }
        // Split the title in words
        // TechCrunch | my wonderfull article
        // my wonderfull article | TechCrunch
        $titleWords = preg_split('@[\\s]+@', trim($title));
        // Check for an empty title
        if (empty($titleWords)) {
            return '';
        }
        // Check if last letter is in self::$SPLITTER_CHARS
        // if so remove it
        if (in_array($titleWords[count($titleWords) - 1], self::$SPLITTER_CHARS)) {
            array_pop($titleWords);
        }
        // Check if first letter is in self::$SPLITTER_CHARS
        // if so remove it
        if (isset($titleWords[0]) && in_array($titleWords[0], self::$SPLITTER_CHARS)) {
            array_shift($titleWords);
        }
        // Rebuild the title
        $title = trim(implode(' ', $titleWords));
        return $title;
    }