Pimcore\Document\Adapter\LibreOffice::getText PHP Method

getText() public method

public getText ( null $page = null, null $path = null ) : boolean | string
$page null
$path null
return boolean | string
    public function getText($page = null, $path = null)
    {
        $path = $path ? $this->preparePath($path) : $this->path;
        if ($page || parent::isFileTypeSupported($path)) {
            // for per page extraction we have to convert the document to PDF and extract the text via ghostscript
            return parent::getText($page, $this->getPdf($path));
        } elseif (File::getFileExtension($path)) {
            // if we want to get the text of the whole document, we can use libreoffices text export feature
            $cmd = self::getLibreOfficeCli() . " --headless --nologo --nofirststartwizard --norestore --convert-to txt:Text --outdir " . escapeshellarg(PIMCORE_TEMPORARY_DIRECTORY) . " " . escapeshellarg($path);
            $out = Console::exec($cmd, null, 240);
            Logger::debug("LibreOffice Output was: " . $out);
            $tmpName = PIMCORE_TEMPORARY_DIRECTORY . "/" . preg_replace("/\\." . File::getFileExtension($path) . "\$/", ".txt", basename($path));
            if (file_exists($tmpName)) {
                $text = file_get_contents($tmpName);
                $text = \Pimcore\Tool\Text::convertToUTF8($text);
                unlink($tmpName);
                return $text;
            } else {
                $message = "Couldn't convert document to PDF: " . $path . " with the command: '" . $cmd . "' - now trying to get the text out of the PDF ...";
                Logger::error($message);
                return parent::getText(null, $this->getPdf($path));
            }
        }
        return "";
        // default empty string
    }