PHP_CodeSniffer_File::tokenizeString PHP Method

tokenizeString() public static method

Starts by using token_get_all() but does a lot of extra processing to insert information about the context of the token.
public static tokenizeString ( string $string, object $tokenizer, string $eolChar = '\n', integer $tabWidth = null, string $encoding = null ) : array
$string string The string to tokenize.
$tokenizer object A tokenizer class to use to tokenize the string.
$eolChar string The EOL character to use for splitting strings.
$tabWidth integer The number of spaces each tab respresents.
$encoding string The charset of the sniffed file.
return array
    public static function tokenizeString($string, $tokenizer, $eolChar = '\\n', $tabWidth = null, $encoding = null)
    {
        // Minified files often have a very large number of characters per line
        // and cause issues when tokenizing.
        if (property_exists($tokenizer, 'skipMinified') === true && $tokenizer->skipMinified === true) {
            $numChars = strlen($string);
            $numLines = substr_count($string, $eolChar) + 1;
            $average = $numChars / $numLines;
            if ($average > 100) {
                throw new PHP_CodeSniffer_Exception('File appears to be minified and cannot be processed');
            }
        }
        $tokens = $tokenizer->tokenizeString($string, $eolChar);
        if ($tabWidth === null) {
            $tabWidth = PHP_CODESNIFFER_TAB_WIDTH;
        }
        if ($encoding === null) {
            $encoding = PHP_CODESNIFFER_ENCODING;
        }
        self::_createPositionMap($tokens, $tokenizer, $eolChar, $encoding, $tabWidth);
        self::_createTokenMap($tokens, $tokenizer, $eolChar);
        self::_createParenthesisNestingMap($tokens, $tokenizer, $eolChar);
        self::_createScopeMap($tokens, $tokenizer, $eolChar);
        self::_createLevelMap($tokens, $tokenizer, $eolChar);
        // Allow the tokenizer to do additional processing if required.
        $tokenizer->processAdditional($tokens, $eolChar);
        return $tokens;
    }

Usage Example

Ejemplo n.º 1
0
 /**
  * Processes this test, when one of its tokens is encountered.
  *
  * @param PHP_CodeSniffer_File $phpcsFile The file being scanned.
  * @param int                  $stackPtr  The position of the current token
  *                                        in the stack passed in $tokens.
  *
  * @return void
  */
 public function process(PHP_CodeSniffer_File $phpcsFile, $stackPtr)
 {
     $tokens = $phpcsFile->getTokens();
     // Process whole comment blocks at once, so skip all but the first token.
     if ($stackPtr > 0 && $tokens[$stackPtr]['code'] === $tokens[$stackPtr - 1]['code']) {
         return;
     }
     // Ignore comments at the end of code blocks.
     if (substr($tokens[$stackPtr]['content'], 0, 6) === '//end ') {
         return;
     }
     $content = '';
     if ($phpcsFile->tokenizerType === 'PHP') {
         $content = '<?php ';
     }
     for ($i = $stackPtr; $i < $phpcsFile->numTokens; $i++) {
         if ($tokens[$stackPtr]['code'] !== $tokens[$i]['code']) {
             break;
         }
         /*
             Trim as much off the comment as possible so we don't
             have additional whitespace tokens or comment tokens
         */
         $tokenContent = trim($tokens[$i]['content']);
         if (substr($tokenContent, 0, 2) === '//') {
             $tokenContent = substr($tokenContent, 2);
         }
         if (substr($tokenContent, 0, 1) === '#') {
             $tokenContent = substr($tokenContent, 1);
         }
         if (substr($tokenContent, 0, 3) === '/**') {
             $tokenContent = substr($tokenContent, 3);
         }
         if (substr($tokenContent, 0, 2) === '/*') {
             $tokenContent = substr($tokenContent, 2);
         }
         if (substr($tokenContent, -2) === '*/') {
             $tokenContent = substr($tokenContent, 0, -2);
         }
         if (substr($tokenContent, 0, 1) === '*') {
             $tokenContent = substr($tokenContent, 1);
         }
         $content .= $tokenContent . $phpcsFile->eolChar;
     }
     //end for
     $content = trim($content);
     if ($phpcsFile->tokenizerType === 'PHP') {
         $content .= ' ?>';
     }
     // Quite a few comments use multiple dashes, equals signs etc
     // to frame comments and licence headers.
     $content = preg_replace('/[-=*]+/', '-', $content);
     // Because we are not really parsing code, the tokenizer can throw all sorts
     // of errors that don't mean anything, so ignore them.
     $oldErrors = ini_get('error_reporting');
     ini_set('error_reporting', 0);
     try {
         $stringTokens = PHP_CodeSniffer_File::tokenizeString($content, $phpcsFile->tokenizer, $phpcsFile->eolChar);
     } catch (PHP_CodeSniffer_Exception $e) {
         // We couldn't check the comment, so ignore it.
         ini_set('error_reporting', $oldErrors);
         return;
     }
     ini_set('error_reporting', $oldErrors);
     $emptyTokens = array(T_WHITESPACE => true, T_STRING => true, T_STRING_CONCAT => true, T_ENCAPSED_AND_WHITESPACE => true, T_NONE => true, T_COMMENT => true);
     $numTokens = count($stringTokens);
     /*
         We know what the first two and last two tokens should be
         (because we put them there) so ignore this comment if those
         tokens were not parsed correctly. It obviously means this is not
         valid code.
     */
     // First token is always the opening PHP tag.
     if ($stringTokens[0]['code'] !== T_OPEN_TAG) {
         return;
     }
     // Last token is always the closing PHP tag, unless something went wrong.
     if (isset($stringTokens[$numTokens - 1]) === false || $stringTokens[$numTokens - 1]['code'] !== T_CLOSE_TAG) {
         return;
     }
     // Second last token is always whitespace or a comment, depending
     // on the code inside the comment.
     if ($phpcsFile->tokenizerType === 'PHP' && isset(PHP_CodeSniffer_Tokens::$emptyTokens[$stringTokens[$numTokens - 2]['code']]) === false) {
         return;
     }
     $numComment = 0;
     $numPossible = 0;
     $numCode = 0;
     for ($i = 0; $i < $numTokens; $i++) {
         if (isset($emptyTokens[$stringTokens[$i]['code']]) === true) {
             // Looks like comment.
             $numComment++;
         } else {
             if (in_array($stringTokens[$i]['code'], PHP_CodeSniffer_Tokens::$comparisonTokens) === true || in_array($stringTokens[$i]['code'], PHP_CodeSniffer_Tokens::$arithmeticTokens) === true || $stringTokens[$i]['code'] === T_GOTO_LABEL) {
                 // Commented out HTML/XML and other docs contain a lot of these
                 // characters, so it is best to not use them directly.
                 $numPossible++;
             } else {
                 // Looks like code.
                 $numCode++;
             }
         }
     }
     // We subtract 3 from the token number so we ignore the start/end tokens
     // and their surrounding whitespace. We take 2 off the number of code
     // tokens so we ignore the start/end tokens.
     if ($numTokens > 3) {
         $numTokens -= 3;
     }
     if ($numCode >= 2) {
         $numCode -= 2;
     }
     $percentCode = ceil($numCode / $numTokens * 100);
     if ($percentCode > $this->maxPercentage) {
         // Just in case.
         $percentCode = min(100, $percentCode);
         $error = 'This comment is %s%% valid code; is this commented out code?';
         $data = array($percentCode);
         $phpcsFile->addWarning($error, $stackPtr, 'Found', $data);
     }
 }
All Usage Examples Of PHP_CodeSniffer_File::tokenizeString