From 87c393639ec69f2076dbb5edc7bacdc95a956f94 Mon Sep 17 00:00:00 2001 From: jrfnl Date: Tue, 21 May 2024 18:12:21 +0200 Subject: [PATCH] Tokenizer/PHP: efficiency fix Reminder: the PHP::processAdditional()` method walks _back_ from the end of the file to the beginning. The type handling retokenization layer is triggered for each `&`, `|` and `)` the tokenizer encounters. When something is recognized as a valid type declaration, the relevant tokens will all be retokenized in one go the first time the type handling layer is triggered, which means that - as the type tokens will have been retokenized already -, the type handling layer will not be triggered again for any of the other type related tokens in the type. However, if the type is *not* recognized as a valid type, the type handling layer will keep getting retriggered and will (correctly) keep concluding this is not a valid type. The change in this PR, prevents the type handling layer from doing any work when it is retriggered on a token which was previously already seen and concluded to be, either not part of a type or part of an invalid type. This should make the tokenizer marginally faster for complex types containing an error, like `(A&B|(C&D)|(E&F)`. --- src/Tokenizers/PHP.php | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/Tokenizers/PHP.php b/src/Tokenizers/PHP.php index 60a0d41df8..7168231de6 100644 --- a/src/Tokenizers/PHP.php +++ b/src/Tokenizers/PHP.php @@ -2602,7 +2602,9 @@ protected function processAdditional() $this->createAttributesNestingMap(); - $numTokens = count($this->tokens); + $numTokens = count($this->tokens); + $lastSeenTypeToken = $numTokens; + for ($i = ($numTokens - 1); $i >= 0; $i--) { // Check for any unset scope conditions due to alternate IF/ENDIF syntax. if (isset($this->tokens[$i]['scope_opener']) === true @@ -3038,6 +3040,12 @@ protected function processAdditional() || $this->tokens[$i]['code'] === T_BITWISE_AND || $this->tokens[$i]['code'] === T_CLOSE_PARENTHESIS ) { + if ($lastSeenTypeToken < $i) { + // We've already examined this code to check if it is a type declaration and concluded it wasn't. + // No need to do it again. + continue; + } + /* Convert "|" to T_TYPE_UNION or leave as T_BITWISE_OR. Convert "&" to T_TYPE_INTERSECTION or leave as T_BITWISE_AND. @@ -3250,6 +3258,9 @@ protected function processAdditional() break; }//end for + // Remember the last token we examined as part of the (non-)"type declaration". + $lastSeenTypeToken = $x; + if ($confirmed === false && $suspectedType === 'property or parameter' && isset($this->tokens[$i]['nested_parenthesis']) === true