From 4e42f8b12e8dc57a15998f22d508a19cf3c856aa Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Tue, 5 Jul 2022 13:37:41 +0300 Subject: [PATCH] [SPARK-39677][SQL][DOCS] Fix args formatting of the regexp and like functions ### What changes were proposed in this pull request? In the PR, I propose to fix args formatting of some regexp functions by adding explicit new lines. That fixes the following items in arg lists. Before: Screenshot 2022-07-05 at 09 48 28 After: Screenshot 2022-07-05 at 11 06 13 ### Why are the changes needed? To improve readability of Spark SQL docs. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? By building docs and checking manually: ``` $ SKIP_SCALADOC=1 SKIP_PYTHONDOC=1 SKIP_RDOC=1 bundle exec jekyll build ``` Closes #37082 from MaxGekk/fix-regexp-docs. Authored-by: Max Gekk Signed-off-by: Max Gekk --- .../expressions/regexpExpressions.scala | 46 +++++++------------ 1 file changed, 16 insertions(+), 30 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala index 126e0b6dc1f1b..8d81305829691 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala @@ -84,16 +84,12 @@ abstract class StringRegexExpression extends BinaryExpression Arguments: * str - a string expression * pattern - a string expression. The pattern is a string which is matched literally, with - exception to the following special symbols: - - _ matches any one character in the input (similar to . in posix regular expressions) - + exception to the following special symbols:

+ _ matches any one character in the input (similar to . in posix regular expressions)\ % matches zero or more characters in the input (similar to .* in posix regular - expressions) - + expressions)

Since Spark 2.0, string literals are unescaped in our SQL parser. For example, in order - to match "\abc", the pattern should be "\\abc". - + to match "\abc", the pattern should be "\\abc".

When SQL config 'spark.sql.parser.escapedStringLiterals' is enabled, it falls back to Spark 1.6 behavior regarding string literal parsing. For example, if the config is enabled, the pattern to match "\abc" should be "\abc". @@ -189,7 +185,7 @@ case class Like(left: Expression, right: Expression, escapeChar: Char) copy(left = newLeft, right = newRight) } -// scalastyle:off line.contains.tab +// scalastyle:off line.contains.tab line.size.limit /** * Simple RegEx case-insensitive pattern matching function */ @@ -200,16 +196,12 @@ case class Like(left: Expression, right: Expression, escapeChar: Char) Arguments: * str - a string expression * pattern - a string expression. The pattern is a string which is matched literally and - case-insensitively, with exception to the following special symbols: - - _ matches any one character in the input (similar to . in posix regular expressions) - + case-insensitively, with exception to the following special symbols:

+ _ matches any one character in the input (similar to . in posix regular expressions)

% matches zero or more characters in the input (similar to .* in posix regular - expressions) - + expressions)

Since Spark 2.0, string literals are unescaped in our SQL parser. For example, in order - to match "\abc", the pattern should be "\\abc". - + to match "\abc", the pattern should be "\\abc".

When SQL config 'spark.sql.parser.escapedStringLiterals' is enabled, it falls back to Spark 1.6 behavior regarding string literal parsing. For example, if the config is enabled, the pattern to match "\abc" should be "\abc". @@ -237,7 +229,7 @@ case class Like(left: Expression, right: Expression, escapeChar: Char) """, since = "3.3.0", group = "predicate_funcs") -// scalastyle:on line.contains.tab +// scalastyle:on line.contains.tab line.size.limit case class ILike( left: Expression, right: Expression, @@ -574,12 +566,10 @@ case class StringSplit(str: Expression, regex: Expression, limit: Expression) Arguments: * str - a string expression to search for a regular expression pattern match. * regexp - a string representing a regular expression. The regex string should be a - Java regular expression. - + Java regular expression.

Since Spark 2.0, string literals (including regex patterns) are unescaped in our SQL parser. For example, to match "\abc", a regular expression for `regexp` can be - "^\\abc$". - + "^\\abc$".

There is a SQL config 'spark.sql.parser.escapedStringLiterals' that can be used to fallback to the Spark 1.6 behavior regarding string literal parsing. For example, if the config is enabled, the `regexp` that can match "\abc" is "^\abc$". @@ -783,12 +773,10 @@ abstract class RegExpExtractBase Arguments: * str - a string expression. * regexp - a string representing a regular expression. The regex string should be a - Java regular expression. - + Java regular expression.

Since Spark 2.0, string literals (including regex patterns) are unescaped in our SQL parser. For example, to match "\abc", a regular expression for `regexp` can be - "^\\abc$". - + "^\\abc$".

There is a SQL config 'spark.sql.parser.escapedStringLiterals' that can be used to fallback to the Spark 1.6 behavior regarding string literal parsing. For example, if the config is enabled, the `regexp` that can match "\abc" is "^\abc$". @@ -888,12 +876,10 @@ case class RegExpExtract(subject: Expression, regexp: Expression, idx: Expressio Arguments: * str - a string expression. * regexp - a string representing a regular expression. The regex string should be a - Java regular expression. - + Java regular expression.

Since Spark 2.0, string literals (including regex patterns) are unescaped in our SQL parser. For example, to match "\abc", a regular expression for `regexp` can be - "^\\abc$". - + "^\\abc$".

There is a SQL config 'spark.sql.parser.escapedStringLiterals' that can be used to fallback to the Spark 1.6 behavior regarding string literal parsing. For example, if the config is enabled, the `regexp` that can match "\abc" is "^\abc$".