-
Notifications
You must be signed in to change notification settings - Fork 266
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[rel/3.8] Handle if RootNamespace contains invalid identifier charact…
…ers (#5069) Co-authored-by: Youssef1313 <youssefvictor00@gmail.com>
- Loading branch information
1 parent
1945ea7
commit 8026b02
Showing
5 changed files
with
223 additions
and
0 deletions.
There are no files selected for viewing
51 changes: 51 additions & 0 deletions
51
src/Platform/Microsoft.Testing.Platform.MSBuild/Tasks/NamespaceHelpers.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
// Copyright (c) Microsoft Corporation. All rights reserved. | ||
// Licensed under the MIT license. See LICENSE file in the project root for full license information. | ||
|
||
namespace Microsoft.Testing.Platform.MSBuild; | ||
|
||
internal static class NamespaceHelpers | ||
{ | ||
internal static string ToSafeNamespace(string value) | ||
{ | ||
const char invalidCharacterReplacement = '_'; | ||
|
||
value = value.Trim(); | ||
|
||
StringBuilder safeValueStr = new(value.Length); | ||
|
||
for (int i = 0; i < value.Length; i++) | ||
{ | ||
if (i < value.Length - 1 && char.IsSurrogatePair(value[i], value[i + 1])) | ||
{ | ||
safeValueStr.Append(invalidCharacterReplacement); | ||
// Skip both chars that make up this symbol. | ||
i++; | ||
continue; | ||
} | ||
|
||
bool isFirstCharacterOfIdentifier = safeValueStr.Length == 0 || safeValueStr[safeValueStr.Length - 1] == '.'; | ||
bool isValidFirstCharacter = UnicodeCharacterUtilities.IsIdentifierStartCharacter(value[i]); | ||
bool isValidPartCharacter = UnicodeCharacterUtilities.IsIdentifierPartCharacter(value[i]); | ||
|
||
if (isFirstCharacterOfIdentifier && !isValidFirstCharacter && isValidPartCharacter) | ||
{ | ||
// This character cannot be at the beginning, but is good otherwise. Prefix it with something valid. | ||
safeValueStr.Append(invalidCharacterReplacement); | ||
safeValueStr.Append(value[i]); | ||
} | ||
else if ((isFirstCharacterOfIdentifier && isValidFirstCharacter) || | ||
(!isFirstCharacterOfIdentifier && isValidPartCharacter) || | ||
(safeValueStr.Length > 0 && i < value.Length - 1 && value[i] == '.')) | ||
{ | ||
// This character is allowed to be where it is. | ||
safeValueStr.Append(value[i]); | ||
} | ||
else | ||
{ | ||
safeValueStr.Append(invalidCharacterReplacement); | ||
} | ||
} | ||
|
||
return safeValueStr.ToString(); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
160 changes: 160 additions & 0 deletions
160
src/Platform/Microsoft.Testing.Platform.MSBuild/Tasks/UnicodeCharacterUtilities.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,160 @@ | ||
// Copyright (c) Microsoft Corporation. All rights reserved. | ||
// Licensed under the MIT license. See LICENSE file in the project root for full license information. | ||
|
||
namespace Microsoft.Testing.Platform.MSBuild; | ||
|
||
/// <summary> | ||
/// Defines a set of helper methods to classify Unicode characters. | ||
/// </summary> | ||
internal static partial class UnicodeCharacterUtilities | ||
{ | ||
public static bool IsIdentifierStartCharacter(char ch) | ||
{ | ||
// identifier-start-character: | ||
// letter-character | ||
// _ (the underscore character U+005F) | ||
if (ch < 'a') // '\u0061' | ||
{ | ||
if (ch < 'A') // '\u0041' | ||
{ | ||
return false; | ||
} | ||
|
||
return ch is <= 'Z' // '\u005A' | ||
or '_'; // '\u005F' | ||
} | ||
|
||
if (ch <= 'z') // '\u007A' | ||
{ | ||
return true; | ||
} | ||
|
||
if (ch <= '\u007F') // max ASCII | ||
{ | ||
return false; | ||
} | ||
|
||
// The ASCII range is handled above Only a-z, A-Z, and underscore are valid. | ||
// Now, we allow unicode characters that are classified as letters. | ||
return IsLetterChar(CharUnicodeInfo.GetUnicodeCategory(ch)); | ||
} | ||
|
||
/// <summary> | ||
/// Returns true if the Unicode character can be a part of an identifier. | ||
/// </summary> | ||
/// <param name="ch">The Unicode character.</param> | ||
public static bool IsIdentifierPartCharacter(char ch) | ||
{ | ||
// identifier-part-character: | ||
// letter-character | ||
// decimal-digit-character | ||
// connecting-character | ||
// combining-character | ||
// formatting-character | ||
if (ch < 'a') // '\u0061' | ||
{ | ||
if (ch < 'A') // '\u0041' | ||
{ | ||
return ch is >= '0' // '\u0030' | ||
and <= '9'; // '\u0039' | ||
} | ||
|
||
return ch is <= 'Z' // '\u005A' | ||
or '_'; // '\u005F' | ||
} | ||
|
||
if (ch <= 'z') // '\u007A' | ||
{ | ||
return true; | ||
} | ||
|
||
if (ch <= '\u007F') // max ASCII | ||
{ | ||
return false; | ||
} | ||
|
||
UnicodeCategory cat = CharUnicodeInfo.GetUnicodeCategory(ch); | ||
return IsLetterChar(cat) | ||
|| IsDecimalDigitChar(cat) | ||
|| IsConnectingChar(cat) | ||
|| IsCombiningChar(cat) | ||
|| IsFormattingChar(cat); | ||
} | ||
|
||
/// <summary> | ||
/// Check that the name is a valid Unicode identifier. | ||
/// </summary> | ||
public static bool IsValidIdentifier(string? name) | ||
{ | ||
if (string.IsNullOrEmpty(name)) | ||
{ | ||
return false; | ||
} | ||
|
||
if (!IsIdentifierStartCharacter(name![0])) | ||
{ | ||
return false; | ||
} | ||
|
||
int nameLength = name.Length; | ||
for (int i = 1; i < nameLength; i++) // NB: start at 1 | ||
{ | ||
if (!IsIdentifierPartCharacter(name[i])) | ||
{ | ||
return false; | ||
} | ||
} | ||
|
||
return true; | ||
} | ||
|
||
/// <summary> | ||
/// Returns true if the Unicode character is a formatting character (Unicode class Cf). | ||
/// </summary> | ||
/// <param name="ch">The Unicode character.</param> | ||
internal static bool IsFormattingChar(char ch) | ||
// There are no FormattingChars in ASCII range | ||
=> ch > 127 && IsFormattingChar(CharUnicodeInfo.GetUnicodeCategory(ch)); | ||
|
||
private static bool IsLetterChar(UnicodeCategory cat) | ||
// letter-character: | ||
// A Unicode character of classes Lu, Ll, Lt, Lm, Lo, or Nl | ||
// A Unicode-escape-sequence representing a character of classes Lu, Ll, Lt, Lm, Lo, or Nl | ||
=> cat switch | ||
{ | ||
UnicodeCategory.UppercaseLetter or UnicodeCategory.LowercaseLetter or UnicodeCategory.TitlecaseLetter or UnicodeCategory.ModifierLetter or UnicodeCategory.OtherLetter or UnicodeCategory.LetterNumber => true, | ||
_ => false, | ||
}; | ||
|
||
private static bool IsCombiningChar(UnicodeCategory cat) | ||
// combining-character: | ||
// A Unicode character of classes Mn or Mc | ||
// A Unicode-escape-sequence representing a character of classes Mn or Mc | ||
=> cat switch | ||
{ | ||
UnicodeCategory.NonSpacingMark or UnicodeCategory.SpacingCombiningMark => true, | ||
_ => false, | ||
}; | ||
|
||
private static bool IsDecimalDigitChar(UnicodeCategory cat) | ||
// decimal-digit-character: | ||
// A Unicode character of the class Nd | ||
// A unicode-escape-sequence representing a character of the class Nd | ||
=> cat == UnicodeCategory.DecimalDigitNumber; | ||
|
||
private static bool IsConnectingChar(UnicodeCategory cat) | ||
// connecting-character: | ||
// A Unicode character of the class Pc | ||
// A unicode-escape-sequence representing a character of the class Pc | ||
=> cat == UnicodeCategory.ConnectorPunctuation; | ||
|
||
/// <summary> | ||
/// Returns true if the Unicode character is a formatting character (Unicode class Cf). | ||
/// </summary> | ||
/// <param name="cat">The Unicode character.</param> | ||
private static bool IsFormattingChar(UnicodeCategory cat) | ||
// formatting-character: | ||
// A Unicode character of the class Cf | ||
// A unicode-escape-sequence representing a character of the class Cf | ||
=> cat == UnicodeCategory.Format; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters