Skip to content

Commit

Permalink
[rel/3.8] Handle if RootNamespace contains invalid identifier charact…
Browse files Browse the repository at this point in the history
…ers (#5069)

Co-authored-by: Youssef1313 <youssefvictor00@gmail.com>
  • Loading branch information
youssef-backport-bot and Youssef1313 authored Feb 19, 2025
1 parent 1945ea7 commit 8026b02
Show file tree
Hide file tree
Showing 5 changed files with 223 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT license. See LICENSE file in the project root for full license information.

namespace Microsoft.Testing.Platform.MSBuild;

internal static class NamespaceHelpers
{
internal static string ToSafeNamespace(string value)
{
const char invalidCharacterReplacement = '_';

value = value.Trim();

StringBuilder safeValueStr = new(value.Length);

for (int i = 0; i < value.Length; i++)
{
if (i < value.Length - 1 && char.IsSurrogatePair(value[i], value[i + 1]))
{
safeValueStr.Append(invalidCharacterReplacement);
// Skip both chars that make up this symbol.
i++;
continue;
}

bool isFirstCharacterOfIdentifier = safeValueStr.Length == 0 || safeValueStr[safeValueStr.Length - 1] == '.';
bool isValidFirstCharacter = UnicodeCharacterUtilities.IsIdentifierStartCharacter(value[i]);
bool isValidPartCharacter = UnicodeCharacterUtilities.IsIdentifierPartCharacter(value[i]);

if (isFirstCharacterOfIdentifier && !isValidFirstCharacter && isValidPartCharacter)
{
// This character cannot be at the beginning, but is good otherwise. Prefix it with something valid.
safeValueStr.Append(invalidCharacterReplacement);
safeValueStr.Append(value[i]);
}
else if ((isFirstCharacterOfIdentifier && isValidFirstCharacter) ||
(!isFirstCharacterOfIdentifier && isValidPartCharacter) ||
(safeValueStr.Length > 0 && i < value.Length - 1 && value[i] == '.'))
{
// This character is allowed to be where it is.
safeValueStr.Append(value[i]);
}
else
{
safeValueStr.Append(invalidCharacterReplacement);
}
}

return safeValueStr.ToString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,11 @@ private static void GenerateCode(string language, string rootNamespace, ITaskIte

private static string GetSourceCode(string language, string rootNamespace, string extensionsFragments)
{
if (language != VBLanguageSymbol && !string.IsNullOrEmpty(rootNamespace))
{
rootNamespace = NamespaceHelpers.ToSafeNamespace(rootNamespace);
}

if (language == CSharpLanguageSymbol)
{
return string.IsNullOrEmpty(rootNamespace)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,11 @@ private static void GenerateEntryPoint(string language, string rootNamespace, IT

private static string GetEntryPointSourceCode(string language, string rootNamespace)
{
if (language != VBLanguageSymbol && !string.IsNullOrEmpty(rootNamespace))
{
rootNamespace = NamespaceHelpers.ToSafeNamespace(rootNamespace);
}

if (language == CSharpLanguageSymbol)
{
return string.IsNullOrEmpty(rootNamespace)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT license. See LICENSE file in the project root for full license information.

namespace Microsoft.Testing.Platform.MSBuild;

/// <summary>
/// Defines a set of helper methods to classify Unicode characters.
/// </summary>
internal static partial class UnicodeCharacterUtilities
{
public static bool IsIdentifierStartCharacter(char ch)
{
// identifier-start-character:
// letter-character
// _ (the underscore character U+005F)
if (ch < 'a') // '\u0061'
{
if (ch < 'A') // '\u0041'
{
return false;
}

return ch is <= 'Z' // '\u005A'
or '_'; // '\u005F'
}

if (ch <= 'z') // '\u007A'
{
return true;
}

if (ch <= '\u007F') // max ASCII
{
return false;
}

// The ASCII range is handled above Only a-z, A-Z, and underscore are valid.
// Now, we allow unicode characters that are classified as letters.
return IsLetterChar(CharUnicodeInfo.GetUnicodeCategory(ch));
}

/// <summary>
/// Returns true if the Unicode character can be a part of an identifier.
/// </summary>
/// <param name="ch">The Unicode character.</param>
public static bool IsIdentifierPartCharacter(char ch)
{
// identifier-part-character:
// letter-character
// decimal-digit-character
// connecting-character
// combining-character
// formatting-character
if (ch < 'a') // '\u0061'
{
if (ch < 'A') // '\u0041'
{
return ch is >= '0' // '\u0030'
and <= '9'; // '\u0039'
}

return ch is <= 'Z' // '\u005A'
or '_'; // '\u005F'
}

if (ch <= 'z') // '\u007A'
{
return true;
}

if (ch <= '\u007F') // max ASCII
{
return false;
}

UnicodeCategory cat = CharUnicodeInfo.GetUnicodeCategory(ch);
return IsLetterChar(cat)
|| IsDecimalDigitChar(cat)
|| IsConnectingChar(cat)
|| IsCombiningChar(cat)
|| IsFormattingChar(cat);
}

/// <summary>
/// Check that the name is a valid Unicode identifier.
/// </summary>
public static bool IsValidIdentifier(string? name)
{
if (string.IsNullOrEmpty(name))
{
return false;
}

if (!IsIdentifierStartCharacter(name![0]))
{
return false;
}

int nameLength = name.Length;
for (int i = 1; i < nameLength; i++) // NB: start at 1
{
if (!IsIdentifierPartCharacter(name[i]))
{
return false;
}
}

return true;
}

/// <summary>
/// Returns true if the Unicode character is a formatting character (Unicode class Cf).
/// </summary>
/// <param name="ch">The Unicode character.</param>
internal static bool IsFormattingChar(char ch)
// There are no FormattingChars in ASCII range
=> ch > 127 && IsFormattingChar(CharUnicodeInfo.GetUnicodeCategory(ch));

private static bool IsLetterChar(UnicodeCategory cat)
// letter-character:
// A Unicode character of classes Lu, Ll, Lt, Lm, Lo, or Nl
// A Unicode-escape-sequence representing a character of classes Lu, Ll, Lt, Lm, Lo, or Nl
=> cat switch
{
UnicodeCategory.UppercaseLetter or UnicodeCategory.LowercaseLetter or UnicodeCategory.TitlecaseLetter or UnicodeCategory.ModifierLetter or UnicodeCategory.OtherLetter or UnicodeCategory.LetterNumber => true,
_ => false,
};

private static bool IsCombiningChar(UnicodeCategory cat)
// combining-character:
// A Unicode character of classes Mn or Mc
// A Unicode-escape-sequence representing a character of classes Mn or Mc
=> cat switch
{
UnicodeCategory.NonSpacingMark or UnicodeCategory.SpacingCombiningMark => true,
_ => false,
};

private static bool IsDecimalDigitChar(UnicodeCategory cat)
// decimal-digit-character:
// A Unicode character of the class Nd
// A unicode-escape-sequence representing a character of the class Nd
=> cat == UnicodeCategory.DecimalDigitNumber;

private static bool IsConnectingChar(UnicodeCategory cat)
// connecting-character:
// A Unicode character of the class Pc
// A unicode-escape-sequence representing a character of the class Pc
=> cat == UnicodeCategory.ConnectorPunctuation;

/// <summary>
/// Returns true if the Unicode character is a formatting character (Unicode class Cf).
/// </summary>
/// <param name="cat">The Unicode character.</param>
private static bool IsFormattingChar(UnicodeCategory cat)
// formatting-character:
// A Unicode character of the class Cf
// A unicode-escape-sequence representing a character of the class Cf
=> cat == UnicodeCategory.Format;
}
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ public async Task Microsoft_Testing_Platform_Extensions_ShouldBe_Correctly_Regis
<OutputType>Exe</OutputType>
<!-- Do not warn about package downgrade. NuGet uses alphabetical sort as ordering so -dev or -ci are considered downgrades of -preview. -->
<NoWarn>$(NoWarn);NETSDK1201</NoWarn>
<RootNamespace>(MSBuild Tests)</RootNamespace>
</PropertyGroup>
<ItemGroup>
Expand Down

0 comments on commit 8026b02

Please # to comment.