diff --git a/Source/MoreDotNet.Test/Extensions/Common/ByteArrayExtensions/GetStringTests.cs b/Source/MoreDotNet.Test/Extensions/Common/ByteArrayExtensions/GetStringTests.cs new file mode 100644 index 0000000..23282db --- /dev/null +++ b/Source/MoreDotNet.Test/Extensions/Common/ByteArrayExtensions/GetStringTests.cs @@ -0,0 +1,120 @@ +namespace MoreDotNet.Tests.Extensions.Common.ByteArrayExtensions +{ + using System.Linq; + using System.Text; + using MoreDotNet.Extensions.Common; + using Xunit; + + public class GetStringTests + { + [Fact] + public void GetString_NullBuffer_ShouldReturnEmptyString() + { + byte[] buffer = null; + var result = buffer.GetString(); + + Assert.Equal(string.Empty, result); + } + + [Fact] + public void GetString_ZeroLengthBuffer_ShouldReturnEmptyString() + { + var buffer = new byte[] { }; + var result = buffer.GetString(); + + Assert.Equal(string.Empty, result); + } + + [Fact] + public void GetString_UTF8_ShouldReturnProperString() + { + var buffer = GetBytesWithPreamble(Encoding.UTF8, "More Dot Net"); + var result = buffer.GetString(); + + Assert.Equal(result, "More Dot Net"); + } + + [Fact] + public void GetString_Unicode_ShouldReturnProperString() + { + var buffer = GetBytesWithPreamble(Encoding.Unicode, "More Dot Net"); + var result = buffer.GetString(); + + Assert.Equal(result, "More Dot Net"); + } + + [Fact] + public void GetString_BigEndianUnicode_ShouldReturnProperString() + { + var buffer = GetBytesWithPreamble(Encoding.BigEndianUnicode, "More Dot Net"); + var result = buffer.GetString(); + + Assert.Equal(result, "More Dot Net"); + } + + [Fact] + public void GetString_UTF32_ShouldReturnProperString() + { + var buffer = GetBytesWithPreamble(Encoding.UTF32, "More Dot Net"); + var result = buffer.GetString(); + + Assert.Equal(result, "More Dot Net"); + } + + [Fact] + public void GetString_UTF32BigEndian_ShouldReturnProperString() + { + var buffer = GetBytesWithPreamble(new UTF32Encoding(true, true), "More Dot Net"); + var result = buffer.GetString(); + + Assert.Equal(result, "More Dot Net"); + } + + [Fact] + public void GetString_UTF7_ShouldReturnProperString() + { + var buffer = GetBytesWithPreamble(Encoding.UTF7, "More Dot Net"); + var result = buffer.GetString(); + + Assert.Equal(result, "More Dot Net"); + } + + [Fact] + public void GetString_ANSI_Specified_ShouldReturnProperString() + { + var buffer = GetBytesWithPreamble(Encoding.Default, "More Dot Net"); + var result = buffer.GetString(); + + Assert.Equal(result, "More Dot Net"); + } + + [Fact] + public void GetString_NoEncodingSpecified_ShouldReturnProperString() + { + var buffer = new byte[] { 77, 111, 114, 101, 32, 68, 111, 116, 32, 78, 101, 116 }; + var result = buffer.GetString(); + + Assert.Equal(result, "More Dot Net"); + } + + [Fact] + public void GetString_UTF16WithLeadingZeros_ShouldReturnProperString() + { + var buffer = new byte[] { 0xff, 0xfe, 0, 0, 77, 0, 111, 0, 97, 0, 114, 0, 101, 0, 32, 0, 68, 0, 111, 0, 116, 0, 32, 0, 78, 0, 101, 0, 116, 0 }; + var result = buffer.GetString(); + + Assert.Equal(result, "\0Moare Dot Net"); + } + + [Fact] + public void GetString_ASCII_ShouldReturnProperString() + { + var buffer = GetBytesWithPreamble(Encoding.ASCII, "More Dot Net"); + var result = buffer.GetString(); + + Assert.Equal(result, "More Dot Net"); + } + + private static byte[] GetBytesWithPreamble(Encoding encoding, string data) => encoding.GetPreamble().Concat(encoding.GetBytes(data)).ToArray(); + } +} diff --git a/Source/MoreDotNet.Test/MoreDotNet.Tests.csproj b/Source/MoreDotNet.Test/MoreDotNet.Tests.csproj index 45bf278..159f9e6 100644 --- a/Source/MoreDotNet.Test/MoreDotNet.Tests.csproj +++ b/Source/MoreDotNet.Test/MoreDotNet.Tests.csproj @@ -117,6 +117,7 @@ + diff --git a/Source/MoreDotNet/Extensions/Common/ByteArrayExtensions.cs b/Source/MoreDotNet/Extensions/Common/ByteArrayExtensions.cs index 2261a12..6fc8b0e 100644 --- a/Source/MoreDotNet/Extensions/Common/ByteArrayExtensions.cs +++ b/Source/MoreDotNet/Extensions/Common/ByteArrayExtensions.cs @@ -33,7 +33,17 @@ FE FF UTF-16 big endian { encoding = Encoding.UTF8; } - else if (buffer[0] == 0xfe && buffer[1] == 0xff) + + // In addition to preamble check the length to help UTF-16 with leading zeros be recognized properly as UTF-32 is always 4 bytes fixed width and UTF-16 could be 2 or 4 + else if (buffer[0] == 0xff && buffer[1] == 0xfe && buffer[2] == 0 && buffer[3] == 0 && buffer.Length % 4 == 0) + { + encoding = Encoding.UTF32; + } + else if (buffer[0] == 0 && buffer[1] == 0 && buffer[2] == 0xfe && buffer[3] == 0xff) + { + encoding = new UTF32Encoding(true, true); + } + else if (buffer[0] == 0xff && buffer[1] == 0xfe) { encoding = Encoding.Unicode; } @@ -41,10 +51,6 @@ FE FF UTF-16 big endian { encoding = Encoding.BigEndianUnicode; // utf-16be } - else if (buffer[0] == 0 && buffer[1] == 0 && buffer[2] == 0xfe && buffer[3] == 0xff) - { - encoding = Encoding.UTF32; - } else if (buffer[0] == 0x2b && buffer[1] == 0x2f && buffer[2] == 0x76) { encoding = Encoding.UTF7;