aeron-io · mjpt777 · Jan 8, 2022 · Jan 4, 2022 · Jan 4, 2022 · Jan 4, 2022
diff --git a/sbe-tool/src/main/java/uk/co/real_logic/sbe/generation/golang/GolangGenerator.java b/sbe-tool/src/main/java/uk/co/real_logic/sbe/generation/golang/GolangGenerator.java
@@ -20,6 +20,7 @@
 import uk.co.real_logic.sbe.generation.CodeGenerator;
 import org.agrona.generation.OutputManager;
 import uk.co.real_logic.sbe.generation.Generators;
+import uk.co.real_logic.sbe.generation.java.JavaUtil;
 import uk.co.real_logic.sbe.ir.*;
 import org.agrona.Verify;
 
@@ -244,29 +245,31 @@ private void generateCharacterEncodingRangeCheck(
 
         if (null != characterEncoding)
         {
-            switch (token.encoding().characterEncoding())
+            if (JavaUtil.isAsciiEncoding(characterEncoding))
             {
-                case "ASCII":
-                    imports.peek().add("fmt");
-                    sb.append(String.format(
-                        "\tfor idx, ch := range %1$s {\n" +
-                        "\t\tif ch > 127 {\n" +
-                        "\t\t\treturn fmt.Errorf(\"%1$s[%%d]=%%d" +
-                        " failed ASCII validation\", idx, ch)\n" +
-                        "\t\t}\n" +
-                        "\t}\n",
-                        varName));
-                    break;
-
-                case "UTF-8":
-                    imports.peek().add("errors");
-                    imports.peek().add("unicode/utf8");
-                    sb.append(String.format(
-                        "\tif !utf8.Valid(%1$s[:]) {\n" +
-                        "\t\treturn errors.New(\"%1$s failed UTF-8 validation\")\n" +
-                        "\t}\n",
-                        varName));
-                    break;
+                imports.peek().add("fmt");
+                sb.append(String.format(
+                    "\tfor idx, ch := range %1$s {\n" +
+                    "\t\tif ch > 127 {\n" +
+                    "\t\t\treturn fmt.Errorf(\"%1$s[%%d]=%%d" +
+                    " failed ASCII validation\", idx, ch)\n" +
+                    "\t\t}\n" +
+                    "\t}\n",
+                    varName));
+            }
+            else if (JavaUtil.isUtf8Encoding(characterEncoding))
+            {
+                imports.peek().add("errors");
+                imports.peek().add("unicode/utf8");
+                sb.append(String.format(
+                    "\tif !utf8.Valid(%1$s[:]) {\n" +
+                    "\t\treturn errors.New(\"%1$s failed UTF-8 validation\")\n" +
+                    "\t}\n",
+                    varName));
+            }
+            else
+            {
+                throw new IllegalArgumentException("Unsupported encoding: " + characterEncoding);
             }
         }
     }
@@ -1836,7 +1839,7 @@ private void generateCompositePropertyElements(
         final String containingTypeName,
         final List<Token> tokens)
     {
-        for (int i = 0; i < tokens.size();)
+        for (int i = 0; i < tokens.size(); )
         {
             final Token token = tokens.get(i);
             final String propertyName = formatPropertyName(token.name());

diff --git a/sbe-tool/src/main/java/uk/co/real_logic/sbe/generation/java/JavaGenerator.java b/sbe-tool/src/main/java/uk/co/real_logic/sbe/generation/java/JavaGenerator.java
@@ -921,25 +921,16 @@ private void generateDataDecodeMethods(
                 indent + "        }\n\n" +
                 indent + "        final byte[] tmp = new byte[dataLength];\n" +
                 indent + "        buffer.getBytes(limit + headerLength, tmp, 0, dataLength);\n\n" +
-                indent + "        final String value;\n" +
-                indent + "        try\n" +
-                indent + "        {\n" +
-                indent + "            value = new String(tmp, \"%6$s\");\n" +
-                indent + "        }\n" +
-                indent + "        catch (final java.io.UnsupportedEncodingException ex)\n" +
-                indent + "        {\n" +
-                indent + "            throw new RuntimeException(ex);\n" +
-                indent + "        }\n\n" +
-                indent + "        return value;\n" +
+                indent + "        return new String(tmp, %6$s);\n" +
                 indent + "    }\n",
                 formatPropertyName(propertyName),
                 generateStringNotPresentCondition(token.version(), indent),
                 sizeOfLengthField,
                 PrimitiveType.UINT32 == lengthType ? "(int)" : "",
                 generateGet(lengthType, "limit", byteOrderStr),
-                characterEncoding);
+                charset(characterEncoding));
 
-            if (characterEncoding.contains("ASCII"))
+            if (isAsciiEncoding(characterEncoding))
             {
                 new Formatter(sb).format("\n" +
                     indent + "    public int get%1$s(final Appendable appendable)\n" +
@@ -1050,7 +1041,7 @@ private void generateCharArrayEncodeMethods(
     {
         final PrimitiveType lengthPutType = PrimitiveType.UINT32 == lengthType ? PrimitiveType.INT32 : lengthType;
 
-        if (characterEncoding.contains("ASCII"))
+        if (isAsciiEncoding(characterEncoding))
         {
             new Formatter(sb).format("\n" +
                 indent + "    public %1$s %2$s(final String value)\n" +
@@ -1099,16 +1090,8 @@ private void generateCharArrayEncodeMethods(
             new Formatter(sb).format("\n" +
                 indent + "    public %1$s %2$s(final String value)\n" +
                 indent + "    {\n" +
-                indent + "        final byte[] bytes;\n" +
-                indent + "        try\n" +
-                indent + "        {\n" +
-                indent + "            bytes = null == value || value.isEmpty() ?" +
-                " org.agrona.collections.ArrayUtil.EMPTY_BYTE_ARRAY : value.getBytes(\"%3$s\");\n" +
-                indent + "        }\n" +
-                indent + "        catch (final java.io.UnsupportedEncodingException ex)\n" +
-                indent + "        {\n" +
-                indent + "            throw new RuntimeException(ex);\n" +
-                indent + "        }\n\n" +
+                indent + "        final byte[] bytes = (null == value || value.isEmpty()) ?" +
+                " org.agrona.collections.ArrayUtil.EMPTY_BYTE_ARRAY : value.getBytes(%3$s);\n\n" +
                 indent + "        final int length = bytes.length;\n" +
                 indent + "        if (length > %4$d)\n" +
                 indent + "        {\n" +
@@ -1123,7 +1106,7 @@ private void generateCharArrayEncodeMethods(
                 indent + "    }\n",
                 className,
                 formatPropertyName(propertyName),
-                characterEncoding,
+                charset(characterEncoding),
                 maxLengthValue,
                 sizeOfLengthField,
                 generatePut(lengthPutType, "limit", "length", byteOrderStr));
@@ -2042,7 +2025,7 @@ private CharSequence generatePrimitiveArrayPropertyDecode(
                 fieldLength,
                 charset(encoding.characterEncoding()));
 
-            if (encoding.characterEncoding().contains("ASCII"))
+            if (isAsciiEncoding(encoding.characterEncoding()))
             {
                 new Formatter(sb).format("\n" +
                     indent + "    public int get%1$s(final Appendable value)\n" +
@@ -2240,7 +2223,7 @@ private void generateCharArrayEncodeMethods(
             fieldLength,
             offset);
 
-        if (encoding.characterEncoding().contains("ASCII"))
+        if (isAsciiEncoding(encoding.characterEncoding()))
         {
             new Formatter(sb).format("\n" +
                 indent + "    public %1$s %2$s(final String src)\n" +
@@ -2274,15 +2257,10 @@ private void generateCharArrayEncodeMethods(
                 indent + "            throw new IndexOutOfBoundsException(" +
                 "\"CharSequence too large for copy: byte length=\" + srcLength);\n" +
                 indent + "        }\n\n" +
-                indent + "        for (int i = 0; i < srcLength; ++i)\n" +
-                indent + "        {\n" +
-                indent + "            final char charValue = src.charAt(i);\n" +
-                indent + "            final byte byteValue = charValue > 127 ? (byte)'?' : (byte)charValue;\n" +
-                indent + "            buffer.putByte(offset + %4$d + i, byteValue);\n" +
-                indent + "        }\n\n" +
-                indent + "        for (int i = srcLength; i < length; ++i)\n" +
+                indent + "        buffer.putStringWithoutLengthAscii(offset + %4$d, src);\n\n" +
+                indent + "        for (int start = srcLength; start < length; ++start)\n" +
                 indent + "        {\n" +
-                indent + "            buffer.putByte(offset + %4$d + i, (byte)0);\n" +
+                indent + "            buffer.putByte(offset + %4$d + start, (byte)0);\n" +
                 indent + "        }\n\n" +
                 indent + "        return this;\n" +
                 indent + "    }\n",
@@ -2297,7 +2275,8 @@ private void generateCharArrayEncodeMethods(
                 indent + "    public %s %s(final String src)\n" +
                 indent + "    {\n" +
                 indent + "        final int length = %d;\n" +
-                indent + "        final byte[] bytes = null == src ? new byte[0] : src.getBytes(%s);\n" +
+                indent + "        final byte[] bytes = (null == src || src.isEmpty()) ?" +
+                " org.agrona.collections.ArrayUtil.EMPTY_BYTE_ARRAY : src.getBytes(%s);\n" +
                 indent + "        if (bytes.length > length)\n" +
                 indent + "        {\n" +
                 indent + "            throw new IndexOutOfBoundsException(" +
@@ -2387,7 +2366,7 @@ private static void generateCharacterEncodingMethod(
             sb.append("\n")
                 .append(indent).append("    public static String ").append(propName).append("CharacterEncoding()\n")
                 .append(indent).append("    {\n")
-                .append(indent).append("        return \"").append(characterEncoding).append("\";\n")
+                .append(indent).append("        return ").append(charsetName(characterEncoding)).append(";\n")
                 .append(indent).append("    }\n");
         }
     }
@@ -3537,7 +3516,7 @@ private void appendDecoderDisplay(
             }
             else
             {
-                if (characterEncoding.contains("ASCII") || characterEncoding.contains("ascii"))
+                if (isAsciiEncoding(characterEncoding))
                 {
                     append(sb, indent, "builder.append('\\'');");
                     append(sb, indent, formatGetterName(varDataToken.name()) + "(builder);");

diff --git a/sbe-tool/src/main/java/uk/co/real_logic/sbe/generation/java/JavaUtil.java b/sbe-tool/src/main/java/uk/co/real_logic/sbe/generation/java/JavaUtil.java
@@ -18,20 +18,19 @@
 import org.agrona.Strings;
 import uk.co.real_logic.sbe.PrimitiveType;
 import uk.co.real_logic.sbe.SbeTool;
+import uk.co.real_logic.sbe.ValidationUtil;
 import uk.co.real_logic.sbe.generation.Generators;
 import uk.co.real_logic.sbe.ir.Token;
-import uk.co.real_logic.sbe.ValidationUtil;
 
 import java.io.IOException;
 import java.lang.reflect.Field;
+import java.lang.reflect.Modifier;
 import java.nio.charset.Charset;
 import java.nio.charset.StandardCharsets;
 import java.util.EnumMap;
 import java.util.HashMap;
 import java.util.Map;
 
-import static java.lang.reflect.Modifier.STATIC;
-
 /**
  * Utilities for mapping between {@link uk.co.real_logic.sbe.ir.Ir} and the Java language.
  */
@@ -96,19 +95,33 @@ public String toString()
     /**
      * Indexes known charset aliases to the name of the instance in {@link StandardCharsets}.
      */
-    private static final Map<String, String> STD_CHARSETS = new HashMap<>();
+    static final HashMap<String, String> STD_CHARSETS = new HashMap<>();
 
     static
     {
         try
         {
             for (final Field field : StandardCharsets.class.getDeclaredFields())
             {
-                if (Charset.class.isAssignableFrom(field.getType()) && ((field.getModifiers() & STATIC) == STATIC))
+                if (Charset.class.isAssignableFrom(field.getType()) && Modifier.isStatic(field.getModifiers()) &&
+                    Modifier.isPublic(field.getModifiers()))
                 {
                     final Charset charset = (Charset)field.get(null);
-                    STD_CHARSETS.put(charset.name(), field.getName());
-                    charset.aliases().forEach((alias) -> STD_CHARSETS.put(alias, field.getName()));
+                    final String name = field.getName();
+                    String oldName = STD_CHARSETS.put(charset.name(), name);
+                    if (null != oldName)
+                    {
+                        throw new IllegalStateException("Duplicate charset alias: old=" + oldName + ", new=" + name);
+                    }
+                    for (final String alias : charset.aliases())
+                    {
+                        oldName = STD_CHARSETS.put(alias, name);
+                        if (null != oldName)
+                        {
+                            throw new IllegalStateException("Duplicate charset alias: old=" + oldName + ", new=" +
+                                alias);
+                        }
+                    }
                 }
             }
         }
@@ -207,10 +220,52 @@ public static String charset(final String encoding)
         }
         else
         {
-            return "java.nio.charset.Charset.forName(\"" + encoding + "\")";
+            final String canonicalName = Charset.isSupported(encoding) ? Charset.forName(encoding).name() : encoding;
+            return "java.nio.charset.Charset.forName(\"" + canonicalName + "\")";
         }
     }
 
+    /**
+     * Code to fetch the name of the {@link Charset} given the encoding.
+     *
+     * @param encoding as a string name (eg. UTF-8).
+     * @return the code to fetch the associated Charset name.
+     */
+    public static String charsetName(final String encoding)
+    {
+        final String charsetName = STD_CHARSETS.get(encoding);
+        if (charsetName != null)
+        {
+            return "java.nio.charset.StandardCharsets." + charsetName + ".name()";
+        }
+        else
+        {
+            return "\"" + (Charset.isSupported(encoding) ? Charset.forName(encoding).name() : encoding) + "\"";
+        }
+    }
+
+    /**
+     * Checks if the given encoding represents an ASCII charset.
+     *
+     * @param encoding as a string name (e.g. ASCII).
+     * @return {@code true} if the encoding denotes an ASCII charset.
+     */
+    public static boolean isAsciiEncoding(final String encoding)
+    {
+        return "US_ASCII".equals(STD_CHARSETS.get(encoding));
+    }
+
+    /**
+     * Checks if the given encoding represents a UTF-8 charset.
+     *
+     * @param encoding as a string name (e.g. unicode-1-1-utf-8).
+     * @return {@code true} if the encoding denotes a UTF-8 charset.
+     */
+    public static boolean isUtf8Encoding(final String encoding)
+    {
+        return "UTF_8".equals(STD_CHARSETS.get(encoding));
+    }
+
     /**
      * Generate a literal value to be used in code generation.
      *

diff --git a/sbe-tool/src/main/java/uk/co/real_logic/sbe/generation/rust/RustGenerator.java b/sbe-tool/src/main/java/uk/co/real_logic/sbe/generation/rust/RustGenerator.java
@@ -20,6 +20,7 @@
 import uk.co.real_logic.sbe.PrimitiveType;
 import uk.co.real_logic.sbe.generation.CodeGenerator;
 import uk.co.real_logic.sbe.generation.Generators;
+import uk.co.real_logic.sbe.generation.java.JavaUtil;
 import uk.co.real_logic.sbe.ir.Encoding;
 import uk.co.real_logic.sbe.ir.Ir;
 import uk.co.real_logic.sbe.ir.Signal;
@@ -304,20 +305,15 @@ static void generateEncoderVarData(
 
             final String varDataType;
             final String toBytesFn;
-            switch (characterEncoding)
+            if (JavaUtil.isUtf8Encoding(characterEncoding))
             {
-                case "UTF-8":
-                {
-                    varDataType = "&str";
-                    toBytesFn = ".as_bytes()";
-                    break;
-                }
-                default:
-                {
-                    varDataType = "&[u8]";
-                    toBytesFn = "";
-                    break;
-                }
+                varDataType = "&str";
+                toBytesFn = ".as_bytes()";
+            }
+            else
+            {
+                varDataType = "&[u8]";
+                toBytesFn = "";
             }
 
             // function to write slice ... todo - handle character encoding ?
@@ -681,23 +677,20 @@ private static void generatePrimitiveConstantDecoder(
             indent(sb, level, "/// characterEncoding: '%s'\n", characterEncoding);
             indent(sb, level, "#[inline]\n");
 
-            switch (characterEncoding)
+            if (JavaUtil.isAsciiEncoding(characterEncoding))
             {
-                case "US-ASCII":
-                {
-                    indent(sb, level, "pub fn %s(&self) -> &'static [u8] {\n",
-                        formatFunctionName(name));
-                    indent(sb, level + 1, "b\"%s\"\n", rawConstValue);
-                    break;
-                }
-                case "UTF-8":
-                {
-                    indent(sb, level, "pub fn %s(&self) -> &'static str {\n", formatFunctionName(name));
-                    indent(sb, level + 1, "\"%s\"\n", rawConstValue);
-                    break;
-                }
-                default:
-                    throw new RuntimeException("Unable to handle " + characterEncoding);
+                indent(sb, level, "pub fn %s(&self) -> &'static [u8] {\n",
+                    formatFunctionName(name));
+                indent(sb, level + 1, "b\"%s\"\n", rawConstValue);
+            }
+            else if (JavaUtil.isUtf8Encoding(characterEncoding))
+            {
+                indent(sb, level, "pub fn %s(&self) -> &'static str {\n", formatFunctionName(name));
+                indent(sb, level + 1, "\"%s\"\n", rawConstValue);
+            }
+            else
+            {
+                throw new IllegalArgumentException("Unsupported encoding: " + characterEncoding);
             }
 
             indent(sb, level, "}\n\n");