diff --git a/release-notes/CREDITS-2.x b/release-notes/CREDITS-2.x index efd26101c1..f1ff2787dc 100644 --- a/release-notes/CREDITS-2.x +++ b/release-notes/CREDITS-2.x @@ -172,3 +172,8 @@ David Nault (dnault@github) * Reported #531: Non-blocking parser reports incorrect locations when fed with non-zero offset (2.10.0) + +Fabien Renaud (fabienrenaud@github) + * Reported, contributed fix fir #533: UTF-8 BOM not accounted for in + `JsonLocation.getByteOffset()` + (2.10.0) diff --git a/release-notes/VERSION-2.x b/release-notes/VERSION-2.x index a27d2d77d8..ebeed10d0a 100644 --- a/release-notes/VERSION-2.x +++ b/release-notes/VERSION-2.x @@ -31,6 +31,8 @@ JSON library. #508: Add new exception type `InputCoercionException` to be used for failed coercions like overflow for `int` #527: Add simple module-info for JDK9+, using Moditect +#533: UTF-8 BOM not accounted for in JsonLocation.getByteOffset() + (contributed by Fabien R) 2.9.9 (16-May-2019) diff --git a/src/main/java/com/fasterxml/jackson/core/json/ByteSourceJsonBootstrapper.java b/src/main/java/com/fasterxml/jackson/core/json/ByteSourceJsonBootstrapper.java index bc509077e6..ce4ce11cd5 100644 --- a/src/main/java/com/fasterxml/jackson/core/json/ByteSourceJsonBootstrapper.java +++ b/src/main/java/com/fasterxml/jackson/core/json/ByteSourceJsonBootstrapper.java @@ -243,7 +243,9 @@ public JsonParser constructParser(int parserFeatures, ObjectCodec codec, ByteQuadsCanonicalizer rootByteSymbols, CharsToNameCanonicalizer rootCharSymbols, int factoryFeatures) throws IOException { + int prevInputPtr = _inputPtr; JsonEncoding enc = detectEncoding(); + int bytesProcessed = _inputPtr - prevInputPtr; if (enc == JsonEncoding.UTF8) { /* and without canonicalization, byte-based approach is not performant; just use std UTF-8 reader @@ -252,7 +254,7 @@ public JsonParser constructParser(int parserFeatures, ObjectCodec codec, if (JsonFactory.Feature.CANONICALIZE_FIELD_NAMES.enabledIn(factoryFeatures)) { ByteQuadsCanonicalizer can = rootByteSymbols.makeChild(factoryFeatures); return new UTF8StreamJsonParser(_context, parserFeatures, _in, codec, can, - _inputBuffer, _inputPtr, _inputEnd, _bufferRecyclable); + _inputBuffer, _inputPtr, _inputEnd, bytesProcessed, _bufferRecyclable); } } return new ReaderBasedJsonParser(_context, parserFeatures, constructReader(), codec, diff --git a/src/main/java/com/fasterxml/jackson/core/json/UTF8StreamJsonParser.java b/src/main/java/com/fasterxml/jackson/core/json/UTF8StreamJsonParser.java index cee8b47fe6..554ce79209 100644 --- a/src/main/java/com/fasterxml/jackson/core/json/UTF8StreamJsonParser.java +++ b/src/main/java/com/fasterxml/jackson/core/json/UTF8StreamJsonParser.java @@ -137,10 +137,23 @@ public class UTF8StreamJsonParser /********************************************************** */ + /** + * @deprecated Since 2.10 + */ + @Deprecated public UTF8StreamJsonParser(IOContext ctxt, int features, InputStream in, ObjectCodec codec, ByteQuadsCanonicalizer sym, byte[] inputBuffer, int start, int end, boolean bufferRecyclable) + { + this(ctxt, features, in, codec, sym, + inputBuffer, start, end, 0, bufferRecyclable); + } + + public UTF8StreamJsonParser(IOContext ctxt, int features, InputStream in, + ObjectCodec codec, ByteQuadsCanonicalizer sym, + byte[] inputBuffer, int start, int end, int bytesPreProcessed, + boolean bufferRecyclable) { super(ctxt, features); _inputStream = in; @@ -149,9 +162,9 @@ public UTF8StreamJsonParser(IOContext ctxt, int features, InputStream in, _inputBuffer = inputBuffer; _inputPtr = start; _inputEnd = end; - _currInputRowStart = start; + _currInputRowStart = start - bytesPreProcessed; // If we have offset, need to omit that from byte offset, so: - _currInputProcessed = -start; + _currInputProcessed = -start + bytesPreProcessed; _bufferRecyclable = bufferRecyclable; } diff --git a/src/test/java/com/fasterxml/jackson/core/json/LocationOffsetsTest.java b/src/test/java/com/fasterxml/jackson/core/json/LocationOffsetsTest.java index a8e7d25998..3f2fa5f1f7 100644 --- a/src/test/java/com/fasterxml/jackson/core/json/LocationOffsetsTest.java +++ b/src/test/java/com/fasterxml/jackson/core/json/LocationOffsetsTest.java @@ -23,7 +23,7 @@ public void testSimpleInitialOffsets() throws Exception assertEquals(0L, loc.getCharOffset()); assertEquals(1, loc.getLineNr()); assertEquals(1, loc.getColumnNr()); - + loc = p.getCurrentLocation(); assertEquals(-1L, loc.getByteOffset()); assertEquals(1L, loc.getCharOffset()); @@ -42,7 +42,7 @@ public void testSimpleInitialOffsets() throws Exception assertEquals(-1L, loc.getCharOffset()); assertEquals(1, loc.getLineNr()); assertEquals(1, loc.getColumnNr()); - + loc = p.getCurrentLocation(); assertEquals(1L, loc.getByteOffset()); assertEquals(-1L, loc.getCharOffset()); @@ -69,7 +69,7 @@ public void testOffsetWithInputOffset() throws Exception assertEquals(-1L, loc.getCharOffset()); assertEquals(1, loc.getLineNr()); assertEquals(1, loc.getColumnNr()); - + loc = p.getCurrentLocation(); assertEquals(1L, loc.getByteOffset()); assertEquals(-1L, loc.getCharOffset()); @@ -78,4 +78,119 @@ public void testOffsetWithInputOffset() throws Exception p.close(); } + + public void testOffsetWithoutInputOffset() throws Exception + { + JsonLocation loc; + JsonParser p; + // 3 spaces before, 2 after, just for padding + byte[] b = " { } ".getBytes("UTF-8"); + + // and then peel them off + p = JSON_F.createParser(b); + assertToken(JsonToken.START_OBJECT, p.nextToken()); + + loc = p.getTokenLocation(); + assertEquals(3L, loc.getByteOffset()); + assertEquals(-1L, loc.getCharOffset()); + assertEquals(1, loc.getLineNr()); + assertEquals(4, loc.getColumnNr()); + + loc = p.getCurrentLocation(); + assertEquals(4L, loc.getByteOffset()); + assertEquals(-1L, loc.getCharOffset()); + assertEquals(1, loc.getLineNr()); + assertEquals(5, loc.getColumnNr()); + + p.close(); + } + + // for [core#533] + public void testUtf8Bom() throws Exception + { + JsonLocation loc; + JsonParser p; + + byte[] b = withUtf8Bom("{ }".getBytes()); + + // and then peel them off + p = JSON_F.createParser(b); + assertToken(JsonToken.START_OBJECT, p.nextToken()); + + loc = p.getTokenLocation(); + assertEquals(3L, loc.getByteOffset()); + assertEquals(-1L, loc.getCharOffset()); + assertEquals(1, loc.getLineNr()); + assertEquals(4, loc.getColumnNr()); + + loc = p.getCurrentLocation(); + assertEquals(4L, loc.getByteOffset()); + assertEquals(-1L, loc.getCharOffset()); + assertEquals(1, loc.getLineNr()); + assertEquals(5, loc.getColumnNr()); + + p.close(); + } + + public void testUtf8BomWithPadding() throws Exception + { + JsonLocation loc; + JsonParser p; + + byte[] b = withUtf8Bom(" { }".getBytes()); + + // and then peel them off + p = JSON_F.createParser(b); + assertToken(JsonToken.START_OBJECT, p.nextToken()); + + loc = p.getTokenLocation(); + assertEquals(6L, loc.getByteOffset()); + assertEquals(-1L, loc.getCharOffset()); + assertEquals(1, loc.getLineNr()); + assertEquals(7, loc.getColumnNr()); + + loc = p.getCurrentLocation(); + assertEquals(7L, loc.getByteOffset()); + assertEquals(-1L, loc.getCharOffset()); + assertEquals(1, loc.getLineNr()); + assertEquals(8, loc.getColumnNr()); + + p.close(); + } + + public void testUtf8BomWithInputOffset() throws Exception + { + JsonLocation loc; + JsonParser p; + + byte[] b = withUtf8Bom(" { }".getBytes()); + + // and then peel them off + p = JSON_F.createParser(b); + assertToken(JsonToken.START_OBJECT, p.nextToken()); + + loc = p.getTokenLocation(); + assertEquals(6L, loc.getByteOffset()); + assertEquals(-1L, loc.getCharOffset()); + assertEquals(1, loc.getLineNr()); + assertEquals(7, loc.getColumnNr()); + + loc = p.getCurrentLocation(); + assertEquals(7L, loc.getByteOffset()); + assertEquals(-1L, loc.getCharOffset()); + assertEquals(1, loc.getLineNr()); + assertEquals(8, loc.getColumnNr()); + + p.close(); + } + + private byte[] withUtf8Bom(byte[] bytes) { + byte[] arr = new byte[bytes.length + 3]; + // write UTF-8 BOM + arr[0] = (byte) 0xEF; + arr[1] = (byte) 0xBB; + arr[2] = (byte) 0xBF; + System.arraycopy(bytes, 0, arr, 3, bytes.length); + return arr; + } } diff --git a/src/test/java/com/fasterxml/jackson/core/read/JsonParserTest.java b/src/test/java/com/fasterxml/jackson/core/read/JsonParserTest.java index 87844a7390..1508aac2de 100644 --- a/src/test/java/com/fasterxml/jackson/core/read/JsonParserTest.java +++ b/src/test/java/com/fasterxml/jackson/core/read/JsonParserTest.java @@ -446,15 +446,9 @@ public void testUtf8BOMHandling() throws Exception JsonParser p = JSON_FACTORY.createParser(input); assertEquals(JsonToken.START_ARRAY, p.nextToken()); - // should also have skipped first 3 bytes of BOM; but do we have offset available? - /* 08-Oct-2013, tatu: Alas, due to [core#111], we have to omit BOM in calculations - * as we do not know what the offset is due to -- may need to revisit, if this - * discrepancy becomes an issue. For now it just means that BOM is considered - * "out of stream" (not part of input). - */ + JsonLocation loc = p.getTokenLocation(); - // so if BOM was consider in-stream (part of input), this should expect 3: - assertEquals(0, loc.getByteOffset()); + assertEquals(3, loc.getByteOffset()); assertEquals(-1, loc.getCharOffset()); assertEquals(JsonToken.VALUE_NUMBER_INT, p.nextToken()); assertEquals(JsonToken.END_ARRAY, p.nextToken());