Skip to content

Commit

Permalink
Bacport #533 fix in 2.10 branch
Browse files Browse the repository at this point in the history
  • Loading branch information
cowtowncoder committed May 29, 2019
1 parent a1ab6e3 commit cb33f0f
Show file tree
Hide file tree
Showing 6 changed files with 145 additions and 14 deletions.
5 changes: 5 additions & 0 deletions release-notes/CREDITS-2.x
Original file line number Diff line number Diff line change
Expand Up @@ -172,3 +172,8 @@ David Nault (dnault@github)
* Reported #531: Non-blocking parser reports incorrect locations when fed with
non-zero offset
(2.10.0)
Fabien Renaud (fabienrenaud@github)
* Reported, contributed fix fir #533: UTF-8 BOM not accounted for in
`JsonLocation.getByteOffset()`
(2.10.0)
2 changes: 2 additions & 0 deletions release-notes/VERSION-2.x
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ JSON library.
#508: Add new exception type `InputCoercionException` to be used for failed coercions
like overflow for `int`
#527: Add simple module-info for JDK9+, using Moditect
#533: UTF-8 BOM not accounted for in JsonLocation.getByteOffset()
(contributed by Fabien R)

2.9.9 (16-May-2019)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,9 @@ public JsonParser constructParser(int parserFeatures, ObjectCodec codec,
ByteQuadsCanonicalizer rootByteSymbols, CharsToNameCanonicalizer rootCharSymbols,
int factoryFeatures) throws IOException
{
int prevInputPtr = _inputPtr;
JsonEncoding enc = detectEncoding();
int bytesProcessed = _inputPtr - prevInputPtr;

if (enc == JsonEncoding.UTF8) {
/* and without canonicalization, byte-based approach is not performant; just use std UTF-8 reader
Expand All @@ -252,7 +254,7 @@ public JsonParser constructParser(int parserFeatures, ObjectCodec codec,
if (JsonFactory.Feature.CANONICALIZE_FIELD_NAMES.enabledIn(factoryFeatures)) {
ByteQuadsCanonicalizer can = rootByteSymbols.makeChild(factoryFeatures);
return new UTF8StreamJsonParser(_context, parserFeatures, _in, codec, can,
_inputBuffer, _inputPtr, _inputEnd, _bufferRecyclable);
_inputBuffer, _inputPtr, _inputEnd, bytesProcessed, _bufferRecyclable);
}
}
return new ReaderBasedJsonParser(_context, parserFeatures, constructReader(), codec,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -137,10 +137,23 @@ public class UTF8StreamJsonParser
/**********************************************************
*/

/**
* @deprecated Since 2.10
*/
@Deprecated
public UTF8StreamJsonParser(IOContext ctxt, int features, InputStream in,
ObjectCodec codec, ByteQuadsCanonicalizer sym,
byte[] inputBuffer, int start, int end,
boolean bufferRecyclable)
{
this(ctxt, features, in, codec, sym,
inputBuffer, start, end, 0, bufferRecyclable);
}

public UTF8StreamJsonParser(IOContext ctxt, int features, InputStream in,
ObjectCodec codec, ByteQuadsCanonicalizer sym,
byte[] inputBuffer, int start, int end, int bytesPreProcessed,
boolean bufferRecyclable)
{
super(ctxt, features);
_inputStream = in;
Expand All @@ -149,9 +162,9 @@ public UTF8StreamJsonParser(IOContext ctxt, int features, InputStream in,
_inputBuffer = inputBuffer;
_inputPtr = start;
_inputEnd = end;
_currInputRowStart = start;
_currInputRowStart = start - bytesPreProcessed;
// If we have offset, need to omit that from byte offset, so:
_currInputProcessed = -start;
_currInputProcessed = -start + bytesPreProcessed;
_bufferRecyclable = bufferRecyclable;
}

Expand Down
121 changes: 118 additions & 3 deletions src/test/java/com/fasterxml/jackson/core/json/LocationOffsetsTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ public void testSimpleInitialOffsets() throws Exception
assertEquals(0L, loc.getCharOffset());
assertEquals(1, loc.getLineNr());
assertEquals(1, loc.getColumnNr());

loc = p.getCurrentLocation();
assertEquals(-1L, loc.getByteOffset());
assertEquals(1L, loc.getCharOffset());
Expand All @@ -42,7 +42,7 @@ public void testSimpleInitialOffsets() throws Exception
assertEquals(-1L, loc.getCharOffset());
assertEquals(1, loc.getLineNr());
assertEquals(1, loc.getColumnNr());

loc = p.getCurrentLocation();
assertEquals(1L, loc.getByteOffset());
assertEquals(-1L, loc.getCharOffset());
Expand All @@ -69,7 +69,7 @@ public void testOffsetWithInputOffset() throws Exception
assertEquals(-1L, loc.getCharOffset());
assertEquals(1, loc.getLineNr());
assertEquals(1, loc.getColumnNr());

loc = p.getCurrentLocation();
assertEquals(1L, loc.getByteOffset());
assertEquals(-1L, loc.getCharOffset());
Expand All @@ -78,4 +78,119 @@ public void testOffsetWithInputOffset() throws Exception

p.close();
}

public void testOffsetWithoutInputOffset() throws Exception
{
JsonLocation loc;
JsonParser p;
// 3 spaces before, 2 after, just for padding
byte[] b = " { } ".getBytes("UTF-8");

// and then peel them off
p = JSON_F.createParser(b);
assertToken(JsonToken.START_OBJECT, p.nextToken());

loc = p.getTokenLocation();
assertEquals(3L, loc.getByteOffset());
assertEquals(-1L, loc.getCharOffset());
assertEquals(1, loc.getLineNr());
assertEquals(4, loc.getColumnNr());

loc = p.getCurrentLocation();
assertEquals(4L, loc.getByteOffset());
assertEquals(-1L, loc.getCharOffset());
assertEquals(1, loc.getLineNr());
assertEquals(5, loc.getColumnNr());

p.close();
}

// for [core#533]
public void testUtf8Bom() throws Exception
{
JsonLocation loc;
JsonParser p;

byte[] b = withUtf8Bom("{ }".getBytes());

// and then peel them off
p = JSON_F.createParser(b);
assertToken(JsonToken.START_OBJECT, p.nextToken());

loc = p.getTokenLocation();
assertEquals(3L, loc.getByteOffset());
assertEquals(-1L, loc.getCharOffset());
assertEquals(1, loc.getLineNr());
assertEquals(4, loc.getColumnNr());

loc = p.getCurrentLocation();
assertEquals(4L, loc.getByteOffset());
assertEquals(-1L, loc.getCharOffset());
assertEquals(1, loc.getLineNr());
assertEquals(5, loc.getColumnNr());

p.close();
}

public void testUtf8BomWithPadding() throws Exception
{
JsonLocation loc;
JsonParser p;

byte[] b = withUtf8Bom(" { }".getBytes());

// and then peel them off
p = JSON_F.createParser(b);
assertToken(JsonToken.START_OBJECT, p.nextToken());

loc = p.getTokenLocation();
assertEquals(6L, loc.getByteOffset());
assertEquals(-1L, loc.getCharOffset());
assertEquals(1, loc.getLineNr());
assertEquals(7, loc.getColumnNr());

loc = p.getCurrentLocation();
assertEquals(7L, loc.getByteOffset());
assertEquals(-1L, loc.getCharOffset());
assertEquals(1, loc.getLineNr());
assertEquals(8, loc.getColumnNr());

p.close();
}

public void testUtf8BomWithInputOffset() throws Exception
{
JsonLocation loc;
JsonParser p;

byte[] b = withUtf8Bom(" { }".getBytes());

// and then peel them off
p = JSON_F.createParser(b);
assertToken(JsonToken.START_OBJECT, p.nextToken());

loc = p.getTokenLocation();
assertEquals(6L, loc.getByteOffset());
assertEquals(-1L, loc.getCharOffset());
assertEquals(1, loc.getLineNr());
assertEquals(7, loc.getColumnNr());

loc = p.getCurrentLocation();
assertEquals(7L, loc.getByteOffset());
assertEquals(-1L, loc.getCharOffset());
assertEquals(1, loc.getLineNr());
assertEquals(8, loc.getColumnNr());

p.close();
}

private byte[] withUtf8Bom(byte[] bytes) {
byte[] arr = new byte[bytes.length + 3];
// write UTF-8 BOM
arr[0] = (byte) 0xEF;
arr[1] = (byte) 0xBB;
arr[2] = (byte) 0xBF;
System.arraycopy(bytes, 0, arr, 3, bytes.length);
return arr;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -446,15 +446,9 @@ public void testUtf8BOMHandling() throws Exception

JsonParser p = JSON_FACTORY.createParser(input);
assertEquals(JsonToken.START_ARRAY, p.nextToken());
// should also have skipped first 3 bytes of BOM; but do we have offset available?
/* 08-Oct-2013, tatu: Alas, due to [core#111], we have to omit BOM in calculations
* as we do not know what the offset is due to -- may need to revisit, if this
* discrepancy becomes an issue. For now it just means that BOM is considered
* "out of stream" (not part of input).
*/

JsonLocation loc = p.getTokenLocation();
// so if BOM was consider in-stream (part of input), this should expect 3:
assertEquals(0, loc.getByteOffset());
assertEquals(3, loc.getByteOffset());
assertEquals(-1, loc.getCharOffset());
assertEquals(JsonToken.VALUE_NUMBER_INT, p.nextToken());
assertEquals(JsonToken.END_ARRAY, p.nextToken());
Expand Down

0 comments on commit cb33f0f

Please # to comment.