Skip to content

Commit

Permalink
Preserve the mark when buffering
Browse files Browse the repository at this point in the history
So that we can rewind if required.

Fixes #1324
  • Loading branch information
jhy committed Feb 15, 2020
1 parent 528ba55 commit 62c0595
Show file tree
Hide file tree
Showing 7 changed files with 1,048 additions and 11 deletions.
4 changes: 4 additions & 0 deletions CHANGES
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ jsoup changelog
* Bugfix: in a <select> tag, a second <optgroup> would not automatically close an earlier open <optgroup>
<https://github.com/jhy/jsoup/issues/1313>

* Bugfix: in CharacterReader when parsing an input stream, could throw a Mark Invalid exception if the reader was
marked, a bufferUp occurred, and then the reader was rewound.
<https://github.com/jhy/jsoup/issues/1324>

* Removed old methods and classes that were marked deprecated in previous releases.

**** Release 1.12.2 [2020-Feb-08]
Expand Down
19 changes: 13 additions & 6 deletions src/main/java/org/jsoup/parser/CharacterReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,18 @@ public CharacterReader(String input) {

private boolean readFully; // if the underlying stream has been completely read, no value in further buffering
private void bufferUp() {
if (readFully)
if (readFully || bufPos < bufSplitPoint)
return;

final int pos = bufPos;
if (pos < bufSplitPoint)
return;
final int pos;
final int offset;
if (bufMark != -1) {
pos = bufMark;
offset = bufPos - bufMark;
} else {
pos = bufPos;
offset = 0;
}

try {
final long skipped = reader.skip(pos);
Expand All @@ -70,8 +76,9 @@ private void bufferUp() {
Validate.isTrue(skipped == pos); // Previously asserted that there is room in buf to skip, so this will be a WTF
bufLength = read;
readerPos += pos;
bufPos = 0;
bufMark = -1;
bufPos = offset;
if (bufMark != -1)
bufMark = 0;
bufSplitPoint = bufLength > readAheadLimit ? readAheadLimit : bufLength;
}
} catch (IOException e) {
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/org/jsoup/parser/Token.java
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ final static class EndTag extends Tag{

@Override
public String toString() {
return "</" + name() + ">";
return "</" + (tagName != null ? tagName : "(unset)") + ">";
}
}

Expand Down
13 changes: 13 additions & 0 deletions src/test/java/org/jsoup/integration/ParseTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,19 @@ public void testLowercaseUtf8Charset() throws IOException {
assertEquals("UTF-8", doc.outputSettings().charset().name());
}

@Test
public void testXwiki() throws IOException {
// https://github.com/jhy/jsoup/issues/1324
File in = getFile("/htmltests/xwiki-1324.html");
Document doc = Jsoup.parse(in, null, "https://localhost/");
assertEquals("XWiki Jetty HSQLDB 12.1-SNAPSHOT", doc.select("#xwikiplatformversion").text());

// was getting busted at =userdirectory, because it hit the bufferup point but the mark was then lost. so
// updated to preserve the mark.
String wantHtml = "<a class=\"list-group-item\" data-id=\"userdirectory\" href=\"/xwiki/bin/admin/XWiki/XWikiPreferences?editor=globaladmin&amp;section=userdirectory\" title=\"Customize the user directory live table.\">User Directory</a>";
assertEquals(wantHtml, doc.select("[data-id=userdirectory]").outerHtml());
}

public static File getFile(String resourceName) {
try {
URL resource = ParseTest.class.getResource(resourceName);
Expand Down
5 changes: 1 addition & 4 deletions src/test/java/org/jsoup/parser/CharacterReaderTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,7 @@
import java.io.BufferedReader;
import java.io.StringReader;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.*;

/**
* Test suite for character reader.
Expand Down Expand Up @@ -338,5 +336,4 @@ public void notEmptyAtBufferSplitPoint() {
assertTrue(r.isEmpty());
}


}
1 change: 1 addition & 0 deletions src/test/resources/htmltests/README
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,4 @@ Sources
* yahoo-jp.html http://www.yahoo.co.jp/index.html 12-Jan-2010
* baidu-cn-home.html http://www.baidu.com/ 15-Jul-2010
* nyt-article-1.html http://www.nytimes.com/2010/07/26/business/global/26bp.html?hp
* xwiki-1324.html https://github.com/jhy/jsoup/issues/1324 15-Feb-2020
Loading

0 comments on commit 62c0595

Please # to comment.