Skip to content

Commit

Permalink
Use MurmurHash3_64 when computing the cacheKey for inline images (bug
Browse files Browse the repository at this point in the history
1799927)

*Please note:* This is a tentative patch, which only fixes the "wrong letter" part of bug 1799927.

It appears that the simple `computeAdler32` function, used when caching inline images, generates hash collisions for some (very short) TypedArrays. In this case that leads to some of the "letters", which are actually inline images, being rendered incorrectly.
To avoid that we replace it with the `MurmurHash3_64` class instead, which is already used in other parts of the code-base. The one disadvantage of doing this is that it's slightly slower, which in some cases will lead to a performance regression.[1] However I believe that we'll have to accept a smaller regression here, since the alternative is much worse (i.e. broken rendering).

One small benefit of these changes is that we can avoid creating lots of `Stream`-instances for already cached inline images.

---
[1] Doing some quick benchmarking in the viewer, using `#pdfBug=Stats`, with the PDF document from issue mozilla#2618 shows at least a 10 percent regression.
  • Loading branch information
Snuffleupagus committed Nov 9, 2022
1 parent f744956 commit 05ea8b2
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 32 deletions.
41 changes: 9 additions & 32 deletions src/core/parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -35,33 +35,12 @@ import { Jbig2Stream } from "./jbig2_stream.js";
import { JpegStream } from "./jpeg_stream.js";
import { JpxStream } from "./jpx_stream.js";
import { LZWStream } from "./lzw_stream.js";
import { MurmurHash3_64 } from "../shared/murmurhash3.js";
import { NullStream } from "./stream.js";
import { PredictorStream } from "./predictor_stream.js";
import { RunLengthStream } from "./run_length_stream.js";

const MAX_LENGTH_TO_CACHE = 1000;
const MAX_ADLER32_LENGTH = 5552;

function computeAdler32(bytes) {
const bytesLength = bytes.length;
if (
typeof PDFJSDev === "undefined" ||
PDFJSDev.test("!PRODUCTION || TESTING")
) {
assert(
bytesLength < MAX_ADLER32_LENGTH,
'computeAdler32: Unsupported "bytes" length.'
);
}
let a = 1,
b = 0;
for (let i = 0; i < bytesLength; ++i) {
// No modulo required in the loop if `bytesLength < 5552`.
a += bytes[i] & 0xff;
b += a;
}
return (b % 65521 << 16) | a % 65521;
}

class Parser {
constructor({ lexer, xref, allowStreams = false, recoveryMode = false }) {
Expand Down Expand Up @@ -532,24 +511,21 @@ class Parser {
default:
length = this.findDefaultInlineStreamEnd(stream);
}
let imageStream = stream.makeSubStream(startPos, length, dict);

// Cache all images below the MAX_LENGTH_TO_CACHE threshold by their
// adler32 checksum.
// Cache all images below the MAX_LENGTH_TO_CACHE threshold by their hash.
let cacheKey;
if (length < MAX_LENGTH_TO_CACHE && dictLength < MAX_ADLER32_LENGTH) {
const imageBytes = imageStream.getBytes();
imageStream.reset();

if (length < MAX_LENGTH_TO_CACHE && dictLength > 0) {
const initialStreamPos = stream.pos;
// Set the stream position to the beginning of the dictionary data...
stream.pos = lexer.beginInlineImagePos;
// ... and fetch the bytes of the *entire* dictionary.
const dictBytes = stream.getBytes(dictLength);
// ... and fetch the bytes of the dictionary *and* the inline image.
const inlineBytes = stream.getBytes(dictLength + length);
// Finally, don't forget to reset the stream position.
stream.pos = initialStreamPos;

cacheKey = computeAdler32(imageBytes) + "_" + computeAdler32(dictBytes);
const hash = new MurmurHash3_64();
hash.update(inlineBytes);
cacheKey = hash.hexdigest();

const cacheEntry = this.imageCache[cacheKey];
if (cacheEntry !== undefined) {
Expand All @@ -561,6 +537,7 @@ class Parser {
}
}

let imageStream = stream.makeSubStream(startPos, length, dict);
if (cipherTransform) {
imageStream = cipherTransform.createStream(imageStream, length);
}
Expand Down
1 change: 1 addition & 0 deletions test/pdfs/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@
!bug1200096.pdf
!bug1068432.pdf
!issue12295.pdf
!bug1799927.pdf
!bug1146106.pdf
!issue13447.pdf
!bug1245391_reduced.pdf
Expand Down
Binary file added test/pdfs/bug1799927.pdf
Binary file not shown.
6 changes: 6 additions & 0 deletions test/test_manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -1782,6 +1782,12 @@
"type": "eq",
"about": "Optional marked content."
},
{ "id": "bug1799927",
"file": "pdfs/bug1799927.pdf",
"md5": "e6ad013c24e58e5b40c3bae50f04c8e8",
"rounds": 1,
"type": "eq"
},
{ "id": "issue11144_reduced",
"file": "pdfs/issue11144_reduced.pdf",
"md5": "09e3e771ebd6867558074e900adb54b9",
Expand Down

0 comments on commit 05ea8b2

Please # to comment.