diff --git a/src/core/parser.js b/src/core/parser.js index 088a26c6e99c42..05e93814aaf036 100644 --- a/src/core/parser.js +++ b/src/core/parser.js @@ -35,33 +35,12 @@ import { Jbig2Stream } from "./jbig2_stream.js"; import { JpegStream } from "./jpeg_stream.js"; import { JpxStream } from "./jpx_stream.js"; import { LZWStream } from "./lzw_stream.js"; +import { MurmurHash3_64 } from "../shared/murmurhash3.js"; import { NullStream } from "./stream.js"; import { PredictorStream } from "./predictor_stream.js"; import { RunLengthStream } from "./run_length_stream.js"; const MAX_LENGTH_TO_CACHE = 1000; -const MAX_ADLER32_LENGTH = 5552; - -function computeAdler32(bytes) { - const bytesLength = bytes.length; - if ( - typeof PDFJSDev === "undefined" || - PDFJSDev.test("!PRODUCTION || TESTING") - ) { - assert( - bytesLength < MAX_ADLER32_LENGTH, - 'computeAdler32: Unsupported "bytes" length.' - ); - } - let a = 1, - b = 0; - for (let i = 0; i < bytesLength; ++i) { - // No modulo required in the loop if `bytesLength < 5552`. - a += bytes[i] & 0xff; - b += a; - } - return (b % 65521 << 16) | a % 65521; -} class Parser { constructor({ lexer, xref, allowStreams = false, recoveryMode = false }) { @@ -532,24 +511,21 @@ class Parser { default: length = this.findDefaultInlineStreamEnd(stream); } - let imageStream = stream.makeSubStream(startPos, length, dict); - // Cache all images below the MAX_LENGTH_TO_CACHE threshold by their - // adler32 checksum. + // Cache all images below the MAX_LENGTH_TO_CACHE threshold by their hash. let cacheKey; - if (length < MAX_LENGTH_TO_CACHE && dictLength < MAX_ADLER32_LENGTH) { - const imageBytes = imageStream.getBytes(); - imageStream.reset(); - + if (length < MAX_LENGTH_TO_CACHE && dictLength > 0) { const initialStreamPos = stream.pos; // Set the stream position to the beginning of the dictionary data... stream.pos = lexer.beginInlineImagePos; - // ... and fetch the bytes of the *entire* dictionary. - const dictBytes = stream.getBytes(dictLength); + // ... and fetch the bytes of the dictionary *and* the inline image. + const inlineBytes = stream.getBytes(dictLength + length); // Finally, don't forget to reset the stream position. stream.pos = initialStreamPos; - cacheKey = computeAdler32(imageBytes) + "_" + computeAdler32(dictBytes); + const hash = new MurmurHash3_64(); + hash.update(inlineBytes); + cacheKey = hash.hexdigest(); const cacheEntry = this.imageCache[cacheKey]; if (cacheEntry !== undefined) { @@ -561,6 +537,7 @@ class Parser { } } + let imageStream = stream.makeSubStream(startPos, length, dict); if (cipherTransform) { imageStream = cipherTransform.createStream(imageStream, length); } diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index c0ef6527d3a439..1fbf44a923ea56 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -145,6 +145,7 @@ !bug1200096.pdf !bug1068432.pdf !issue12295.pdf +!bug1799927.pdf !bug1146106.pdf !issue13447.pdf !bug1245391_reduced.pdf diff --git a/test/pdfs/bug1799927.pdf b/test/pdfs/bug1799927.pdf new file mode 100644 index 00000000000000..2605ee54e27383 Binary files /dev/null and b/test/pdfs/bug1799927.pdf differ diff --git a/test/test_manifest.json b/test/test_manifest.json index 9a66ad1cf73deb..8a835a840dbaf3 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -1782,6 +1782,12 @@ "type": "eq", "about": "Optional marked content." }, + { "id": "bug1799927", + "file": "pdfs/bug1799927.pdf", + "md5": "e6ad013c24e58e5b40c3bae50f04c8e8", + "rounds": 1, + "type": "eq" + }, { "id": "issue11144_reduced", "file": "pdfs/issue11144_reduced.pdf", "md5": "09e3e771ebd6867558074e900adb54b9",