Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Take the dictionary, and not just the image data, into account when caching inline images (issue 9398) #9420

Merged
merged 1 commit into from
Feb 13, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 45 additions & 17 deletions src/core/parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,22 @@ import { Jbig2Stream } from './jbig2_stream';
import { JpegStream } from './jpeg_stream';
import { JpxStream } from './jpx_stream';

var MAX_LENGTH_TO_CACHE = 1000;
const MAX_LENGTH_TO_CACHE = 1000;
const MAX_ADLER32_LENGTH = 5552;

function computeAdler32(bytes) {
let bytesLength = bytes.length;
if (bytesLength >= MAX_ADLER32_LENGTH) {
throw new Error('computeAdler32: The input is too large.');
}
let a = 1, b = 0;
for (let i = 0; i < bytesLength; ++i) {
// No modulo required in the loop if `bytesLength < 5552`.
a += bytes[i] & 0xFF;
b += a;
}
return ((b % 65521) << 16) | (a % 65521);
}

var Parser = (function ParserClosure() {
function Parser(lexer, allowStreams, xref, recoveryMode) {
Expand Down Expand Up @@ -371,7 +386,7 @@ var Parser = (function ParserClosure() {
var stream = lexer.stream;

// Parse dictionary.
var dict = new Dict(this.xref);
let dict = new Dict(this.xref), dictLength;
while (!isCmd(this.buf1, 'ID') && !isEOF(this.buf1)) {
if (!isName(this.buf1)) {
throw new FormatError('Dictionary key must be a name object');
Expand All @@ -383,6 +398,9 @@ var Parser = (function ParserClosure() {
}
dict.set(key, this.getObj(cipherTransform));
}
if (lexer.beginInlineImagePos !== -1) {
dictLength = stream.pos - lexer.beginInlineImagePos;
}

// Extract the name of the first (i.e. the current) image filter.
var filter = dict.get('Filter', 'F'), filterName;
Expand All @@ -396,7 +414,7 @@ var Parser = (function ParserClosure() {
}

// Parse image stream.
var startPos = stream.pos, length, i, ii;
let startPos = stream.pos, length;
if (filterName === 'DCTDecode' || filterName === 'DCT') {
length = this.findDCTDecodeInlineStreamEnd(stream);
} else if (filterName === 'ASCII85Decode' || filterName === 'A85') {
Expand All @@ -410,21 +428,22 @@ var Parser = (function ParserClosure() {

// Cache all images below the MAX_LENGTH_TO_CACHE threshold by their
// adler32 checksum.
var adler32;
if (length < MAX_LENGTH_TO_CACHE) {
let cacheKey;
if (length < MAX_LENGTH_TO_CACHE && dictLength < MAX_ADLER32_LENGTH) {
var imageBytes = imageStream.getBytes();
imageStream.reset();

var a = 1;
var b = 0;
for (i = 0, ii = imageBytes.length; i < ii; ++i) {
// No modulo required in the loop if imageBytes.length < 5552.
a += imageBytes[i] & 0xff;
b += a;
}
adler32 = ((b % 65521) << 16) | (a % 65521);
const initialStreamPos = stream.pos;
// Set the stream position to the beginning of the dictionary data...
stream.pos = lexer.beginInlineImagePos;
// ... and fetch the bytes of the *entire* dictionary.
let dictBytes = stream.getBytes(dictLength);
// Finally, don't forget to reset the stream position.
stream.pos = initialStreamPos;

let cacheEntry = this.imageCache[adler32];
cacheKey = computeAdler32(imageBytes) + '_' + computeAdler32(dictBytes);

let cacheEntry = this.imageCache[cacheKey];
if (cacheEntry !== undefined) {
this.buf2 = Cmd.get('EI');
this.shift();
Expand All @@ -440,9 +459,9 @@ var Parser = (function ParserClosure() {

imageStream = this.filter(imageStream, dict, length);
imageStream.dict = dict;
if (adler32 !== undefined) {
imageStream.cacheKey = 'inline_' + length + '_' + adler32;
this.imageCache[adler32] = imageStream;
if (cacheKey !== undefined) {
imageStream.cacheKey = 'inline_' + length + '_' + cacheKey;
this.imageCache[cacheKey] = imageStream;
}

this.buf2 = Cmd.get('EI');
Expand Down Expand Up @@ -653,6 +672,8 @@ var Lexer = (function LexerClosure() {
// 'fa', 'fal', 'fals'. The prefixes are not needed, if the command has no
// other commands or literals as a prefix. The knowCommands is optional.
this.knownCommands = knownCommands;

this.beginInlineImagePos = -1;
}

// A '1' in this array means the character is white space. A '1' or
Expand Down Expand Up @@ -1047,6 +1068,13 @@ var Lexer = (function LexerClosure() {
if (str === 'null') {
return null;
}

if (str === 'BI') {
// Keep track of the current stream position, since it's needed in order
// to correctly cache inline images; see `Parser.makeInlineImage`.
this.beginInlineImagePos = this.stream.pos;
}

return Cmd.get(str);
},
skipToNextLine: function Lexer_skipToNextLine() {
Expand Down
3 changes: 2 additions & 1 deletion test/test_manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -3192,7 +3192,8 @@
"md5": "ad02d4aa374b315bf1766038d002d57a",
"link": false,
"rounds": 1,
"type": "eq"
"type": "eq",
"about": "Also tests issue9398."
},
{ "id": "issue8613",
"file": "pdfs/issue8613.pdf",
Expand Down