Skip to content

Commit db6f539

Browse files
committed
file inference improvements for .tbr and .tgz
When unpacking, only infer brotli compression from the filename if the first 512 bytes are an invalid tar header (or the stream is less than 512 bytes) While Brotli doesn't give us magic header bytes like gzip, we can be reasonably sure that a .tbr file starting with 512 bytes of valid tar data is almost certainly not a brotli compressed archive. And a .tbr file starting with the magic gzip bytes is almost certainly a gzip archive, and not brotli, despite what the filename says. In all cases, if explicit boolean or object values appear in the options for either gzip or brotli, we respect that, and ignore the filename.
1 parent 336fa8f commit db6f539

File tree

3 files changed

+94
-5
lines changed

3 files changed

+94
-5
lines changed

lib/pack.js

+1
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ const Pack = warner(class Pack extends Minipass {
7979

8080
this.portable = !!opt.portable
8181
this.zip = null
82+
8283
if (opt.gzip || opt.brotli) {
8384
if (opt.gzip && opt.brotli) {
8485
throw new TypeError('gzip and brotli are mutually exclusive')

lib/parse.js

+42-3
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,13 @@ module.exports = warner(class Parser extends EE {
100100
// Unlike gzip, brotli doesn't have any magic bytes to identify it
101101
// Users need to explicitly tell us they're extracting a brotli file
102102
// Or we infer from the file extension
103-
this.brotli = opt.brotli || (opt.file && (opt.file.endsWith('.tar.br') || opt.file.endsWith('.tbr')))
103+
const isTBR = (opt.file && (
104+
opt.file.endsWith('.tar.br') || opt.file.endsWith('.tbr')))
105+
// if it's a tbr file it MIGHT be brotli, but we don't know until
106+
// we look at it and verify it's not a valid tar file.
107+
this.brotli = !opt.gzip && opt.brotli !== undefined ? opt.brotli
108+
: isTBR ? undefined
109+
: false
104110

105111
// have to set this so that streams are ok piping into it
106112
this.writable = true
@@ -351,7 +357,9 @@ module.exports = warner(class Parser extends EE {
351357
}
352358

353359
// first write, might be gzipped
354-
if (this[UNZIP] === null && chunk) {
360+
const needSniff = this[UNZIP] === null ||
361+
this.brotli === undefined && this[UNZIP] === false
362+
if (needSniff && chunk) {
355363
if (this[BUFFER]) {
356364
chunk = Buffer.concat([this[BUFFER], chunk])
357365
this[BUFFER] = null
@@ -360,15 +368,45 @@ module.exports = warner(class Parser extends EE {
360368
this[BUFFER] = chunk
361369
return true
362370
}
371+
372+
// look for gzip header
363373
for (let i = 0; this[UNZIP] === null && i < gzipHeader.length; i++) {
364374
if (chunk[i] !== gzipHeader[i]) {
365375
this[UNZIP] = false
366376
}
367377
}
378+
379+
const maybeBrotli = this.brotli === undefined
380+
if (this[UNZIP] === false && maybeBrotli) {
381+
// read the first header to see if it's a valid tar file. If so,
382+
// we can safely assume that it's not actually brotli, despite the
383+
// .tbr or .tar.br file extension.
384+
// if we ended before getting a full chunk, yes, def brotli
385+
if (chunk.length < 512) {
386+
if (this[ENDED]) {
387+
this.brotli = true
388+
} else {
389+
this[BUFFER] = chunk
390+
return true
391+
}
392+
} else {
393+
// if it's tar, it's pretty reliably not brotli, chances of
394+
// that happening are astronomical.
395+
try {
396+
new Header(chunk.slice(0, 512))
397+
this.brotli = false
398+
} catch (_) {
399+
this.brotli = true
400+
}
401+
}
402+
}
403+
368404
if (this[UNZIP] === null || (this[UNZIP] === false && this.brotli)) {
369405
const ended = this[ENDED]
370406
this[ENDED] = false
371-
this[UNZIP] = this.brotli ? new zlib.BrotliDecompress() : new zlib.Unzip()
407+
this[UNZIP] = this[UNZIP] === null
408+
? new zlib.Unzip()
409+
: new zlib.BrotliDecompress()
372410
this[UNZIP].on('data', chunk => this[CONSUMECHUNK](chunk))
373411
this[UNZIP].on('error', er => this.abort(er))
374412
this[UNZIP].on('end', _ => {
@@ -506,6 +544,7 @@ module.exports = warner(class Parser extends EE {
506544
this[UNZIP].end(chunk)
507545
} else {
508546
this[ENDED] = true
547+
if (this.brotli === undefined) chunk = chunk || Buffer.alloc(0)
509548
this.write(chunk)
510549
}
511550
}

test/parse.js

+51-2
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ t.test('fixture tests', t => {
8080
const eventsFile = parsedir + '/' + base + tail
8181
const expect = require(eventsFile)
8282

83-
t.test('one byte at a time', t => {
83+
t.test('uncompressed one byte at a time', t => {
8484
const bs = new ByteStream()
8585
const opt = (maxMeta || filter || strict) ? {
8686
maxMetaEntrySize: maxMeta,
@@ -93,7 +93,7 @@ t.test('fixture tests', t => {
9393
bs.end(tardata)
9494
})
9595

96-
t.test('all at once', t => {
96+
t.test('uncompressed all at once', t => {
9797
const p = new Parse({
9898
maxMetaEntrySize: maxMeta,
9999
filter: filter ? (path, entry) => entry.size % 2 !== 0 : null,
@@ -103,6 +103,31 @@ t.test('fixture tests', t => {
103103
p.end(tardata)
104104
})
105105

106+
t.test('uncompressed one byte at a time, filename .tbr', t => {
107+
const bs = new ByteStream()
108+
const opt = (maxMeta || filter || strict) ? {
109+
maxMetaEntrySize: maxMeta,
110+
filter: filter ? (path, entry) => entry.size % 2 !== 0 : null,
111+
strict: strict,
112+
file: 'example.tbr',
113+
} : null
114+
const bp = new Parse(opt)
115+
trackEvents(t, expect, bp)
116+
bs.pipe(bp)
117+
bs.end(tardata)
118+
})
119+
120+
t.test('uncompressed all at once, filename .tar.br', t => {
121+
const p = new Parse({
122+
maxMetaEntrySize: maxMeta,
123+
filter: filter ? (path, entry) => entry.size % 2 !== 0 : null,
124+
strict: strict,
125+
file: 'example.tar.br',
126+
})
127+
trackEvents(t, expect, p)
128+
p.end(tardata)
129+
})
130+
106131
t.test('gzipped all at once', t => {
107132
const p = new Parse({
108133
maxMetaEntrySize: maxMeta,
@@ -113,6 +138,17 @@ t.test('fixture tests', t => {
113138
p.end(zlib.gzipSync(tardata))
114139
})
115140

141+
t.test('gzipped all at once, filename .tbr', t => {
142+
const p = new Parse({
143+
maxMetaEntrySize: maxMeta,
144+
filter: filter ? (path, entry) => entry.size % 2 !== 0 : null,
145+
strict: strict,
146+
file: 'example.tbr',
147+
})
148+
trackEvents(t, expect, p)
149+
p.end(zlib.gzipSync(tardata))
150+
})
151+
116152
t.test('gzipped byte at a time', t => {
117153
const bs = new ByteStream()
118154
const bp = new Parse({
@@ -171,6 +207,19 @@ t.test('fixture tests', t => {
171207
bs.end(zlib.brotliCompressSync(tardata))
172208
})
173209

210+
t.test('compress with brotli .tbr byte at a time', t => {
211+
const bs = new ByteStream()
212+
const bp = new Parse({
213+
maxMetaEntrySize: maxMeta,
214+
filter: filter ? (path, entry) => entry.size % 2 !== 0 : null,
215+
strict: strict,
216+
file: 'example.tbr',
217+
})
218+
trackEvents(t, expect, bp)
219+
bs.pipe(bp)
220+
bs.end(zlib.brotliCompressSync(tardata))
221+
})
222+
174223
t.test('async chunks', t => {
175224
const p = new Parse({
176225
maxMetaEntrySize: maxMeta,

0 commit comments

Comments
 (0)