diff --git a/README.md b/README.md index b2889173..6dc11bab 100644 --- a/README.md +++ b/README.md @@ -495,12 +495,21 @@ cacache.rm.content(cachePath, 'sha512-SoMeDIGest/IN+BaSE64==').then(() => { }) ``` -#### `> cacache.index.compact(cache, key, matchFn) -> Promise` +#### `> cacache.index.compact(cache, key, matchFn, [opts]) -> Promise` Uses `matchFn`, which must be a synchronous function that accepts two entries and returns a boolean indicating whether or not the two entries match, to deduplicate all entries in the cache for the given `key`. +If `opts.validateEntry` is provided, it will be called as a function with the +only parameter being a single index entry. The function must return a Boolean, +if it returns `true` the entry is considered valid and will be kept in the index, +if it returns `false` the entry will be removed from the index. + +If `opts.validateEntry` is not provided, however, every entry in the index will +be deduplicated and kept until the first `null` integrity is reached, removing +all entries that were written before the `null`. + The deduplicated list of entries is both written to the index, replacing the existing content, and returned in the Promise. diff --git a/lib/entry-index.js b/lib/entry-index.js index 28f444b7..20afb60e 100644 --- a/lib/entry-index.js +++ b/lib/entry-index.js @@ -37,15 +37,31 @@ module.exports.compact = compact async function compact (cache, key, matchFn, opts = {}) { const bucket = bucketPath(cache, key) const entries = await bucketEntries(bucket) - // reduceRight because the bottom-most result is the newest + const newEntries = [] + // we loop backwards because the bottom-most result is the newest // since we add new entries with appendFile - const newEntries = entries.reduceRight((acc, newEntry) => { - if (!acc.find((oldEntry) => matchFn(oldEntry, newEntry))) { - acc.push(newEntry) + for (let i = entries.length - 1; i >= 0; --i) { + const entry = entries[i] + // a null integrity could mean either a delete was appended + // or the user has simply stored an index that does not map + // to any content. we determine if the user wants to keep the + // null integrity based on the validateEntry function passed in options. + // if the integrity is null and no validateEntry is provided, we break + // as we consider the null integrity to be a deletion of everything + // that came before it. + if (entry.integrity === null && !opts.validateEntry) { + break } - return acc - }, []) + // if this entry is valid, and it is either the first entry or + // the newEntries array doesn't already include an entry that + // matches this one based on the provided matchFn, then we add + // it to the beginning of our list + if ((!opts.validateEntry || opts.validateEntry(entry) === true) && + (newEntries.length === 0 || !newEntries.find((oldEntry) => matchFn(oldEntry, entry)))) { + newEntries.unshift(entry) + } + } const newIndex = '\n' + newEntries.map((entry) => { const stringified = JSON.stringify(entry) @@ -87,7 +103,12 @@ async function compact (cache, key, matchFn, opts = {}) { // write the file atomically await disposer(setup(), teardown, write) - return newEntries.map((entry) => formatEntry(cache, entry, true)) + // we reverse the list we generated such that the newest + // entries come first in order to make looping through them easier + // the true passed to formatEntry tells it to keep null + // integrity values, if they made it this far it's because + // validateEntry returned true, and as such we should return it + return newEntries.reverse().map((entry) => formatEntry(cache, entry, true)) } module.exports.insert = insert diff --git a/test/entry-index.js b/test/entry-index.js index 9df5d3f2..889d57e4 100644 --- a/test/entry-index.js +++ b/test/entry-index.js @@ -60,21 +60,94 @@ test('compact', async (t) => { index.insert(CACHE, KEY, INTEGRITY, { metadata: { rev: 1 } }), index.insert(CACHE, KEY, INTEGRITY, { metadata: { rev: 2 } }), index.insert(CACHE, KEY, INTEGRITY, { metadata: { rev: 2 } }), - index.insert(CACHE, KEY, INTEGRITY, { metadata: { rev: 1 } }), - // compact will return entries with a null integrity - index.insert(CACHE, KEY, null, { metadata: { rev: 3 } }) + index.insert(CACHE, KEY, INTEGRITY, { metadata: { rev: 1 } }) ]) const bucket = index.bucketPath(CACHE, KEY) const entries = await index.bucketEntries(bucket) - t.equal(entries.length, 5, 'started with 5 entries') + t.equal(entries.length, 4, 'started with 4 entries') const filter = (entryA, entryB) => entryA.metadata.rev === entryB.metadata.rev const compacted = await index.compact(CACHE, KEY, filter) - t.equal(compacted.length, 3, 'should return only three entries') + t.equal(compacted.length, 2, 'should return only two entries') + + const newEntries = await index.bucketEntries(bucket) + t.equal(newEntries.length, 2, 'bucket was deduplicated') +}) + +test('compact: treats null integrity without validateEntry as a delete', async (t) => { + t.teardown(() => { + index.delete.sync(CACHE, KEY) + }) + // this one does not use Promise.all because we want to be certain + // things are written in the right order + await index.insert(CACHE, KEY, INTEGRITY, { metadata: { rev: 1 } }) + await index.insert(CACHE, KEY, INTEGRITY, { metadata: { rev: 2 } }) + // this is a delete, revs 1, 2 and 3 will be omitted + await index.insert(CACHE, KEY, null, { metadata: { rev: 3 } }) + await index.insert(CACHE, KEY, INTEGRITY, { metadata: { rev: 4 } }) + + const bucket = index.bucketPath(CACHE, KEY) + const entries = await index.bucketEntries(bucket) + t.equal(entries.length, 4, 'started with 4 entries') + + const filter = (entryA, entryB) => entryA.metadata.rev === entryB.metadata.rev + const compacted = await index.compact(CACHE, KEY, filter) + t.equal(compacted.length, 1, 'should return only one entry') + t.equal(compacted[0].metadata.rev, 4, 'kept rev 4') + + const newEntries = await index.bucketEntries(bucket) + t.equal(newEntries.length, 1, 'bucket was deduplicated') +}) + +test('compact: leverages validateEntry to skip invalid entries', async (t) => { + t.teardown(() => { + index.delete.sync(CACHE, KEY) + }) + await Promise.all([ + index.insert(CACHE, KEY, INTEGRITY, { metadata: { rev: 1 } }), + index.insert(CACHE, KEY, INTEGRITY, { metadata: { rev: 2 } }), + index.insert(CACHE, KEY, INTEGRITY, { metadata: { rev: 2 } }), + index.insert(CACHE, KEY, INTEGRITY, { metadata: { rev: 1 } }) + ]) + + const bucket = index.bucketPath(CACHE, KEY) + const entries = await index.bucketEntries(bucket) + t.equal(entries.length, 4, 'started with 4 entries') + + const matchFn = (entryA, entryB) => entryA.metadata.rev === entryB.metadata.rev + const validateEntry = (entry) => entry.metadata.rev > 1 + const compacted = await index.compact(CACHE, KEY, matchFn, { validateEntry }) + t.equal(compacted.length, 1, 'should return only one entries') + t.equal(compacted[0].metadata.rev, 2, 'kept the rev 2 entry') + + const newEntries = await index.bucketEntries(bucket) + t.equal(newEntries.length, 1, 'bucket was deduplicated') +}) + +test('compact: validateEntry allows for keeping null integrity', async (t) => { + t.teardown(() => { + index.delete.sync(CACHE, KEY) + }) + await Promise.all([ + index.insert(CACHE, KEY, null, { metadata: { rev: 1 } }), + index.insert(CACHE, KEY, null, { metadata: { rev: 2 } }), + index.insert(CACHE, KEY, null, { metadata: { rev: 2 } }), + index.insert(CACHE, KEY, null, { metadata: { rev: 1 } }) + ]) + + const bucket = index.bucketPath(CACHE, KEY) + const entries = await index.bucketEntries(bucket) + t.equal(entries.length, 4, 'started with 4 entries') + + const matchFn = (entryA, entryB) => entryA.metadata.rev === entryB.metadata.rev + const validateEntry = (entry) => entry.metadata.rev > 1 + const compacted = await index.compact(CACHE, KEY, matchFn, { validateEntry }) + t.equal(compacted.length, 1, 'should return only one entry') + t.equal(compacted[0].metadata.rev, 2, 'kept the rev 2 entry') const newEntries = await index.bucketEntries(bucket) - t.equal(newEntries.length, 3, 'bucket was deduplicated') + t.equal(newEntries.length, 1, 'bucket was deduplicated') }) test('compact: ENOENT in chownr does not cause failure', async (t) => {