diff --git a/README.md b/README.md
index b2889173..6dc11bab 100644
--- a/README.md
+++ b/README.md
@@ -495,12 +495,21 @@ cacache.rm.content(cachePath, 'sha512-SoMeDIGest/IN+BaSE64==').then(() => {
})
```
-#### `> cacache.index.compact(cache, key, matchFn) -> Promise`
+#### `> cacache.index.compact(cache, key, matchFn, [opts]) -> Promise`
Uses `matchFn`, which must be a synchronous function that accepts two entries
and returns a boolean indicating whether or not the two entries match, to
deduplicate all entries in the cache for the given `key`.
+If `opts.validateEntry` is provided, it will be called as a function with the
+only parameter being a single index entry. The function must return a Boolean,
+if it returns `true` the entry is considered valid and will be kept in the index,
+if it returns `false` the entry will be removed from the index.
+
+If `opts.validateEntry` is not provided, however, every entry in the index will
+be deduplicated and kept until the first `null` integrity is reached, removing
+all entries that were written before the `null`.
+
The deduplicated list of entries is both written to the index, replacing the
existing content, and returned in the Promise.
diff --git a/lib/entry-index.js b/lib/entry-index.js
index 28f444b7..20afb60e 100644
--- a/lib/entry-index.js
+++ b/lib/entry-index.js
@@ -37,15 +37,31 @@ module.exports.compact = compact
async function compact (cache, key, matchFn, opts = {}) {
const bucket = bucketPath(cache, key)
const entries = await bucketEntries(bucket)
- // reduceRight because the bottom-most result is the newest
+ const newEntries = []
+ // we loop backwards because the bottom-most result is the newest
// since we add new entries with appendFile
- const newEntries = entries.reduceRight((acc, newEntry) => {
- if (!acc.find((oldEntry) => matchFn(oldEntry, newEntry))) {
- acc.push(newEntry)
+ for (let i = entries.length - 1; i >= 0; --i) {
+ const entry = entries[i]
+ // a null integrity could mean either a delete was appended
+ // or the user has simply stored an index that does not map
+ // to any content. we determine if the user wants to keep the
+ // null integrity based on the validateEntry function passed in options.
+ // if the integrity is null and no validateEntry is provided, we break
+ // as we consider the null integrity to be a deletion of everything
+ // that came before it.
+ if (entry.integrity === null && !opts.validateEntry) {
+ break
}
- return acc
- }, [])
+ // if this entry is valid, and it is either the first entry or
+ // the newEntries array doesn't already include an entry that
+ // matches this one based on the provided matchFn, then we add
+ // it to the beginning of our list
+ if ((!opts.validateEntry || opts.validateEntry(entry) === true) &&
+ (newEntries.length === 0 || !newEntries.find((oldEntry) => matchFn(oldEntry, entry)))) {
+ newEntries.unshift(entry)
+ }
+ }
const newIndex = '\n' + newEntries.map((entry) => {
const stringified = JSON.stringify(entry)
@@ -87,7 +103,12 @@ async function compact (cache, key, matchFn, opts = {}) {
// write the file atomically
await disposer(setup(), teardown, write)
- return newEntries.map((entry) => formatEntry(cache, entry, true))
+ // we reverse the list we generated such that the newest
+ // entries come first in order to make looping through them easier
+ // the true passed to formatEntry tells it to keep null
+ // integrity values, if they made it this far it's because
+ // validateEntry returned true, and as such we should return it
+ return newEntries.reverse().map((entry) => formatEntry(cache, entry, true))
}
module.exports.insert = insert
diff --git a/test/entry-index.js b/test/entry-index.js
index 9df5d3f2..889d57e4 100644
--- a/test/entry-index.js
+++ b/test/entry-index.js
@@ -60,21 +60,94 @@ test('compact', async (t) => {
index.insert(CACHE, KEY, INTEGRITY, { metadata: { rev: 1 } }),
index.insert(CACHE, KEY, INTEGRITY, { metadata: { rev: 2 } }),
index.insert(CACHE, KEY, INTEGRITY, { metadata: { rev: 2 } }),
- index.insert(CACHE, KEY, INTEGRITY, { metadata: { rev: 1 } }),
- // compact will return entries with a null integrity
- index.insert(CACHE, KEY, null, { metadata: { rev: 3 } })
+ index.insert(CACHE, KEY, INTEGRITY, { metadata: { rev: 1 } })
])
const bucket = index.bucketPath(CACHE, KEY)
const entries = await index.bucketEntries(bucket)
- t.equal(entries.length, 5, 'started with 5 entries')
+ t.equal(entries.length, 4, 'started with 4 entries')
const filter = (entryA, entryB) => entryA.metadata.rev === entryB.metadata.rev
const compacted = await index.compact(CACHE, KEY, filter)
- t.equal(compacted.length, 3, 'should return only three entries')
+ t.equal(compacted.length, 2, 'should return only two entries')
+
+ const newEntries = await index.bucketEntries(bucket)
+ t.equal(newEntries.length, 2, 'bucket was deduplicated')
+})
+
+test('compact: treats null integrity without validateEntry as a delete', async (t) => {
+ t.teardown(() => {
+ index.delete.sync(CACHE, KEY)
+ })
+ // this one does not use Promise.all because we want to be certain
+ // things are written in the right order
+ await index.insert(CACHE, KEY, INTEGRITY, { metadata: { rev: 1 } })
+ await index.insert(CACHE, KEY, INTEGRITY, { metadata: { rev: 2 } })
+ // this is a delete, revs 1, 2 and 3 will be omitted
+ await index.insert(CACHE, KEY, null, { metadata: { rev: 3 } })
+ await index.insert(CACHE, KEY, INTEGRITY, { metadata: { rev: 4 } })
+
+ const bucket = index.bucketPath(CACHE, KEY)
+ const entries = await index.bucketEntries(bucket)
+ t.equal(entries.length, 4, 'started with 4 entries')
+
+ const filter = (entryA, entryB) => entryA.metadata.rev === entryB.metadata.rev
+ const compacted = await index.compact(CACHE, KEY, filter)
+ t.equal(compacted.length, 1, 'should return only one entry')
+ t.equal(compacted[0].metadata.rev, 4, 'kept rev 4')
+
+ const newEntries = await index.bucketEntries(bucket)
+ t.equal(newEntries.length, 1, 'bucket was deduplicated')
+})
+
+test('compact: leverages validateEntry to skip invalid entries', async (t) => {
+ t.teardown(() => {
+ index.delete.sync(CACHE, KEY)
+ })
+ await Promise.all([
+ index.insert(CACHE, KEY, INTEGRITY, { metadata: { rev: 1 } }),
+ index.insert(CACHE, KEY, INTEGRITY, { metadata: { rev: 2 } }),
+ index.insert(CACHE, KEY, INTEGRITY, { metadata: { rev: 2 } }),
+ index.insert(CACHE, KEY, INTEGRITY, { metadata: { rev: 1 } })
+ ])
+
+ const bucket = index.bucketPath(CACHE, KEY)
+ const entries = await index.bucketEntries(bucket)
+ t.equal(entries.length, 4, 'started with 4 entries')
+
+ const matchFn = (entryA, entryB) => entryA.metadata.rev === entryB.metadata.rev
+ const validateEntry = (entry) => entry.metadata.rev > 1
+ const compacted = await index.compact(CACHE, KEY, matchFn, { validateEntry })
+ t.equal(compacted.length, 1, 'should return only one entries')
+ t.equal(compacted[0].metadata.rev, 2, 'kept the rev 2 entry')
+
+ const newEntries = await index.bucketEntries(bucket)
+ t.equal(newEntries.length, 1, 'bucket was deduplicated')
+})
+
+test('compact: validateEntry allows for keeping null integrity', async (t) => {
+ t.teardown(() => {
+ index.delete.sync(CACHE, KEY)
+ })
+ await Promise.all([
+ index.insert(CACHE, KEY, null, { metadata: { rev: 1 } }),
+ index.insert(CACHE, KEY, null, { metadata: { rev: 2 } }),
+ index.insert(CACHE, KEY, null, { metadata: { rev: 2 } }),
+ index.insert(CACHE, KEY, null, { metadata: { rev: 1 } })
+ ])
+
+ const bucket = index.bucketPath(CACHE, KEY)
+ const entries = await index.bucketEntries(bucket)
+ t.equal(entries.length, 4, 'started with 4 entries')
+
+ const matchFn = (entryA, entryB) => entryA.metadata.rev === entryB.metadata.rev
+ const validateEntry = (entry) => entry.metadata.rev > 1
+ const compacted = await index.compact(CACHE, KEY, matchFn, { validateEntry })
+ t.equal(compacted.length, 1, 'should return only one entry')
+ t.equal(compacted[0].metadata.rev, 2, 'kept the rev 2 entry')
const newEntries = await index.bucketEntries(bucket)
- t.equal(newEntries.length, 3, 'bucket was deduplicated')
+ t.equal(newEntries.length, 1, 'bucket was deduplicated')
})
test('compact: ENOENT in chownr does not cause failure', async (t) => {