Skip to content

Commit

Permalink
Merge pull request #364 from dabutvin/page-files
Browse files Browse the repository at this point in the history
paginate the defintions in the mongo store (fixes #254)
  • Loading branch information
dabutvin authored Jan 30, 2019
2 parents 2ffdd02 + b4bf779 commit be97205
Show file tree
Hide file tree
Showing 5 changed files with 113 additions and 29 deletions.
23 changes: 21 additions & 2 deletions business/aggregator.js
Original file line number Diff line number Diff line change
Expand Up @@ -30,17 +30,19 @@
//

const { getLatestVersion, mergeDefinitions } = require('../lib/utils')
const { flattenDeep, set } = require('lodash')
const { flattenDeep, get, set, intersectionBy } = require('lodash')
const logger = require('../providers/logging/logger')

class AggregationService {
constructor(options) {
this.options = options
// we take the configured precedence expected to be highest first
this.workingPrecedence =
options.precedence && flattenDeep(options.precedence.map(group => [...group].reverse()).reverse())
this.logger = logger()
}

process(summarized) {
process(summarized, coordinates) {
const result = {}
const order = this.workingPrecedence || []
const tools = []
Expand All @@ -53,6 +55,7 @@ class AggregationService {
})
if (!tools.length) return null
set(result, 'described.tools', tools.reverse())
this._normalizeFiles(result, summarized, coordinates)
return result
}

Expand All @@ -66,6 +69,22 @@ class AggregationService {
const latest = getLatestVersion(versions)
return latest ? { toolSpec: `${tool}/${latest}`, summary: summarized[tool][latest] } : null
}

/*
* Take the clearlydefined tool as the source of truth for file paths as it is just a recursive dir
* Intersect the summarized file list with the clearlydefined file list by path
*/
_normalizeFiles(result, summarized, coordinates) {
const cdFiles = get(this._findData('clearlydefined', summarized), 'summary.files')
if (!cdFiles) return
const difference = result.files.length - cdFiles.length
if (!difference) return
this.logger.info('difference between summary file count and cd file count', {
count: difference,
coordinates: coordinates.toString()
})
result.files = intersectionBy(result.files, cdFiles, 'path')
}
}

module.exports = options => new AggregationService(options)
2 changes: 1 addition & 1 deletion business/definitionService.js
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ class DefinitionService {
const raw = await this.harvestStore.getAll(coordinates)
coordinates = this._getCasedCoordinates(raw, coordinates)
const summaries = await this.summaryService.summarizeAll(coordinates, raw)
const aggregatedDefinition = (await this.aggregationService.process(summaries)) || {}
const aggregatedDefinition = (await this.aggregationService.process(summaries, coordinates)) || {}
aggregatedDefinition.coordinates = coordinates
this._ensureToolScores(coordinates, aggregatedDefinition)
const definition = await this.curationService.apply(coordinates, curationSpec, aggregatedDefinition)
Expand Down
11 changes: 8 additions & 3 deletions lib/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ const semver = require('semver')
const EntityCoordinates = require('./entityCoordinates')
const ResultCoordinates = require('./resultCoordinates')
const moment = require('moment')
const { set, find, unset, union } = require('lodash')
const { set, unset, union } = require('lodash')
const extend = require('extend')
const SPDX = require('./spdx')

Expand Down Expand Up @@ -96,8 +96,12 @@ function mergeDefinitions(base, proposed) {
function _mergeFiles(base, proposed) {
if (!proposed) return base
if (!base) return proposed
const baseLookup = base.reduce((result, item) => {
result[item.path] = item
return result
}, {})
proposed.forEach(file => {
const entry = find(base, current => current.path === file.path)
const entry = baseLookup[file.path]
if (entry) _mergeFile(entry, file)
else base.push(file)
})
Expand All @@ -119,9 +123,10 @@ function _mergeFile(base, proposed) {
function _mergeDescribed(base, proposed) {
if (!proposed) return base
if (!base) return proposed
const result = _mergeExcept(base, proposed, ['facets', 'hashes'])
const result = _mergeExcept(base, proposed, ['facets', 'hashes', 'files'])
setIfValue(result, 'facets', _mergeObject(base.facets, proposed.facets))
setIfValue(result, 'hashes', _mergeObject(base.hashes, proposed.hashes))
setIfValue(result, 'files', Math.max(base.files || 0, proposed.files || 0))
return result
}

Expand Down
43 changes: 38 additions & 5 deletions providers/stores/mongo.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ const MongoClient = require('mongodb').MongoClient
const promiseRetry = require('promise-retry')
const EntityCoordinates = require('../../lib/entityCoordinates')
const logger = require('../logging/logger')
const { clone, get, range } = require('lodash')

class MongoStore {
constructor(options) {
Expand Down Expand Up @@ -49,18 +50,50 @@ class MongoStore {
* @param {Coordinates} coordinates - The coordinates of the object to get
* @returns The loaded object
*/
get(coordinates) {
return this.collection.findOne({ _id: this._getId(coordinates) }, { projection: { _id: 0 } })
async get(coordinates) {
const cursor = await this.collection.find(
{ '_mongo.partitionKey': this._getId(coordinates) },
{ projection: { _id: 0, _mongo: 0 }, sort: { '_mongo.page': 1 } }
)
let definition
await cursor.forEach(page => {
if (!definition) definition = page
else definition.files = definition.files.concat(page.files)
})
return definition
}

async store(definition) {
const pageSize = 1000
definition._id = this._getId(definition.coordinates)
await this.collection.replaceOne({ _id: definition._id }, definition, { upsert: true })
return null
await this.collection.deleteMany({ '_mongo.partitionKey': definition._id })
const pages = Math.ceil((get(definition, 'files.length') || 1) / pageSize)
const result = await this.collection.insertMany(
range(pages).map(
index => {
if (index === 0) {
const definitionPage = clone(definition)
if (definition.files) definitionPage.files = definition.files.slice(0, pageSize)
return { ...definitionPage, _mongo: { partitionKey: definition._id, page: 1, totalPages: pages } }
}
return {
_id: definition._id + `/${index}`,
_mongo: {
partitionKey: definition._id,
page: index + 1,
totalPages: pages
},
files: definition.files.slice(index * pageSize, index * pageSize + pageSize)
}
},
{ ordered: false }
)
)
return result
}

async delete(coordinates) {
await this.collection.deleteOne({ _id: this._getId(coordinates) })
await this.collection.deleteMany({ _id: new RegExp(`^${this._getId(coordinates)}(\\/.+)?$`) })
return null
}

Expand Down
63 changes: 45 additions & 18 deletions test/providers/store/mongoDefinition.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ const Store = require('../../../providers/stores/mongo')
const sinon = require('sinon')
const { expect } = require('chai')
const EntityCoordinates = require('../../../lib/entityCoordinates')
const { range } = require('lodash')

describe('Mongo Definition store', () => {
const data = {
Expand Down Expand Up @@ -50,15 +51,39 @@ describe('Mongo Definition store', () => {
const definition = createDefinition('npm/npmjs/-/foo/1.0')
const store = createStore()
await store.store(definition)
expect(store.collection.replaceOne.callCount).to.eq(1)
expect(store.collection.replaceOne.args[0][0]._id).to.eq('npm/npmjs/-/foo/1.0')
expect(store.collection.deleteMany.callCount).to.eq(1)
expect(store.collection.deleteMany.args[0][0]['_mongo.partitionKey']).to.eq('npm/npmjs/-/foo/1.0')
expect(store.collection.insertMany.callCount).to.eq(1)
expect(store.collection.insertMany.args[0][0][0]._id).to.eq('npm/npmjs/-/foo/1.0')
expect(store.collection.insertMany.args[0][0][0]._mongo.page).to.eq(1)
expect(store.collection.insertMany.args[0][0][0]._mongo.totalPages).to.eq(1)
})

it('stores a paged definition', async () => {
const definition = createDefinition('npm/npmjs/-/foo/1.0')
definition.files = range(0, 1500).map(x => {
return { path: `/path/to/${x}.txt` }
})
const store = createStore()
await store.store(definition)
expect(store.collection.deleteMany.callCount).to.eq(1)
expect(store.collection.deleteMany.args[0][0]['_mongo.partitionKey']).to.eq('npm/npmjs/-/foo/1.0')
expect(store.collection.insertMany.callCount).to.eq(1)
expect(store.collection.insertMany.args[0][0][0]._id).to.eq('npm/npmjs/-/foo/1.0')
expect(store.collection.insertMany.args[0][0][0].files.length).to.eq(1000)
expect(store.collection.insertMany.args[0][0][0]._mongo.page).to.eq(1)
expect(store.collection.insertMany.args[0][0][0]._mongo.totalPages).to.eq(2)
expect(store.collection.insertMany.args[0][0][1]._id).to.eq('npm/npmjs/-/foo/1.0/1')
expect(store.collection.insertMany.args[0][0][1].files.length).to.eq(500)
expect(store.collection.insertMany.args[0][0][1]._mongo.page).to.eq(2)
expect(store.collection.insertMany.args[0][0][1]._mongo.totalPages).to.eq(2)
})

it('deletes a definition', async () => {
const store = createStore()
await store.delete(EntityCoordinates.fromString('npm/npmjs/-/foo/1.0'))
expect(store.collection.deleteOne.callCount).to.eq(1)
expect(store.collection.deleteOne.args[0][0]._id).to.eq('npm/npmjs/-/foo/1.0')
expect(store.collection.deleteMany.callCount).to.eq(1)
expect(store.collection.deleteMany.args[0][0]._id).to.deep.eq(/^npm\/npmjs\/-\/foo\/1.0(\/.+)?$/)
})

it('gets a definition', async () => {
Expand All @@ -75,32 +100,34 @@ describe('Mongo Definition store', () => {

function createDefinition(coordinates) {
coordinates = EntityCoordinates.fromString(coordinates)
return { coordinates, _id: coordinates.toString() }
return { coordinates, _id: coordinates.toString(), '_mongo.partitionKey': coordinates.toString() }
}

function createStore(data) {
const collectionStub = {
find: sinon.stub().callsFake(async filter => {
const regex = filter._id
if (regex.toString().includes('error')) throw new Error('test error')
const partitionKey = filter['_mongo.partitionKey']
if (regex && regex.toString().includes('error')) throw new Error('test error')
if (partitionKey && partitionKey.includes('error')) throw new Error('test error')
// return an object that mimics a Mongo cursor (i.e., has toArray)
return {
toArray: () => {
const result = Object.keys(data)
.map(key => (regex.exec(key) ? data[key] : null))
.filter(e => e)
return result
const result = partitionKey
? Object.keys(data).map(key => (key.indexOf(partitionKey) > -1 ? data[key] : null))
: Object.keys(data).map(key => (regex.exec(key) ? data[key] : null))
return result.filter(e => e)
},
forEach: cb => {
Object.keys(data).forEach(key => {
if (regex && regex.exec(key)) cb(data[key])
if (partitionKey && key.indexOf(partitionKey) > -1) cb(data[key])
})
}
}
}),
findOne: sinon.stub().callsFake(async filter => {
const name = filter._id
if (name.includes('error')) throw new Error('test error')
if (data[name]) return data[name]
throw new Error('not found')
}),
replaceOne: sinon.stub(),
deleteOne: sinon.stub()
insertMany: sinon.stub(),
deleteMany: sinon.stub()
}
const store = Store({})
store.collection = collectionStub
Expand Down

0 comments on commit be97205

Please # to comment.