From 3fea16e8d7f1d87b398b28ca57d1f14d7e800f61 Mon Sep 17 00:00:00 2001 From: Qing Tomlinson Date: Fri, 22 Jul 2022 15:26:11 -0700 Subject: [PATCH] Reduce the number of fetches harvesting one component (#475) * Cache in progress fetch promises, cached fetched results Cache in progress fetch promises, cached fetched results for maven Add a unit test for gitCloner Cache fetch results from gitCloner Add a unit test for pypiFetch Cache fetch results from pypiFetch Minor refactoring Cache fetch results from npmjsFetch Add unit tests for rubyGem Cache fetch results from rubyGemFetch Cache fetch results from packagistFetch Cache fetch results from crateioFetch Cache fetch results from debianFetch Cache fetch results from goFetch Deep clone cached result on copy Cache fetch results from nugetFetch Add unit tests for podFetch Cache results from podFetch Delay fetchResult construction until end of fetch. Delay fetchResult construction and transfer the clean up of the download directory at the end of the fetch. This is to ensure when error occurs, the cleanup of the download directory will still be tracked in request. Minor refactoring Minor refactoring Remove todo to avoid merge conflict Adapt tests after merge * Add ScopedQueueSets ScopedQueueSets contains local and global scoped queue sets. local scoped queueset holds tasks to be performed on the fetched result (package) that is currently processed and cached locally on the crawler instance. This avoid refectch and increase the cache hit. global scoped queueset is the shared queues among crawler instances. local queueset is popped prior to the global one. This ensures that cache is utilized before expiration. * Publish requests on local queues to global upon crawler shutdown Fix and add tests Allow graceful shutdown * Minor refactor and add more tests * Update docker file to relay of shutdown signal * Add config for dispatcher.fetched cache After the scopedQueueSets is introduced, the tool tasks on the same fetched result (in the local scoped queueset) are processed consecutively. Therefore, cache ttl for the fetched result can now be reduced. * Address review comments * Removed --init option in docker run In my previous changes: -nodejs application is run as PID 1 in the docker container, and -the application can handle termination signals. Therefore, --init option is not longer necessary and hence removed in docker run command. --- DevDockerfile | 2 +- Dockerfile | 2 +- README.md | 2 +- config/cdConfig.js | 4 +- dev-scripts/debug.bat | 2 +- ghcrawler/bin/www.js | 27 ++ ghcrawler/crawlerFactory.js | 9 +- ghcrawler/lib/crawler.js | 8 +- ghcrawler/lib/crawlerService.js | 19 +- ghcrawler/lib/request.js | 17 +- .../providers/queuing/attenuatedQueue.js | 16 +- ghcrawler/providers/queuing/queueSet.js | 21 - .../providers/queuing/scopedQueueSets.js | 100 ++++ lib/fetchResult.js | 55 +++ lib/memoryCache.js | 26 ++ providers/fetch/cratesioFetch.js | 11 +- providers/fetch/debianFetch.js | 9 +- providers/fetch/dispatcher.js | 35 +- providers/fetch/gitCloner.js | 13 +- providers/fetch/goFetch.js | 8 +- providers/fetch/mavenBasedFetch.js | 13 +- providers/fetch/npmjsFetch.js | 9 +- providers/fetch/nugetFetch.js | 11 +- providers/fetch/packagistFetch.js | 10 +- providers/fetch/podFetch.js | 14 +- providers/fetch/pypiFetch.js | 11 +- providers/fetch/rubyGemsFetch.js | 11 +- providers/process/abstractProcessor.js | 9 +- providers/process/composerExtract.js | 5 +- providers/process/crateExtract.js | 5 +- providers/process/debExtract.js | 5 +- providers/process/gemExtract.js | 5 +- providers/process/goExtract.js | 4 +- providers/process/mavenExtract.js | 5 +- providers/process/npmExtract.js | 5 +- providers/process/nugetExtract.js | 4 +- providers/process/podExtract.js | 5 +- providers/process/pypiExtract.js | 5 +- providers/process/source.js | 7 +- test/fixtures/packagist/registryData.json | 51 ++ test/fixtures/pod/registryData.json | 32 ++ test/fixtures/pod/versions.json | 49 ++ test/fixtures/pypi/registryData.json | 186 ++++++++ test/fixtures/ruby/small-0.5.1.gem | Bin 0 -> 7680 bytes test/unit/ghcrawler/queueSetTests.js | 74 +-- test/unit/ghcrawler/requestTests.js | 47 +- test/unit/ghcrawler/scopedQueueSetsTests.js | 434 ++++++++++++++++++ test/unit/lib/fetchResultTests.js | 117 +++++ .../providers/fetch/cratesioFetchTests.js | 17 +- test/unit/providers/fetch/debianFetchTests.js | 16 + test/unit/providers/fetch/dispatcherTests.js | 390 +++++++++++++++- test/unit/providers/fetch/gitClonerTests.js | 35 ++ test/unit/providers/fetch/goFetchTests.js | 9 +- .../providers/fetch/gradlePluginFetchTests.js | 34 +- .../providers/fetch/mavencentralFetchTests.js | 12 +- .../providers/fetch/mavengoogleFetchTests.js | 12 +- test/unit/providers/fetch/npmjsFetchTests.js | 1 + test/unit/providers/fetch/nugetFetchTests.js | 2 + .../providers/fetch/packagistFetchTests.js | 34 +- test/unit/providers/fetch/podFetchTests.js | 49 ++ test/unit/providers/fetch/pypiFetchTests.js | 31 ++ .../providers/fetch/rubyGemsFetchTests.js | 44 ++ .../process/abstractProcessorTests.js | 43 ++ test/unit/providers/process/sourceTests.js | 35 ++ 64 files changed, 2005 insertions(+), 278 deletions(-) create mode 100644 ghcrawler/providers/queuing/scopedQueueSets.js create mode 100644 lib/fetchResult.js create mode 100644 lib/memoryCache.js create mode 100644 test/fixtures/packagist/registryData.json create mode 100644 test/fixtures/pod/registryData.json create mode 100644 test/fixtures/pod/versions.json create mode 100644 test/fixtures/pypi/registryData.json create mode 100644 test/fixtures/ruby/small-0.5.1.gem create mode 100644 test/unit/ghcrawler/scopedQueueSetsTests.js create mode 100644 test/unit/lib/fetchResultTests.js create mode 100644 test/unit/providers/fetch/podFetchTests.js create mode 100644 test/unit/providers/fetch/rubyGemsFetchTests.js create mode 100644 test/unit/providers/process/sourceTests.js diff --git a/DevDockerfile b/DevDockerfile index 3e115e70..de1c602a 100644 --- a/DevDockerfile +++ b/DevDockerfile @@ -50,4 +50,4 @@ ENV NODE_ENV "localhost" ENV PORT 5000 EXPOSE 5000 -ENTRYPOINT ["npm", "start"] +ENTRYPOINT ["node", "index.js"] diff --git a/Dockerfile b/Dockerfile index 9e58c6a0..ee0a653c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -86,4 +86,4 @@ COPY . "${APPDIR}" ENV PORT 5000 EXPOSE 5000 -ENTRYPOINT ["npm", "start"] +ENTRYPOINT ["node", "index.js"] diff --git a/README.md b/README.md index a38d5395..1c8fea58 100644 --- a/README.md +++ b/README.md @@ -139,7 +139,7 @@ See `local.env.list`, `dev.env.list` and `prod.env.list` tempate files. With a debugger: -`docker run --rm -d --env-file ../dev.env.list -p 9229:9229 -p 5000:5000 --entrypoint npm cdcrawler:latest run local` +`docker run --rm -d --env-file ../dev.env.list -p 9229:9229 -p 5000:5000 --entrypoint node cdcrawler:latest --inspect-brk=0.0.0.0:9229 index.js` At this point you can attach VS Code with the built in debugging profile (see .vscode/launch.json) diff --git a/config/cdConfig.js b/config/cdConfig.js index c31d6946..3fe2e699 100644 --- a/config/cdConfig.js +++ b/config/cdConfig.js @@ -29,7 +29,9 @@ module.exports = { }, fetch: { dispatcher: 'cdDispatch', - cdDispatch: {}, + cdDispatch: { + fetched: { defaultTtlSeconds: 60 * 60 * 8 } + }, cocoapods: { githubToken }, cratesio: {}, debian: { cdFileLocation: cd_file.location }, diff --git a/dev-scripts/debug.bat b/dev-scripts/debug.bat index 781249ae..11cb38dc 100644 --- a/dev-scripts/debug.bat +++ b/dev-scripts/debug.bat @@ -1,3 +1,3 @@ CALL docker kill cdcrawler CALL mkdir C:\temp\crawler-data -CALL docker run --rm --name cdcrawler --env-file %~dp0\..\..\env.list -p 5000:5000 -p 9229:9229 -v C:\temp\crawler-data:/tmp/cd --entrypoint npm cdcrawler:latest run local \ No newline at end of file +CALL docker run --rm --name cdcrawler --env-file %~dp0\..\..\env.list -p 5000:5000 -p 9229:9229 -v C:\temp\crawler-data:/tmp/cd --entrypoint node cdcrawler:latest --inspect-brk=0.0.0.0:9229 index.js \ No newline at end of file diff --git a/ghcrawler/bin/www.js b/ghcrawler/bin/www.js index 421fc6c3..f3018445 100644 --- a/ghcrawler/bin/www.js +++ b/ghcrawler/bin/www.js @@ -35,6 +35,11 @@ function run(service, logger) { server.on('error', onError) server.on('listening', onListening) + server.on('close', onClose) + + process.on('SIGTERM', onShutdown) + process.on('SIGINT', onShutdown) + process.on('SIGHUP', onShutdown) /** * Normalize a port into a number, string, or false. @@ -90,6 +95,28 @@ function run(service, logger) { var bind = typeof addr === 'string' ? 'pipe ' + addr : 'port ' + addr.port console.log(`Crawler service listening on ${bind}`) } + + /** + * Event listener for HTTP server 'close' event. + */ + function onClose() { + service.stop() + .then(() => { + console.log('Server closed.') + process.exit(0) + }, error => { + console.error(`Closing server: ${error}`) + process.exit(1) + }) + } + + /** + * Event listener for terminal signals + */ + function onShutdown(signal) { + console.log(`Received ${signal}`) + server.close() + } } module.exports = run diff --git a/ghcrawler/crawlerFactory.js b/ghcrawler/crawlerFactory.js index c3098c47..4c15319c 100644 --- a/ghcrawler/crawlerFactory.js +++ b/ghcrawler/crawlerFactory.js @@ -4,6 +4,7 @@ const Crawler = require('./lib/crawler') const CrawlerService = require('./lib/crawlerService') const QueueSet = require('./providers/queuing/queueSet') +const ScopedQueueSets = require('./providers/queuing/scopedQueueSets') const RefreshingConfig = require('@microsoft/refreshing-config') let logger = null @@ -58,7 +59,7 @@ class CrawlerFactory { } = {} ) { logger.info('creating crawler') - queues = queues || CrawlerFactory.createQueues(options.queue) + queues = queues || CrawlerFactory.createScopedQueueSets(options.queue) store = store || CrawlerFactory.createStore(options.store) deadletters = deadletters || CrawlerFactory.createDeadLetterStore(options.deadletter) locker = locker || CrawlerFactory.createLocker(options.lock) @@ -216,6 +217,12 @@ class CrawlerFactory { const later = manager.createQueueChain('later', options) return new QueueSet([immediate, soon, normal, later], options) } + + static createScopedQueueSets(queueOptions) { + const globalQueues = CrawlerFactory.createQueues(queueOptions) + const localQueues = CrawlerFactory.createQueues(queueOptions, 'memory') + return new ScopedQueueSets(globalQueues, localQueues) + } } module.exports = CrawlerFactory diff --git a/ghcrawler/lib/crawler.js b/ghcrawler/lib/crawler.js index 658896b0..7086c1c8 100644 --- a/ghcrawler/lib/crawler.js +++ b/ghcrawler/lib/crawler.js @@ -647,8 +647,8 @@ class Crawler { return deadDocument } - queue(requests, name = null) { - return this.queues.push(this._preFilter(requests), name || 'normal') + queue(requests, name = null, scope = null) { + return this.queues.push(this._preFilter(requests), name || 'normal', scope) } _preFilter(requests) { @@ -672,6 +672,10 @@ class Crawler { } return false } + + done() { + return this.queues.publish() + } } module.exports = Crawler diff --git a/ghcrawler/lib/crawlerService.js b/ghcrawler/lib/crawlerService.js index 0ffad1fa..fec2719d 100644 --- a/ghcrawler/lib/crawlerService.js +++ b/ghcrawler/lib/crawlerService.js @@ -36,10 +36,10 @@ class CrawlerService { console.log(`Done loop ${loop.options.name}`) } - async ensureLoops() { + async ensureLoops(targetCount = this.options.crawler.count) { this.loops = this.loops.filter(loop => loop.running()) const running = this.status() - const delta = this.options.crawler.count - running + const delta = targetCount - running if (delta < 0) { for (let i = 0; i < Math.abs(delta); i++) { const loop = this.loops.shift() @@ -61,7 +61,8 @@ class CrawlerService { } stop() { - return this.ensureLoops() + return this.ensureLoops(0) + .then(() => this.crawler.done()) } queues() { @@ -72,24 +73,24 @@ class CrawlerService { return this.crawler.queue(requests, name) } - async flushQueue(name) { - const queue = this.crawler.queues.getQueue(name) + async flushQueue(name, scope = null) { + const queue = this.crawler.queues.getQueue(name, scope) if (!queue) { return null } return queue.flush() } - getQueueInfo(name) { - const queue = this.crawler.queues.getQueue(name) + getQueueInfo(name, scope = null) { + const queue = this.crawler.queues.getQueue(name, scope) if (!queue) { return Promise.reject(`No queue found: ${name}`) } return queue.getInfo() } - async getRequests(name, count, remove = false) { - const queue = this.crawler.queues.getQueue(name) + async getRequests(name, count, remove = false, scope = null) { + const queue = this.crawler.queues.getQueue(name, scope) if (!queue) { return null } diff --git a/ghcrawler/lib/request.js b/ghcrawler/lib/request.js index 78c9f8bc..241143b0 100644 --- a/ghcrawler/lib/request.js +++ b/ghcrawler/lib/request.js @@ -126,6 +126,15 @@ class Request { return this } + removeCleanup(cleanups) { + if (!cleanups || !this.cleanups) { + return this + } + const toRemove = Array.isArray(cleanups) ? cleanups : [cleanups] + this.cleanups = this.cleanups.filter(item => !toRemove.includes(item)) + return this + } + addMeta(data) { this.meta = Object.assign({}, this.meta, data) return this @@ -181,13 +190,13 @@ class Request { return this.policy.getNextPolicy(name) } - queueRequests(requests, name = null) { + queueRequests(requests, name = null, scope = null) { requests = Array.isArray(requests) ? requests : [requests] const toQueue = requests.filter(request => !this.hasSeen(request)) - this.track(this.crawler.queue(toQueue, name)) + this.track(this.crawler.queue(toQueue, name, scope)) } - queue(type, url, policy, context = null, pruneRelation = true) { + queue(type, url, policy, context = null, pruneRelation = true, scope = null) { if (!policy) { return } @@ -199,7 +208,7 @@ class Request { if (pruneRelation) { delete newRequest.context.relation } - this.queueRequests(newRequest, _.get(this._originQueue, 'queue.name')) + this.queueRequests(newRequest, _.get(this._originQueue, 'queue.name'), scope) } markDead(outcome, message) { diff --git a/ghcrawler/providers/queuing/attenuatedQueue.js b/ghcrawler/providers/queuing/attenuatedQueue.js index e5de639a..4d6b85fd 100644 --- a/ghcrawler/providers/queuing/attenuatedQueue.js +++ b/ghcrawler/providers/queuing/attenuatedQueue.js @@ -12,6 +12,15 @@ class AttenuatedQueue extends NestedQueue { this.logger = options.logger } + done(request) { + return super.done(request) + .then(() => { + const key = this._getCacheKey(request) + const deleted = memoryCache.del(key) + if (deleted) this.logger.verbose(`Deleted ${key}`) + }) + } + push(requests) { const self = this requests = Array.isArray(requests) ? requests : [requests] @@ -27,7 +36,7 @@ class AttenuatedQueue extends NestedQueue { _pushOne(request) { // Include the attempt count in the key. This allows for one concurrent requeue const attemptCount = request.attemptCount || 0 - const key = `t:${attemptCount}:${request.toUniqueString()}` + const key = this._getCacheKey(request) let entry = memoryCache.get(key) if (entry) { // We've seen this request recently. The push is either in progress (and may fail) or is already done. @@ -54,6 +63,11 @@ class AttenuatedQueue extends NestedQueue { return entry.promise } + _getCacheKey(request) { + const attemptCount = request.attemptCount || 0 + return `t:${attemptCount}:${request.toUniqueString()}` + } + _log(message) { return this.logger ? this.logger.silly(message) : null } diff --git a/ghcrawler/providers/queuing/queueSet.js b/ghcrawler/providers/queuing/queueSet.js index 63f849d0..e6815d7f 100644 --- a/ghcrawler/providers/queuing/queueSet.js +++ b/ghcrawler/providers/queuing/queueSet.js @@ -38,11 +38,6 @@ class QueueSet { return this.getQueue(name).push(requests) } - repush(original, newRequest) { - const queue = original._retryQueue ? this.getQueue(original._retryQueue) : original._originQueue - return queue.push(newRequest) - } - subscribe() { return Promise.all( this.queues.map(queue => { @@ -77,22 +72,6 @@ class QueueSet { return result } - done(request) { - const acked = request.acked - request.acked = true - return !acked && request._originQueue ? request._originQueue.done(request) : Promise.resolve() - } - - defer(request) { - return request._originQueue ? request._originQueue.defer(request) : Promise.resolve() - } - - abandon(request) { - const acked = request.acked - request.acked = true - return !acked && request._originQueue ? request._originQueue.abandon(request) : Promise.resolve() - } - getQueue(name) { const result = this.queueTable[name] if (!result) { diff --git a/ghcrawler/providers/queuing/scopedQueueSets.js b/ghcrawler/providers/queuing/scopedQueueSets.js new file mode 100644 index 00000000..66125990 --- /dev/null +++ b/ghcrawler/providers/queuing/scopedQueueSets.js @@ -0,0 +1,100 @@ +// (c) Copyright 2022, SAP SE and ClearlyDefined contributors. Licensed under the MIT license. +// SPDX-License-Identifier: MIT + +class ScopedQueueSets { + constructor(globalQueues, localQueues) { + this._scopedQueues = { + local: localQueues, + global: globalQueues + } + } + + _getQueuesInScope(scope) { + return this._scopedQueues[scope || 'global'] + } + + addQueue(queue, location = 'beginning', scope = null) { + this._getQueuesInScope(scope)?.addQueue(queue, location) + } + + push(requests, name, scope) { + return this.getQueue(name, scope).push(requests) + } + + async repush(original, newRequest) { + //Always retry on the global queue + const queue = original._retryQueue ? this.getQueue(original._retryQueue, 'global') : original._originQueue + if (queue !== original._originQueue) await original._originQueue.done(original) + return queue.push(newRequest) + } + + subscribe() { + return Promise.all( + Object.values(this._scopedQueues).map(queues => { + return queues.subscribe() + }) + ) + } + + unsubscribe() { + return Promise.all( + Object.values(this._scopedQueues).map(queues => { + return queues.unsubscribe() + }) + ) + } + + pop() { + return this._scopedQueues.local.pop() + .then(request => { + if (request) { + //mark to retry on the global queues + request._retryQueue = request._originQueue.getName() + return request + } + return this._scopedQueues.global.pop() + }) + } + + done(request) { + const acked = request.acked + request.acked = true + return !acked && request._originQueue ? request._originQueue.done(request) : Promise.resolve() + } + + defer(request) { + return request._originQueue ? request._originQueue.defer(request) : Promise.resolve() + } + + abandon(request) { + const acked = request.acked + request.acked = true + return !acked && request._originQueue ? request._originQueue.abandon(request) : Promise.resolve() + } + + getQueue(name, scope = null) { + return this._getQueuesInScope(scope)?.getQueue(name) + } + + publish() { + const publishToGlobal = async localQueue => { + const localRequests = [] + const info = await localQueue.getInfo() + for (let count = info.count; count > 0; count--) { + localRequests.push( + localQueue.pop() + .then(request => request && localQueue.done(request) + .then(() => this.push(request, localQueue.getName(), 'global')))) + } + return Promise.all(localRequests) + } + + return Promise.allSettled(this._scopedQueues.local.queues.map(publishToGlobal)) + .then(results => { + const found = results.find(result => result.status === 'rejected') + if (found) throw new Error(found.reason) + }) + } +} + +module.exports = ScopedQueueSets diff --git a/lib/fetchResult.js b/lib/fetchResult.js new file mode 100644 index 00000000..b001fbd0 --- /dev/null +++ b/lib/fetchResult.js @@ -0,0 +1,55 @@ +// (c) Copyright 2022, SAP SE and ClearlyDefined contributors. Licensed under the MIT license. +// SPDX-License-Identifier: MIT + +const { cloneDeep } = require('lodash') + +class FetchResult { + + constructor(url) { + this.contentOrigin = 'origin' + if (url) this.url = url + //make sure these are not enumerable + Object.defineProperty(this, '_cleanups', { value: [] }) + Object.defineProperty(this, '_meta', { value: {} }) + } + + trackCleanup(cleanups) { + if (!cleanups) return this + const cleanupCallbacks = Array.isArray(cleanups) ? cleanups : [cleanups] + this._cleanups.push(...cleanupCallbacks) + return this + } + + adoptCleanup(needCleanup, fromRequest) { + if (!needCleanup) return this + const cleanups = (Array.isArray(needCleanup) ? needCleanup : [needCleanup]) + .map(toCleanup => toCleanup.removeCallback) + .filter(item => item) + //transfer the clean up from request to fetchResult + this.trackCleanup(cleanups) + fromRequest?.removeCleanup(cleanups) + return this + } + + cleanup(errorHandler) { + this._cleanups.forEach(cleanup => { + try { + cleanup() + } catch (error) { + errorHandler?.(error) + } + }) + } + + addMeta(data) { + Object.assign(this._meta, data) + return this + } + + copyTo(request) { + Object.assign(request, cloneDeep(this)) + if (Object.keys(this._meta).length) request.addMeta?.(cloneDeep(this._meta)) + } +} + +module.exports = FetchResult \ No newline at end of file diff --git a/lib/memoryCache.js b/lib/memoryCache.js new file mode 100644 index 00000000..c0f5759d --- /dev/null +++ b/lib/memoryCache.js @@ -0,0 +1,26 @@ +// (c) Copyright 2022, SAP SE and ClearlyDefined contributors. Licensed under the MIT license. +// SPDX-License-Identifier: MIT + +const Cache = require('memory-cache').Cache + +class MemoryCache { + constructor(options) { + this.cache = new Cache() + this.defaultTtlSeconds = options.defaultTtlSeconds + } + + get(item) { + return this.cache.get(item) + } + + set(item, value, onExpire, ttlSeconds = null) { + const expiration = 1000 * (ttlSeconds || this.defaultTtlSeconds) + this.cache.put(item, value, expiration, onExpire) + } + + delete(item) { + this.cache.del(item) + } +} + +module.exports = options => new MemoryCache(options || { defaultTtlSeconds: 60 * 60 }) \ No newline at end of file diff --git a/providers/fetch/cratesioFetch.js b/providers/fetch/cratesioFetch.js index 1f1e4818..e0a5e9bf 100644 --- a/providers/fetch/cratesioFetch.js +++ b/providers/fetch/cratesioFetch.js @@ -6,6 +6,7 @@ const AbstractFetch = require('./abstractFetch') const request = require('request-promise-native') const fs = require('fs') const path = require('path') +const FetchResult = require('../../lib/fetchResult') class CratesioFetch extends AbstractFetch { canHandle(request) { @@ -26,19 +27,21 @@ class CratesioFetch extends AbstractFetch { await this._getPackage(zip, version) const crateDir = path.join(dir.name, 'crate') await this.decompress(zip, crateDir) + + const fetchResult = new FetchResult(request.url) const location = path.join(crateDir, `${version.crate}-${version.num}`) - request.document = { + fetchResult.document = { registryData: version, releaseDate: version.created_at, location, hashes: await this.computeHashes(zip), manifest: registryData.manifest } - request.contentOrigin = 'origin' if (version.crate) { - request.casedSpec = clone(spec) - request.casedSpec.name = version.crate + fetchResult.casedSpec = clone(spec) + fetchResult.casedSpec.name = version.crate } + request.fetchResult = fetchResult.adoptCleanup(dir, request) return request } diff --git a/providers/fetch/debianFetch.js b/providers/fetch/debianFetch.js index 32065931..60496a25 100644 --- a/providers/fetch/debianFetch.js +++ b/providers/fetch/debianFetch.js @@ -14,6 +14,7 @@ const { promisify } = require('util') const requestPromise = require('request-promise-native') const tmp = require('tmp') const unixArchive = require('ar-async') +const FetchResult = require('../../lib/fetchResult') const exec = promisify(require('child_process').exec) const exists = promisify(fs.exists) @@ -59,9 +60,11 @@ class DebianFetch extends AbstractFetch { const { dir, releaseDate, hashes } = await this._getPackage(request, binary, source, patches) const copyrightUrl = this._getCopyrightUrl(registryData) const declaredLicenses = await this._getDeclaredLicenses(copyrightUrl) - request.document = this._createDocument({ dir, registryData, releaseDate, copyrightUrl, declaredLicenses, hashes }) - request.contentOrigin = 'origin' - request.casedSpec = clone(spec) + + const fetchResult = new FetchResult(request.url) + fetchResult.document = this._createDocument({ dir, registryData, releaseDate, copyrightUrl, declaredLicenses, hashes }) + fetchResult.casedSpec = clone(spec) + request.fetchResult = fetchResult.adoptCleanup(dir, request) return request } diff --git a/providers/fetch/dispatcher.js b/providers/fetch/dispatcher.js index 242c2a49..925a95b6 100644 --- a/providers/fetch/dispatcher.js +++ b/providers/fetch/dispatcher.js @@ -2,14 +2,17 @@ // SPDX-License-Identifier: MIT const AbstractFetch = require('./abstractFetch') +const MemoryCache = require('../../lib/memoryCache') class FetchDispatcher extends AbstractFetch { - constructor(options, store, fetchers, processors, filter) { + constructor(options, store, fetchers, processors, filter, fetchResultCache, inProgressFetchCache) { super(options) this.store = store this.fetchers = fetchers this.processors = processors this.filter = filter + this.fetched = fetchResultCache || MemoryCache(options.fetched) + this.inProgressFetches = inProgressFetchCache || {} } canHandle() { @@ -54,15 +57,39 @@ class FetchDispatcher extends AbstractFetch { // get the right real fetcher for this request and dispatch const handler = this._getHandler(request, this.fetchers) if (!handler) throw new Error(`No fetcher found for ${request.toString()}`) - await handler.handle(request) + + await this._fetchResult(request, handler) return request } + async _fetchResult(request, handler) { + const cacheKey = this.toSpec(request).toUrlPath() + const fetchResult = this.fetched.get(cacheKey) || await this._fetchPromise(handler, request, cacheKey) + fetchResult?.copyTo(request) + } + + async _fetchPromise(handler, request, cacheKey) { + return this.inProgressFetches[cacheKey] || + (this.inProgressFetches[cacheKey] = this._fetch(handler, request, cacheKey) + .finally(() => delete this.inProgressFetches[cacheKey])) + } + + async _fetch(handler, request, cacheKey) { + this.logger.debug(`---Start Fetch: ${cacheKey} at ${new Date().toISOString()}`) + await handler.handle(request) + const fetchResult = request.fetchResult + delete request.fetchResult + if (fetchResult) this.fetched.set(cacheKey, fetchResult, (key, value) => + value.cleanup(error => this.logger.info(`Cleanup Problem cleaning up after ${key} ${error.message}`))) + this.logger.debug(`---End Fetch: ${cacheKey} at ${new Date().toISOString()}`) + return fetchResult + } + // get all the handler that apply to this request from the given list of handlers _getHandler(request, list) { return list.filter(element => element.canHandle(request))[0] } } -module.exports = (options, store, fetchers, processors, filter) => - new FetchDispatcher(options, store, fetchers, processors, filter) +module.exports = (options, store, fetchers, processors, filter, fetchResultCache, inProgressFetchCache) => + new FetchDispatcher(options, store, fetchers, processors, filter, fetchResultCache, inProgressFetchCache) diff --git a/providers/fetch/gitCloner.js b/providers/fetch/gitCloner.js index c7142a06..00b80615 100644 --- a/providers/fetch/gitCloner.js +++ b/providers/fetch/gitCloner.js @@ -5,6 +5,7 @@ const AbstractFetch = require('./abstractFetch') const { exec } = require('child_process') const { clone } = require('lodash') const rimraf = require('rimraf') +const FetchResult = require('../../lib/fetchResult') const providerDictionary = { gitlab: 'https://gitlab.com', @@ -31,13 +32,15 @@ class GitCloner extends AbstractFetch { request.url = spec.toUrl() const releaseDate = await this._getDate(dir.name, spec.name) await this._deleteGitDatabase(dir.name, spec.name) - request.contentOrigin = 'origin' - request.document = this._createDocument(dir.name + '/' + spec.name, repoSize, releaseDate, options.version) + + const fetchResult = new FetchResult(request.url).addMeta({ gitSize: repoSize }) + fetchResult.document = this._createDocument(dir.name + '/' + spec.name, repoSize, releaseDate, options.version) if (spec.provider === 'github') { - request.casedSpec = clone(spec) - request.casedSpec.namespace = spec.namespace.toLowerCase() - request.casedSpec.name = spec.name.toLowerCase() + fetchResult.casedSpec = clone(spec) + fetchResult.casedSpec.namespace = spec.namespace.toLowerCase() + fetchResult.casedSpec.name = spec.name.toLowerCase() } + request.fetchResult = fetchResult.adoptCleanup(dir, request) return request } diff --git a/providers/fetch/goFetch.js b/providers/fetch/goFetch.js index 413a834e..fd9e9fb2 100644 --- a/providers/fetch/goFetch.js +++ b/providers/fetch/goFetch.js @@ -7,6 +7,7 @@ const axios = require('axios') const { default: axiosRetry, exponentialDelay, isNetworkOrIdempotentRequestError } = require('axios-retry') const { parse: htmlParser } = require('node-html-parser') const { parse: spdxParser } = require('@clearlydefined/spdx') +const FetchResult = require('../../lib/fetchResult') class GoFetch extends AbstractFetch { constructor(options) { @@ -59,9 +60,10 @@ class GoFetch extends AbstractFetch { } } - request.document = this._createDocument(dir, releaseDate, hashes, registryData) - request.contentOrigin = 'origin' - request.casedSpec = clone(spec) + const fetchResult = new FetchResult(request.url) + fetchResult.document = this._createDocument(dir, releaseDate, hashes, registryData) + fetchResult.casedSpec = clone(spec) + request.fetchResult = fetchResult.adoptCleanup(dir, request) return request } diff --git a/providers/fetch/mavenBasedFetch.js b/providers/fetch/mavenBasedFetch.js index c8098cc2..98d9b6c6 100644 --- a/providers/fetch/mavenBasedFetch.js +++ b/providers/fetch/mavenBasedFetch.js @@ -14,6 +14,7 @@ const path = require('path') const parseString = promisify(require('xml2js').parseString) const EntitySpec = require('../../lib/entitySpec') const { extractDate } = require('../../lib/utils') +const FetchResult = require('../../lib/fetchResult') const extensionMap = { sourcesJar: '-sources.jar', @@ -54,13 +55,15 @@ class MavenBasedFetch extends AbstractFetch { await this.decompress(artifact.name, dir.name) const hashes = await this.computeHashes(artifact.name) const releaseDate = await this._getReleaseDate(dir.name, spec) - request.document = this._createDocument(dir, releaseDate, hashes, poms, summary) - request.contentOrigin = 'origin' + + const fetchResult = new FetchResult(request.url) + fetchResult.document = this._createDocument(dir, releaseDate, hashes, poms, summary) if (get(summary, 'groupId[0]') || get(summary, 'artifactId[0]')) { - request.casedSpec = clone(spec) - request.casedSpec.namespace = get(summary, 'groupId[0]') || spec.namespace - request.casedSpec.name = get(summary, 'artifactId[0]') || spec.name + fetchResult.casedSpec = clone(spec) + fetchResult.casedSpec.namespace = get(summary, 'groupId[0]') || spec.namespace + fetchResult.casedSpec.name = get(summary, 'artifactId[0]') || spec.name } + request.fetchResult = fetchResult.adoptCleanup(dir, request) return request } diff --git a/providers/fetch/npmjsFetch.js b/providers/fetch/npmjsFetch.js index 3322c387..8eb7861b 100644 --- a/providers/fetch/npmjsFetch.js +++ b/providers/fetch/npmjsFetch.js @@ -6,6 +6,7 @@ const nodeRequest = require('request') const requestPromise = require('request-promise-native') const fs = require('fs') const { clone, get } = require('lodash') +const FetchResult = require('../../lib/fetchResult') const providerMap = { npmjs: 'https://registry.npmjs.com' @@ -30,10 +31,12 @@ class NpmFetch extends AbstractFetch { const dir = this.createTempDir(request) await this.decompress(file.name, dir.name) const hashes = await this.computeHashes(file.name) - request.document = this._createDocument(dir, registryData, hashes) - request.contentOrigin = 'origin' + + const fetchResult = new FetchResult(request.url) + fetchResult.document = this._createDocument(dir, registryData, hashes) const casedSpec = this._getCasedSpec(spec, registryData) - if (casedSpec) request.casedSpec = casedSpec + if (casedSpec) fetchResult.casedSpec = casedSpec + request.fetchResult = fetchResult.adoptCleanup(dir, request) return request } diff --git a/providers/fetch/nugetFetch.js b/providers/fetch/nugetFetch.js index 2d4ffd23..6dbbd1de 100644 --- a/providers/fetch/nugetFetch.js +++ b/providers/fetch/nugetFetch.js @@ -8,6 +8,7 @@ const mkdirp = require('mkdirp') const path = require('path') const { promisify } = require('util') const requestRetry = require('requestretry').defaults({ maxAttempts: 3, fullResponse: true }) +const FetchResult = require('../../lib/fetchResult') const providerMap = { nuget: 'https://api.nuget.org' @@ -42,7 +43,9 @@ class NuGetFetch extends AbstractFetch { await this._getPackage(zip, registryData.packageContent) const location = path.join(dir.name, 'nupkg') await this.decompress(zip, location) - request.document = { + + const fetchResult = new FetchResult(request.url) + fetchResult.document = { registryData, location, metadataLocation, @@ -52,11 +55,11 @@ class NuGetFetch extends AbstractFetch { if (manifest.licenseUrl) { await this._downloadLicense({ dirName: location, licenseUrl: manifest.licenseUrl }) } - request.contentOrigin = 'origin' if (get(manifest, 'id')) { - request.casedSpec = clone(spec) - request.casedSpec.name = manifest.id + fetchResult.casedSpec = clone(spec) + fetchResult.casedSpec.name = manifest.id } + request.fetchResult = fetchResult.adoptCleanup(dir, request) return request } diff --git a/providers/fetch/packagistFetch.js b/providers/fetch/packagistFetch.js index 672ab41a..153f2af4 100644 --- a/providers/fetch/packagistFetch.js +++ b/providers/fetch/packagistFetch.js @@ -8,6 +8,8 @@ const { get } = require('lodash') const nodeRequest = require('request') const { promisify } = require('util') const readdir = promisify(fs.readdir) +const FetchResult = require('../../lib/fetchResult') + const providerMap = { packagist: 'https://repo.packagist.org/' } @@ -28,9 +30,11 @@ class PackagistFetch extends AbstractFetch { const dir = this.createTempDir(request) await this.decompress(file.name, dir.name) const hashes = await this.computeHashes(file.name) - request.document = this._createDocument(dir, registryData, hashes) - request.document.dirRoot = await this._getDirRoot(dir.name) - request.contentOrigin = 'origin' + + const fetchResult = new FetchResult(request.url) + fetchResult.document = this._createDocument(dir, registryData, hashes) + fetchResult.document.dirRoot = await this._getDirRoot(dir.name) + request.fetchResult = fetchResult.adoptCleanup(dir, request) return request } diff --git a/providers/fetch/podFetch.js b/providers/fetch/podFetch.js index a441f643..f30890ba 100644 --- a/providers/fetch/podFetch.js +++ b/providers/fetch/podFetch.js @@ -9,6 +9,7 @@ const path = require('path') const crypto = require('crypto') const { exec } = require('child_process') const requestRetry = require('requestretry').defaults({ maxAttempts: 3, fullResponse: true }) +const FetchResult = require('../../lib/fetchResult') const services = { trunk: 'https://trunk.cocoapods.org/api/v1', @@ -38,19 +39,18 @@ class PodFetch extends AbstractFetch { const location = await this._getPackage(dir, registryData) if (!location) return request.markSkip('Missing ') - request.url = spec.toUrl() - request.contentOrigin = 'origin' - request.document = { + const fetchResult = new FetchResult(spec.toUrl()) + fetchResult.document = { location: location, registryData: registryData, releaseDate: version.created_at } if (registryData.name) { - request.casedSpec = clone(spec) - request.casedSpec.name = registryData.name + fetchResult.casedSpec = clone(spec) + fetchResult.casedSpec.name = registryData.name } - + request.fetchResult = fetchResult.adoptCleanup(dir, request) return request } @@ -155,7 +155,7 @@ class PodFetch extends AbstractFetch { .createHash('md5') .update(spec.name) .digest('hex') - prefixes = prefixLengths.map(function(length) { + prefixes = prefixLengths.map(function (length) { const prefix = hashedName.slice(0, length) hashedName = hashedName.substring(length) return prefix diff --git a/providers/fetch/pypiFetch.js b/providers/fetch/pypiFetch.js index 01ab0842..ebc14dfc 100644 --- a/providers/fetch/pypiFetch.js +++ b/providers/fetch/pypiFetch.js @@ -7,6 +7,7 @@ const nodeRequest = require('request') const fs = require('fs') const spdxCorrect = require('spdx-correct') const { findLastKey, get, find, clone } = require('lodash') +const FetchResult = require('../../lib/fetchResult') const providerMap = { pypi: 'https://pypi.python.org' @@ -32,12 +33,14 @@ class PyPiFetch extends AbstractFetch { const dir = this.createTempDir(request) await this.decompress(file.name, dir.name) const hashes = await this.computeHashes(file.name) - request.document = await this._createDocument(dir, spec, registryData, hashes) - request.contentOrigin = 'origin' + + const fetchResult = new FetchResult(request.url) + fetchResult.document = await this._createDocument(dir, spec, registryData, hashes) if (get(registryData, 'info.name')) { - request.casedSpec = clone(spec) - request.casedSpec.name = registryData.info.name + fetchResult.casedSpec = clone(spec) + fetchResult.casedSpec.name = registryData.info.name } + request.fetchResult = fetchResult.adoptCleanup(dir, request) return request } diff --git a/providers/fetch/rubyGemsFetch.js b/providers/fetch/rubyGemsFetch.js index 84d8975a..393dc833 100644 --- a/providers/fetch/rubyGemsFetch.js +++ b/providers/fetch/rubyGemsFetch.js @@ -8,6 +8,7 @@ const fs = require('fs') const zlib = require('zlib') const path = require('path') const { clone, get } = require('lodash') +const FetchResult = require('../../lib/fetchResult') const providerMap = { rubyGems: 'https://rubygems.org' @@ -33,12 +34,14 @@ class RubyGemsFetch extends AbstractFetch { await this.decompress(file.name, dir.name) await this._extractFiles(dir.name) const hashes = await this.computeHashes(file.name) - request.document = await this._createDocument(dir, registryData, hashes) - request.contentOrigin = 'origin' + + const fetchResult = new FetchResult(request.url) + fetchResult.document = await this._createDocument(dir, registryData, hashes) if (get(registryData, 'name')) { - request.casedSpec = clone(spec) - request.casedSpec.name = registryData.name + fetchResult.casedSpec = clone(spec) + fetchResult.casedSpec.name = registryData.name } + request.fetchResult = fetchResult.adoptCleanup(dir, request) return request } diff --git a/providers/process/abstractProcessor.js b/providers/process/abstractProcessor.js index 59526858..e371561e 100644 --- a/providers/process/abstractProcessor.js +++ b/providers/process/abstractProcessor.js @@ -219,13 +219,18 @@ class AbstractProcessor extends BaseHandler { request.queue(name, spec.toUrl(), request.getNextPolicy(name)) } - linkAndQueueTool(request, name, tool = name) { + linkAndQueueTool(request, name, tool = name, scope = null) { const spec = clone(this.toSpec(request)) const url = spec.toUrl() spec.tool = tool const urn = spec.toUrn() request.linkCollection(name, urn) - request.queue(tool, url, request.getNextPolicy(name)) + request.queue(tool, url, request.getNextPolicy(name), undefined, undefined, scope) + } + + addLocalToolTasks(request, ...tools) { + const toolList = tools.length ? tools : ['licensee', 'scancode', 'reuse'/*, 'fossology'*/] + toolList.forEach(tool => this.linkAndQueueTool(request, tool, undefined, 'local')) } } diff --git a/providers/process/composerExtract.js b/providers/process/composerExtract.js index a3cb733b..46efc7ab 100644 --- a/providers/process/composerExtract.js +++ b/providers/process/composerExtract.js @@ -37,10 +37,7 @@ class ComposerExtract extends AbstractClearlyDefinedProcessor { if (manifest) this.attachFiles(request.document, [manifestLocation], location) else this.logger.info('PHP package without composer.json', { url: request.url }) } - this.linkAndQueueTool(request, 'licensee') - // this.linkAndQueueTool(request, 'fossology') - this.linkAndQueueTool(request, 'scancode') - this.linkAndQueueTool(request, 'reuse') + this.addLocalToolTasks(request) if (request.document.sourceInfo) { const sourceSpec = SourceSpec.fromObject(request.document.sourceInfo) this.linkAndQueue(request, 'source', sourceSpec.toEntitySpec()) diff --git a/providers/process/crateExtract.js b/providers/process/crateExtract.js index 408f663e..2e6a3c97 100644 --- a/providers/process/crateExtract.js +++ b/providers/process/crateExtract.js @@ -26,10 +26,7 @@ class CrateExtract extends AbstractClearlyDefinedProcessor { await super.handle(request) await this._createDocument(request, request.document.manifest, request.document.registryData) } - this.linkAndQueueTool(request, 'licensee') - // this.linkAndQueueTool(request, 'fossology') - this.linkAndQueueTool(request, 'scancode') - this.linkAndQueueTool(request, 'reuse') + this.addLocalToolTasks(request) if (request.document.sourceInfo) { const sourceSpec = SourceSpec.fromObject(request.document.sourceInfo) this.linkAndQueue(request, 'source', sourceSpec.toEntitySpec()) diff --git a/providers/process/debExtract.js b/providers/process/debExtract.js index 161ac335..49799414 100644 --- a/providers/process/debExtract.js +++ b/providers/process/debExtract.js @@ -29,10 +29,7 @@ class DebExtract extends AbstractClearlyDefinedProcessor { const spec = this.toSpec(request) this._createDocument(request, spec, request.document.registryData) } - this.linkAndQueueTool(request, 'licensee') - // this.linkAndQueueTool(request, 'fossology') - this.linkAndQueueTool(request, 'scancode') - this.linkAndQueueTool(request, 'reuse') + this.addLocalToolTasks(request) if (request.document.sourceInfo) { const sourceSpec = SourceSpec.fromObject(request.document.sourceInfo) this.linkAndQueue(request, 'source', sourceSpec.toEntitySpec()) diff --git a/providers/process/gemExtract.js b/providers/process/gemExtract.js index 86cad74e..328f8009 100644 --- a/providers/process/gemExtract.js +++ b/providers/process/gemExtract.js @@ -26,10 +26,7 @@ class GemExtract extends AbstractClearlyDefinedProcessor { await super.handle(request) await this._createDocument(request, request.document.registryData) } - this.linkAndQueueTool(request, 'licensee') - // this.linkAndQueueTool(request, 'fossology') - this.linkAndQueueTool(request, 'scancode') - this.linkAndQueueTool(request, 'reuse') + this.addLocalToolTasks(request) if (request.document.sourceInfo) { const sourceSpec = SourceSpec.fromObject(request.document.sourceInfo) this.linkAndQueue(request, 'source', sourceSpec.toEntitySpec()) diff --git a/providers/process/goExtract.js b/providers/process/goExtract.js index d89d82cc..1cc04964 100644 --- a/providers/process/goExtract.js +++ b/providers/process/goExtract.js @@ -23,9 +23,7 @@ class GoExtract extends AbstractClearlyDefinedProcessor { const spec = this.toSpec(request) this._createDocument(request, spec) } - this.linkAndQueueTool(request, 'licensee') - this.linkAndQueueTool(request, 'scancode') - this.linkAndQueueTool(request, 'reuse') + this.addLocalToolTasks(request) } _createDocument(request) { diff --git a/providers/process/mavenExtract.js b/providers/process/mavenExtract.js index 7d338555..0b24777d 100644 --- a/providers/process/mavenExtract.js +++ b/providers/process/mavenExtract.js @@ -31,10 +31,7 @@ class MavenExtract extends AbstractClearlyDefinedProcessor { const manifest = { summary: request.document.summary, poms: request.document.poms } await this._createDocument(request, spec, manifest, request.document.releaseDate) } - this.linkAndQueueTool(request, 'licensee') - // this.linkAndQueueTool(request, 'fossology') - this.linkAndQueueTool(request, 'scancode') - this.linkAndQueueTool(request, 'reuse') + this.addLocalToolTasks(request) if (request.document.sourceInfo) { const sourceSpec = SourceSpec.fromObject(request.document.sourceInfo) this.linkAndQueue(request, 'source', sourceSpec.toEntitySpec()) diff --git a/providers/process/npmExtract.js b/providers/process/npmExtract.js index 55e1a729..a3e99f92 100644 --- a/providers/process/npmExtract.js +++ b/providers/process/npmExtract.js @@ -36,10 +36,7 @@ class NpmExtract extends AbstractClearlyDefinedProcessor { if (manifest) this.attachFiles(request.document, [manifestLocation], location) else this.logger.info('NPM without package.json', { url: request.url }) } - this.linkAndQueueTool(request, 'licensee') - // this.linkAndQueueTool(request, 'fossology') - this.linkAndQueueTool(request, 'scancode') - this.linkAndQueueTool(request, 'reuse') + this.addLocalToolTasks(request) if (request.document.sourceInfo) { const sourceSpec = SourceSpec.fromObject(request.document.sourceInfo) this.linkAndQueue(request, 'source', sourceSpec.toEntitySpec()) diff --git a/providers/process/nugetExtract.js b/providers/process/nugetExtract.js index 71164072..76958734 100644 --- a/providers/process/nugetExtract.js +++ b/providers/process/nugetExtract.js @@ -34,9 +34,7 @@ class NuGetExtract extends AbstractClearlyDefinedProcessor { const manifest = await this._getManifest(metadataLocation.manifest) await this._createDocument(request, manifest, request.document.registryData) } - this.linkAndQueueTool(request, 'licensee') - this.linkAndQueueTool(request, 'scancode') - this.linkAndQueueTool(request, 'reuse') + this.addLocalToolTasks(request) if (request.document.sourceInfo) { const sourceSpec = SourceSpec.fromObject(request.document.sourceInfo) this.linkAndQueue(request, 'source', sourceSpec.toEntitySpec()) diff --git a/providers/process/podExtract.js b/providers/process/podExtract.js index e3be08fd..ff3e73a5 100644 --- a/providers/process/podExtract.js +++ b/providers/process/podExtract.js @@ -27,10 +27,7 @@ class PodExtract extends AbstractClearlyDefinedProcessor { await super.handle(request, location) await this._createDocument(request, request.document.registryData, request.document.releaseDate) } - this.linkAndQueueTool(request, 'licensee') - this.linkAndQueueTool(request, 'scancode') - this.linkAndQueueTool(request, 'reuse') - // this.linkAndQueueTool(request, 'fossology') + this.addLocalToolTasks(request) if (request.document.sourceInfo) { const sourceSpec = SourceSpec.fromObject(request.document.sourceInfo) this.linkAndQueue(request, 'source', sourceSpec.toEntitySpec()) diff --git a/providers/process/pypiExtract.js b/providers/process/pypiExtract.js index ce297229..278de711 100644 --- a/providers/process/pypiExtract.js +++ b/providers/process/pypiExtract.js @@ -27,10 +27,7 @@ class PyPiExtract extends AbstractClearlyDefinedProcessor { const spec = this.toSpec(request) await this._createDocument(request, spec, request.document.registryData) } - this.linkAndQueueTool(request, 'licensee') - // this.linkAndQueueTool(request, 'fossology') - this.linkAndQueueTool(request, 'scancode') - this.linkAndQueueTool(request, 'reuse') + this.addLocalToolTasks(request) if (request.document.sourceInfo) { const sourceSpec = SourceSpec.fromObject(request.document.sourceInfo) this.linkAndQueue(request, 'source', sourceSpec.toEntitySpec()) diff --git a/providers/process/source.js b/providers/process/source.js index a0562318..32bb1ce7 100644 --- a/providers/process/source.js +++ b/providers/process/source.js @@ -16,11 +16,8 @@ class SourceProcessor extends AbstractProcessor { handle(request) { super.handle(request) - this.linkAndQueueTool(request, 'clearlydefined') - this.linkAndQueueTool(request, 'licensee') - // this.linkAndQueueTool(request, 'fossology') - this.linkAndQueueTool(request, 'scancode') - this.linkAndQueueTool(request, 'reuse') + this.addLocalToolTasks(request, 'clearlydefined') + this.addLocalToolTasks(request) request.markNoSave() return request } diff --git a/test/fixtures/packagist/registryData.json b/test/fixtures/packagist/registryData.json new file mode 100644 index 00000000..e5bf0e5b --- /dev/null +++ b/test/fixtures/packagist/registryData.json @@ -0,0 +1,51 @@ +{ + "manifest": { + "name": "symfony/polyfill-mbstring", + "description": "Symfony polyfill for the Mbstring extension", + "keywords": [ + "mbstring", + "compatibility", + "portable", + "polyfill", + "shim" + ], + "homepage": "https://symfony.com", + "version": "v1.11.0", + "version_normalized": "1.11.0.0", + "license": [ + "MIT" + ], + "authors": [ + [], + [] + ], + "source": { + "type": "git", + "url": "https://github.com/symfony/polyfill-mbstring.git", + "reference": "fe5e94c604826c35a32fa832f35bd036b6799609" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/symfony/polyfill-mbstring/zipball/fe5e94c604826c35a32fa832f35bd036b6799609", + "reference": "fe5e94c604826c35a32fa832f35bd036b6799609", + "shasum": "" + }, + "type": "library", + "time": "2019-02-06T07:57:58+00:00", + "autoload": { + "psr-4": [], + "files": [] + }, + "extra": { + "branch-alias": [] + }, + "require": { + "php": ">=5.3.3" + }, + "suggest": { + "ext-mbstring": "For best performance" + }, + "uid": 2850406 + }, + "releaseDate": "2019-02-06T07:57:58+00:00" +} \ No newline at end of file diff --git a/test/fixtures/pod/registryData.json b/test/fixtures/pod/registryData.json new file mode 100644 index 00000000..69704063 --- /dev/null +++ b/test/fixtures/pod/registryData.json @@ -0,0 +1,32 @@ +{ + "name": "SwiftLCS", + "version": "1.0", + "summary": "SwiftLCS is a Swift implementation of longest common subsequence (LCS) algorithm.", + "homepage": "https://github.com/Frugghi/SwiftLCS", + "license": "MIT", + "authors": {}, + "source": { + "git": "https://github.com/Frugghi/SwiftLCS.git", + "tag": "1.0" + }, + "requires_arc": true, + "default_subspecs": "Foundation", + "platforms": { + "ios": "8.0", + "osx": "10.9", + "watchos": "2.0" + }, + "subspecs": [ + { + "name": "Core", + "source_files": "SwiftLCS/SwiftLCS.swift" + }, + { + "name": "Foundation", + "source_files": "SwiftLCS/SwiftLCS+Foundation.swift", + "dependencies": { + "SwiftLCS/Core": [] + } + } + ] +} \ No newline at end of file diff --git a/test/fixtures/pod/versions.json b/test/fixtures/pod/versions.json new file mode 100644 index 00000000..9413d5fa --- /dev/null +++ b/test/fixtures/pod/versions.json @@ -0,0 +1,49 @@ +{ + "versions": [ + { + "name": "1.0", + "created_at": "2015-10-19 01:36:36 UTC" + }, + { + "name": "1.0.1", + "created_at": "2016-04-26 02:49:03 UTC" + }, + { + "name": "1.1.0", + "created_at": "2016-09-18 09:01:03 UTC" + }, + { + "name": "1.1.1", + "created_at": "2017-03-24 01:51:40 UTC" + }, + { + "name": "1.2.0", + "created_at": "2017-09-17 12:51:14 UTC" + }, + { + "name": "1.3.0", + "created_at": "2017-12-01 00:08:01 UTC" + }, + { + "name": "1.3.1", + "created_at": "2018-02-16 00:59:41 UTC" + }, + { + "name": "1.3.2", + "created_at": "2018-02-27 03:33:16 UTC" + }, + { + "name": "1.3.3", + "created_at": "2018-06-15 21:50:04 UTC" + }, + { + "name": "1.3.4", + "created_at": "2019-04-10 00:22:10 UTC" + } + ], + "owners": [ + { + "created_at": "2014-05-22 00:58:35 UTC" + } + ] +} \ No newline at end of file diff --git a/test/fixtures/pypi/registryData.json b/test/fixtures/pypi/registryData.json new file mode 100644 index 00000000..347371ce --- /dev/null +++ b/test/fixtures/pypi/registryData.json @@ -0,0 +1,186 @@ +{ + "info": { + "classifiers": [ + "Development Status :: 5 - Production/Stable", + "License :: OSI Approved :: Python Software Foundation License", + "Programming Language :: Python :: 2.4", + "Programming Language :: Python :: 2.5", + "Programming Language :: Python :: 2.6", + "Programming Language :: Python :: 2.7", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.0", + "Programming Language :: Python :: 3.1", + "Programming Language :: Python :: 3.2", + "Programming Language :: Python :: 3.3", + "Programming Language :: Python :: 3.4", + "Programming Language :: Python :: 3.5", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Topic :: Security :: Cryptography" + ], + "description": "\nThe ssl.match_hostname() function from Python 3.7\n=================================================\n\nThe Secure Sockets Layer is only actually *secure*\nif you check the hostname in the certificate returned\nby the server to which you are connecting,\nand verify that it matches to hostname\nthat you are trying to reach.\n\nBut the matching logic, defined in `RFC2818`_,\ncan be a bit tricky to implement on your own.\nSo the ``ssl`` package in the Standard Library of Python 3.2\nand greater now includes a ``match_hostname()`` function\nfor performing this check instead of requiring every application\nto implement the check separately.\n\nThis backport brings ``match_hostname()`` to users\nof earlier versions of Python.\nSimply make this distribution a dependency of your package,\nand then use it like this::\n\n from backports.ssl_match_hostname import match_hostname, CertificateError\n [...]\n sslsock = ssl.wrap_socket(sock, ssl_version=ssl.PROTOCOL_SSLv23,\n cert_reqs=ssl.CERT_REQUIRED, ca_certs=...)\n try:\n match_hostname(sslsock.getpeercert(), hostname)\n except CertificateError, ce:\n ...\n\nBrandon Craig Rhodes is merely the packager of this distribution;\nthe actual code inside comes from Python 3.7 with small changes for\nportability.\n\n\nRequirements\n------------\n\n* If you need to use this on Python versions earlier than 2.6 you will need to\n install the `ssl module`_. From Python 2.6 upwards ``ssl`` is included in\n the Python Standard Library so you do not need to install it separately.\n\n.. _`ssl module`:: https://pypi.python.org/pypi/ssl\n\nHistory\n-------\n\n* This function was introduced in python-3.2\n* It was updated for python-3.4a1 for a CVE \n (backports-ssl_match_hostname-3.4.0.1)\n* It was updated from RFC2818 to RFC 6125 compliance in order to fix another\n security flaw for python-3.3.3 and python-3.4a5\n (backports-ssl_match_hostname-3.4.0.2)\n* It was updated in python-3.5 to handle IPAddresses in ServerAltName fields\n (something that backports.ssl_match_hostname will do if you also install the\n ipaddress library from pypi).\n* It was updated in python-3.7 to handle IPAddresses without the ipaddress library and dropped\n support for partial wildcards\n\n.. _`ipaddress module`:: https://pypi.python.org/pypi/ipaddress\n\n.. _RFC2818: http://tools.ietf.org/html/rfc2818.html\n", + "description_content_type": "", + "docs_url": null, + "download_url": "", + "downloads": { + "last_day": -1, + "last_month": -1, + "last_week": -1 + }, + "home_page": "http://bitbucket.org/brandon/backports.ssl_match_hostname", + "keywords": "", + "license": "Python Software Foundation License", + "maintainer": "", + "maintainer_email": "", + "name": "backports.ssl_match_hostname", + "package_url": "https://pypi.org/project/backports.ssl_match_hostname/", + "platform": "", + "project_url": "https://pypi.org/project/backports.ssl_match_hostname/", + "project_urls": { + "Homepage": "http://bitbucket.org/brandon/backports.ssl_match_hostname" + }, + "release_url": "https://pypi.org/project/backports.ssl_match_hostname/3.7.0.1/", + "requires_dist": null, + "requires_python": "", + "summary": "The ssl.match_hostname() function from Python 3.5", + "version": "3.7.0.1", + "yanked": false, + "yanked_reason": null + }, + "last_serial": 4689314, + "releases": { + "3.2a3": [ + { + "comment_text": "", + "digests": { + "md5": "42adbd3c15d78eb6b7b7c654ec5c717c", + "sha256": "ef78d0532f11c4403288a6a4a7e80da2f8924e6b0d662349bb86c09c6fea8b31" + }, + "downloads": -1, + "filename": "backports.ssl_match_hostname-3.2a3.tar.gz", + "has_sig": false, + "md5_digest": "42adbd3c15d78eb6b7b7c654ec5c717c", + "packagetype": "sdist", + "python_version": "source", + "requires_python": null, + "size": 2448, + "upload_time": "2010-10-16T00:40:14", + "upload_time_iso_8601": "2010-10-16T00:40:14.229416Z", + "url": "https://files.pythonhosted.org/packages/f3/d8/77d5967f6648ef4d75acc8be8aa73020c0a5272d28eec1183607df4efcc2/backports.ssl_match_hostname-3.2a3.tar.gz", + "yanked": false, + "yanked_reason": null + } + ], + "3.4.0.1": [ + { + "comment_text": "", + "digests": { + "md5": "a7402a991cce1e00c30df0142d511458", + "sha256": "8ae5c577c12a39cc403444db7769458a784382a5f8ce07190297387df2255c41" + }, + "downloads": -1, + "filename": "backports.ssl_match_hostname-3.4.0.1.tar.gz", + "has_sig": false, + "md5_digest": "a7402a991cce1e00c30df0142d511458", + "packagetype": "sdist", + "python_version": "source", + "requires_python": null, + "size": 9422, + "upload_time": "2013-09-02T19:11:04", + "upload_time_iso_8601": "2013-09-02T19:11:04.276102Z", + "url": "https://files.pythonhosted.org/packages/b0/5f/b718c15078d9c873bc3b0630a925f572535623cb0fcdc2ba8565f0d825df/backports.ssl_match_hostname-3.4.0.1.tar.gz", + "yanked": false, + "yanked_reason": null + } + ], + "3.4.0.2": [ + { + "comment_text": "", + "digests": { + "md5": "788214f20214c64631f0859dc79f23c6", + "sha256": "07410e7fb09aab7bdaf5e618de66c3dac84e2e3d628352814dc4c37de321d6ae" + }, + "downloads": -1, + "filename": "backports.ssl_match_hostname-3.4.0.2.tar.gz", + "has_sig": true, + "md5_digest": "788214f20214c64631f0859dc79f23c6", + "packagetype": "sdist", + "python_version": "source", + "requires_python": null, + "size": 5151, + "upload_time": "2013-10-27T19:24:08", + "upload_time_iso_8601": "2013-10-27T19:24:08.110026Z", + "url": "https://files.pythonhosted.org/packages/3a/15/f9e48bfd2b971ade10ad0c03babab057791c260b05322cbd3f47e27be108/backports.ssl_match_hostname-3.4.0.2.tar.gz", + "yanked": false, + "yanked_reason": null + } + ], + "3.5.0.1": [ + { + "comment_text": "", + "digests": { + "md5": "c03fc5e2c7b3da46b81acf5cbacfe1e6", + "sha256": "502ad98707319f4a51fa2ca1c677bd659008d27ded9f6380c79e8932e38dcdf2" + }, + "downloads": -1, + "filename": "backports.ssl_match_hostname-3.5.0.1.tar.gz", + "has_sig": true, + "md5_digest": "c03fc5e2c7b3da46b81acf5cbacfe1e6", + "packagetype": "sdist", + "python_version": "source", + "requires_python": null, + "size": 5605, + "upload_time": "2015-12-19T22:33:23", + "upload_time_iso_8601": "2015-12-19T22:33:23.257001Z", + "url": "https://files.pythonhosted.org/packages/76/21/2dc61178a2038a5cb35d14b61467c6ac632791ed05131dda72c20e7b9e23/backports.ssl_match_hostname-3.5.0.1.tar.gz", + "yanked": false, + "yanked_reason": null + } + ], + "3.7.0.1": [ + { + "comment_text": "", + "digests": { + "md5": "32d2f593af01a046bec3d2f5181a420a", + "sha256": "bb82e60f9fbf4c080eabd957c39f0641f0fc247d9a16e31e26d594d8f42b9fd2" + }, + "downloads": -1, + "filename": "backports.ssl_match_hostname-3.7.0.1.tar.gz", + "has_sig": true, + "md5_digest": "32d2f593af01a046bec3d2f5181a420a", + "packagetype": "sdist", + "python_version": "source", + "requires_python": null, + "size": 5722, + "upload_time": "2019-01-12T22:25:58", + "upload_time_iso_8601": "2019-01-12T22:25:58.410787Z", + "url": "https://files.pythonhosted.org/packages/ff/2b/8265224812912bc5b7a607c44bf7b027554e1b9775e9ee0de8032e3de4b2/backports.ssl_match_hostname-3.7.0.1.tar.gz", + "yanked": false, + "yanked_reason": null + } + ] + }, + "urls": [ + { + "comment_text": "", + "digests": { + "md5": "32d2f593af01a046bec3d2f5181a420a", + "sha256": "bb82e60f9fbf4c080eabd957c39f0641f0fc247d9a16e31e26d594d8f42b9fd2" + }, + "downloads": -1, + "filename": "backports.ssl_match_hostname-3.7.0.1.tar.gz", + "has_sig": true, + "md5_digest": "32d2f593af01a046bec3d2f5181a420a", + "packagetype": "sdist", + "python_version": "source", + "requires_python": null, + "size": 5722, + "upload_time": "2019-01-12T22:25:58", + "upload_time_iso_8601": "2019-01-12T22:25:58.410787Z", + "url": "https://files.pythonhosted.org/packages/ff/2b/8265224812912bc5b7a607c44bf7b027554e1b9775e9ee0de8032e3de4b2/backports.ssl_match_hostname-3.7.0.1.tar.gz", + "yanked": false, + "yanked_reason": null + } + ], + "vulnerabilities": [] +} \ No newline at end of file diff --git a/test/fixtures/ruby/small-0.5.1.gem b/test/fixtures/ruby/small-0.5.1.gem new file mode 100644 index 0000000000000000000000000000000000000000..4dab78857009651f83cba57f6b1efb99e250f962 GIT binary patch literal 7680 zcmeHLWl$W-md1la&;$)Zg1ZJCCO8C#A-KCcgFC@Vu;3P4gUbZ>1b5fqI{3hB?rv@E zt-Dok|GZcGe%w=Cb-MdhpFZd7Q>VHQzq9{+y{DDAx!s=)f0_SZ_Ww-nPs#n|_`mCt zIvO2e?PrTJ0`ifFttVMa(#;{XT4j&{4Q(+nNKx?o7C)o+Xg&eit4@f@@GBzXBJYZw z_@>hq?52{rbnUiQn9y=p&&@@#T!SOIP;EaMQ0(WjI-&h7z_(2!Xu4u=_y|g z2SVTe!FHMjZj|g;qMqhE$3uZ1(6PPTj(iHaaNBueux{p7{G>0|LLt=uW`oMJnqBms zIeKQ3c~r5F_;uD9Uz5f17l-+A9+9<{iLiN1k9Dxn|bt7To#=ZdeH zY1O!6RK-KprV5FR8?7%&-`GVjxbC+3o@CgqEAx-ugffYA=_`TnKjS=JEqaJ3u?$a^ zJ*mO5)8QH|U!T%3u)6Hb$C}>}rquY61~zCgxrp%N%t@U{X|ZOLuhxO4>#0B5u``65 zD;K|ZBxZC}6kU2A9_1}byypweVet~or`;vL--bqRBeH?x$!=>fq>YLqa#@;j#wyx z*0_#De`E{mq{POqdE@h5b@W2Az4u5GjHZ+C7qtR(E}3uT^BJCbN+VwVt@b45$c>lW zBLRAAmHiL-15$}Xz>;%JY|@xGP~eM?DLB>-K=87>VTkyIDNQS{)y$u3Ek^tz_90Xv zW2U!XD^kKJt6}3sxx~C|g;%EpNIhG-HGwHMENgU3>}6y@JlI?S7va4>8P9%+(Fq$w0uq?FpE3a^DpP`A$}6nG(xZQHcOa z0=Mo(0r2_)N(!APGHD-5A*5#hp`Ti#?nMZ}PeEm%T)I5hkL)}W`;5YYU}1DK5jvwF zF=4=kLu8uVrazUCKK#s11Z*xG=f@^2<|E%X5$y27Bq$(~TZ7=DAfHplyhIB0?&X2U z<4`~dSOKbM^E1jGs9dwRbfLZcT>YNJMPu0El^8G)#%m9}Ms`9Za5W*Dl|kKPZ)%^v z(eRM8{AB3tXx%di7UMeJm)YqwK<;-j7o?7s(z1|p89Xxc>d(!ot4UCr0FoHPR9qRY z#MVP;v&h-qrUeH#!lOc7AzoIt6PKRZY}P#HBp&18!l?}fGmjH7AMz4J8AAe$vwzrm z@U$S}n-Z>U2oi6*uIshyAr7PeRKc~?=*O3neSt6s`Bk@^l=1aPmHj~G4nFQmknDCf zteiMnlw7c}lcB0(6I-;1{FYwK)OEn1mS0gbr};~aJZD9_U>%}n(4EvMYR&`&;iB3U zyU{pnT9?1fAq*FVCGC|(C$)t;l#z^ADRH;gq?W!mSCX?SnUaCL1luG3K9^;R?cqZ_ zPGHB{L@pmX@<_6D%cNODh}IT8%rv1|Blp301g4^+3AMRIy+uj0p-iu&l9*j-HUoxg z9Efk!gA_IR%aZ|prrIf|;_v#NS%YTqY?5VxX&r~j_2C=`dvjggmL|h&q{X}ov3Xaa zBZ?SVL#3(S3O|3g6(CeRcHN#(mPJXLERB)h>3ZBI7gGI9ggK3kD} zlcsmpLB{bG)mgE|wN3o*W4>I+3A1dpzL=J@)N_gGiPVuW!}Tvji3@eb_MpjiBEf&- z&CgSTqYI3QR6XnELHP;(g81H~1EA=LVW$T^(JzPqG$F<HB zVZ<@wi5iUH5KC1_NLp~Vb7J;S;1ku_B3>eUai#Gax#UZcW4YMHbo{r0;Tie0m_#+A$N^xuB{dYg$Hb zhU+))-4kPN7pO{@H9OHjGL>Z?Uok&hd879U4Y|e0fL4q0S>K2iwOzae!A?g4H`2nV z*m242h$a>At0 zDOCEcEUIe7C5zeoS-qc!bp9e~?qeWS_~iQo=N?3qvUcieLGD>#weX@&)8l;wtrVar zZftSol0euxr)kY2vJ6iT4SV9V$EmU~3X-?|t*6MrHJEIEyLpWq=ap{o2V7~I{fCnp zb(3R&XC#_8&mJ`XIhD+nYeu@+{oKpam!YJ1kFDhXZ`h(Tl2EQVu=T+uL7Y zZTOm))Rwqj?$f^5I^=v!#OiB*K!HK|Nk81Q&}r^H+2&j*sVatZha>ORV_Zz4tE54~Tqa#EWSd8ntd?R9KXy`eg;ObE6ow{Xi$moYnTswzK`%4<9~*D8sacN@jS@4I3KCf0xnpBJL@6S6brg z`V7lU-Y_iccK%R-@NH%W!x{JTTr8}y*B6=~=M_PEM1Ya%yJ!X&pQ3Sd1+l@~q#UkB z69VUkc@`~GnX{!N)E%1*nLAY$J9F}7`~0z6p9Am`^z?K7x#QHrWYZbKO(onW8G7gV z*yjNM<(x!ueqo_pd^m0vM0Y%cV*2yuKN zeh*ms`6hXfo6w3sYfT0@Duoavn0`M*SeDBrxNy9iMe(_Ta_}To0T6w7+e{G8G08Tnrm27ASc$5SWFM zRdos6yFkMF6BYe)8-GJ=P zE~EOO8_&Q8GxXPErMNaBhu0vVmnMe$wx;)7Il4PYQ;!ya<0%q6jy>82M=4&{jpqF< z9rqx^_o^4Yi~@vMCq56L1Y|UZh&Up$?NChJ$_?u_=9zdPlpfAH^eK5XvLb8@WzVft zN4BJfv{YrOO(85n4Xvd!x>dPTSfnZsg?2?!$E$kCM2@|+ue!;I8m2L3gnUGDB;AnG zg$5!aZ$o#*MhpP5g$CcB{0oMNAzz? z9#bY8@<8iE?kyFd#BiPp8ypt$UrL5;08=%ZxN!{QVAKm}ZcW^A;y}5x-Jf(LZ9HBp zHu_#&dFE*|rgje1Kxpp{p6thFyXT<+`FGS~9N$Nt@G7owxgW$ELRMh#7feUm^DyuS z-fbL?Ec%$E+X#Yo8pHAWVmq%x(~uw(rvPNaXtWHdOiI;DbSz9+p62s{65lMKk=MCJKHCZ_MAx9g`3%dT3tbeKwu}{D!Mp;Tv`}M1Ow&|T1T&A)d z`Ze@%6OK;dh`UNQ4)D17kxpjZlnQaBCSzYty#f8cqqloTBY6uP=)n{_sUmO$S&k&v zfga3cojSYkcc3cKqPm|(u6n;o_U`<&QE$;CO1|3Y0eL7o4-wizJrX8LrTmQgjo?lN z(=M3iMRrDI?`XaNpb|U8KIYW%H3eE|U{m#p@1PPPN9?Fek{W*^Rf&g;XaOSm^ym6+ zP^>viM{G!IG(Oj>6vXIfPSK*vs}|>rfGCI8D!Dgx@3ir|9SzM5)Furf%Re8Cr3a$P zBCt36FG7x=Ofx|`qN+I+>`fm;Hh`wr_dM86GPE=KT5n69zW06I2_M^2wZf-rDtv~N z$8@xp_oxMRArtqB&go1=ydrs;aD6Fom!@=hKBNrbq^rI_&sppu844ZyRL z=@_BTS5z!Hp3=pSgqn01IsSmMY8u79U8%1plv9UMC4%!c45>72$&~SqvL!|Ph@MY<2 zhU22H3rql7jDP};#Li*c$re!<{?Q*Efxa?8lS8hoR_W^&hkj9FB~T zH8hoop!M%c^8ACi?9JVb&Hms3|KIpOPA*O!p8v)F3Gn~T|NVn!`=^=yhyNSZP;i{) z#D_d+Hy4ZU)ao*8CQ1;2G-W%DS)38_2Avs(RK$Zt0V@GxbPLav%Cj@{nX(-r-d->v zxun5pPK}3A?e73}XRJoX6QA+VmjAB+I?hocMw^%gXS|Wr{KMxEh_NWW+|qWbeg2)~h(y znhLUP98pCAsbn>G@t@(LI4rLYVV7Yy%$bMC{A}N(JZ==ofjFq^Ufz%VieGZ{)HQfn zQD>c*j`{nCr6P*Na#Ht=d6|PU>*ERmP-a{b)U7o^c;E|C;8l~DV^@{LQm}K9bw-S{ z_{9b-+X3!H-h<3^qNj0Y1R*hR?)!h}8LM3G;qn~#6%y#vU4JGq$>ff-PV#@7lI}S! zv2YppHQnWy7L-~LI!yYmB_oQ2q82yHQ?Edk0ZDh`XscI^0oImK?&+v)7qhECMxCR+ zq6zU?R>R_llr`?!O^FDlRU3{F;`qzc^i8GA?YpF9JlfFUdAjt6AR&!(Lq9k15uqIJgg`CWX@Ruzl-XRUdDKDMw^WC XPph!M9sP~K-w6DTz~2b`J0kEO)ZyLy literal 0 HcmV?d00001 diff --git a/test/unit/ghcrawler/queueSetTests.js b/test/unit/ghcrawler/queueSetTests.js index 43cc4935..aa8c91a7 100644 --- a/test/unit/ghcrawler/queueSetTests.js +++ b/test/unit/ghcrawler/queueSetTests.js @@ -82,87 +82,17 @@ describe('QueueSet pushing', () => { expect(() => queues.push(request, 'foo')).to.throw(Error) }) - - it('should repush into the same queue', async () => { - const priority = createBaseQueue('priority', { - pop: async () => new Request('test', 'http://test'), - push: () => null - }) - const queues = createBaseQueues([priority]) - sinon.spy(priority, 'push') - - const request = await queues.pop() - await queues.repush(request, request) - expect(request._originQueue === priority).to.be.true - expect(priority.push.callCount).to.be.equal(1) - expect(priority.push.getCall(0).args[0].type).to.be.equal('test') - }) }) describe('QueueSet originQueue management', () => { - it('should call done and mark acked on done', async () => { - const priority = createBaseQueue('priority', { - pop: async () => new Request('test', 'http://test'), - done: async () => null - }) - const queues = createBaseQueues([priority]) - sinon.spy(priority, 'done') - - const request = await queues.pop() - await queues.done(request) - expect(request.acked).to.be.true - expect(priority.done.callCount).to.be.equal(1) - expect(priority.done.getCall(0).args[0].type).to.be.equal('test') - }) - - it('should call done and mark acked on abandon', async () => { - const priority = createBaseQueue('priority', { - pop: async () => new Request('test', 'http://test'), - abandon: async () => null - }) - const queues = createBaseQueues([priority]) - sinon.spy(priority, 'abandon') - - const request = await queues.pop() - await queues.abandon(request) - expect(request.acked).to.be.true - expect(priority.abandon.callCount).to.be.equal(1) - expect(priority.abandon.getCall(0).args[0].type).to.be.equal('test') - }) - - it('should not abandon twice', async () => { - const priority = createBaseQueue('priority', { - pop: async () => new Request('test', 'http://test'), - abandon: async () => null - }) - const queues = createBaseQueues([priority]) - sinon.spy(priority, 'abandon') - - const request = await queues.pop() - await queues.abandon(request) - await queues.abandon(request) - expect(request.acked).to.be.true - expect(priority.abandon.callCount).to.be.equal(1) - expect(priority.abandon.getCall(0).args[0].type).to.be.equal('test') - }) - - it('should not done after abandon ', async () => { + it('should set originQueue on pop', async () => { const priority = createBaseQueue('priority', { pop: async () => new Request('test', 'http://test'), - abandon: async () => null, - done: async () => null }) const queues = createBaseQueues([priority]) - sinon.spy(priority, 'abandon') - sinon.spy(priority, 'done') const request = await queues.pop() - await queues.abandon(request) - await queues.done(request) - expect(request.acked).to.be.true - expect(priority.done.callCount).to.be.equal(0) - expect(priority.abandon.callCount).to.be.equal(1) - expect(priority.abandon.getCall(0).args[0].type).to.be.equal('test') + expect(request._originQueue).to.be.equal(priority) }) }) diff --git a/test/unit/ghcrawler/requestTests.js b/test/unit/ghcrawler/requestTests.js index 122c7d35..0b8290f6 100644 --- a/test/unit/ghcrawler/requestTests.js +++ b/test/unit/ghcrawler/requestTests.js @@ -6,7 +6,7 @@ const expect = require('chai').expect const Request = require('../../../ghcrawler/lib/request.js') describe('Request context/qualifier', () => { - it('will not queueRoot if none transitivity', () => {}) + it('will not queueRoot if none transitivity', () => { }) }) describe('Request link management', () => { @@ -103,3 +103,48 @@ describe('Request marking', () => { expect(request.message).to.be.undefined }) }) + +describe('clean up management', () => { + let request + beforeEach(() => { + request = new Request('test', 'http://test') + }) + it('will track single cleanup', () => { + request.trackCleanup('foo') + expect(request.getTrackedCleanups().length).to.be.equal(1) + expect(request.cleanups[0]).to.be.equal('foo') + }) + + it('will remove single cleanup', () => { + request.trackCleanup('foo') + request.removeCleanup('foo') + expect(request.getTrackedCleanups().length).to.be.equal(0) + }) + + it('will track multiple cleanups', () => { + request.trackCleanup(['foo', 'bar']) + expect(request.getTrackedCleanups().length).to.be.equal(2) + expect(request.getTrackedCleanups()).to.be.deep.equal(['foo', 'bar']) + + request.trackCleanup(['x', 'y']) + expect(request.getTrackedCleanups().length).to.be.equal(4) + expect(request.getTrackedCleanups()).to.be.deep.equal(['foo', 'bar', 'x', 'y']) + }) + + it('will remove multiple cleanups', () => { + request.trackCleanup(['foo', 'bar', 'x']) + expect(request.getTrackedCleanups().length).to.be.equal(3) + + request.removeCleanup('bar') + expect(request.getTrackedCleanups().length).to.be.equal(2) + expect(request.getTrackedCleanups()).to.be.deep.equal(['foo', 'x']) + + request.trackCleanup('y') + expect(request.getTrackedCleanups().length).to.be.equal(3) + expect(request.getTrackedCleanups()).to.be.deep.equal(['foo', 'x', 'y']) + + request.removeCleanup(['bar', 'y', 'x']) + expect(request.getTrackedCleanups().length).to.be.equal(1) + expect(request.getTrackedCleanups()).to.be.deep.equal(['foo']) + }) +}) diff --git a/test/unit/ghcrawler/scopedQueueSetsTests.js b/test/unit/ghcrawler/scopedQueueSetsTests.js new file mode 100644 index 00000000..4146a691 --- /dev/null +++ b/test/unit/ghcrawler/scopedQueueSetsTests.js @@ -0,0 +1,434 @@ +// (c) Copyright 2022, SAP SE and ClearlyDefined contributors. Licensed under the MIT license. +// SPDX-License-Identifier: MIT + +const { expect, should } = require('chai') +const sinon = require('sinon') +const Request = require('../../../ghcrawler/lib/request.js') +const ScopedQueueSets = require('../../../ghcrawler/providers/queuing/scopedQueueSets.js') +const AttenuatedQueue = require('../../../ghcrawler/providers/queuing/attenuatedQueue') +const InMemoryCrawlQueue = require('../../../ghcrawler/providers/queuing/inmemorycrawlqueue') +const QueueSet = require('../../../ghcrawler/providers/queuing/queueSet.js') + +describe('scopedQueueSets', () => { + + describe('subscription management', () => { + let scopedQueues + let globalQueues + let localQueues + + function createQueues() { + return { + subscribe: sinon.stub(), + unsubscribe: sinon.stub() + } + } + + beforeEach(() => { + globalQueues = createQueues() + localQueues = createQueues() + scopedQueues = new ScopedQueueSets(globalQueues, localQueues) + }) + + it('should subscribe all', async () => { + await scopedQueues.subscribe() + expect(globalQueues.subscribe.calledOnce) + expect(localQueues.subscribe.calledOnce) + }) + + it('should unsubscribe all', async () => { + await scopedQueues.unsubscribe() + expect(globalQueues.unsubscribe.calledOnce) + expect(localQueues.unsubscribe.calledOnce) + }) + }) + + describe('originQueue management', () => { + let scopedQueues + + beforeEach(() => { + scopedQueues = new ScopedQueueSets({}, {}) + }) + + it('should call done and mark acked on done', async () => { + const queue = { done: sinon.stub() } + const request = poppedRequest(queue) + + await scopedQueues.done(request) + + expect(request.acked).to.be.true + expect(queue.done.callCount).to.be.equal(1) + expect(queue.done.getCall(0).args[0].type).to.be.equal('test') + }) + + it('should call done and mark acked on abandon', async () => { + const queue = { abandon: sinon.stub() } + const request = poppedRequest(queue) + + await scopedQueues.abandon(request) + + expect(request.acked).to.be.true + expect(queue.abandon.callCount).to.be.equal(1) + expect(queue.abandon.getCall(0).args[0].type).to.be.equal('test') + }) + + it('should not abandon twice', async () => { + const queue = { abandon: sinon.stub() } + const request = poppedRequest(queue) + + await scopedQueues.abandon(request) + await scopedQueues.abandon(request) + + expect(request.acked).to.be.true + expect(queue.abandon.callCount).to.be.equal(1) + expect(queue.abandon.getCall(0).args[0].type).to.be.equal('test') + }) + + it('should not done after abandon ', async () => { + const queue = { abandon: sinon.stub(), done: sinon.stub() } + const request = poppedRequest(queue) + + await scopedQueues.abandon(request) + await scopedQueues.done(request) + + expect(request.acked).to.be.true + expect(queue.done.callCount).to.be.equal(0) + expect(queue.abandon.callCount).to.be.equal(1) + expect(queue.abandon.getCall(0).args[0].type).to.be.equal('test') + }) + }) + + describe('getQueue', () => { + let scopedQueues + let globalQueues + let localQueues + + beforeEach(() => { + globalQueues = { getQueue: sinon.stub() } + localQueues = { getQueue: sinon.stub() } + scopedQueues = new ScopedQueueSets(globalQueues, localQueues) + }) + + it('get global queue', async () => { + scopedQueues.getQueue('test', 'global') + expect(globalQueues.getQueue.calledOnce) + expect(globalQueues.getQueue.getCall(0).args[0]).to.be.equal('test') + expect(localQueues.getQueue.callCount).to.be.equal(0) + }) + + it('get local queue', async () => { + scopedQueues.getQueue('test', 'local') + expect(localQueues.getQueue.calledOnce) + expect(localQueues.getQueue.getCall(0).args[0]).to.be.equal('test') + expect(globalQueues.getQueue.callCount).to.be.equal(0) + }) + }) + + describe('pop', () => { + let scopedQueues + let globalQueues + let localQueues + + function mockPopReturn(fromQueue) { + const queue = { + getName: sinon.stub().returns(fromQueue) + } + return poppedRequest(queue) + } + + beforeEach(() => { + globalQueues = { pop: sinon.stub().resolves(mockPopReturn('global')) } + localQueues = { pop: sinon.stub() } + scopedQueues = new ScopedQueueSets(globalQueues, localQueues) + }) + + it('pop local, set retry queue', async () => { + localQueues.pop.resolves(mockPopReturn('local')) + + const poped = await scopedQueues.pop() + + expect(poped._retryQueue).to.be.equal('local') + expect(localQueues.pop.calledOnce) + expect(globalQueues.pop.callCount).to.be.equal(0) + }) + + it('pop global', async () => { + localQueues.pop.resolves(undefined) + + const poped = await scopedQueues.pop() + + expect(poped._originQueue.getName()).to.be.equal('global') + expect(poped._retryQueue).to.be.undefined + expect(localQueues.pop.calledOnce) + expect(globalQueues.pop.calledOnce) + }) + }) + + describe('repush', () => { + let scopedQueues + let globalQueues, globalQueue, localQueues, localQueue + + beforeEach(() => { + globalQueue = mockQueue('normal') + globalQueues = { + getQueue: sinon.stub().returns(globalQueue), + pop: sinon.stub().resolves(poppedRequest(globalQueue)) + } + localQueue = mockQueue('normal') + localQueues = { + pop: sinon.stub() + } + scopedQueues = new ScopedQueueSets(globalQueues, localQueues) + }) + + it('should repush local request to global scope', async () => { + localQueues.pop.resolves(poppedRequest(localQueue)) + + const request = await scopedQueues.pop() + await scopedQueues.repush(request, request) + + expect(localQueue.done.calledOnce).to.be.true + expect(globalQueue.push.calledOnce).to.be.true + expect(globalQueue.push.getCall(0).args[0].type).to.be.equal('test') + }) + + it('should repush global request into the same queue', async () => { + localQueues.pop.resolves() + + const request = await scopedQueues.pop() + await scopedQueues.repush(request, request) + + expect(localQueue.done.callCount).to.be.equal(0) + expect(request._originQueue === globalQueue).to.be.true + expect(globalQueue.push.calledOnce).to.be.true + expect(globalQueue.push.getCall(0).args[0].type).to.be.equal('test') + }) + }) + + describe('publish', () => { + let scopedQueues + let globalQueues, globalQueue, localQueues, localQueue + + beforeEach(() => { + globalQueue = mockQueue('normal') + globalQueues = { + getQueue: () => globalQueue + } + localQueue = mockQueue('normal') + localQueue.pop.resolves(poppedRequest(localQueue)) + localQueues = { + queues: [localQueue] + } + + scopedQueues = new ScopedQueueSets(globalQueues, localQueues) + }) + + it('skip update empty local queues', async () => { + localQueue.getInfo = sinon.stub().resolves({ count: 0 }) + + await scopedQueues.publish() + + expect(localQueue.pop.callCount).to.be.equal(0) + expect(globalQueue.push.callCount).to.be.equal(0) + }) + + it('success', async () => { + localQueue.getInfo = sinon.stub().resolves({ count: 1 }) + + await scopedQueues.publish() + + expect(localQueue.pop.calledOnce).to.be.true + expect(localQueue.done.calledOnce).to.be.true + expect(globalQueue.push.calledOnce).to.be.true + expect(globalQueue.push.getCall(0).args[0].type).to.be.equal('test') + }) + + it('success with 2 items', async () => { + localQueue.getInfo = sinon.stub().resolves({ count: 2 }) + + await scopedQueues.publish() + + expect(localQueue.pop.callCount).to.be.equal(2) + expect(localQueue.done.callCount).to.be.equal(2) + expect(globalQueue.push.callCount).to.be.equal(2) + expect(globalQueue.push.getCall(0).args[0].type).to.be.equal('test') + expect(globalQueue.push.getCall(1).args[0].type).to.be.equal('test') + }) + + it('partial success', async () => { + const failQueue = mockQueue('failing') + failQueue.getInfo = sinon.stub().rejects('failed') + localQueue.getInfo = sinon.stub().resolves({ count: 1 }) + localQueues.queues = [failQueue, localQueue] + + try { + await scopedQueues.publish() + should.fail() + } catch (error) { + expect(error.message).to.be.equal('failed') + } + + //The remaining queue is still processed. + expect(localQueue.done.calledOnce).to.be.true + expect(globalQueue.push.calledOnce).to.be.true + expect(globalQueue.push.getCall(0).args[0].type).to.be.equal('test') + }) + }) +}) + +describe('integration test with AttenuatedQueue and InMemoryCrawlQueue', () => { + const queueName = 'queue' + let scopedQueues + + beforeEach(() => { + scopedQueues = createScopedQueueSets(queueName) + }) + + afterEach(async () => { + await cleanup(scopedQueues, queueName) + }) + + it('add to global by default and pop', async () => { + const mockRequest = new Request('test', 'http://test') + await scopedQueues.push(mockRequest, queueName) + let queueInfo = await getQueueInfos(scopedQueues, queueName) + expect(queueInfo.global.count).to.be.equal(1) + expect(queueInfo.local.count).to.be.equal(0) + + const popped = await scopedQueues.pop() + expect(popped.type).to.be.equal('test') + queueInfo = await getQueueInfos(scopedQueues, queueName) + expect(queueInfo.global.count).to.be.equal(0) + expect(queueInfo.local.count).to.be.equal(0) + + //ensure request is removed from the cache in the AttenuatedQueue + await scopedQueues.done(popped) + }) + + it('add to local and pop', async () => { + const mockRequest = new Request('test', 'http://test') + await scopedQueues.push(mockRequest, queueName, 'local') + let queueInfo = await getQueueInfos(scopedQueues, queueName) + expect(queueInfo.global.count).to.be.equal(0) + expect(queueInfo.local.count).to.be.equal(1) + + const popped = await scopedQueues.pop() + expect(popped.type).to.be.equal('test') + queueInfo = await getQueueInfos(scopedQueues, queueName) + expect(queueInfo.global.count).to.be.equal(0) + expect(queueInfo.local.count).to.be.equal(0) + + //ensure request is removed from the cache in the AttenuatedQueue + await scopedQueues.done(popped) + }) + + it('add to global, local and pop', async () => { + const mockRequestGlobal = new Request('testGlobal', 'http://test') + const mockRequestLocal = new Request('testLocal', 'http://test') + await scopedQueues.push(mockRequestGlobal, queueName) + await scopedQueues.push(mockRequestLocal, queueName, 'local') + let queueInfo = await getQueueInfos(scopedQueues, queueName) + expect(queueInfo.global.count).to.be.equal(1) + expect(queueInfo.local.count).to.be.equal(1) + + const popped = await scopedQueues.pop() + expect(popped.type).to.be.equal('testLocal') + queueInfo = await getQueueInfos(scopedQueues, queueName) + expect(queueInfo.global.count).to.be.equal(1) + expect(queueInfo.local.count).to.be.equal(0) + + //ensure request is removed from the cache in the AttenuatedQueue + await scopedQueues.done(popped) + }) + + it('local repushed to global', async () => { + const mockRequest = new Request('test', 'http://test') + await scopedQueues.push(mockRequest, queueName, 'local') + let queueInfo = await getQueueInfos(scopedQueues, queueName) + expect(queueInfo.global.count).to.be.equal(0) + expect(queueInfo.local.count).to.be.equal(1) + + const popped = await scopedQueues.pop() + await scopedQueues.repush(popped, popped.createRequeuable()) + queueInfo = await getQueueInfos(scopedQueues, queueName) + expect(queueInfo.global.count).to.be.equal(1) + expect(queueInfo.local.count).to.be.equal(0) + }) + + it('publish local to global', async () => { + const mockRequest = new Request('test', 'http://test') + await scopedQueues.push(mockRequest, queueName, 'local') + let queueInfo = await getQueueInfos(scopedQueues, queueName) + expect(queueInfo.global.count).to.be.equal(0) + expect(queueInfo.local.count).to.be.equal(1) + + await scopedQueues.publish() + queueInfo = await getQueueInfos(scopedQueues, queueName) + expect(queueInfo.global.count).to.be.equal(1) + expect(queueInfo.local.count).to.be.equal(0) + }) + + it('publish two local requests to global', async () => { + const globalRequest = new Request('testGlobal', 'http://test') + const localRequest1 = new Request('testLocal-1', 'http://test') + const localRequest2 = new Request('testLocal-2', 'http://test') + await scopedQueues.push(globalRequest, queueName) + await scopedQueues.push(localRequest1, queueName, 'local') + await scopedQueues.push(localRequest2, queueName, 'local') + let queueInfo = await getQueueInfos(scopedQueues, queueName) + expect(queueInfo.global.count).to.be.equal(1) + expect(queueInfo.local.count).to.be.equal(2) + + await scopedQueues.publish() + queueInfo = await getQueueInfos(scopedQueues, queueName) + expect(queueInfo.global.count).to.be.equal(3) + expect(queueInfo.local.count).to.be.equal(0) + }) +}) + +function createScopedQueueSets(queueName) { + const options = { + _config: { + on: sinon.stub() + }, + logger: { + verbose: sinon.stub() + } + } + const global = new AttenuatedQueue(new InMemoryCrawlQueue(queueName, options), options) + const local = new AttenuatedQueue(new InMemoryCrawlQueue(queueName, options), options) + return new ScopedQueueSets( + new QueueSet([global], options), + new QueueSet([local], options)) +} + +async function getQueueInfos(scopedQueues, queueName) { + let globalInfo = await scopedQueues.getQueue(queueName).getInfo() + let localQueueInfo = await scopedQueues.getQueue(queueName, 'local').getInfo() + return { global: globalInfo, local: localQueueInfo } +} + +async function cleanup(scopedQueues, queueName) { + //remove request from the cache inside the AttenuatedQueue + let queueInfo = await getQueueInfos(scopedQueues, queueName) + let count = queueInfo.global.count + queueInfo.local.count + while (count) { + const popped = await scopedQueues.pop() + await scopedQueues.done(popped) + count-- + } +} + +function poppedRequest(fromQueue) { + const request = new Request('test', 'http://test') + request._originQueue = fromQueue + return request +} + +function mockQueue(fromQueue) { + return { + getName: sinon.stub().returns(fromQueue), + push: sinon.stub().resolves(), + done: sinon.stub().resolves(), + pop: sinon.stub() + } +} \ No newline at end of file diff --git a/test/unit/lib/fetchResultTests.js b/test/unit/lib/fetchResultTests.js new file mode 100644 index 00000000..cdb6a503 --- /dev/null +++ b/test/unit/lib/fetchResultTests.js @@ -0,0 +1,117 @@ +// (c) Copyright 2022, SAP SE and ClearlyDefined contributors. Licensed under the MIT license. +// SPDX-License-Identifier: MIT + +const expect = require('chai').expect +const sinon = require('sinon') +const Request = require('../../../ghcrawler/lib/request.js') +const FetchResult = require('../../../lib/fetchResult') + +describe('fetchResult', () => { + let fetchResult + const errorHandler = sinon.stub() + + beforeEach(() => { + fetchResult = new FetchResult() + }) + + describe('clean up management', () => { + it('clean up callback success', () => { + const cleanup = sinon.stub() + fetchResult.trackCleanup(cleanup) + + fetchResult.cleanup(errorHandler) + expect(cleanup.calledOnce).to.be.true + expect(errorHandler.called).to.be.false + }) + + it('clean up success with callback array', () => { + const cleanup1 = sinon.stub() + const cleanup2 = sinon.stub() + fetchResult.trackCleanup([cleanup1, cleanup2]) + + fetchResult.cleanup(errorHandler) + expect(cleanup1.calledOnce).to.be.true + expect(cleanup2.calledOnce).to.be.true + expect(errorHandler.called).to.be.false + }) + + it('failure during cleanup should be logged', () => { + const cleanup = sinon.stub().throws({ message: 'error message' }) + fetchResult.trackCleanup([cleanup, cleanup]) + fetchResult.cleanup(errorHandler) + expect(cleanup.calledTwice).to.be.true + expect(errorHandler.calledTwice).to.be.true + expect(errorHandler.calledWith({ message: 'error message' })).to.be.true + }) + + it('adopt a cleanup', () => { + const dir = { removeCallback: sinon.stub() } + const request = new Request('test', 'http://test').trackCleanup(dir.removeCallback) + expect(request.getTrackedCleanups().length).to.be.equal(1) + + fetchResult.adoptCleanup(dir, request) + expect(request.getTrackedCleanups().length).to.be.equal(0) + + fetchResult.cleanup() + expect(dir.removeCallback.calledOnce).to.be.true + }) + + it('adopt multiple cleanups', () => { + const dir1 = { removeCallback: sinon.stub() } + const dir2 = { removeCallback: sinon.stub() } + const request = new Request('test', 'http://test').trackCleanup([ + dir1.removeCallback, + dir2.removeCallback, + { removeCallback: sinon.stub() }]) + expect(request.getTrackedCleanups().length).to.be.equal(3) + + fetchResult.adoptCleanup([dir1, dir2], request) + expect(request.getTrackedCleanups().length).to.be.equal(1) + + fetchResult.cleanup() + expect(dir1.removeCallback.calledOnce).to.be.true + expect(dir2.removeCallback.calledOnce).to.be.true + }) + }) + + describe('copyTo', () => { + it('verify copyTo', () => { + const result = {} + fetchResult.copyTo(result) + expect(result).to.be.deep.equal({ contentOrigin: 'origin' }) + }) + + it('verify deepCopy', () => { + fetchResult.document = { test: true } + const result1 = {} + fetchResult.copyTo(result1) + expect(result1.document).to.be.deep.equal({ test: true }) + result1.document.result1Flag = true + expect(result1.document).to.be.deep.equal({ test: true, result1Flag: true }) + + const result2 = {} + fetchResult.copyTo(result2) + expect(result2.document).to.be.deep.equal({ test: true }) + }) + + it('verify copyTo with url', () => { + const result = {} + fetchResult = new FetchResult('http://localhost') + fetchResult.copyTo(result) + expect(result).to.be.deep.equal({ contentOrigin: 'origin', url: 'http://localhost' }) + }) + + it('avoid copy for empty meta', () => { + const result = { addMeta: sinon.stub() } + fetchResult.copyTo(result) + expect(result.addMeta.called).to.be.false + }) + + it('verify addMeta', () => { + fetchResult.addMeta({ gitSize: 532 }) + const result = { addMeta: sinon.stub() } + fetchResult.copyTo(result) + expect(result.addMeta.calledWith({ gitSize: 532 })).to.be.true + }) + }) +}) \ No newline at end of file diff --git a/test/unit/providers/fetch/cratesioFetchTests.js b/test/unit/providers/fetch/cratesioFetchTests.js index 7a8fbc1b..7bea8ea1 100644 --- a/test/unit/providers/fetch/cratesioFetchTests.js +++ b/test/unit/providers/fetch/cratesioFetchTests.js @@ -23,7 +23,7 @@ function pickFile(url) { return 'bitflags.json' } -describe('', () => { +describe('crateFetch workflow', () => { beforeEach(() => { const requestPromiseStub = options => { if (options && options.url) { @@ -50,6 +50,7 @@ describe('', () => { it('succeeds in download, decompress and hash', async () => { const handler = setup() const request = await handler.handle(new Request('test', 'cd:/crate/cratesio/-/bitflags/1.0.4')) + request.fetchResult.copyTo(request) expect(request.document.hashes.sha1).to.be.equal(hashes['bitflags-1.0.4.crate']['sha1']) expect(request.document.hashes.sha256).to.be.equal(hashes['bitflags-1.0.4.crate']['sha256']) expect(request.document.releaseDate).to.equal('2018-08-21T19:55:12.284583+00:00') @@ -64,11 +65,13 @@ describe('', () => { version: { num: '1.0.4', dl_path: 'error' } } } + const request = new Request('test', 'cd:/crate/cratesio/-/bitflags/1.0.4') try { - await handler.handle(new Request('test', 'cd:/crate/cratesio/-/bitflags/1.0.4')) + await handler.handle(request) expect(false).to.be.true } catch (error) { expect(error.message).to.be.equal('yikes') + expect(request.getTrackedCleanups().length).to.be.greaterThan(0) } }) @@ -79,11 +82,13 @@ describe('', () => { version: { num: '1.0.4', dl_path: 'missing' } } } + const request = new Request('test', 'cd:/crate/cratesio/-/bitflags/1.0.4') try { - await handler.handle(new Request('test', 'cd:/crate/cratesio/-/bitflags/1.0.4')) + await handler.handle(request) expect(false).to.be.true } catch (error) { expect(error.statusCode).to.be.equal(404) + expect(request.getTrackedCleanups().length).to.be.greaterThan(0) } }) @@ -136,7 +141,8 @@ describe('crateFetch', () => { return { manifest: {}, version: { num: '0.5.0', crate: 'name' } } } }) - const request = await crateFetch.handle({ url: 'cd:/crate/cratesio/-/name/0.1.0' }) + const request = await crateFetch.handle(new Request('crate', 'cd:/crate/cratesio/-/name/0.1.0')) + request.fetchResult.copyTo(request) expect(request.url).to.eq('cd:/crate/cratesio/-/name/0.5.0') }) @@ -146,7 +152,8 @@ describe('crateFetch', () => { return { manifest: {}, version: { num: '0.1.0', crate: 'name' } } } }) - const request = await crateFetch.handle({ url: 'cd:/crate/cratesio/-/naME/0.1.0' }) + const request = await crateFetch.handle(new Request('crate', 'cd:/crate/cratesio/-/naME/0.1.0')) + request.fetchResult.copyTo(request) expect(request.casedSpec.name).to.eq('name') }) }) diff --git a/test/unit/providers/fetch/debianFetchTests.js b/test/unit/providers/fetch/debianFetchTests.js index 0bc661ab..bb0b4592 100644 --- a/test/unit/providers/fetch/debianFetchTests.js +++ b/test/unit/providers/fetch/debianFetchTests.js @@ -125,6 +125,7 @@ describe('Debian fetching', () => { return ['MIT', 'BSD-3-clause'] } const request = await handler.handle(new Request('test', 'cd:/deb/debian/-/0ad/0.0.17-1_armhf')) + request.fetchResult.copyTo(request) expect(request.document.hashes.sha1).to.be.equal(hashes['0ad_0.0.17-1_armhf.deb']['sha1']) expect(request.document.hashes.sha256).to.be.equal(hashes['0ad_0.0.17-1_armhf.deb']['sha256']) expect(request.document.releaseDate.getFullYear()).to.be.equal(2014) @@ -133,6 +134,21 @@ describe('Debian fetching', () => { ) expect(request.document.declaredLicenses).to.deep.equal(['MIT', 'BSD-3-clause']) }) + + it('failed to get declared license', async () => { + const handler = DebianFetch(debianFetchOptions) + handler._download = async (downloadUrl, destination) => { + fs.copyFileSync('test/fixtures/debian/0ad_0.0.17-1_armhf.deb', destination) + } + handler._getDeclaredLicenses = sinon.stub().rejects('failed') + const request = new Request('test', 'cd:/deb/debian/-/0ad/0.0.17-1_armhf') + try { + await handler.handle(request) + expect(false).to.be.true + } catch (error) { + expect(request.getTrackedCleanups().length).to.be.equal(2) + } + }) }) function spec(type, provider, name, revision) { diff --git a/test/unit/providers/fetch/dispatcherTests.js b/test/unit/providers/fetch/dispatcherTests.js index e0a3fec0..a45819b5 100644 --- a/test/unit/providers/fetch/dispatcherTests.js +++ b/test/unit/providers/fetch/dispatcherTests.js @@ -3,12 +3,23 @@ const chai = require('chai') const spies = require('chai-spies') +const sinon = require('sinon') +const fs = require('fs') +const PassThrough = require('stream').PassThrough +const proxyquire = require('proxyquire') + const Request = require('../../../../ghcrawler').request +const { promisify } = require('util') chai.use(spies) const expect = chai.expect const FetchDispatcher = require('../../../../providers/fetch/dispatcher') +const MavenFetch = require('../../../../providers/fetch/mavencentralFetch') +const GitCloner = require('../../../../providers/fetch/gitCloner') +const PypiFetch = require('../../../../providers/fetch/pypiFetch') +const RubyGemsFetch = require('../../../../providers/fetch/rubyGemsFetch') +const PackagistFetch = require('../../../../providers/fetch/packagistFetch') describe('fetchDispatcher', () => { it('should handle any request', () => { @@ -20,7 +31,7 @@ describe('fetchDispatcher', () => { const processorsStub = [{ canHandle: () => true, shouldFetch: () => false }] const fetchDispatcher = FetchDispatcher({}, {}, {}, processorsStub) const request = {} - chai.spy.on(request, 'markNoSave', () => {}) + chai.spy.on(request, 'markNoSave', () => { }) await fetchDispatcher.handle(request) expect(request.markNoSave).to.have.been.called.once }) @@ -35,3 +46,380 @@ describe('fetchDispatcher', () => { expect(result.shouldSkip()).to.be.true }) }) + +describe('fetchDispatcher cache fetch result', () => { + + let resultCache + let inProgressPromiseCache + + beforeEach(() => { + resultCache = {} + inProgressPromiseCache = {} + }) + + afterEach(() => { + Object.values(resultCache).forEach(fetched => fetched.cleanup()) + }) + + function setupDispatcher(fetcher) { + const storeStub = { get: () => null } + const processorsStub = [{ canHandle: () => true, shouldFetch: () => true, getUrnFor: () => 'documentkey' }] + const filterStub = { shouldFetchMissing: () => true, shouldFetch: () => true } + const options = { logger: { info: sinon.stub(), debug: sinon.stub() } } + return FetchDispatcher(options, storeStub, [fetcher], processorsStub, filterStub, mockResultCache(resultCache), inProgressPromiseCache) + } + + function mockResultCache(cache) { + return { + get: key => cache[key], + set: (key, value) => cache[key] = value, + } + } + + async function verifyFetchAndCache(fetchDispatcher, url) { + const fetched = await fetchDispatcher.handle(new Request('test', url)) + verifyFetchSuccess() + + fetchDispatcher._fetchPromise = sinon.stub().rejects('should not be called') + const resultFromCache = await fetchDispatcher.handle(new Request('test', url)) + verifyFetchResult(fetched, resultFromCache) + } + + function verifyFetchSuccess() { + expect(Object.keys(resultCache).length).to.be.equal(1) + expect(Object.keys(inProgressPromiseCache).length).to.be.equal(0) + } + + function verifyFetchFailure() { + expect(Object.keys(resultCache).length).to.be.equal(0) + expect(Object.keys(inProgressPromiseCache).length).to.be.equal(0) + } + + function verifyFetchResult(fetched, resultFromCache) { + // eslint-disable-next-line no-unused-vars + const { cleanups, ...expected } = fetched + expect(resultFromCache).to.be.deep.equal(expected) + } + + describe('cache maven fetch result', () => { + function setupMavenFetch() { + const fileSupplier = url => { + let fileName + if (url.includes('solrsearch')) fileName = 'swt-3.3.0-v3346.json' + if (url.endsWith('.pom')) fileName = 'swt-3.3.0-v3346.pom' + if (url.endsWith('-sources.jar')) fileName = 'swt-3.3.0-v3346.jar' + if (url.endsWith('.jar')) fileName = 'swt-3.3.0-v3346.jar' + return `/maven/${fileName}` + } + return MavenFetch({ + logger: { log: sinon.stub() }, + requestPromise: createRequestPromiseStub(fileSupplier), + requestStream: createGetStub(fileSupplier) + }) + } + + let fetchDispatcher + + beforeEach(() => { + fetchDispatcher = setupDispatcher(setupMavenFetch()) + }) + + it('cached result same as fetched', async () => { + await verifyFetchAndCache(fetchDispatcher, 'cd:/maven/mavencentral/org.eclipse/swt/3.3.0-v3344') + }) + + it('no cache for missing maven fetch', async () => { + const fetched = await fetchDispatcher.handle(new Request('test', 'cd:/maven/mavencentral/org.eclipse/swt')) + expect(fetched.processControl).to.be.equal('skip') + verifyFetchFailure() + }) + + it('no cache for failed maven fetch', async () => { + try { + await fetchDispatcher.handle(new Request('test', 'cd:/maven/mavencentral/org.eclipse/error/3.3.0-v3344')) + expect(false).to.be.true + } catch (error) { + expect(error.message).to.be.equal('yikes') + verifyFetchFailure() + } + }) + }) + + describe('cache GitCloner fetch result', () => { + let fetchDispatcher + + beforeEach(() => { + const gitCloner = GitCloner({ logger: { log: sinon.stub() } }) + gitCloner._cloneRepo = sinon.stub().resolves(532) + gitCloner._getRevision = sinon.stub().resolves('deef80a18aa929943e5dab1dba7276c231c84519') + gitCloner._getDate = sinon.stub().resolves(new Date('2021-04-08T13:27:49.000Z')) + fetchDispatcher = setupDispatcher(gitCloner) + }) + + it('cached result same as fetched', async () => { + await verifyFetchAndCache(fetchDispatcher, 'cd:git/github/palantir/refreshable/2.0.0') + }) + }) + + describe('cache PypiFetch result', () => { + let pypiFetch + + beforeEach(() => { + pypiFetch = PypiFetch({ logger: { log: sinon.stub() } }) + pypiFetch._getPackage = sinon.stub().callsFake(async (spec, registryData, destination) => { + await getPacakgeStub('test/fixtures/maven/swt-3.3.0-v3346.jar', destination) + return true + }) + }) + + it('cached result same as fetched', async () => { + pypiFetch._getRegistryData = sinon.stub().resolves(JSON.parse(fs.readFileSync('test/fixtures/pypi/registryData.json'))) + const fetchDispatcher = setupDispatcher(pypiFetch) + await verifyFetchAndCache(fetchDispatcher, 'cd:/pypi/pypi/-/backports.ssl-match-hostname/3.7.0.1') + }) + + it('no cache for missing package', async () => { + pypiFetch._getRegistryData = sinon.stub().resolves(null) + const fetchDispatcher = setupDispatcher(pypiFetch) + + const fetched = await fetchDispatcher.handle(new Request('licensee', 'cd:/pypi/pypi/-/test/revision')) + expect(fetched.processControl).to.be.equal('skip') + verifyFetchFailure() + }) + }) + + describe('cache NpmFetch result', () => { + + const npmRegistryRequestStub = () => { + const version = '0.3.0' + return { + manifest: { version }, + versions: { [version]: { test: true } }, + time: { [version]: '42' } + } + } + + let fetchDispatcher + + beforeEach(() => { + const NpmFetch = proxyquire('../../../../providers/fetch/npmjsFetch', { + 'request-promise-native': npmRegistryRequestStub + }) + const npmFetch = NpmFetch({ logger: { log: sinon.stub() } }) + npmFetch._getPackage = sinon.stub().callsFake(async (spec, destination) => + await getPacakgeStub('test/fixtures/npm/redie-0.3.0.tgz', destination)) + + fetchDispatcher = setupDispatcher(npmFetch) + }) + + it('cached result same as fetched', async () => { + await verifyFetchAndCache(fetchDispatcher, 'cd:/npm/npmjs/-/redie/0.3.0') + }) + }) + + describe('cache RubyGemsFetch result', () => { + let fetchDispatcher + + beforeEach(() => { + const rubyGemsFetch = RubyGemsFetch({ logger: { log: sinon.stub() } }) + rubyGemsFetch._getRegistryData = sinon.stub().resolves({ + name: 'small', + version: '0.5.1', + gem_uri: 'https://rubygems.org/gems/small-0.5.1.gem', + }) + rubyGemsFetch._getPackage = sinon.stub().callsFake(async (spec, destination) => + await getPacakgeStub('test/fixtures/ruby/small-0.5.1.gem', destination)) + + fetchDispatcher = setupDispatcher(rubyGemsFetch) + }) + + it('cached result same as fetched', async () => { + await verifyFetchAndCache(fetchDispatcher, 'cd:/gem/rubygems/-/small/0.5.1') + }) + }) + + describe('cache PackagistFetch result', () => { + let fetchDispatcher + + beforeEach(() => { + const packagistFetch = PackagistFetch({ logger: { log: sinon.stub() } }) + packagistFetch._getRegistryData = sinon.stub().resolves( + JSON.parse(fs.readFileSync('test/fixtures/packagist/registryData.json'))) + packagistFetch._getPackage = sinon.stub().callsFake(async (spec, registryData, destination) => + await getPacakgeStub('test/fixtures/composer/symfony-polyfill-mbstring-v1.11.0-0-gfe5e94c.zip', destination)) + + fetchDispatcher = setupDispatcher(packagistFetch) + }) + + it('cached result same as fetched', async () => { + await verifyFetchAndCache(fetchDispatcher, 'cd:/composer/packagist/symfony/polyfill-mbstring/1.11.0') + }) + }) + + describe('cache CrateioFetch result', () => { + const requestPromiseStub = options => { + const body = fs.readFileSync('test/fixtures/crates/bitflags.json') + if (options && options.json) return JSON.parse(body) + const response = new PassThrough() + response.write(fs.readFileSync('test/fixtures/crates/bitflags-1.0.4.crate')) + response.statusCode = 200 + response.end() + return response + } + + let fetchDispatcher + + beforeEach(() => { + const CrateioFetch = proxyquire('../../../../providers/fetch/cratesioFetch', { + 'request-promise-native': requestPromiseStub + }) + const packagistFetch = CrateioFetch({ logger: { log: sinon.stub() } }) + fetchDispatcher = setupDispatcher(packagistFetch) + }) + + it('cached result same as fetched', async () => { + await verifyFetchAndCache(fetchDispatcher, 'cd:/crate/cratesio/-/bitflags/1.0.4') + }) + }) + + describe('cache DebianFetch result', () => { + const memCacheStub = { get: () => true } + let fetchDispatcher + + beforeEach(() => { + const DebianFetch = proxyquire('../../../../providers/fetch/debianFetch', { + 'memory-cache': memCacheStub + }) + const fetch = DebianFetch({ logger: { info: sinon.stub() }, cdFileLocation: 'test/fixtures/debian/fragment' }) + fetch._download = async (downloadUrl, destination) => + getPacakgeStub('test/fixtures/debian/0ad_0.0.17-1_armhf.deb', destination) + fetch._getDeclaredLicenses = async () => { + return ['MIT', 'BSD-3-clause'] + } + fetchDispatcher = setupDispatcher(fetch) + }) + + it('cached result same as fetched', async () => { + await verifyFetchAndCache(fetchDispatcher, 'cd:/deb/debian/-/0ad/0.0.17-1_armhf') + }) + }) + + describe('cache GoFetch result', () => { + function fileSupplier(url) { + const fileName = url.endsWith('.info') ? 'v1.3.0.info' : 'v1.3.0.zip' + return `/go/${fileName}` + } + + let fetchDispatcher + + beforeEach(() => { + const GoFetch = proxyquire('../../../../providers/fetch/goFetch', { + request: { get: createGetStub(fileSupplier) }, + 'request-promise-native': createRequestPromiseStub(fileSupplier) + }) + const fetch = GoFetch({ logger: { info: sinon.stub() } }) + fetchDispatcher = setupDispatcher(fetch) + }) + + it('cached result same as fetched', async () => { + await verifyFetchAndCache(fetchDispatcher, 'cd:/go/golang/rsc.io/quote/v1.3.0') + }) + }) + + describe('cache NugetFetch result', () => { + const fileSupplier = (url) => { + let fileName = null + if (url.includes('catalog')) fileName = 'xunit.core.2.4.1.catalog.json' + if (url.endsWith('index.json')) fileName = 'xunit.core.index.json' + if (url.endsWith('.json')) fileName = 'xunit.core.2.4.1.json' + if (url.endsWith('.nuspec')) fileName = 'xunit.core.2.4.1.nuspec' + if (url.endsWith('.nupkg')) fileName = 'xunit.core.2.4.1.nupkg' + if (url.endsWith('license.txt')) fileName = 'license.txt' + return `nuget/${fileName}` + } + + const requestPromiseStub = (url, options) => { + const body = fs.readFileSync(`test/fixtures/${fileSupplier(url)}`) + if (options?.json) return { body: JSON.parse(body), statusCode: 200 } + const response = new PassThrough() + response.body = body + response.write(body) + response.statusCode = 200 + response.end() + return response + } + + let fetchDispatcher + + beforeEach(() => { + const NugetFetch = proxyquire('../../../../providers/fetch/nugetFetch', { + requestretry: { + defaults: () => { + return { get: requestPromiseStub } + } + } + }) + const fetch = NugetFetch({ logger: { info: sinon.stub() } }) + fetchDispatcher = setupDispatcher(fetch) + }) + it('cached result same as fetched', async () => { + await verifyFetchAndCache(fetchDispatcher, 'cd:/nuget/nuget/-/xunit.core/2.4.1') + }) + }) + + describe('cache PodFetch result', () => { + let fetchDispatcher + + beforeEach(() => { + const PodFetch = proxyquire('../../../../providers/fetch/podFetch', { + requestretry: { + defaults: () => { + return { get: sinon.stub().resolves({ body: loadJson('pod/versions.json'), statusCode: 200 }) } + } + }, + 'request-promise-native': sinon.stub().resolves(loadJson('pod/registryData.json')) + }) + const fetch = PodFetch({ logger: { info: sinon.stub() } }) + fetchDispatcher = setupDispatcher(fetch) + }) + + it('cached result same as fetched', async () => { + await verifyFetchAndCache(fetchDispatcher, 'cd:/pod/cocoapods/-/SwiftLCS/1.0') + }) + }) +}) + +const createRequestPromiseStub = fileSupplier => { + return options => { + if (options.url) { + if (options.url.includes('error')) throw new Error('yikes') + if (options.url.includes('code')) throw { statusCode: 500, message: 'Code' } + if (options.url.includes('missing')) throw { statusCode: 404 } + } + const content = fs.readFileSync(`test/fixtures/${fileSupplier(options.url)}`) + return options.json ? JSON.parse(content) : content + } +} + +const createGetStub = fileSupplier => { + return (url, callback) => { + const response = new PassThrough() + const file = `test/fixtures/${fileSupplier(url)}` + if (file) { + response.write(fs.readFileSync(file)) + callback(null, { statusCode: 200 }) + } else { + callback(new Error(url.includes('error') ? 'Error' : 'Code')) + } + response.end() + return response + } +} + +const getPacakgeStub = async (file, destination) => { + await promisify(fs.copyFile)(file, destination) +} + +const loadJson = fileName => { + return JSON.parse(fs.readFileSync(`test/fixtures/${fileName}`)) +} \ No newline at end of file diff --git a/test/unit/providers/fetch/gitClonerTests.js b/test/unit/providers/fetch/gitClonerTests.js index 5d712d51..0c963525 100644 --- a/test/unit/providers/fetch/gitClonerTests.js +++ b/test/unit/providers/fetch/gitClonerTests.js @@ -1,5 +1,7 @@ const expect = require('chai').expect +const sinon = require('sinon') const gitCloner = require('../../../../providers/fetch/gitCloner') +const Request = require('../../../../ghcrawler').request const gitlab_stub = 'https://gitlab.com/' const github_stub = 'https://github.com/' @@ -15,6 +17,39 @@ describe('building git urls', () => { }) }) +describe('fetch result', () => { + let gitClient + beforeEach(() => { + gitClient = gitCloner({ logger: { log: sinon.stub() } }) + gitClient._cloneRepo = sinon.stub().resolves(532) + gitClient._getRevision = sinon.stub().resolves('deef80a18aa929943e5dab1dba7276c231c84519') + gitClient._getDate = sinon.stub().resolves(new Date('2021-04-08T13:27:49.000Z')) + }) + + it('fetch success', async () => { + const request = await gitClient.handle(new Request('licensee', 'cd:git/github/palantir/refreshable/2.0.0')) + request.fetchResult.copyTo(request) + expect(request.url).to.be.equal('cd:/git/github/palantir/refreshable/deef80a18aa929943e5dab1dba7276c231c84519') + expect(request.meta.gitSize).to.be.equal(532) + expect(request.contentOrigin).to.be.equal('origin') + expect(request.casedSpec.toUrl()).to.be.equal('cd:/git/github/palantir/refreshable/deef80a18aa929943e5dab1dba7276c231c84519') + expect(request.document.size).to.be.equal(532) + expect(request.document.releaseDate.toISOString()).to.be.equal('2021-04-08T13:27:49.000Z') + expect(request.getTrackedCleanups().length).to.be.equal(0) + }) + + it('fetch failed', async () => { + gitClient._getDate = sinon.stub().rejects('failed') + const request = new Request('licensee', 'cd:git/github/palantir/refreshable/2.0.0') + try { + await gitClient.handle(request) + } catch (error) { + expect(request.fetchResult).to.be.undefined + expect(request.getTrackedCleanups().length).to.be.equal(1) + } + }) +}) + function spec(type, provider, namespace, name, revision) { return { type, provider: provider, namespace, name, revision } } diff --git a/test/unit/providers/fetch/goFetchTests.js b/test/unit/providers/fetch/goFetchTests.js index f59cdce0..da8a3e59 100644 --- a/test/unit/providers/fetch/goFetchTests.js +++ b/test/unit/providers/fetch/goFetchTests.js @@ -111,6 +111,7 @@ describe('Go Proxy fetching', () => { it('succeeds in download, decompress, hash, and get registry licenses', async () => { const handler = Fetch({ logger: { log: sinon.stub(), info: sinon.stub() }, http: successHttpStub }) const request = await handler.handle(new Request('test', 'cd:/go/golang/rsc.io/quote/v1.3.0')) + request.fetchResult.copyTo(request) expect(request.document.hashes.sha1).to.be.equal(hashes['v1.3.0.zip']['sha1']) expect(request.document.hashes.sha256).to.be.equal(hashes['v1.3.0.zip']['sha256']) expect(request.document.releaseDate).to.equal('2018-02-14T00:54:53Z') @@ -126,6 +127,7 @@ describe('Go Proxy fetching', () => { const handler = Fetch({ logger: { log: sinon.stub(), info: sinon.stub() }, http: successHttpStub }) const request = await handler.handle(new Request('test', 'cd:/go/golang/rsc.io/quote')) + request.fetchResult.copyTo(request) expect(request.casedSpec.revision).to.equal('v1.5.3-pre1') }) @@ -138,6 +140,7 @@ describe('Go Proxy fetching', () => { expect(request.processControl).to.equal('skip') expect(request.document).to.be.undefined expect(request.casedSpec).to.be.undefined + expect(request.fetchResult).to.be.undefined }) it('marks the request for skipping when no revision is found', async () => { @@ -149,6 +152,7 @@ describe('Go Proxy fetching', () => { expect(request.processControl).to.equal('skip') expect(request.document).to.be.undefined expect(request.casedSpec).to.be.undefined + expect(request.fetchResult).to.be.undefined }) it('marks the request for skipping when no artifact is found', async () => { @@ -161,6 +165,7 @@ describe('Go Proxy fetching', () => { expect(request.outcome).to.eq('Missing ') expect(request.document).to.be.undefined expect(request.casedSpec).to.be.undefined + expect(request.fetchResult).to.be.undefined }) it('marks the request for requeuing when pkg.go.dev return 429', async () => { @@ -216,7 +221,7 @@ describe('Go Proxy fetching', () => { } }) const request = await handler.handle(new Request('test', 'cd:/go/golang/rsc.io/quote/v1.3.0')) - expect(request.document.registryData?.licenses).to.be.undefined + expect(request.fetchResult.document.registryData?.licenses).to.be.undefined }) it('should not pass invalid license if html changed', async () => { @@ -242,7 +247,7 @@ describe('Go Proxy fetching', () => { } }) const request = await handler.handle(new Request('test', 'cd:/go/golang/rsc.io/quote/v1.3.0')) - expect(request.document.registryData?.licenses).to.be.undefined + expect(request.fetchResult.document.registryData?.licenses).to.be.undefined expect(info.called) }) }) diff --git a/test/unit/providers/fetch/gradlePluginFetchTests.js b/test/unit/providers/fetch/gradlePluginFetchTests.js index 1916f6d9..11fd0983 100644 --- a/test/unit/providers/fetch/gradlePluginFetchTests.js +++ b/test/unit/providers/fetch/gradlePluginFetchTests.js @@ -66,13 +66,13 @@ describe('Gradle plugin fetch', () => { return fs.readFileSync(`test/fixtures/maven/${pickArtifact(url)}`) } - function verifySuccess(request) { - expect(request.document.hashes.sha1).to.be.equal(hashes['swt-3.3.0-v3346.jar']['sha1']) - expect(request.document.hashes.sha256).to.be.equal(hashes['swt-3.3.0-v3346.jar']['sha256']) + function verifySuccess({ document, casedSpec }) { + expect(document.hashes.sha1).to.be.equal(hashes['swt-3.3.0-v3346.jar']['sha1']) + expect(document.hashes.sha256).to.be.equal(hashes['swt-3.3.0-v3346.jar']['sha256']) //date from manifest.mf modification date - expect(request.document.releaseDate.startsWith('2007-06-25')).to.be.true - expect(request.casedSpec.name).to.equal('swt') - expect(request.casedSpec.namespace).to.equal('org.eclipse') + expect(document.releaseDate.startsWith('2007-06-25')).to.be.true + expect(casedSpec.name).to.equal('swt') + expect(casedSpec.namespace).to.equal('org.eclipse') } let handler @@ -113,25 +113,25 @@ describe('Gradle plugin fetch', () => { it('test success with maven spec with version', async () => { const request = await handler.handle(new Request('test', 'cd:/maven/gradleplugin/org.eclipse/swt/3.3.0-v3344')) - verifySuccess(request) - expect(request.casedSpec.revision).to.equal('3.3.0-v3344') - expect(request.document.location).to.be.a('string') - expect(request.document.poms.length).to.equal(1) + verifySuccess(request.fetchResult) + expect(request.fetchResult.casedSpec.revision).to.equal('3.3.0-v3344') + expect(request.fetchResult.document.location).to.be.a('string') + expect(request.fetchResult.document.poms.length).to.equal(1) }) it('test success with maven spec without version', async () => { const request = await handler.handle(new Request('test', 'cd:/maven/gradleplugin/org.eclipse/swt')) - verifySuccess(request) - expect(request.casedSpec.revision).to.equal('4.5.10') - expect(request.url).to.equal('cd:/maven/gradleplugin/org.eclipse/swt/4.5.10') + verifySuccess(request.fetchResult) + expect(request.fetchResult.casedSpec.revision).to.equal('4.5.10') + expect(request.fetchResult.url).to.equal('cd:/maven/gradleplugin/org.eclipse/swt/4.5.10') }) it('test success with sourcearchive', async () => { const request = await handler.handle(new Request('test', 'cd:/sourcearchive/gradleplugin/org.eclipse/swt/3.3.0-v3344')) - verifySuccess(request) - expect(request.casedSpec.revision).to.equal('3.3.0-v3344') - expect(request.document.location).to.be.a('string') - expect(request.document.poms.length).to.equal(1) + verifySuccess(request.fetchResult) + expect(request.fetchResult.casedSpec.revision).to.equal('3.3.0-v3344') + expect(request.fetchResult.document.location).to.be.a('string') + expect(request.fetchResult.document.poms.length).to.equal(1) }) it('handle no maven meta data found', async () => { diff --git a/test/unit/providers/fetch/mavencentralFetchTests.js b/test/unit/providers/fetch/mavencentralFetchTests.js index 4cbcd340..bd027fa3 100644 --- a/test/unit/providers/fetch/mavencentralFetchTests.js +++ b/test/unit/providers/fetch/mavencentralFetchTests.js @@ -104,13 +104,13 @@ describe('MavenCentral fetching', () => { it('succeeds in download, decompress and hash', async () => { const request = await handler.handle(new Request('test', 'cd:/maven/mavencentral/org.eclipse/swt/3.3.0-v3344')) - expect(request.document.hashes.sha1).to.be.equal(hashes['swt-3.3.0-v3346.jar']['sha1']) - expect(request.document.hashes.sha256).to.be.equal(hashes['swt-3.3.0-v3346.jar']['sha256']) + expect(request.fetchResult.document.hashes.sha1).to.be.equal(hashes['swt-3.3.0-v3346.jar']['sha1']) + expect(request.fetchResult.document.hashes.sha256).to.be.equal(hashes['swt-3.3.0-v3346.jar']['sha256']) //from query maven central - expect(request.document.releaseDate).to.equal('2007-11-27T07:15:10.000Z') - expect(request.casedSpec.name).to.equal('swt') - expect(request.casedSpec.namespace).to.equal('org.eclipse') - expect(request.document.location).to.be.a('string') + expect(request.fetchResult.document.releaseDate).to.equal('2007-11-27T07:15:10.000Z') + expect(request.fetchResult.casedSpec.name).to.equal('swt') + expect(request.fetchResult.casedSpec.namespace).to.equal('org.eclipse') + expect(request.fetchResult.document.location).to.be.a('string') }) it('handles download error', async () => { diff --git a/test/unit/providers/fetch/mavengoogleFetchTests.js b/test/unit/providers/fetch/mavengoogleFetchTests.js index a5a35873..99294db1 100644 --- a/test/unit/providers/fetch/mavengoogleFetchTests.js +++ b/test/unit/providers/fetch/mavengoogleFetchTests.js @@ -102,13 +102,13 @@ describe('MavenGoogle fetching', () => { it('succeeds in download, decompress and hash', async () => { const request = await handler.handle(new Request('test', 'cd:/maven/mavengoogle/org.eclipse/swt/3.3.0-v3344')) - expect(request.document.hashes.sha1).to.be.equal(hashes['swt-3.3.0-v3346.jar']['sha1']) - expect(request.document.hashes.sha256).to.be.equal(hashes['swt-3.3.0-v3346.jar']['sha256']) + expect(request.fetchResult.document.hashes.sha1).to.be.equal(hashes['swt-3.3.0-v3346.jar']['sha1']) + expect(request.fetchResult.document.hashes.sha256).to.be.equal(hashes['swt-3.3.0-v3346.jar']['sha256']) //date from manifest - expect(request.document.releaseDate.startsWith('2007-06-25')).to.be.true - expect(request.casedSpec.name).to.equal('swt') - expect(request.casedSpec.namespace).to.equal('org.eclipse') - expect(request.document.location).to.be.a('string') + expect(request.fetchResult.document.releaseDate.startsWith('2007-06-25')).to.be.true + expect(request.fetchResult.casedSpec.name).to.equal('swt') + expect(request.fetchResult.casedSpec.namespace).to.equal('org.eclipse') + expect(request.fetchResult.document.location).to.be.a('string') }) it('handles download error', async () => { diff --git a/test/unit/providers/fetch/npmjsFetchTests.js b/test/unit/providers/fetch/npmjsFetchTests.js index 41b03f42..ddc6653a 100644 --- a/test/unit/providers/fetch/npmjsFetchTests.js +++ b/test/unit/providers/fetch/npmjsFetchTests.js @@ -85,6 +85,7 @@ describe('', () => { it('succeeds in download, decompress and hash', async () => { const handler = setup(createRegistryData('0.3.0')) const request = await handler.handle(new Request('test', 'cd:/npm/npmjs/-/redie/0.3.0')) + request.fetchResult.copyTo(request) expect(request.document.hashes.sha1).to.be.equal(hashes['redie-0.3.0.tgz']['sha1']) expect(request.document.hashes.sha256).to.be.equal(hashes['redie-0.3.0.tgz']['sha256']) expect(request.document.releaseDate).to.equal('42') diff --git a/test/unit/providers/fetch/nugetFetchTests.js b/test/unit/providers/fetch/nugetFetchTests.js index 19ce1c62..feab0dd5 100644 --- a/test/unit/providers/fetch/nugetFetchTests.js +++ b/test/unit/providers/fetch/nugetFetchTests.js @@ -78,6 +78,7 @@ describe('', () => { it('succeeds in download, decompress and hash', async () => { const handler = setup() const request = await handler.handle(new Request('test', 'cd:/nuget/nuget/-/xunit.core/2.4.1')) + request.fetchResult.copyTo(request) expect(request.document.hashes.sha1).to.be.equal(hashes['xunit.core.2.4.1.nupkg']['sha1']) expect(request.document.hashes.sha256).to.be.equal(hashes['xunit.core.2.4.1.nupkg']['sha256']) expect(request.document.releaseDate).to.equal('2018-10-29T04:18:45.803Z') @@ -88,6 +89,7 @@ describe('', () => { it('succeeds for latest version for download, decompress and hash', async () => { const handler = setup() const request = await handler.handle(new Request('test', 'cd:/nuget/nuget/-/xunit.core')) + request.fetchResult.copyTo(request) expect(request.document.hashes.sha1).to.be.equal(hashes['xunit.core.2.4.1.nupkg']['sha1']) expect(request.document.hashes.sha256).to.be.equal(hashes['xunit.core.2.4.1.nupkg']['sha256']) expect(request.document.releaseDate).to.equal('2018-10-29T04:18:45.803Z') diff --git a/test/unit/providers/fetch/packagistFetchTests.js b/test/unit/providers/fetch/packagistFetchTests.js index ea796ad3..43d944f2 100644 --- a/test/unit/providers/fetch/packagistFetchTests.js +++ b/test/unit/providers/fetch/packagistFetchTests.js @@ -59,10 +59,12 @@ describe('packagistFetch', () => { const handler = setup(createRegistryData()) handler._getRegistryData = () => createRegistryData() const request = await handler.handle(new Request('test', 'cd:/composer/packagist/symfony/polyfill-mbstring/1.11.0')) + request.fetchResult.copyTo(request) expect(request.document.hashes.sha1).to.be.equal(hashes['symfony-polyfill-mbstring-v1.11.0-0-gfe5e94c.zip']['sha1']) expect(request.document.hashes.sha256).to.be.equal( hashes['symfony-polyfill-mbstring-v1.11.0-0-gfe5e94c.zip']['sha256'] ) + expect(request.document.dirRoot).to.be.equal('symfony-polyfill-mbstring-fe5e94c') expect(request.document.releaseDate).to.equal('2019-02-06T07:57:58+00:00') }) @@ -99,37 +101,7 @@ describe('packagistFetch', () => { }) function createRegistryData() { - return { - manifest: { - name: 'symfony/polyfill-mbstring', - description: 'Symfony polyfill for the Mbstring extension', - keywords: ['mbstring', 'compatibility', 'portable', 'polyfill', 'shim'], - homepage: 'https://symfony.com', - version: 'v1.11.0', - version_normalized: '1.11.0.0', - license: ['MIT'], - authors: [[Object], [Object]], - source: { - type: 'git', - url: 'https://github.com/symfony/polyfill-mbstring.git', - reference: 'fe5e94c604826c35a32fa832f35bd036b6799609' - }, - dist: { - type: 'zip', - url: 'https://api.github.com/repos/symfony/polyfill-mbstring/zipball/fe5e94c604826c35a32fa832f35bd036b6799609', - reference: 'fe5e94c604826c35a32fa832f35bd036b6799609', - shasum: '' - }, - type: 'library', - time: '2019-02-06T07:57:58+00:00', - autoload: { 'psr-4': [Object], files: [Array] }, - extra: { 'branch-alias': [Object] }, - require: { php: '>=5.3.3' }, - suggest: { 'ext-mbstring': 'For best performance' }, - uid: 2850406 - }, - releaseDate: '2019-02-06T07:57:58+00:00' - } + return JSON.parse(fs.readFileSync('test/fixtures/packagist/registryData.json')) } function setup(registryData) { diff --git a/test/unit/providers/fetch/podFetchTests.js b/test/unit/providers/fetch/podFetchTests.js new file mode 100644 index 00000000..bbb3b7ca --- /dev/null +++ b/test/unit/providers/fetch/podFetchTests.js @@ -0,0 +1,49 @@ +const expect = require('chai').expect +const sinon = require('sinon') +const fs = require('fs') +const proxyquire = require('proxyquire') + +const Request = require('../../../../ghcrawler/lib/request.js') + +describe('podFetch', () => { + + const loadJson = fileName => { + return JSON.parse(fs.readFileSync(`test/fixtures/pod/${fileName}`)) + } + + const PodFetch = proxyquire('../../../../providers/fetch/podFetch', { + requestretry: { + defaults: () => { + return { + get: sinon.stub().resolves({ body: loadJson('versions.json'), statusCode: 200 }) + } + } + }, + 'request-promise-native': sinon.stub().resolves(loadJson('registryData.json')) + }) + + let fetch + + beforeEach(() => { + fetch = PodFetch({ logger: { info: sinon.stub() } }) + fetch._getPackage = sinon.stub().resolves('/tmp/cd-pYKk9q/SwiftLCS-1.0') + }) + + it('spec with version', async () => { + const result = await fetch.handle(new Request('test', 'cd:/pod/cocoapods/-/SwiftLCS/1.0')) + result.fetchResult.copyTo(result) + expect(result.url).to.be.equal('cd:/pod/cocoapods/-/SwiftLCS/1.0') + expect(result.document.location).to.be.a.string + expect(result.document.registryData.name).to.be.equal('SwiftLCS') + expect(result.document.releaseDate).to.be.equal('2015-10-19 01:36:36 UTC') + expect(result.casedSpec.toUrl()).to.be.equal('cd:/pod/cocoapods/-/SwiftLCS/1.0') + }) + + it('spec without version', async () => { + const result = await fetch.handle(new Request('test', 'cd:/pod/cocoapods/-/SwiftLCS')) + result.fetchResult.copyTo(result) + expect(result.url).to.be.equal('cd:/pod/cocoapods/-/SwiftLCS/1.3.4') + expect(result.document.releaseDate).to.be.equal('2019-04-10 00:22:10 UTC') + expect(result.casedSpec.toUrl()).to.be.equal('cd:/pod/cocoapods/-/SwiftLCS/1.3.4') + }) +}) \ No newline at end of file diff --git a/test/unit/providers/fetch/pypiFetchTests.js b/test/unit/providers/fetch/pypiFetchTests.js index 12982b9e..aa5cbdb4 100644 --- a/test/unit/providers/fetch/pypiFetchTests.js +++ b/test/unit/providers/fetch/pypiFetchTests.js @@ -2,7 +2,10 @@ // SPDX-License-Identifier: MIT const expect = require('chai').expect +const fs = require('fs') const sinon = require('sinon') +const PassThrough = require('stream').PassThrough +const nodeRequest = require('request') const PypiFetch = require('../../../../providers/fetch/pypiFetch') const requestRetryWithDefaults = require('../../../../providers/fetch/requestRetryWithDefaults') const Request = require('../../../../ghcrawler/lib/request.js') @@ -15,6 +18,7 @@ describe('pypiFetch handle function', () => { beforeEach(function () { requestGetStub = sandbox.stub(requestRetryWithDefaults, 'get') + sandbox.stub(nodeRequest, 'get').callsFake(getCompressedFile) fetch = PypiFetch(pypiFetchOptions) }) @@ -31,6 +35,24 @@ describe('pypiFetch handle function', () => { expect(result.outcome).to.be.equal('Missing ') }) + it('fetch success', async () => { + const registryData = JSON.parse(fs.readFileSync('test/fixtures/pypi/registryData.json')) + requestGetStub.resolves({ body: registryData, statusCode: 200 }) + + const result = await fetch.handle(new Request('pypi', 'cd:/pypi/pypi/-/backports.ssl-match-hostname/3.7.0.1')) + result.fetchResult.copyTo(result) + expect(result.url).to.be.equal('cd:/pypi/pypi/-/backports.ssl-match-hostname/3.7.0.1') + expect(result.contentOrigin).to.be.equal('origin') + expect(result.casedSpec.toUrl()).to.be.equal('cd:/pypi/pypi/-/backports.ssl_match_hostname/3.7.0.1') + expect(result.document.location).to.be.a.string + expect(result.document.registryData).to.be.deep.equal(registryData) + expect(result.document.releaseDate).to.be.equal('2019-01-12T22:25:58') + expect(result.document.hashes).to.be.deep.equal({ + sha1: 'd886a6db6b7195911516896feebe3a5d1dddfd46', + sha256: '18a3a53a27df164d4db56d0f7f5da2edd25995418d5538f40eb4018347fe1354' + }) + }) + it('returns missing when failed to find download url', async () => { // release information in the registry data is empty requestGetStub.returns({ @@ -44,3 +66,12 @@ describe('pypiFetch handle function', () => { expect(result.outcome).to.be.equal('Missing ') }) }) + +const getCompressedFile = (url, callback) => { + const response = new PassThrough() + const file = 'test/fixtures/maven/swt-3.3.0-v3346.jar' + response.write(fs.readFileSync(file)) + callback(null, { statusCode: 200 }) + response.end() + return response +} diff --git a/test/unit/providers/fetch/rubyGemsFetchTests.js b/test/unit/providers/fetch/rubyGemsFetchTests.js new file mode 100644 index 00000000..86f6e9f1 --- /dev/null +++ b/test/unit/providers/fetch/rubyGemsFetchTests.js @@ -0,0 +1,44 @@ +const expect = require('chai').expect +const fs = require('fs') +const sinon = require('sinon') +const { promisify } = require('util') +const RubyGemsFetch = require('../../../../providers/fetch/rubyGemsFetch') +const Request = require('../../../../ghcrawler/lib/request.js') + +describe('rubyGemsFetch', () => { + let fetch + beforeEach(() => { + fetch = RubyGemsFetch({ logger: { info: sinon.stub() } }) + fetch._getRegistryData = sinon.stub().resolves({ + name: 'small', + version: '0.5.1', + gem_uri: 'https://rubygems.org/gems/small-0.5.1.gem', + }) + fetch._getPackage = sinon.stub().callsFake((spec, destination) => + getPacakgeStub('test/fixtures/ruby/small-0.5.1.gem', destination)) + }) + + function verifyFetch(result) { + expect(result.url).to.be.equal('cd:/ruby/rubygems/-/small/0.5.1') + expect(result.casedSpec.toUrl()).to.be.equal('cd:/ruby/rubygems/-/small/0.5.1') + expect(result.document.hashes).to.be.deep.equal({ + sha1: 'f343d34992fffa1e4abbb1a2bfae45fcf49123ba', + sha256: '2b5e4ba4e915e897d6fe9392c1cd1f5a21f8e7963679fb23f0a1953124772da0' + }) + expect(result.document.releaseDate).to.be.equal('2012-05-21') + } + + it('fetch spec with version', async () => { + const result = await fetch.handle(new Request('test', 'cd:/ruby/rubygems/-/small/0.5.1')) + verifyFetch(result.fetchResult) + }) + + it('fetch spec without version', async () => { + const result = await fetch.handle(new Request('test', 'cd:/ruby/rubygems/-/small')) + verifyFetch(result.fetchResult) + }) +}) + +const getPacakgeStub = async (file, destination) => { + await promisify(fs.copyFile)(file, destination) +} \ No newline at end of file diff --git a/test/unit/providers/process/abstractProcessorTests.js b/test/unit/providers/process/abstractProcessorTests.js index 9006eacd..f6f51cf8 100644 --- a/test/unit/providers/process/abstractProcessorTests.js +++ b/test/unit/providers/process/abstractProcessorTests.js @@ -147,6 +147,49 @@ describe('AbstractProcessor attach files', () => { }) }) +describe('link and queue local tasks', () => { + let processor + + beforeEach(() => { + processor = new AbstractProcessor({}) + processor.linkAndQueueTool = sinon.stub() + }) + + it('link and queue one local task', () => { + const request = new Request('npm', 'cd:/npm/npmjs/-/redie/0.3.0') + processor.addLocalToolTasks(request, 'clearlydefined') + expect(processor.linkAndQueueTool.calledOnce).to.be.true + expect(processor.linkAndQueueTool.args[0][0].type).to.be.equal('npm') + expect(processor.linkAndQueueTool.args[0][1]).to.be.equal('clearlydefined') + expect(processor.linkAndQueueTool.args[0][3]).to.be.equal('local') + }) + + it('link and queue two local tasks', () => { + const request = new Request('npm', 'cd:/npm/npmjs/-/redie/0.3.0') + processor.addLocalToolTasks(request, 'clearlydefined', 'licensee') + + expect(processor.linkAndQueueTool.callCount).to.be.equal(2) + expect(processor.linkAndQueueTool.args[0][0].type).to.be.equal('npm') + expect(processor.linkAndQueueTool.args[0][1]).to.be.equal('clearlydefined') + expect(processor.linkAndQueueTool.args[0][3]).to.be.equal('local') + + expect(processor.linkAndQueueTool.args[1][0].type).to.be.equal('npm') + expect(processor.linkAndQueueTool.args[1][1]).to.be.equal('licensee') + expect(processor.linkAndQueueTool.args[1][3]).to.be.equal('local') + }) + + it('link and queue default local tasks', () => { + const request = new Request('npm', 'cd:/npm/npmjs/-/redie/0.3.0') + processor.addLocalToolTasks(request) + expect(processor.linkAndQueueTool.callCount).to.be.equal(3) + expect(processor.linkAndQueueTool.args.map(call => call[1])).to.have.members([ + 'licensee', + 'scancode', + 'reuse' + ]) + }) +}) + describe('AbstractProcessor get interesting files', () => { it('filters out uninteresting files', async () => { const processor = new AbstractProcessor({}) diff --git a/test/unit/providers/process/sourceTests.js b/test/unit/providers/process/sourceTests.js new file mode 100644 index 00000000..2d243f6e --- /dev/null +++ b/test/unit/providers/process/sourceTests.js @@ -0,0 +1,35 @@ +const expect = require('chai').expect +const sinon = require('sinon') +const Request = require('../../../../ghcrawler').request +const SourceProcessor = require('../../../../providers/process/source').processor + +describe('Source processing', () => { + let processor + + beforeEach(() => { + processor = SourceProcessor({}) + }) + + it('process source package correctly', async () => { + processor.linkAndQueueTool = sinon.stub() + + const request = mockRequest('cd:/sourcearchive/mavengoogle/android.arch.lifecycle/common/1.0.1') + processor.handle(request) + + expect(processor.linkAndQueueTool.callCount).to.be.equal(4) + expect(processor.linkAndQueueTool.args.map(call => call[1])).to.have.members([ + 'clearlydefined', + 'licensee', + 'scancode', + 'reuse' + ]) + }) +}) + +function mockRequest(url) { + const request = new Request('source', url) + request.document = { + _metadata: { links: {} } + } + return request +} \ No newline at end of file