From bbc21d8fc8a9a241a26d7d494d374b8c4e8ec196 Mon Sep 17 00:00:00 2001 From: jorgee Date: Thu, 24 Apr 2025 14:56:16 +0200 Subject: [PATCH 01/19] add fromLineage channel factory Signed-off-by: jorgee --- .../src/main/groovy/nextflow/Channel.groovy | 22 +++ .../nextflow/extension/LinChannelEx.groovy | 9 ++ .../nextflow/lineage/LinChanneExImpl.groovy | 65 ++++++++ .../src/main/nextflow/lineage/LinUtils.groovy | 2 +- .../src/resources/META-INF/extensions.idx | 1 + .../lineage/LinChanneExImplTest.groovy | 144 ++++++++++++++++++ 6 files changed, 242 insertions(+), 1 deletion(-) create mode 100644 modules/nextflow/src/main/groovy/nextflow/extension/LinChannelEx.groovy create mode 100644 modules/nf-lineage/src/main/nextflow/lineage/LinChanneExImpl.groovy create mode 100644 modules/nf-lineage/src/test/nextflow/lineage/LinChanneExImplTest.groovy diff --git a/modules/nextflow/src/main/groovy/nextflow/Channel.groovy b/modules/nextflow/src/main/groovy/nextflow/Channel.groovy index ee1bb55f43..529ee623bb 100644 --- a/modules/nextflow/src/main/groovy/nextflow/Channel.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/Channel.groovy @@ -16,6 +16,9 @@ package nextflow +import nextflow.extension.LinChannelEx +import nextflow.plugin.Plugins + import static nextflow.util.CheckHelper.* import java.nio.file.FileSystem @@ -657,4 +660,23 @@ class Channel { fromPath0Future = future.exceptionally(Channel.&handlerException) } + static DataflowWriteChannel fromLineage(String uri) { + final result = CH.create() + if( NF.isDsl2() ) { + session.addIgniter { fromLineage0(result, uri) } + } + else { + fromLineage0(result, uri ) + } + return result + } + + private static void fromLineage0(DataflowWriteChannel channel, String uri) { + final operation = Plugins.getExtension(LinChannelEx) + if( !operation ) + throw new IllegalStateException("Unable to load lineage extensions.") + def future = CompletableFuture.runAsync( { operation.queryLineage(session, channel, new URI(uri)) } as Runnable) + future.exceptionally(this.&handlerException) + } + } diff --git a/modules/nextflow/src/main/groovy/nextflow/extension/LinChannelEx.groovy b/modules/nextflow/src/main/groovy/nextflow/extension/LinChannelEx.groovy new file mode 100644 index 0000000000..2f96dfa969 --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/extension/LinChannelEx.groovy @@ -0,0 +1,9 @@ +package nextflow.extension + +import groovyx.gpars.dataflow.DataflowWriteChannel +import nextflow.Session + +interface LinChannelEx { + void queryLineage(Session session, DataflowWriteChannel channel, URI uri) + +} \ No newline at end of file diff --git a/modules/nf-lineage/src/main/nextflow/lineage/LinChanneExImpl.groovy b/modules/nf-lineage/src/main/nextflow/lineage/LinChanneExImpl.groovy new file mode 100644 index 0000000000..0f3a6fb944 --- /dev/null +++ b/modules/nf-lineage/src/main/nextflow/lineage/LinChanneExImpl.groovy @@ -0,0 +1,65 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.lineage + +import groovy.transform.CompileStatic +import groovy.util.logging.Slf4j +import groovyx.gpars.dataflow.DataflowWriteChannel +import nextflow.Channel +import nextflow.Session +import nextflow.extension.LinChannelEx + +/** + * Lineage channel extensions + * + * @author Jorge Ejarque + */ +@CompileStatic +@Slf4j +class LinChanneExImpl implements LinChannelEx{ + + void queryLineage(Session session, DataflowWriteChannel channel, URI uri) { + final store = getStore(session) + emitResults(channel, LinUtils.query(store, uri)) + channel.bind(Channel.STOP) + } + + protected LinStore getStore(Session session){ + final store = LinStoreFactory.getOrCreate(session) + if( !store ) { + throw new Exception("Lineage store not found - Check Nextflow configuration") + } + return store + } + + private static void emitResults(DataflowWriteChannel channel, Collection results){ + if( !results ) { + return + } + // Remove nested collections of a single element + if( results.size() == 1 ) { + final entry = results[0] + if( entry instanceof Collection ) { + emitResults(channel, entry) + } else { + channel.bind(LinUtils.encodeSearchOutputs(entry)) + } + } else + results.forEach { channel.bind(LinUtils.encodeSearchOutputs(it)) } + } + +} diff --git a/modules/nf-lineage/src/main/nextflow/lineage/LinUtils.groovy b/modules/nf-lineage/src/main/nextflow/lineage/LinUtils.groovy index dfb5a4e634..b62d67abaf 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/LinUtils.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/LinUtils.groovy @@ -316,7 +316,7 @@ class LinUtils { * @param output Output to encode * @return Output encoded as a JSON string */ - static String encodeSearchOutputs(Object output, boolean prettyPrint) { + static String encodeSearchOutputs(Object output, boolean prettyPrint = false) { if (output instanceof LinSerializable) { return new LinEncoder().withPrettyPrint(prettyPrint).encode(output) } else { diff --git a/modules/nf-lineage/src/resources/META-INF/extensions.idx b/modules/nf-lineage/src/resources/META-INF/extensions.idx index 53c350a1be..d4809ad1ad 100644 --- a/modules/nf-lineage/src/resources/META-INF/extensions.idx +++ b/modules/nf-lineage/src/resources/META-INF/extensions.idx @@ -17,3 +17,4 @@ nextflow.lineage.DefaultLinStoreFactory nextflow.lineage.LinObserverFactory nextflow.lineage.cli.LinCommandImpl +nextflow.lineage.LinChanneExImpl \ No newline at end of file diff --git a/modules/nf-lineage/src/test/nextflow/lineage/LinChanneExImplTest.groovy b/modules/nf-lineage/src/test/nextflow/lineage/LinChanneExImplTest.groovy new file mode 100644 index 0000000000..5a67a08ea5 --- /dev/null +++ b/modules/nf-lineage/src/test/nextflow/lineage/LinChanneExImplTest.groovy @@ -0,0 +1,144 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.lineage + +import nextflow.Channel +import nextflow.extension.CH +import nextflow.lineage.model.Annotation +import nextflow.lineage.model.FileOutput + +import java.nio.file.Path +import java.time.Instant +import java.time.OffsetDateTime + +import nextflow.Session +import nextflow.lineage.config.LineageConfig +import nextflow.lineage.model.Checksum +import nextflow.lineage.model.DataPath +import nextflow.lineage.model.Parameter +import nextflow.lineage.model.Workflow +import nextflow.lineage.model.WorkflowOutput +import nextflow.lineage.model.WorkflowRun + +import spock.lang.Specification +import spock.lang.TempDir + +import java.time.ZoneOffset + +/** + * Lineage channel extensions tests + * + * @author Jorge Ejarque + */ +class LinChanneExImplTest extends Specification { + + @TempDir + Path tempDir + + Path storeLocation + Map configMap + + def setup() { + storeLocation = tempDir.resolve("store") + configMap = [linage: [enabled: true, store: [location: storeLocation.toString()]]] + } + + def 'should get metadata fragment'() { + + given: + def uniqueId = UUID.randomUUID() + def mainScript = new DataPath("file://path/to/main.nf", new Checksum("78910", "nextflow", "standard")) + def workflow = new Workflow([mainScript], "https://nextflow.io/nf-test/", "123456") + def key = "testKey" + def params = [new Parameter("String", "param1", "value1"), new Parameter("String", "param2", "value2")] + def value1 = new WorkflowRun(workflow, uniqueId.toString(), "test_run", params) + def outputs = [new Parameter("String", "output", "name")] + def wfOutputs = new WorkflowOutput(OffsetDateTime.now(), "lid://testKey", outputs) + def lidStore = new DefaultLinStore() + def session = Mock(Session) { + getConfig() >> configMap + } + lidStore.open(LineageConfig.create(session)) + lidStore.save(key, value1) + lidStore.save("$key#output", wfOutputs) + def channelLinExt = Spy(new LinChanneExImpl()) + + when: + def results = CH.create() + channelLinExt.queryLineage(session, results, new URI('lid://testKey#params')) + then: + channelLinExt.getStore(session) >> lidStore + and: + results.val == LinUtils.encodeSearchOutputs(params[0]) + results.val == LinUtils.encodeSearchOutputs(params[1]) + results.val == Channel.STOP + + when: + results = CH.create() + channelLinExt.queryLineage(session, results, new URI('lid://testKey#output')) + then: + channelLinExt.getStore(session) >> lidStore + and: + results.val == LinUtils.encodeSearchOutputs(outputs[0]) + results.val == Channel.STOP + } + + def 'should return global query results' () { + given: + def uniqueId = UUID.randomUUID() + def time = OffsetDateTime.ofInstant(Instant.ofEpochMilli(1234567), ZoneOffset.UTC) + def mainScript = new DataPath("file://path/to/main.nf", new Checksum("78910", "nextflow", "standard")) + def workflow = new Workflow([mainScript],"https://nextflow.io/nf-test/", "123456" ) + def key = "testKey" + def value1 = new WorkflowRun(workflow, uniqueId.toString(), "test_run", [ new Parameter("String", "param1", "value1"), new Parameter("String", "param2", "value2")] ) + def key2 = "testKey2" + def value2 = new FileOutput("/path/tp/file1", new Checksum("78910", "nextflow", "standard"), "testkey", "testkey", null, 1234, time, time, [new Annotation("key1","value1"), new Annotation("key2","value2")]) + def key3 = "testKey3" + def value3 = new FileOutput("/path/tp/file2", new Checksum("78910", "nextflow", "standard"), "testkey", "testkey", null, 1234, time, time, [new Annotation("key2","value2"), new Annotation("key3","value3")]) + def key4 = "testKey4" + def value4 = new FileOutput("/path/tp/file", new Checksum("78910", "nextflow", "standard"), "testkey", "testkey", null, 1234, time, time, [new Annotation("key4","value4"), new Annotation("key3","value3")]) + def lidStore = new DefaultLinStore() + def session = Mock(Session) { + getConfig() >> configMap + } + lidStore.open(LineageConfig.create(session)) + lidStore.save(key, value1) + lidStore.save(key2, value2) + lidStore.save(key3, value3) + lidStore.save(key4, value4) + def channelLinExt = Spy(new LinChanneExImpl()) + when: + def results = CH.create() + channelLinExt.queryLineage(session, results, new URI("cid:///?type=FileOutput&annotations.key=key2&annotations.value=value2")) + then: + channelLinExt.getStore(session) >> lidStore + and: + results.val == LinUtils.encodeSearchOutputs(value2) + results.val == LinUtils.encodeSearchOutputs(value3) + results.val == Channel.STOP + + when: + results = CH.create() + channelLinExt.queryLineage(session, results, new URI("cid:///?type=FileOutput&annotations.key=key2&annotations.value=value2#path")) + then: + channelLinExt.getStore(session) >> lidStore + and: + results.val == '"/path/tp/file1"' + results.val == '"/path/tp/file2"' + results.val == Channel.STOP + } +} From fbdbeef23bde4ae2ed3c7bf628411cb4a2410087 Mon Sep 17 00:00:00 2001 From: jorgee Date: Thu, 24 Apr 2025 16:19:11 +0200 Subject: [PATCH 02/19] include query factory Signed-off-by: jorgee --- .../src/main/groovy/nextflow/Channel.groovy | 21 ++++++++++++++++++- .../nextflow/extension/LinChannelEx.groovy | 3 ++- .../nextflow/lineage/LinChanneExImpl.groovy | 18 +++++++++++++++- .../lineage/LinChanneExImplTest.groovy | 8 +++---- 4 files changed, 43 insertions(+), 7 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/Channel.groovy b/modules/nextflow/src/main/groovy/nextflow/Channel.groovy index 529ee623bb..78e8c6e8f5 100644 --- a/modules/nextflow/src/main/groovy/nextflow/Channel.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/Channel.groovy @@ -675,7 +675,26 @@ class Channel { final operation = Plugins.getExtension(LinChannelEx) if( !operation ) throw new IllegalStateException("Unable to load lineage extensions.") - def future = CompletableFuture.runAsync( { operation.queryLineage(session, channel, new URI(uri)) } as Runnable) + def future = CompletableFuture.runAsync( { operation.viewLineage(session, channel, new URI(uri)) } as Runnable) + future.exceptionally(this.&handlerException) + } + + static DataflowWriteChannel fromLineageQuery(String queryString) { + final result = CH.create() + if( NF.isDsl2() ) { + session.addIgniter { fromLineageQuery0(result, queryString) } + } + else { + fromLineageQuery0(result, queryString ) + } + return result + } + + private static void fromLineageQuery0(DataflowWriteChannel channel, String query) { + final operation = Plugins.getExtension(LinChannelEx) + if( !operation ) + throw new IllegalStateException("Unable to load lineage extensions.") + def future = CompletableFuture.runAsync( { operation.queryLineage(session, channel, query) } as Runnable) future.exceptionally(this.&handlerException) } diff --git a/modules/nextflow/src/main/groovy/nextflow/extension/LinChannelEx.groovy b/modules/nextflow/src/main/groovy/nextflow/extension/LinChannelEx.groovy index 2f96dfa969..482ef26263 100644 --- a/modules/nextflow/src/main/groovy/nextflow/extension/LinChannelEx.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/extension/LinChannelEx.groovy @@ -4,6 +4,7 @@ import groovyx.gpars.dataflow.DataflowWriteChannel import nextflow.Session interface LinChannelEx { - void queryLineage(Session session, DataflowWriteChannel channel, URI uri) + void viewLineage(Session session, DataflowWriteChannel channel, URI uri) + void queryLineage(Session session, DataflowWriteChannel channel, String query) } \ No newline at end of file diff --git a/modules/nf-lineage/src/main/nextflow/lineage/LinChanneExImpl.groovy b/modules/nf-lineage/src/main/nextflow/lineage/LinChanneExImpl.groovy index 0f3a6fb944..7a4365fd71 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/LinChanneExImpl.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/LinChanneExImpl.groovy @@ -22,6 +22,9 @@ import groovyx.gpars.dataflow.DataflowWriteChannel import nextflow.Channel import nextflow.Session import nextflow.extension.LinChannelEx +import nextflow.lineage.fs.LinPath +import nextflow.lineage.fs.LinPathFactory +import nextflow.lineage.serde.LinSerializable /** * Lineage channel extensions @@ -32,12 +35,19 @@ import nextflow.extension.LinChannelEx @Slf4j class LinChanneExImpl implements LinChannelEx{ - void queryLineage(Session session, DataflowWriteChannel channel, URI uri) { + void viewLineage(Session session, DataflowWriteChannel channel, URI uri) { final store = getStore(session) emitResults(channel, LinUtils.query(store, uri)) channel.bind(Channel.STOP) } + void queryLineage(Session session, DataflowWriteChannel channel, String query) { + final store = getStore(session) + emitSearchResults(channel, store.search(query)) + channel.bind(Channel.STOP) + } + + protected LinStore getStore(Session session){ final store = LinStoreFactory.getOrCreate(session) if( !store ) { @@ -62,4 +72,10 @@ class LinChanneExImpl implements LinChannelEx{ results.forEach { channel.bind(LinUtils.encodeSearchOutputs(it)) } } + private void emitSearchResults(DataflowWriteChannel channel, Map results) { + if( !results ) { + return + } + results.keySet().forEach { channel.bind(LinPathFactory.create(LinPath.LID_PROT + it)) } + } } diff --git a/modules/nf-lineage/src/test/nextflow/lineage/LinChanneExImplTest.groovy b/modules/nf-lineage/src/test/nextflow/lineage/LinChanneExImplTest.groovy index 5a67a08ea5..75b527b4e1 100644 --- a/modules/nf-lineage/src/test/nextflow/lineage/LinChanneExImplTest.groovy +++ b/modules/nf-lineage/src/test/nextflow/lineage/LinChanneExImplTest.groovy @@ -79,7 +79,7 @@ class LinChanneExImplTest extends Specification { when: def results = CH.create() - channelLinExt.queryLineage(session, results, new URI('lid://testKey#params')) + channelLinExt.viewLineage(session, results, new URI('lid://testKey#params')) then: channelLinExt.getStore(session) >> lidStore and: @@ -89,7 +89,7 @@ class LinChanneExImplTest extends Specification { when: results = CH.create() - channelLinExt.queryLineage(session, results, new URI('lid://testKey#output')) + channelLinExt.viewLineage(session, results, new URI('lid://testKey#output')) then: channelLinExt.getStore(session) >> lidStore and: @@ -123,7 +123,7 @@ class LinChanneExImplTest extends Specification { def channelLinExt = Spy(new LinChanneExImpl()) when: def results = CH.create() - channelLinExt.queryLineage(session, results, new URI("cid:///?type=FileOutput&annotations.key=key2&annotations.value=value2")) + channelLinExt.viewLineage(session, results, new URI("cid:///?type=FileOutput&annotations.key=key2&annotations.value=value2")) then: channelLinExt.getStore(session) >> lidStore and: @@ -133,7 +133,7 @@ class LinChanneExImplTest extends Specification { when: results = CH.create() - channelLinExt.queryLineage(session, results, new URI("cid:///?type=FileOutput&annotations.key=key2&annotations.value=value2#path")) + channelLinExt.viewLineage(session, results, new URI("cid:///?type=FileOutput&annotations.key=key2&annotations.value=value2#path")) then: channelLinExt.getStore(session) >> lidStore and: From 3aa4f0f3830fcbd2fdb70d7452ea5cc1a1f87982 Mon Sep 17 00:00:00 2001 From: jorgee Date: Fri, 25 Apr 2025 21:11:52 +0200 Subject: [PATCH 03/19] add published files in output, support queries in fromPath Signed-off-by: jorgee --- .../src/main/groovy/nextflow/Session.groovy | 2 +- .../groovy/nextflow/file/PathVisitor.groovy | 23 ++- .../groovy/nextflow/file/QueryablePath.groovy | 13 ++ .../main/nextflow/lineage/LinObserver.groovy | 11 +- .../main/nextflow/lineage/fs/LinPath.groovy | 153 +++++++++++------- 5 files changed, 143 insertions(+), 59 deletions(-) create mode 100644 modules/nextflow/src/main/groovy/nextflow/file/QueryablePath.groovy diff --git a/modules/nextflow/src/main/groovy/nextflow/Session.groovy b/modules/nextflow/src/main/groovy/nextflow/Session.groovy index a486123362..acc1c4fb7d 100644 --- a/modules/nextflow/src/main/groovy/nextflow/Session.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/Session.groovy @@ -381,7 +381,7 @@ class Session implements ISession { this.dag = new DAG() // -- init output dir - this.outputDir = FileHelper.toCanonicalPath(config.outputDir ?: 'results') + this.outputDir = FileHelper.toCanonicalPath(config.outputDir ?: config.navigate('params.outdir') ?: 'results') // -- init work dir this.workDir = FileHelper.toCanonicalPath(config.workDir ?: 'work') diff --git a/modules/nextflow/src/main/groovy/nextflow/file/PathVisitor.groovy b/modules/nextflow/src/main/groovy/nextflow/file/PathVisitor.groovy index 0a484bff9f..68e27785f8 100644 --- a/modules/nextflow/src/main/groovy/nextflow/file/PathVisitor.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/file/PathVisitor.groovy @@ -66,7 +66,7 @@ class PathVisitor { applyRegexPattern0(filePattern) else if( filePattern != null ) - applyGlobPattern0(filePattern as Path) + applyPathPattern0(filePattern as Path) else throw new IllegalArgumentException("Missing file pattern argument") @@ -103,6 +103,27 @@ class PathVisitor { target.bind(STOP) } + private void applyPathPattern0(Path filePattern) { + if( isQuery(filePattern) ) + applyQueryablePath0(filePattern as QueryablePath) + else + applyGlobPattern0(filePattern) + } + + private static boolean isQuery(Path filePattern) { + log.debug("Checking if query: $filePattern.class ") + return filePattern instanceof QueryablePath && (filePattern as QueryablePath).hasQuery() + } + + private boolean applyQueryablePath0(QueryablePath path) { + final paths = path.resolveQuery() + if( !paths ) + throw new FileNotFoundException("No files found for ${path}") + + paths.forEach { emit0(it) } + close0() + } + private void applyGlobPattern0(Path filePattern) { final glob = opts?.containsKey('glob') ? opts.glob as boolean : true diff --git a/modules/nextflow/src/main/groovy/nextflow/file/QueryablePath.groovy b/modules/nextflow/src/main/groovy/nextflow/file/QueryablePath.groovy new file mode 100644 index 0000000000..084c43d3b5 --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/file/QueryablePath.groovy @@ -0,0 +1,13 @@ +package nextflow.file + +import java.nio.file.Path + +/** + * Interface to indicate a Path could contain a query that is resolved to several real paths. + * + * @author Jorge Ejarque + */ +interface QueryablePath { + boolean hasQuery(); + List resolveQuery(); +} \ No newline at end of file diff --git a/modules/nf-lineage/src/main/nextflow/lineage/LinObserver.groovy b/modules/nf-lineage/src/main/nextflow/lineage/LinObserver.groovy index 081a5bba7a..53ee984b2a 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/LinObserver.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/LinObserver.groovy @@ -87,6 +87,7 @@ class LinObserver implements TraceObserver { private Session session private WorkflowOutput workflowOutput private Map outputsStoreDirLid = new HashMap(10) + private Set publishedFiles = new HashSet() private PathNormalizer normalizer LinObserver(Session session, LinStore store){ @@ -124,6 +125,10 @@ class LinObserver implements TraceObserver { @Override void onFlowComplete(){ if (this.workflowOutput){ + //Add publishedFiles + for (String path: publishedFiles){ + workflowOutput.output.add(new Parameter(Path.simpleName, null, path)) + } workflowOutput.createdAt = OffsetDateTime.now() final key = executionHash + '#output' this.store.save(key, workflowOutput) @@ -360,6 +365,7 @@ class LinObserver implements TraceObserver { LinUtils.toDate(attrs?.lastModifiedTime()), convertAnnotations(annotations)) store.save(key, value) + publishedFiles.add(asUriString(key)) } catch (Throwable e) { log.warn("Unexpected error storing published file '${destination.toUriString()}' for workflow '${executionHash}'", e) } @@ -411,8 +417,9 @@ class LinObserver implements TraceObserver { private Object convertPathsToLidReferences(Object value){ if( value instanceof Path ) { try { - final key = getWorkflowOutputKey(value) - return asUriString(key) + final key = asUriString(getWorkflowOutputKey(value)) + publishedFiles.remove(key) + return key } catch (Throwable e){ //Workflow output key not found return value diff --git a/modules/nf-lineage/src/main/nextflow/lineage/fs/LinPath.groovy b/modules/nf-lineage/src/main/nextflow/lineage/fs/LinPath.groovy index 8d0559f55a..c94607f0eb 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/fs/LinPath.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/fs/LinPath.groovy @@ -20,6 +20,7 @@ import groovy.transform.CompileStatic import groovy.util.logging.Slf4j import nextflow.file.FileHelper import nextflow.file.LogicalDataPath +import nextflow.file.QueryablePath import nextflow.lineage.model.Checksum import nextflow.lineage.model.FileOutput import nextflow.lineage.serde.LinSerializable @@ -45,13 +46,13 @@ import java.time.OffsetDateTime */ @Slf4j @CompileStatic -class LinPath implements Path, LogicalDataPath { +class LinPath implements Path, LogicalDataPath, QueryablePath { - static public final List SUPPORTED_CHECKSUM_ALGORITHMS=["nextflow"] + static public final List SUPPORTED_CHECKSUM_ALGORITHMS = ["nextflow"] static public final String SEPARATOR = '/' public static final String LID_PROT = "${SCHEME}://" - static private final String[] EMPTY = new String[] {} + static private final String[] EMPTY = new String[]{} private LinFileSystem fileSystem @@ -72,12 +73,19 @@ class LinPath implements Path, LogicalDataPath { throw new IllegalArgumentException("Invalid LID URI - scheme is different for $SCHEME") } this.fileSystem = fs + setFieldsFormURI(uri) + //Check if query and fragment are with filePath + if (query == null && fragment == null){ + setFieldsFormURI(new URI(toUriString())) + } + } + private void setFieldsFormURI(URI uri){ this.query = uri.query this.fragment = uri.fragment - this.filePath = resolve0( fs, norm0("${uri.authority?:''}${uri.path}") ) + this.filePath = resolve0(fileSystem, norm0("${uri.authority?:''}${uri.path}") ) } - protected LinPath(String query, String fragment, String filepath, LinFileSystem fs){ + protected LinPath(String query, String fragment, String filepath, LinFileSystem fs) { this.fileSystem = fs this.query = query this.fragment = fragment @@ -100,9 +108,9 @@ class LinPath implements Path, LogicalDataPath { return path && path.startsWith(LID_PROT) } - private static String buildPath(String first, String[] more){ + private static String buildPath(String first, String[] more) { first = norm0(first) - if (more){ + if( more ) { final morePath = norm0(more).join(SEPARATOR) return first.isEmpty() ? morePath : first + SEPARATOR + morePath } @@ -117,25 +125,25 @@ class LinPath implements Path, LogicalDataPath { } protected static void validateChecksum(Checksum checksum, Path hashedPath) { - if( !checksum) + if( !checksum ) return - if( ! isAlgorithmSupported(checksum.algorithm) ) { + if( !isAlgorithmSupported(checksum.algorithm) ) { log.warn("Checksum of '$hashedPath' can't be validated. Algorithm '${checksum.algorithm}' is not supported") return } final hash = checksum.mode - ? CacheHelper.hasher(hashedPath, CacheHelper.HashMode.of(checksum.mode.toString().toLowerCase())).hash().toString() - : CacheHelper.hasher(hashedPath).hash().toString() - if (hash != checksum.value) + ? CacheHelper.hasher(hashedPath, CacheHelper.HashMode.of(checksum.mode.toString().toLowerCase())).hash().toString() + : CacheHelper.hasher(hashedPath).hash().toString() + if( hash != checksum.value ) log.warn("Checksum of '$hashedPath' does not match with the one stored in the metadata") } - protected static isAlgorithmSupported( String algorithm ){ + protected static isAlgorithmSupported(String algorithm) { return algorithm && algorithm in SUPPORTED_CHECKSUM_ALGORITHMS } @TestOnly - protected String getFilePath(){ this.filePath } + protected String getFilePath() { this.filePath } /** * Finds the target path of a LinPath. @@ -149,7 +157,7 @@ class LinPath implements Path, LogicalDataPath { * IllegalArgumentException if the filepath, filesystem or its LinStore are null. * FileNotFoundException if the filePath or children are not found in the LinStore. */ - protected static Path findTarget(LinFileSystem fs, String filePath, boolean resultsAsPath, String[] children=[]) throws Exception { + protected static Path findTarget(LinFileSystem fs, String filePath, boolean resultsAsPath, String[] children = []) throws Exception { if( !fs ) throw new IllegalArgumentException("Cannot get target path for a relative lineage path") if( filePath.isEmpty() || filePath == SEPARATOR ) @@ -158,11 +166,11 @@ class LinPath implements Path, LogicalDataPath { if( !store ) throw new Exception("Lineage store not found - Check Nextflow configuration") final object = store.load(filePath) - if ( object ){ + if( object ) { if( object instanceof FileOutput ) { return getTargetPathFromOutput(object, children) } - if( resultsAsPath ){ + if( resultsAsPath ) { return getMetadataAsTargetPath(object, fs, filePath, children) } } else { @@ -180,11 +188,11 @@ class LinPath implements Path, LogicalDataPath { throw new FileNotFoundException("Target path '$filePath' does not exist") } - protected static Path getMetadataAsTargetPath(LinSerializable results, LinFileSystem fs, String filePath, String[] children){ + protected static Path getMetadataAsTargetPath(LinSerializable results, LinFileSystem fs, String filePath, String[] children) { if( !results ) { throw new FileNotFoundException("Target path '$filePath' does not exist") } - if (children && children.size() > 0) { + if( children && children.size() > 0 ) { return getSubObjectAsPath(fs, filePath, results, children) } else { return generateLinMetadataPath(fs, filePath, results, children) @@ -209,13 +217,12 @@ class LinPath implements Path, LogicalDataPath { throw new FileNotFoundException("Target path '$key#output' does not exist") } return generateLinMetadataPath(fs, key, outputs, children) - } - else { + } else { return generateLinMetadataPath(fs, key, object, children) } } - private static LinMetadataPath generateLinMetadataPath(LinFileSystem fs, String key, Object object, String[] children){ + private static LinMetadataPath generateLinMetadataPath(LinFileSystem fs, String key, Object object, String[] children) { def creationTime = toFileTime(navigate(object, 'createdAt') as OffsetDateTime ?: OffsetDateTime.now()) final output = children ? navigate(object, children.join('.')) : object if( !output ) { @@ -229,19 +236,19 @@ class LinPath implements Path, LogicalDataPath { // return the real path stored in the metadata validateDataOutput(lidObject) def realPath = FileHelper.toCanonicalPath(lidObject.path as String) - if (children && children.size() > 0) + if( children && children.size() > 0 ) realPath = realPath.resolve(children.join(SEPARATOR)) - if (!realPath.exists()) + if( !realPath.exists() ) throw new FileNotFoundException("Target path '$realPath' does not exist") return realPath } - private static boolean isEmptyBase(LinFileSystem fs, String base){ + private static boolean isEmptyBase(LinFileSystem fs, String base) { return !base || base == SEPARATOR || (fs && base == "..") } private static String resolve0(LinFileSystem fs, String base, String[] more) { - if( isEmptyBase(fs,base) ) { + if( isEmptyBase(fs, base) ) { return resolveEmptyPathCase(fs, more as List) } if( base.contains(SEPARATOR) ) { @@ -253,8 +260,8 @@ class LinPath implements Path, LogicalDataPath { return more ? result.resolve(more.join(SEPARATOR)).toString() : result.toString() } - private static String resolveEmptyPathCase(LinFileSystem fs, List more ){ - switch(more.size()) { + private static String resolveEmptyPathCase(LinFileSystem fs, List more) { + switch( more.size() ) { case 0: return "/" case 1: @@ -265,7 +272,7 @@ class LinPath implements Path, LogicalDataPath { } static private String norm0(String path) { - if( !path || path==SEPARATOR) + if( !path || path == SEPARATOR ) return "" //Remove repeated elements path = Path.of(path.trim()).normalize().toString() @@ -273,12 +280,12 @@ class LinPath implements Path, LogicalDataPath { if( path.startsWith(SEPARATOR) ) path = path.substring(1) if( path.endsWith(SEPARATOR) ) - path = path.substring(0,path.size()-1) + path = path.substring(0, path.size() - 1) return path } - + static private String[] norm0(String... path) { - for( int i=0; i1 ) - return subpath(0,c-1) - if( c==1 ) - return new LinPath(fileSystem,SEPARATOR) + if( c > 1 ) + return subpath(0, c - 1) + if( c == 1 ) + return new LinPath(fileSystem, SEPARATOR) return null } @@ -322,21 +329,21 @@ class LinPath implements Path, LogicalDataPath { @Override Path getName(int index) { - if( index<0 ) + if( index < 0 ) throw new IllegalArgumentException("Path name index cannot be less than zero - offending value: $index") final path = Path.of(filePath) - if (index == path.nameCount - 1){ - return new LinPath( fragment, query, path.getName(index).toString(), null) + if( index == path.nameCount - 1 ) { + return new LinPath(fragment, query, path.getName(index).toString(), null) } - return new LinPath(index==0 ? fileSystem : null, path.getName(index).toString()) + return new LinPath(index == 0 ? fileSystem : null, path.getName(index).toString()) } @Override Path subpath(int beginIndex, int endIndex) { - if( beginIndex<0 ) + if( beginIndex < 0 ) throw new IllegalArgumentException("subpath begin index cannot be less than zero - offending value: $beginIndex") final path = Path.of(filePath) - return new LinPath(beginIndex==0 ? fileSystem : null, path.subpath(beginIndex, endIndex).toString()) + return new LinPath(beginIndex == 0 ? fileSystem : null, path.subpath(beginIndex, endIndex).toString()) } @Override @@ -369,7 +376,7 @@ class LinPath implements Path, LogicalDataPath { if( LinPath.class != other.class ) throw new ProviderMismatchException() - final that = (LinPath)other + final that = (LinPath) other if( that.fileSystem && this.fileSystem != that.fileSystem ) return other @@ -388,7 +395,7 @@ class LinPath implements Path, LogicalDataPath { final scheme = FileHelper.getUrlProtocol(path) if( !scheme ) { // consider the path as a lid relative path - return resolve(new LinPath(null,path)) + return resolve(new LinPath(null, path)) } if( scheme != SCHEME ) { throw new ProviderMismatchException() @@ -413,12 +420,12 @@ class LinPath implements Path, LogicalDataPath { // Compare 'filePath' as relative paths path = Path.of(filePath).relativize(Path.of(lidOther.filePath)) } - return new LinPath(lidOther.query, lidOther.fragment, path.getNameCount()>0 ? path.toString() : SEPARATOR, null) + return new LinPath(lidOther.query, lidOther.fragment, path.getNameCount() > 0 ? path.toString() : SEPARATOR, null) } @Override URI toUri() { - return asUri("${SCHEME}://${filePath}${query ? '?' + query: ''}${fragment ? '#'+ fragment : ''}") + return asUri("${SCHEME}://${filePath}${query ? '?' + query : ''}${fragment ? '#' + fragment : ''}") } String toUriString() { @@ -455,7 +462,7 @@ class LinPath implements Path, LogicalDataPath { * @return Path associated to a DataOutput or LinMetadataFile with the metadata object for other types. * @throws FileNotFoundException if the metadata associated to the LinPath does not exist */ - protected Path getTargetOrMetadataPath(){ + protected Path getTargetOrMetadataPath() { return findTarget(fileSystem, filePath, true, parseChildrenFromFragment(fragment)) } @@ -479,7 +486,7 @@ class LinPath implements Path, LogicalDataPath { if( LinPath.class != other.class ) { return false } - final that = (LinPath)other + final that = (LinPath) other return this.fileSystem == that.fileSystem && this.filePath.equals(that.filePath) } @@ -488,24 +495,60 @@ class LinPath implements Path, LogicalDataPath { */ @Override int hashCode() { - return Objects.hash(fileSystem,filePath) + return Objects.hash(fileSystem, filePath) } static URI asUri(String path) { - if (!path) + if( !path ) throw new IllegalArgumentException("Missing 'path' argument") - if (!path.startsWith(LID_PROT)) + if( !path.startsWith(LID_PROT) ) throw new IllegalArgumentException("Invalid LID file system path URI - it must start with '${LID_PROT}' prefix - offendinf value: $path") - if (path.startsWith(LID_PROT + SEPARATOR) && path.length() > 7) + if( path.startsWith(LID_PROT + SEPARATOR) && path.length() > 7 ) throw new IllegalArgumentException("Invalid LID file system path URI - make sure the schema prefix does not container more than two slash characters - offending value: $path") - if (path == LID_PROT) //Empty path case + if( path == LID_PROT ) //Empty path case return new URI("lid:///") return new URI(path) } @Override String toString() { - return "$filePath${query ? '?' + query: ''}${fragment ? '#'+ fragment : ''}".toString() + return "$filePath${query ? '?' + query : ''}${fragment ? '#' + fragment : ''}".toString() + } + + @Override + boolean hasQuery() { + //Lin path is a query when is root (no filepath or /) and has the query field + return (filePath.isEmpty() || filePath == SEPARATOR) && query && fileSystem + } + + @Override + List resolveQuery() { + final store = fileSystem.getStore() + if( !store ) + throw new Exception("Lineage store not found - Check Nextflow configuration") + final results = store.search(query) + return parseResults(results) + } + + private List parseResults(Map results) { + if( !results ) + throw new FileNotFoundException("No files found for ${this.toUriString()}") + final List parsedResults = [] + for( def res : results ) { + parsedResults << parseResult(res.key, res.value) + } + return parsedResults + } + + private Path parseResult(String key, LinSerializable object) { + if( fragment ) + return getSubObjectAsPath(fileSystem, key, object, parseChildrenFromFragment(fragment)) + + if( object instanceof FileOutput ) { + return new LinPath(fileSystem, key) + } else { + return generateLinMetadataPath(fileSystem, key, object, null) + } } } From cdd9e89cd831c0cbc229e40aebaf4abd937098c5 Mon Sep 17 00:00:00 2001 From: jorgee Date: Fri, 25 Apr 2025 21:24:11 +0200 Subject: [PATCH 04/19] rename fromLinageQuery to queryLineage Signed-off-by: jorgee --- .../nextflow/src/main/groovy/nextflow/Channel.groovy | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/Channel.groovy b/modules/nextflow/src/main/groovy/nextflow/Channel.groovy index 78e8c6e8f5..e1436ac0ef 100644 --- a/modules/nextflow/src/main/groovy/nextflow/Channel.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/Channel.groovy @@ -679,18 +679,18 @@ class Channel { future.exceptionally(this.&handlerException) } - static DataflowWriteChannel fromLineageQuery(String queryString) { + static DataflowWriteChannel queryLineage(String queryString) { final result = CH.create() if( NF.isDsl2() ) { - session.addIgniter { fromLineageQuery0(result, queryString) } + session.addIgniter { queryLineage0(result, queryString) } } else { - fromLineageQuery0(result, queryString ) + queryLineage0(result, queryString ) } return result } - private static void fromLineageQuery0(DataflowWriteChannel channel, String query) { + private static void queryLineage0(DataflowWriteChannel channel, String query) { final operation = Plugins.getExtension(LinChannelEx) if( !operation ) throw new IllegalStateException("Unable to load lineage extensions.") @@ -698,4 +698,4 @@ class Channel { future.exceptionally(this.&handlerException) } -} +} \ No newline at end of file From 62a2cfadfa02fedfffbe57b41e8c23c60b9255fb Mon Sep 17 00:00:00 2001 From: jorgee Date: Tue, 29 Apr 2025 11:46:40 +0200 Subject: [PATCH 05/19] lineage API refactor and remove other implementations Signed-off-by: jorgee --- .../src/main/groovy/nextflow/Channel.groovy | 29 +++--------- .../src/main/groovy/nextflow/Session.groovy | 2 +- .../nextflow/extension/LinChannelEx.groovy | 22 +++++++++- .../nextflow/extension/OperatorImpl.groovy | 18 ++++++++ .../groovy/nextflow/file/PathVisitor.groovy | 23 +--------- .../groovy/nextflow/file/QueryablePath.groovy | 13 ------ .../nextflow/lineage/DefaultLinStore.groovy | 10 +---- ...eExImpl.groovy => LinChannelExImpl.groovy} | 36 +++++---------- .../main/nextflow/lineage/LinObserver.groovy | 11 +---- .../src/main/nextflow/lineage/LinStore.groovy | 6 +-- .../src/main/nextflow/lineage/LinUtils.groovy | 2 +- .../lineage/cli/LinCommandImpl.groovy | 6 ++- .../main/nextflow/lineage/fs/LinPath.groovy | 39 +--------------- .../src/resources/META-INF/extensions.idx | 2 +- .../lineage/DefaultLinStoreTest.groovy | 2 +- ...est.groovy => LinChannelExImplTest.groovy} | 44 ++++++++----------- .../lineage/cli/LinCommandImplTest.groovy | 10 ++++- 17 files changed, 98 insertions(+), 177 deletions(-) delete mode 100644 modules/nextflow/src/main/groovy/nextflow/file/QueryablePath.groovy rename modules/nf-lineage/src/main/nextflow/lineage/{LinChanneExImpl.groovy => LinChannelExImpl.groovy} (63%) rename modules/nf-lineage/src/test/nextflow/lineage/{LinChanneExImplTest.groovy => LinChannelExImplTest.groovy} (79%) diff --git a/modules/nextflow/src/main/groovy/nextflow/Channel.groovy b/modules/nextflow/src/main/groovy/nextflow/Channel.groovy index e1436ac0ef..c930a9694a 100644 --- a/modules/nextflow/src/main/groovy/nextflow/Channel.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/Channel.groovy @@ -660,41 +660,22 @@ class Channel { fromPath0Future = future.exceptionally(Channel.&handlerException) } - static DataflowWriteChannel fromLineage(String uri) { + static DataflowWriteChannel queryLineage(Map params) { final result = CH.create() if( NF.isDsl2() ) { - session.addIgniter { fromLineage0(result, uri) } + session.addIgniter { queryLineage0(result, params) } } else { - fromLineage0(result, uri ) + queryLineage0(result, params ) } return result } - private static void fromLineage0(DataflowWriteChannel channel, String uri) { + private static void queryLineage0(DataflowWriteChannel channel, Map params) { final operation = Plugins.getExtension(LinChannelEx) if( !operation ) throw new IllegalStateException("Unable to load lineage extensions.") - def future = CompletableFuture.runAsync( { operation.viewLineage(session, channel, new URI(uri)) } as Runnable) - future.exceptionally(this.&handlerException) - } - - static DataflowWriteChannel queryLineage(String queryString) { - final result = CH.create() - if( NF.isDsl2() ) { - session.addIgniter { queryLineage0(result, queryString) } - } - else { - queryLineage0(result, queryString ) - } - return result - } - - private static void queryLineage0(DataflowWriteChannel channel, String query) { - final operation = Plugins.getExtension(LinChannelEx) - if( !operation ) - throw new IllegalStateException("Unable to load lineage extensions.") - def future = CompletableFuture.runAsync( { operation.queryLineage(session, channel, query) } as Runnable) + def future = CompletableFuture.runAsync( { operation.queryLineage(session, channel, params) } as Runnable) future.exceptionally(this.&handlerException) } diff --git a/modules/nextflow/src/main/groovy/nextflow/Session.groovy b/modules/nextflow/src/main/groovy/nextflow/Session.groovy index acc1c4fb7d..a486123362 100644 --- a/modules/nextflow/src/main/groovy/nextflow/Session.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/Session.groovy @@ -381,7 +381,7 @@ class Session implements ISession { this.dag = new DAG() // -- init output dir - this.outputDir = FileHelper.toCanonicalPath(config.outputDir ?: config.navigate('params.outdir') ?: 'results') + this.outputDir = FileHelper.toCanonicalPath(config.outputDir ?: 'results') // -- init work dir this.workDir = FileHelper.toCanonicalPath(config.workDir ?: 'work') diff --git a/modules/nextflow/src/main/groovy/nextflow/extension/LinChannelEx.groovy b/modules/nextflow/src/main/groovy/nextflow/extension/LinChannelEx.groovy index 482ef26263..7b0978ec8e 100644 --- a/modules/nextflow/src/main/groovy/nextflow/extension/LinChannelEx.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/extension/LinChannelEx.groovy @@ -3,8 +3,26 @@ package nextflow.extension import groovyx.gpars.dataflow.DataflowWriteChannel import nextflow.Session +/** + * Interface to implement the Lineage channel factories and functions. + * @author Jorge Ejarque params) } \ No newline at end of file diff --git a/modules/nextflow/src/main/groovy/nextflow/extension/OperatorImpl.groovy b/modules/nextflow/src/main/groovy/nextflow/extension/OperatorImpl.groovy index 3614de19db..7ab23ecd34 100644 --- a/modules/nextflow/src/main/groovy/nextflow/extension/OperatorImpl.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/extension/OperatorImpl.groovy @@ -36,6 +36,7 @@ import nextflow.Channel import nextflow.Global import nextflow.NF import nextflow.Session +import nextflow.plugin.Plugins import nextflow.script.ChannelOut import nextflow.script.TokenBranchDef import nextflow.script.TokenMultiMapDef @@ -1246,4 +1247,21 @@ class OperatorImpl { .getOutput() } + /** + * Transform the items emitted by a channel by applying a function to each of them + * + * @param channel + * @return + */ + DataflowWriteChannel lineage(final DataflowReadChannel source) { + assert source != null + final operation = Plugins.getExtension(LinChannelEx) + if( !operation ) + throw new IllegalStateException("Unable to load lineage extensions.") + final closure = { operation.viewLineage(session, it) } + return new MapOp(source, closure).apply() + } + + + } diff --git a/modules/nextflow/src/main/groovy/nextflow/file/PathVisitor.groovy b/modules/nextflow/src/main/groovy/nextflow/file/PathVisitor.groovy index 68e27785f8..0a484bff9f 100644 --- a/modules/nextflow/src/main/groovy/nextflow/file/PathVisitor.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/file/PathVisitor.groovy @@ -66,7 +66,7 @@ class PathVisitor { applyRegexPattern0(filePattern) else if( filePattern != null ) - applyPathPattern0(filePattern as Path) + applyGlobPattern0(filePattern as Path) else throw new IllegalArgumentException("Missing file pattern argument") @@ -103,27 +103,6 @@ class PathVisitor { target.bind(STOP) } - private void applyPathPattern0(Path filePattern) { - if( isQuery(filePattern) ) - applyQueryablePath0(filePattern as QueryablePath) - else - applyGlobPattern0(filePattern) - } - - private static boolean isQuery(Path filePattern) { - log.debug("Checking if query: $filePattern.class ") - return filePattern instanceof QueryablePath && (filePattern as QueryablePath).hasQuery() - } - - private boolean applyQueryablePath0(QueryablePath path) { - final paths = path.resolveQuery() - if( !paths ) - throw new FileNotFoundException("No files found for ${path}") - - paths.forEach { emit0(it) } - close0() - } - private void applyGlobPattern0(Path filePattern) { final glob = opts?.containsKey('glob') ? opts.glob as boolean : true diff --git a/modules/nextflow/src/main/groovy/nextflow/file/QueryablePath.groovy b/modules/nextflow/src/main/groovy/nextflow/file/QueryablePath.groovy deleted file mode 100644 index 084c43d3b5..0000000000 --- a/modules/nextflow/src/main/groovy/nextflow/file/QueryablePath.groovy +++ /dev/null @@ -1,13 +0,0 @@ -package nextflow.file - -import java.nio.file.Path - -/** - * Interface to indicate a Path could contain a query that is resolved to several real paths. - * - * @author Jorge Ejarque - */ -interface QueryablePath { - boolean hasQuery(); - List resolveQuery(); -} \ No newline at end of file diff --git a/modules/nf-lineage/src/main/nextflow/lineage/DefaultLinStore.groovy b/modules/nf-lineage/src/main/nextflow/lineage/DefaultLinStore.groovy index fb64bbe2c7..92802061c2 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/DefaultLinStore.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/DefaultLinStore.groovy @@ -103,15 +103,7 @@ class DefaultLinStore implements LinStore { void close() throws IOException { } @Override - Map search(String queryString) { - def params = null - if (queryString) { - params = LinUtils.parseQuery(queryString) - } - return searchAllFiles(params) - } - - private Map searchAllFiles(Map params) { + Map search(Map params) { final results = new HashMap() Files.walkFileTree(metaLocation, new FileVisitor() { diff --git a/modules/nf-lineage/src/main/nextflow/lineage/LinChanneExImpl.groovy b/modules/nf-lineage/src/main/nextflow/lineage/LinChannelExImpl.groovy similarity index 63% rename from modules/nf-lineage/src/main/nextflow/lineage/LinChanneExImpl.groovy rename to modules/nf-lineage/src/main/nextflow/lineage/LinChannelExImpl.groovy index 7a4365fd71..508ef6ce88 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/LinChanneExImpl.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/LinChannelExImpl.groovy @@ -33,22 +33,24 @@ import nextflow.lineage.serde.LinSerializable */ @CompileStatic @Slf4j -class LinChanneExImpl implements LinChannelEx{ +class LinChannelExImpl implements LinChannelEx { - void viewLineage(Session session, DataflowWriteChannel channel, URI uri) { + Object viewLineage(Session session, String lid) { final store = getStore(session) - emitResults(channel, LinUtils.query(store, uri)) - channel.bind(Channel.STOP) + final results = LinUtils.query(store, new URI(lid)) + if( !results ) { + throw new FileNotFoundException("No entry found for $lid") + } + return LinUtils.encodeSearchOutputs(results.size() == 1 ? results[0] : results) } - void queryLineage(Session session, DataflowWriteChannel channel, String query) { + void queryLineage(Session session, DataflowWriteChannel channel, Map params) { final store = getStore(session) - emitSearchResults(channel, store.search(query)) + emitSearchResults(channel, store.search(params)) channel.bind(Channel.STOP) } - - protected LinStore getStore(Session session){ + protected LinStore getStore(Session session) { final store = LinStoreFactory.getOrCreate(session) if( !store ) { throw new Exception("Lineage store not found - Check Nextflow configuration") @@ -56,26 +58,10 @@ class LinChanneExImpl implements LinChannelEx{ return store } - private static void emitResults(DataflowWriteChannel channel, Collection results){ - if( !results ) { - return - } - // Remove nested collections of a single element - if( results.size() == 1 ) { - final entry = results[0] - if( entry instanceof Collection ) { - emitResults(channel, entry) - } else { - channel.bind(LinUtils.encodeSearchOutputs(entry)) - } - } else - results.forEach { channel.bind(LinUtils.encodeSearchOutputs(it)) } - } - private void emitSearchResults(DataflowWriteChannel channel, Map results) { if( !results ) { return } - results.keySet().forEach { channel.bind(LinPathFactory.create(LinPath.LID_PROT + it)) } + results.keySet().forEach { channel.bind(LinPath.LID_PROT + it) } } } diff --git a/modules/nf-lineage/src/main/nextflow/lineage/LinObserver.groovy b/modules/nf-lineage/src/main/nextflow/lineage/LinObserver.groovy index 53ee984b2a..081a5bba7a 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/LinObserver.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/LinObserver.groovy @@ -87,7 +87,6 @@ class LinObserver implements TraceObserver { private Session session private WorkflowOutput workflowOutput private Map outputsStoreDirLid = new HashMap(10) - private Set publishedFiles = new HashSet() private PathNormalizer normalizer LinObserver(Session session, LinStore store){ @@ -125,10 +124,6 @@ class LinObserver implements TraceObserver { @Override void onFlowComplete(){ if (this.workflowOutput){ - //Add publishedFiles - for (String path: publishedFiles){ - workflowOutput.output.add(new Parameter(Path.simpleName, null, path)) - } workflowOutput.createdAt = OffsetDateTime.now() final key = executionHash + '#output' this.store.save(key, workflowOutput) @@ -365,7 +360,6 @@ class LinObserver implements TraceObserver { LinUtils.toDate(attrs?.lastModifiedTime()), convertAnnotations(annotations)) store.save(key, value) - publishedFiles.add(asUriString(key)) } catch (Throwable e) { log.warn("Unexpected error storing published file '${destination.toUriString()}' for workflow '${executionHash}'", e) } @@ -417,9 +411,8 @@ class LinObserver implements TraceObserver { private Object convertPathsToLidReferences(Object value){ if( value instanceof Path ) { try { - final key = asUriString(getWorkflowOutputKey(value)) - publishedFiles.remove(key) - return key + final key = getWorkflowOutputKey(value) + return asUriString(key) } catch (Throwable e){ //Workflow output key not found return value diff --git a/modules/nf-lineage/src/main/nextflow/lineage/LinStore.groovy b/modules/nf-lineage/src/main/nextflow/lineage/LinStore.groovy index 3f826b7a0a..ba31b12df9 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/LinStore.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/LinStore.groovy @@ -55,9 +55,9 @@ interface LinStore extends Closeable { /** * Search for lineage entries. - * @queryString Json-path like query string. (Only simple and nested field operators are supported(No array, wildcards,etc.) - * @return Key-lineage entry pairs fulfilling the queryString + * @params Map of query params + * @return Key-lineage entry pairs fulfilling the query params */ - Map search(String queryString) + Map search(Map params) } diff --git a/modules/nf-lineage/src/main/nextflow/lineage/LinUtils.groovy b/modules/nf-lineage/src/main/nextflow/lineage/LinUtils.groovy index b62d67abaf..e426a67436 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/LinUtils.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/LinUtils.groovy @@ -62,7 +62,7 @@ class LinUtils { } private static Collection globalSearch(LinStore store, URI uri) { - final results = store.search(uri.query).values() + final results = store.search(parseQuery(uri.query)).values() if (results && uri.fragment) { // If fragment is defined get the property of the object indicated by the fragment return filterResults(results, uri.fragment) diff --git a/modules/nf-lineage/src/main/nextflow/lineage/cli/LinCommandImpl.groovy b/modules/nf-lineage/src/main/nextflow/lineage/cli/LinCommandImpl.groovy index 43290e46ff..ad4fa7477e 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/cli/LinCommandImpl.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/cli/LinCommandImpl.groovy @@ -17,6 +17,7 @@ package nextflow.lineage.cli import static nextflow.lineage.fs.LinPath.* +import static nextflow.lineage.LinUtils.* import java.nio.charset.StandardCharsets import java.nio.file.Path @@ -319,7 +320,10 @@ class LinCommandImpl implements CmdLineage.LinCommand { return } try { - println LinUtils.encodeSearchOutputs(store.search(args[0]).keySet().collect {asUriString(it)}, true) + final params = args.collectEntries { + it.split('=').collect { URLDecoder.decode(it, 'UTF-8') } + } as Map + println LinUtils.encodeSearchOutputs( store.search(params).keySet().collect { asUriString(it) }, true ) } catch (Throwable e){ println "Error searching for ${args[0]}. ${e.message}" } diff --git a/modules/nf-lineage/src/main/nextflow/lineage/fs/LinPath.groovy b/modules/nf-lineage/src/main/nextflow/lineage/fs/LinPath.groovy index c94607f0eb..fccaf88ab8 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/fs/LinPath.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/fs/LinPath.groovy @@ -20,7 +20,6 @@ import groovy.transform.CompileStatic import groovy.util.logging.Slf4j import nextflow.file.FileHelper import nextflow.file.LogicalDataPath -import nextflow.file.QueryablePath import nextflow.lineage.model.Checksum import nextflow.lineage.model.FileOutput import nextflow.lineage.serde.LinSerializable @@ -46,7 +45,7 @@ import java.time.OffsetDateTime */ @Slf4j @CompileStatic -class LinPath implements Path, LogicalDataPath, QueryablePath { +class LinPath implements Path, LogicalDataPath { static public final List SUPPORTED_CHECKSUM_ALGORITHMS = ["nextflow"] static public final String SEPARATOR = '/' @@ -515,41 +514,5 @@ class LinPath implements Path, LogicalDataPath, QueryablePath { return "$filePath${query ? '?' + query : ''}${fragment ? '#' + fragment : ''}".toString() } - @Override - boolean hasQuery() { - //Lin path is a query when is root (no filepath or /) and has the query field - return (filePath.isEmpty() || filePath == SEPARATOR) && query && fileSystem - } - - @Override - List resolveQuery() { - final store = fileSystem.getStore() - if( !store ) - throw new Exception("Lineage store not found - Check Nextflow configuration") - final results = store.search(query) - return parseResults(results) - } - - private List parseResults(Map results) { - if( !results ) - throw new FileNotFoundException("No files found for ${this.toUriString()}") - final List parsedResults = [] - for( def res : results ) { - parsedResults << parseResult(res.key, res.value) - } - return parsedResults - } - - private Path parseResult(String key, LinSerializable object) { - if( fragment ) - return getSubObjectAsPath(fileSystem, key, object, parseChildrenFromFragment(fragment)) - - if( object instanceof FileOutput ) { - return new LinPath(fileSystem, key) - } else { - return generateLinMetadataPath(fileSystem, key, object, null) - } - } - } diff --git a/modules/nf-lineage/src/resources/META-INF/extensions.idx b/modules/nf-lineage/src/resources/META-INF/extensions.idx index d4809ad1ad..85b08c4461 100644 --- a/modules/nf-lineage/src/resources/META-INF/extensions.idx +++ b/modules/nf-lineage/src/resources/META-INF/extensions.idx @@ -17,4 +17,4 @@ nextflow.lineage.DefaultLinStoreFactory nextflow.lineage.LinObserverFactory nextflow.lineage.cli.LinCommandImpl -nextflow.lineage.LinChanneExImpl \ No newline at end of file +nextflow.lineage.LinChannelExImpl \ No newline at end of file diff --git a/modules/nf-lineage/src/test/nextflow/lineage/DefaultLinStoreTest.groovy b/modules/nf-lineage/src/test/nextflow/lineage/DefaultLinStoreTest.groovy index db135923d4..16c7f02606 100644 --- a/modules/nf-lineage/src/test/nextflow/lineage/DefaultLinStoreTest.groovy +++ b/modules/nf-lineage/src/test/nextflow/lineage/DefaultLinStoreTest.groovy @@ -127,7 +127,7 @@ class DefaultLinStoreTest extends Specification { lidStore.save(key4, value4) when: - def results = lidStore.search("type=FileOutput&annotations.key=key2&annotations.value=value2") + def results = lidStore.search("type":"FileOutput", "annotations.key":"key2", "annotations.value":"value2") then: results.size() == 2 results.keySet().containsAll([key2,key3]) diff --git a/modules/nf-lineage/src/test/nextflow/lineage/LinChanneExImplTest.groovy b/modules/nf-lineage/src/test/nextflow/lineage/LinChannelExImplTest.groovy similarity index 79% rename from modules/nf-lineage/src/test/nextflow/lineage/LinChanneExImplTest.groovy rename to modules/nf-lineage/src/test/nextflow/lineage/LinChannelExImplTest.groovy index 75b527b4e1..29a01007c6 100644 --- a/modules/nf-lineage/src/test/nextflow/lineage/LinChanneExImplTest.groovy +++ b/modules/nf-lineage/src/test/nextflow/lineage/LinChannelExImplTest.groovy @@ -39,12 +39,14 @@ import spock.lang.TempDir import java.time.ZoneOffset +import static nextflow.lineage.fs.LinPath.* + /** * Lineage channel extensions tests * * @author Jorge Ejarque */ -class LinChanneExImplTest extends Specification { +class LinChannelExImplTest extends Specification { @TempDir Path tempDir @@ -75,26 +77,28 @@ class LinChanneExImplTest extends Specification { lidStore.open(LineageConfig.create(session)) lidStore.save(key, value1) lidStore.save("$key#output", wfOutputs) - def channelLinExt = Spy(new LinChanneExImpl()) + def channelLinExt = Spy(new LinChannelExImpl()) when: - def results = CH.create() - channelLinExt.viewLineage(session, results, new URI('lid://testKey#params')) + def results = channelLinExt.viewLineage(session, 'lid://testKey') then: channelLinExt.getStore(session) >> lidStore and: - results.val == LinUtils.encodeSearchOutputs(params[0]) - results.val == LinUtils.encodeSearchOutputs(params[1]) - results.val == Channel.STOP + results == LinUtils.encodeSearchOutputs(value1) when: - results = CH.create() - channelLinExt.viewLineage(session, results, new URI('lid://testKey#output')) + results = channelLinExt.viewLineage(session, 'lid://testKey#params') then: channelLinExt.getStore(session) >> lidStore and: - results.val == LinUtils.encodeSearchOutputs(outputs[0]) - results.val == Channel.STOP + results == LinUtils.encodeSearchOutputs(params) + + when: + results = channelLinExt.viewLineage(session, 'lid://testKey#output') + then: + channelLinExt.getStore(session) >> lidStore + and: + results == LinUtils.encodeSearchOutputs(outputs) } def 'should return global query results' () { @@ -120,25 +124,15 @@ class LinChanneExImplTest extends Specification { lidStore.save(key2, value2) lidStore.save(key3, value3) lidStore.save(key4, value4) - def channelLinExt = Spy(new LinChanneExImpl()) + def channelLinExt = Spy(new LinChannelExImpl()) when: def results = CH.create() - channelLinExt.viewLineage(session, results, new URI("cid:///?type=FileOutput&annotations.key=key2&annotations.value=value2")) - then: - channelLinExt.getStore(session) >> lidStore - and: - results.val == LinUtils.encodeSearchOutputs(value2) - results.val == LinUtils.encodeSearchOutputs(value3) - results.val == Channel.STOP - - when: - results = CH.create() - channelLinExt.viewLineage(session, results, new URI("cid:///?type=FileOutput&annotations.key=key2&annotations.value=value2#path")) + channelLinExt.queryLineage(session, results, [ "type":"FileOutput", "annotations.key":"key2", "annotations.value":"value2" ]) then: channelLinExt.getStore(session) >> lidStore and: - results.val == '"/path/tp/file1"' - results.val == '"/path/tp/file2"' + results.val == asUriString(key2) + results.val == asUriString(key3) results.val == Channel.STOP } } diff --git a/modules/nf-lineage/src/test/nextflow/lineage/cli/LinCommandImplTest.groovy b/modules/nf-lineage/src/test/nextflow/lineage/cli/LinCommandImplTest.groovy index ce17b89177..42c5276ff1 100644 --- a/modules/nf-lineage/src/test/nextflow/lineage/cli/LinCommandImplTest.groovy +++ b/modules/nf-lineage/src/test/nextflow/lineage/cli/LinCommandImplTest.groovy @@ -22,6 +22,7 @@ import nextflow.dag.MermaidHtmlRenderer import nextflow.lineage.LinHistoryRecord import nextflow.lineage.LinStoreFactory import nextflow.lineage.DefaultLinHistoryLog +import nextflow.lineage.model.Annotation import nextflow.lineage.model.Checksum import nextflow.lineage.model.FileOutput import nextflow.lineage.model.DataPath @@ -439,18 +440,23 @@ class LinCommandImplTest extends Specification{ Files.createDirectories(lidFile.parent) def lidFile2 = storeLocation.resolve(".meta/123987/file2.bam/.data.json") Files.createDirectories(lidFile2.parent) + def lidFile3 = storeLocation.resolve(".meta/123987/file3.bam/.data.json") + Files.createDirectories(lidFile3.parent) def encoder = new LinEncoder().withPrettyPrint(true) def time = OffsetDateTime.ofInstant(Instant.ofEpochMilli(123456789), ZoneOffset.UTC) def entry = new FileOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), - "lid://123987/file.bam", "lid://123987/", null, 1234, time, time, null) + "lid://123987/file.bam", "lid://123987/", null, 1234, time, time, [new Annotation("experiment", "test")]) def entry2 = new FileOutput("path/to/file2",new Checksum("42472qet","nextflow","standard"), + "lid://123987/file2.bam", "lid://123987/", null, 1235, time, time, [new Annotation("experiment", "test")]) + def entry3 = new FileOutput("path/to/file3",new Checksum("42472qet","nextflow","standard"), "lid://123987/file2.bam", "lid://123987/", null, 1235, time, time, null) def expectedOutput1 = '[\n "lid://123987/file.bam",\n "lid://123987/file2.bam"\n]' def expectedOutput2 = '[\n "lid://123987/file2.bam",\n "lid://123987/file.bam"\n]' lidFile.text = encoder.encode(entry) lidFile2.text = encoder.encode(entry2) + lidFile3.text = encoder.encode(entry3) when: - new LinCommandImpl().find(configMap, ["type=FileOutput"]) + new LinCommandImpl().find(configMap, ["type=FileOutput", "annotations.value=test"]) def stdout = capture .toString() .readLines()// remove the log part From 3e377283748279c618df84a92979a80309a17bfd Mon Sep 17 00:00:00 2001 From: jorgee Date: Tue, 29 Apr 2025 11:54:45 +0200 Subject: [PATCH 06/19] Correct lineage function comment Signed-off-by: jorgee --- .../src/main/groovy/nextflow/extension/OperatorImpl.groovy | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/extension/OperatorImpl.groovy b/modules/nextflow/src/main/groovy/nextflow/extension/OperatorImpl.groovy index 7ab23ecd34..bd95568e18 100644 --- a/modules/nextflow/src/main/groovy/nextflow/extension/OperatorImpl.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/extension/OperatorImpl.groovy @@ -1248,9 +1248,9 @@ class OperatorImpl { } /** - * Transform the items emitted by a channel by applying a function to each of them + * Transform the Lineage ID items emitted in the source channel by its lineage metadata description * - * @param channel + * @param channel Source channel with emitted lineage IDs * @return */ DataflowWriteChannel lineage(final DataflowReadChannel source) { From 6abae3c98e2e46c9feb11ae4441f705d70d3dc5b Mon Sep 17 00:00:00 2001 From: jorgee Date: Tue, 29 Apr 2025 16:12:06 +0200 Subject: [PATCH 07/19] Convert lineage from operator to function and add documentation Signed-off-by: jorgee --- docs/reference/channel.md | 34 +++++++++++++++++++ docs/reference/cli.md | 4 +-- docs/reference/stdlib.md | 5 +++ .../src/main/groovy/nextflow/Nextflow.groovy | 9 +++++ .../nextflow/extension/LinChannelEx.groovy | 2 +- .../nextflow/extension/OperatorImpl.groovy | 18 ---------- .../nextflow/lineage/LinChannelExImpl.groovy | 6 +--- .../lineage/LinChannelExImplTest.groovy | 12 ++----- 8 files changed, 55 insertions(+), 35 deletions(-) diff --git a/docs/reference/channel.md b/docs/reference/channel.md index 5d3b54c95f..d76ff7df1e 100644 --- a/docs/reference/channel.md +++ b/docs/reference/channel.md @@ -405,6 +405,40 @@ Y See also: [channel.fromList](#fromlist) factory method. +(channel-query-lineage)= + +## queryLineage + +:::{versionadded} 25.04.0 +::: + +:::{warning} *Experimental: may change in a future release.* +::: + +The `channel.queryLineage` method allows you to create a channel that emits the IDs of the lineage metadata objects matching with a set of key-value parameters passed as arguments of the method. + +The following snippet shows how to create a channel (`ch`) using this method. It searches for `FileOutputs` annotated with the value 'test'. +The result is a set of Lineage IDs (lid) that can be consumed by processes as `path` or inspected with the `lineage` function. + +```nextflow + process foo { + input: + path('output_file') + + // ... + } + + workflow { + ch = channel + .queryLineage('type': 'FileOutput', 'annotations.value': 'test') + + foo(ch) + + ch.map { lid -> lineage(lid) } + } + +``` + (channel-topic)= ## topic diff --git a/docs/reference/cli.md b/docs/reference/cli.md index 67cc43d104..0f8dd965fa 100644 --- a/docs/reference/cli.md +++ b/docs/reference/cli.md @@ -732,10 +732,10 @@ View a metadata description fragment. A fragment can be a property of a metadata $ nextflow lineage view ``` -Find a specific metadata description that matches a URL-like query string. The query string consists of `key=value` statements separated by `&`, where keys are defined similarly to the `fragments` used in the `view` command. +Find a specific metadata description that matches to a set of key-value parameters. Keys are defined similarly to the `fragments` used in the `view` command. ```console -$ nextflow lineage find "" +$ nextflow lineage find = = ... ``` Display a git-style diff between two metadata descriptions. diff --git a/docs/reference/stdlib.md b/docs/reference/stdlib.md index d00e1091d2..99b7b19c3d 100644 --- a/docs/reference/stdlib.md +++ b/docs/reference/stdlib.md @@ -235,6 +235,11 @@ The following functions are available in Nextflow scripts: `groupKey( key, size: int ) -> GroupKey` : Create a grouping key to use with the {ref}`operator-grouptuple` operator. +`lineage( lid ) -> LinSerializable` +: :::{versionadded} 25.04.0 +: ::: +: Get the Lineage metadata object + `multiMapCriteria( criteria: Closure ) -> Closure` : Create a multi-map criteria to use with the {ref}`operator-multiMap` operator. diff --git a/modules/nextflow/src/main/groovy/nextflow/Nextflow.groovy b/modules/nextflow/src/main/groovy/nextflow/Nextflow.groovy index aee6368755..52dd250ba8 100644 --- a/modules/nextflow/src/main/groovy/nextflow/Nextflow.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/Nextflow.groovy @@ -29,10 +29,12 @@ import nextflow.ast.OpXformImpl import nextflow.exception.StopSplitIterationException import nextflow.exception.WorkflowScriptErrorException import nextflow.extension.GroupKey +import nextflow.extension.LinChannelEx import nextflow.extension.OperatorImpl import nextflow.file.FileHelper import nextflow.file.FilePatternSplitter import nextflow.mail.Mailer +import nextflow.plugin.Plugins import nextflow.script.TokenBranchDef import nextflow.script.TokenMultiMapDef import nextflow.splitter.FastaSplitter @@ -422,4 +424,11 @@ class Nextflow { */ static Closure multiMapCriteria(Closure closure) { closure } + static Object lineage( String lid ) { + final operation = Plugins.getExtension(LinChannelEx) + if( !operation ) + throw new IllegalStateException("Unable to load lineage extensions.") + return operation.viewLineage(session, lid) + } + } diff --git a/modules/nextflow/src/main/groovy/nextflow/extension/LinChannelEx.groovy b/modules/nextflow/src/main/groovy/nextflow/extension/LinChannelEx.groovy index 7b0978ec8e..195ac870c4 100644 --- a/modules/nextflow/src/main/groovy/nextflow/extension/LinChannelEx.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/extension/LinChannelEx.groovy @@ -13,7 +13,7 @@ interface LinChannelEx { * * @param session Nextflow Session * @param lid Lineage Id to view - * @return Lineage metadata content + * @return Lineage metadata object */ Object viewLineage(Session session, String lid) diff --git a/modules/nextflow/src/main/groovy/nextflow/extension/OperatorImpl.groovy b/modules/nextflow/src/main/groovy/nextflow/extension/OperatorImpl.groovy index bd95568e18..e13b79d017 100644 --- a/modules/nextflow/src/main/groovy/nextflow/extension/OperatorImpl.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/extension/OperatorImpl.groovy @@ -1246,22 +1246,4 @@ class OperatorImpl { .apply() .getOutput() } - - /** - * Transform the Lineage ID items emitted in the source channel by its lineage metadata description - * - * @param channel Source channel with emitted lineage IDs - * @return - */ - DataflowWriteChannel lineage(final DataflowReadChannel source) { - assert source != null - final operation = Plugins.getExtension(LinChannelEx) - if( !operation ) - throw new IllegalStateException("Unable to load lineage extensions.") - final closure = { operation.viewLineage(session, it) } - return new MapOp(source, closure).apply() - } - - - } diff --git a/modules/nf-lineage/src/main/nextflow/lineage/LinChannelExImpl.groovy b/modules/nf-lineage/src/main/nextflow/lineage/LinChannelExImpl.groovy index 508ef6ce88..ae41c5d979 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/LinChannelExImpl.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/LinChannelExImpl.groovy @@ -37,11 +37,7 @@ class LinChannelExImpl implements LinChannelEx { Object viewLineage(Session session, String lid) { final store = getStore(session) - final results = LinUtils.query(store, new URI(lid)) - if( !results ) { - throw new FileNotFoundException("No entry found for $lid") - } - return LinUtils.encodeSearchOutputs(results.size() == 1 ? results[0] : results) + return store.load(LinPathFactory.create(lid).toString()) } void queryLineage(Session session, DataflowWriteChannel channel, Map params) { diff --git a/modules/nf-lineage/src/test/nextflow/lineage/LinChannelExImplTest.groovy b/modules/nf-lineage/src/test/nextflow/lineage/LinChannelExImplTest.groovy index 29a01007c6..acaef23fbf 100644 --- a/modules/nf-lineage/src/test/nextflow/lineage/LinChannelExImplTest.groovy +++ b/modules/nf-lineage/src/test/nextflow/lineage/LinChannelExImplTest.groovy @@ -59,7 +59,7 @@ class LinChannelExImplTest extends Specification { configMap = [linage: [enabled: true, store: [location: storeLocation.toString()]]] } - def 'should get metadata fragment'() { + def 'should get metadata'() { given: def uniqueId = UUID.randomUUID() @@ -84,21 +84,15 @@ class LinChannelExImplTest extends Specification { then: channelLinExt.getStore(session) >> lidStore and: - results == LinUtils.encodeSearchOutputs(value1) + results == value1 - when: - results = channelLinExt.viewLineage(session, 'lid://testKey#params') - then: - channelLinExt.getStore(session) >> lidStore - and: - results == LinUtils.encodeSearchOutputs(params) when: results = channelLinExt.viewLineage(session, 'lid://testKey#output') then: channelLinExt.getStore(session) >> lidStore and: - results == LinUtils.encodeSearchOutputs(outputs) + results == wfOutputs } def 'should return global query results' () { From 584c4eebd4172ca6dc136406982a8d35d722f9c0 Mon Sep 17 00:00:00 2001 From: jorgee Date: Wed, 30 Apr 2025 12:53:41 +0200 Subject: [PATCH 08/19] remove que query in view and fromPath Signed-off-by: jorgee --- .../groovy/nextflow/cli/CmdLineageTest.groovy | 39 +------ .../nextflow/lineage/LinChannelExImpl.groovy | 9 +- .../src/main/nextflow/lineage/LinUtils.groovy | 109 +++++------------- .../lineage/cli/LinCommandImpl.groovy | 12 +- .../main/nextflow/lineage/fs/LinPath.groovy | 12 +- .../lineage/LinChannelExImplTest.groovy | 10 +- .../test/nextflow/lineage/LinUtilsTest.groovy | 61 ++-------- .../lineage/cli/LinCommandImplTest.groovy | 33 +----- .../nextflow/lineage/fs/LinPathTest.groovy | 37 ++++-- 9 files changed, 101 insertions(+), 221 deletions(-) diff --git a/modules/nextflow/src/test/groovy/nextflow/cli/CmdLineageTest.groovy b/modules/nextflow/src/test/groovy/nextflow/cli/CmdLineageTest.groovy index 4db9155246..49cb5ef6e4 100644 --- a/modules/nextflow/src/test/groovy/nextflow/cli/CmdLineageTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/cli/CmdLineageTest.groovy @@ -280,10 +280,10 @@ class CmdLineageTest extends Specification { def entry = new FileOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), "lid://123987/file.bam", "lid://12345", "lid://123987/", 1234, time, time, null) def jsonSer = encoder.encode(entry) - def expectedOutput = jsonSer + def expectedOutput = '[\n "lid://12345"\n]' lidFile.text = jsonSer when: - def lidCmd = new CmdLineage(launcher: launcher, args: ["view", "lid:///?type=FileOutput"]) + def lidCmd = new CmdLineage(launcher: launcher, args: ["find", "type=FileOutput"]) lidCmd.run() def stdout = capture .toString() @@ -300,39 +300,4 @@ class CmdLineageTest extends Specification { folder?.deleteDir() } - def 'should show query results'(){ - given: - def folder = Files.createTempDirectory('test').toAbsolutePath() - def configFile = folder.resolve('nextflow.config') - configFile.text = "lineage.enabled = true\nlineage.store.location = '$folder'".toString() - def lidFile = folder.resolve("12345/.data.json") - Files.createDirectories(lidFile.parent) - def launcher = Mock(Launcher){ - getOptions() >> new CliOptions(config: [configFile.toString()]) - } - def encoder = new LinEncoder().withPrettyPrint(true) - def time = OffsetDateTime.now() - def entry = new FileOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), - "lid://123987/file.bam", "lid://12345", "lid://123987/", 1234, time, time, null) - def jsonSer = encoder.encode(entry) - def expectedOutput = jsonSer - lidFile.text = jsonSer - when: - def lidCmd = new CmdLineage(launcher: launcher, args: ["view", "lid:///?type=FileOutput"]) - lidCmd.run() - def stdout = capture - .toString() - .readLines()// remove the log part - .findResults { line -> !line.contains('DEBUG') ? line : null } - .findResults { line -> !line.contains('INFO') ? line : null } - .findResults { line -> !line.contains('plugin') ? line : null } - - then: - stdout.size() == expectedOutput.readLines().size() - stdout.join('\n') == expectedOutput - - cleanup: - folder?.deleteDir() - } - } diff --git a/modules/nf-lineage/src/main/nextflow/lineage/LinChannelExImpl.groovy b/modules/nf-lineage/src/main/nextflow/lineage/LinChannelExImpl.groovy index ae41c5d979..abe34eed06 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/LinChannelExImpl.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/LinChannelExImpl.groovy @@ -22,10 +22,10 @@ import groovyx.gpars.dataflow.DataflowWriteChannel import nextflow.Channel import nextflow.Session import nextflow.extension.LinChannelEx -import nextflow.lineage.fs.LinPath -import nextflow.lineage.fs.LinPathFactory import nextflow.lineage.serde.LinSerializable +import static nextflow.lineage.fs.LinPath.* + /** * Lineage channel extensions * @@ -37,10 +37,11 @@ class LinChannelExImpl implements LinChannelEx { Object viewLineage(Session session, String lid) { final store = getStore(session) - return store.load(LinPathFactory.create(lid).toString()) + return LinUtils.getMetadataObject(store, new URI(lid)) } void queryLineage(Session session, DataflowWriteChannel channel, Map params) { + new LinPropertyValidator().validateQueryParams(params) final store = getStore(session) emitSearchResults(channel, store.search(params)) channel.bind(Channel.STOP) @@ -58,6 +59,6 @@ class LinChannelExImpl implements LinChannelEx { if( !results ) { return } - results.keySet().forEach { channel.bind(LinPath.LID_PROT + it) } + results.keySet().forEach { channel.bind(asUriString(it)) } } } diff --git a/modules/nf-lineage/src/main/nextflow/lineage/LinUtils.groovy b/modules/nf-lineage/src/main/nextflow/lineage/LinUtils.groovy index e426a67436..e16738fffa 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/LinUtils.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/LinUtils.groovy @@ -16,13 +16,15 @@ package nextflow.lineage +import static nextflow.lineage.fs.LinFileSystemProvider.* +import static nextflow.lineage.fs.LinPath.* + import java.nio.file.attribute.FileTime import java.time.OffsetDateTime import java.time.ZoneId import groovy.transform.CompileStatic import groovy.util.logging.Slf4j -import nextflow.lineage.fs.LinPath import nextflow.lineage.model.TaskRun import nextflow.lineage.model.WorkflowRun import nextflow.lineage.serde.LinEncoder @@ -40,45 +42,39 @@ class LinUtils { private static final String[] EMPTY_ARRAY = new String[] {} /** - * Query a lineage store. + * Get a metadata lineage object or fragment from the Lineage store. * - * @param store lineage store to query. - * @param uri Query to perform in a URI-like format. - * Format 'lid://[?QueryString][#fragment]' where: - * - Key: Element where the query will be applied. '/' indicates query will be applied in all the elements of the lineage store. - * - QueryString: all param-value pairs that the lineage element should fulfill in a URI's query string format. + * @param store Lineage store. + * @param uri Object or fragment to retrieve in URI-like format. + * Format 'lid://[#fragment]' where: + * - Key: Metadata Element key * - Fragment: Element fragment to retrieve. - * @return Collection of object fulfilling the query + * @return Lineage metadata object or fragment. */ - static Collection query(LinStore store, URI uri) { - String key = uri.authority ? uri.authority + uri.path : uri.path - if (key == LinPath.SEPARATOR) { - return globalSearch(store, uri) - } else { - final parameters = uri.query ? parseQuery(uri.query) : null - final children = parseChildrenFromFragment(uri.fragment) - return searchPath(store, key, parameters, children ) + static Object getMetadataObject(LinStore store, URI uri) { + if( uri.scheme != SCHEME ) { + throw new IllegalArgumentException("Invalid LID URI - scheme is different for $SCHEME") } - } - - private static Collection globalSearch(LinStore store, URI uri) { - final results = store.search(parseQuery(uri.query)).values() - if (results && uri.fragment) { - // If fragment is defined get the property of the object indicated by the fragment - return filterResults(results, uri.fragment) + final key = uri.authority ? uri.authority + uri.path : uri.path + if( key == SEPARATOR ) { + throw new IllegalArgumentException("Cannot get object from the root LID URI") } - return results + if ( uri.query ) + log.warn("Query string is not supported the Linage URI ($uri). It will be ignored.") + + final children = parseChildrenFromFragment(uri.fragment) + return getMetadataObject0(store, key, children ) } - private static List filterResults(Collection results, String fragment) { - final filteredResults = [] - results.forEach { - final output = navigate(it, fragment) - if (output) { - filteredResults.add(output) - } + private static Object getMetadataObject0(LinStore store, String key, String[] children = []) { + final object = store.load(key) + if (!object) { + throw new FileNotFoundException("Lineage object $key not found") } - return filteredResults + if (children && children.size() > 0) { + return getSubObject(store, key, object, children) + } + return object } /** @@ -95,38 +91,6 @@ class LinUtils { return children as String[] } - /** - * Search for objects inside a description - * - * @param store lineage store - * @param key lineage key where to perform the search - * @param params Parameter-value pairs to be evaluated in the key - * @param children Sub-objects to evaluate and retrieve - * @return List of object - */ - protected static List searchPath(LinStore store, String key, Map params, String[] children = []) { - final object = store.load(key) - if (!object) { - throw new FileNotFoundException("Lineage object $key not found") - } - final results = new LinkedList() - if (children && children.size() > 0) { - treatSubObject(store, key, object, children, params, results) - } else { - treatObject(object, params, results) - } - - return results - } - - private static void treatSubObject(LinStore store, String key, LinSerializable object, String[] children, Map params, LinkedList results) { - final output = getSubObject(store, key, object, children) - if (!output) { - throw new FileNotFoundException("Lineage object $key#${children.join('.')} not found") - } - treatObject(output, params, results) - } - /** * Get a metadata sub-object. * @@ -179,23 +143,6 @@ class LinUtils { } } - /** - * Parses a query string and store them in parameter-value Map. - * - * @param queryString URI-like query string. (e.g. param1=value1¶m2=value2). - * @return Map containing the parameter-value pairs of the query string. - */ - static Map parseQuery(String queryString) { - if( !queryString ) { - return [:] - } - final params = queryString.split('&').collectEntries { - it.split('=').collect { URLDecoder.decode(it, 'UTF-8') } - } as Map - new LinPropertyValidator().validateQueryParams(params) - return params - } - /** * Check if an object fulfill the parameter-value * diff --git a/modules/nf-lineage/src/main/nextflow/lineage/cli/LinCommandImpl.groovy b/modules/nf-lineage/src/main/nextflow/lineage/cli/LinCommandImpl.groovy index ad4fa7477e..e73d3d9784 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/cli/LinCommandImpl.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/cli/LinCommandImpl.groovy @@ -17,7 +17,6 @@ package nextflow.lineage.cli import static nextflow.lineage.fs.LinPath.* -import static nextflow.lineage.LinUtils.* import java.nio.charset.StandardCharsets import java.nio.file.Path @@ -29,6 +28,7 @@ import nextflow.cli.CmdLineage import nextflow.config.ConfigMap import nextflow.dag.MermaidHtmlRenderer import nextflow.lineage.LinHistoryRecord +import nextflow.lineage.LinPropertyValidator import nextflow.lineage.LinStore import nextflow.lineage.LinStoreFactory import nextflow.lineage.LinUtils @@ -99,13 +99,12 @@ class LinCommandImpl implements CmdLineage.LinCommand { return } try { - def entries = LinUtils.query(store, new URI(args[0])) - if( !entries ) { - println "No entries found for ${args[0]}" + def entry = LinUtils.getMetadataObject(store, new URI(args[0])) + if( !entry ) { + println "No entry found for ${args[0]}" return } - entries = entries.size() == 1 ? entries[0] : entries - println LinUtils.encodeSearchOutputs(entries, true) + println LinUtils.encodeSearchOutputs(entry, true) } catch (Throwable e) { println "Error loading ${args[0]} - ${e.message}" } @@ -323,6 +322,7 @@ class LinCommandImpl implements CmdLineage.LinCommand { final params = args.collectEntries { it.split('=').collect { URLDecoder.decode(it, 'UTF-8') } } as Map + new LinPropertyValidator().validateQueryParams(params) println LinUtils.encodeSearchOutputs( store.search(params).keySet().collect { asUriString(it) }, true ) } catch (Throwable e){ println "Error searching for ${args[0]}. ${e.message}" diff --git a/modules/nf-lineage/src/main/nextflow/lineage/fs/LinPath.groovy b/modules/nf-lineage/src/main/nextflow/lineage/fs/LinPath.groovy index 2c2201e727..4ad0a18618 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/fs/LinPath.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/fs/LinPath.groovy @@ -20,6 +20,7 @@ import groovy.transform.CompileStatic import groovy.util.logging.Slf4j import nextflow.file.FileHelper import nextflow.file.LogicalDataPath +import nextflow.lineage.LinPropertyValidator import nextflow.lineage.model.Checksum import nextflow.lineage.model.FileOutput import nextflow.lineage.serde.LinSerializable @@ -74,10 +75,17 @@ class LinPath implements Path, LogicalDataPath { this.fileSystem = fs setFieldsFormURI(uri) //Check if query and fragment are with filePath - if (query == null && fragment == null){ + if( query == null && fragment == null ) { setFieldsFormURI(new URI(toUriString())) } + //warn if query is specified + if( query ) + log.warn("Query string is not supported the Linage URI ($uri). It will be ignored.") + // Validate fragment + if( fragment ) + new LinPropertyValidator().validate(fragment.tokenize('.')) } + private void setFieldsFormURI(URI uri){ this.query = uri.query this.fragment = uri.fragment @@ -503,7 +511,7 @@ class LinPath implements Path, LogicalDataPath { if( !path.startsWith(LID_PROT) ) throw new IllegalArgumentException("Invalid LID file system path URI - it must start with '${LID_PROT}' prefix - offendinf value: $path") if( path.startsWith(LID_PROT + SEPARATOR) && path.length() > 7 ) - throw new IllegalArgumentException("Invalid LID file system path URI - make sure the schema prefix does not container more than two slash characters - offending value: $path") + throw new IllegalArgumentException("Invalid LID file system path URI - make sure the schema prefix does not container more than two slash characters or a query in the root '/' - offending value: $path") if( path == LID_PROT ) //Empty path case return new URI("lid:///") return new URI(path) diff --git a/modules/nf-lineage/src/test/nextflow/lineage/LinChannelExImplTest.groovy b/modules/nf-lineage/src/test/nextflow/lineage/LinChannelExImplTest.groovy index acaef23fbf..7d9d9013a2 100644 --- a/modules/nf-lineage/src/test/nextflow/lineage/LinChannelExImplTest.groovy +++ b/modules/nf-lineage/src/test/nextflow/lineage/LinChannelExImplTest.groovy @@ -86,13 +86,19 @@ class LinChannelExImplTest extends Specification { and: results == value1 - when: results = channelLinExt.viewLineage(session, 'lid://testKey#output') then: channelLinExt.getStore(session) >> lidStore and: - results == wfOutputs + results == outputs + + when: + results = channelLinExt.viewLineage(session, 'lid://testKey#params') + then: + channelLinExt.getStore(session) >> lidStore + and: + results == params } def 'should return global query results' () { diff --git a/modules/nf-lineage/src/test/nextflow/lineage/LinUtilsTest.groovy b/modules/nf-lineage/src/test/nextflow/lineage/LinUtilsTest.groovy index 47d1247c4a..0b92059d05 100644 --- a/modules/nf-lineage/src/test/nextflow/lineage/LinUtilsTest.groovy +++ b/modules/nf-lineage/src/test/nextflow/lineage/LinUtilsTest.groovy @@ -67,7 +67,7 @@ class LinUtilsTest extends Specification{ } - def 'should query'() { + def 'should get metadata object'() { given: def uniqueId = UUID.randomUUID() def mainScript = new DataPath("file://path/to/main.nf", new Checksum("78910", "nextflow", "standard")) @@ -81,32 +81,30 @@ class LinUtilsTest extends Specification{ lidStore.save("$key#output", outputs1) when: - List params = LinUtils.query(lidStore, new URI('lid://testKey#params')) + def params = LinUtils.getMetadataObject(lidStore, new URI('lid://testKey#params')) then: - params.size() == 1 - params[0] instanceof List - (params[0] as List).size() == 2 + params instanceof List + (params as List).size() == 2 when: - List outputs = LinUtils.query(lidStore, new URI('lid://testKey#output')) + def outputs = LinUtils.getMetadataObject(lidStore, new URI('lid://testKey#output')) then: - outputs.size() == 1 - outputs[0] instanceof List - def param = (outputs[0] as List)[0] as Parameter + outputs instanceof List + def param = (outputs as List)[0] as Parameter param.name == "output" when: - LinUtils.query(lidStore, new URI('lid://testKey#no-exist')) + LinUtils.getMetadataObject(lidStore, new URI('lid://testKey#no-exist')) then: thrown(IllegalArgumentException) when: - LinUtils.query(lidStore, new URI('lid://testKey#outputs.no-exist')) + LinUtils.getMetadataObject(lidStore, new URI('lid://testKey#outputs.no-exist')) then: thrown(IllegalArgumentException) when: - LinUtils.query(lidStore, new URI('lid://no-exist#something')) + LinUtils.getMetadataObject(lidStore, new URI('lid://no-exist#something')) then: thrown(IllegalArgumentException) } @@ -123,17 +121,6 @@ class LinUtilsTest extends Specification{ "" | [] } - def "should parse a query string as Map"() { - expect: - LinUtils.parseQuery(QUERY_STRING) == EXPECTED - - where: - QUERY_STRING | EXPECTED - "type=value1&taskRun=value2" | ["type": "value1", "taskRun": "value2"] - "type=val with space" | ["type": "val with space"] - "" | [:] - null | [:] - } def "should check params in an object"() { given: @@ -154,17 +141,6 @@ class LinUtilsTest extends Specification{ } - def 'should parse query' (){ - expect: - LinUtils.parseQuery(PARAMS) == EXPECTED - where: - PARAMS | EXPECTED - "type=value" | ["type": "value"] - "workflow.repository=subvalue" | ["workflow.repository": "subvalue"] - "" | [:] - null | [:] - } - def "should navigate in object params"() { given: def obj = [ @@ -205,23 +181,6 @@ class LinUtilsTest extends Specification{ [["nested": ["subfield": "match"]], ["nested": ["subfield": "other"]]] | ["nested.subfield": "match"] | [["nested": ["subfield": "match"]]] } - def "Should search path"() { - given: - def uniqueId = UUID.randomUUID() - def mainScript = new DataPath("file://path/to/main.nf", new Checksum("78910", "nextflow", "standard")) - def workflow = new Workflow([mainScript], "https://nextflow.io/nf-test/", "123456") - def key = "testKey" - def value1 = new WorkflowRun(workflow, uniqueId.toString(), "test_run", [new Parameter("String", "param1", "value1"), new Parameter("String", "param2", "value2")]) - def lidStore = new DefaultLinStore() - lidStore.open(config) - lidStore.save(key, value1) - when: - def result = LinUtils.searchPath(lidStore, key, ["name":"param1"], ["params"] as String[]) - - then: - result == [new Parameter("String", "param1", "value1")] - } - def 'should navigate' (){ def uniqueId = UUID.randomUUID() def mainScript = new DataPath("file://path/to/main.nf", new Checksum("78910", "nextflow", "standard")) diff --git a/modules/nf-lineage/src/test/nextflow/lineage/cli/LinCommandImplTest.groovy b/modules/nf-lineage/src/test/nextflow/lineage/cli/LinCommandImplTest.groovy index 9917224b65..29b84fb2e9 100644 --- a/modules/nf-lineage/src/test/nextflow/lineage/cli/LinCommandImplTest.groovy +++ b/modules/nf-lineage/src/test/nextflow/lineage/cli/LinCommandImplTest.groovy @@ -279,7 +279,7 @@ class LinCommandImplTest extends Specification{ outputHtml.text == expectedOutput } - def 'should show query results'(){ + def 'should show an error if trying to do a query'(){ given: def lidFile = storeLocation.resolve("12345/.data.json") Files.createDirectories(lidFile.parent) @@ -288,7 +288,7 @@ class LinCommandImplTest extends Specification{ def entry = new FileOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), "lid://123987/file.bam", "lid://123987/", null, 1234, time, time, null) def jsonSer = encoder.encode(entry) - def expectedOutput = jsonSer + def expectedOutput = "Error loading lid:///?type=FileOutput - Cannot get object from the root LID URI" lidFile.text = jsonSer when: new LinCommandImpl().describe(configMap, ["lid:///?type=FileOutput"]) @@ -304,35 +304,6 @@ class LinCommandImplTest extends Specification{ stdout.join('\n') == expectedOutput } - def 'should show query with fragment'(){ - given: - def lidFile = storeLocation.resolve("12345/.data.json") - Files.createDirectories(lidFile.parent) - def lidFile2 = storeLocation.resolve("67890/.data.json") - Files.createDirectories(lidFile2.parent) - def encoder = new LinEncoder().withPrettyPrint(true) - def time = OffsetDateTime.ofInstant(Instant.ofEpochMilli(123456789), ZoneOffset.UTC) - def entry = new FileOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), - "lid://123987/file.bam", "lid://123987/", null, 1234, time, time, null) - def entry2 = new FileOutput("path/to/file2",new Checksum("42472qet","nextflow","standard"), - "lid://123987/file2.bam", "lid://123987/", null, 1235, time, time, null) - def expectedOutput1 = '[\n "path/to/file",\n "path/to/file2"\n]' - def expectedOutput2 = '[\n "path/to/file2",\n "path/to/file"\n]' - lidFile.text = encoder.encode(entry) - lidFile2.text = encoder.encode(entry2) - when: - new LinCommandImpl().describe(configMap, ["lid:///?type=FileOutput#path"]) - def stdout = capture - .toString() - .readLines()// remove the log part - .findResults { line -> !line.contains('DEBUG') ? line : null } - .findResults { line -> !line.contains('INFO') ? line : null } - .findResults { line -> !line.contains('plugin') ? line : null } - - then: - stdout.join('\n') == expectedOutput1 || stdout.join('\n') == expectedOutput2 - } - def 'should diff'(){ given: def lidFile = storeLocation.resolve("12345/.data.json") diff --git a/modules/nf-lineage/src/test/nextflow/lineage/fs/LinPathTest.groovy b/modules/nf-lineage/src/test/nextflow/lineage/fs/LinPathTest.groovy index 8766f0cd9b..f0256f7c68 100644 --- a/modules/nf-lineage/src/test/nextflow/lineage/fs/LinPathTest.groovy +++ b/modules/nf-lineage/src/test/nextflow/lineage/fs/LinPathTest.groovy @@ -72,13 +72,36 @@ class LinPathTest extends Specification { path.query == QUERY where: - URI_STRING | PATH | QUERY | FRAGMENT - "lid://1234/hola" | "1234/hola" | null | null - "lid://1234/hola#frag.sub" | "1234/hola" | null | "frag.sub" - "lid://1234/#frag.sub" | "1234" | null | "frag.sub" - "lid://1234/?q=a&b=c" | "1234" | "q=a&b=c" | null - "lid://1234/?q=a&b=c#frag.sub" | "1234" | "q=a&b=c" | "frag.sub" - "lid:///" | "/" | null | null + URI_STRING | PATH | QUERY | FRAGMENT + "lid://1234/hola" | "1234/hola" | null | null + "lid://1234/hola#workflow.repository" | "1234/hola" | null | "workflow.repository" + "lid://1234/#workflow.repository" | "1234" | null | "workflow.repository" + "lid://1234/?q=a&b=c" | "1234" | "q=a&b=c" | null + "lid://1234/?q=a&b=c#workflow.repository" | "1234" | "q=a&b=c" | "workflow.repository" + "lid:///" | "/" | null | null + } + + def 'should throw exception if fragment contains an unknown property'() { + when: + new LinPath(fs, new URI ("lid://1234/hola#no-exist")) + then: + thrown(IllegalArgumentException) + + } + + def 'should warn if query is specified'() { + when: + new LinPath(fs, new URI("lid://1234/hola?query")) + def stdout = capture + .toString() + .readLines()// remove the log part + .findResults { line -> !line.contains('DEBUG') ? line : null } + .findResults { line -> !line.contains('INFO') ? line : null } + .findResults { line -> !line.contains('plugin') ? line : null } + + then: + stdout.size() == 1 + stdout[0].endsWith("Query string is not supported the Linage URI (lid://1234/hola?query). It will be ignored.") } def 'should create correct lid Path' () { From 4974beea9df2352e56ffa4d9fa31bd473bd737d2 Mon Sep 17 00:00:00 2001 From: jorgee Date: Wed, 30 Apr 2025 13:01:37 +0200 Subject: [PATCH 09/19] improve error message [ci fast] Signed-off-by: jorgee --- modules/nf-lineage/src/main/nextflow/lineage/fs/LinPath.groovy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/nf-lineage/src/main/nextflow/lineage/fs/LinPath.groovy b/modules/nf-lineage/src/main/nextflow/lineage/fs/LinPath.groovy index 4ad0a18618..4fb6be57e5 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/fs/LinPath.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/fs/LinPath.groovy @@ -168,7 +168,7 @@ class LinPath implements Path, LogicalDataPath { if( !fs ) throw new IllegalArgumentException("Cannot get target path for a relative lineage path") if( filePath.isEmpty() || filePath == SEPARATOR ) - throw new IllegalArgumentException("Cannot get target path for an empty lineage path") + throw new IllegalArgumentException("Cannot get target path for an empty lineage path (lid:///)") final store = fs.getStore() if( !store ) throw new Exception("Lineage store not found - Check Nextflow configuration") From 117ef30c1e5ce773c29c4192aba1df89451410dd Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Wed, 30 Apr 2025 08:41:04 -0500 Subject: [PATCH 10/19] Update docs Signed-off-by: Ben Sherman --- docs/reference/channel.md | 25 ++++++------------------- docs/reference/cli.md | 4 ++-- 2 files changed, 8 insertions(+), 21 deletions(-) diff --git a/docs/reference/channel.md b/docs/reference/channel.md index d76ff7df1e..6bd56f3631 100644 --- a/docs/reference/channel.md +++ b/docs/reference/channel.md @@ -415,28 +415,15 @@ See also: [channel.fromList](#fromlist) factory method. :::{warning} *Experimental: may change in a future release.* ::: -The `channel.queryLineage` method allows you to create a channel that emits the IDs of the lineage metadata objects matching with a set of key-value parameters passed as arguments of the method. +The `channel.queryLineage` factory creates a channel that emits the IDs of the lineage records that match the given key-value pairs. -The following snippet shows how to create a channel (`ch`) using this method. It searches for `FileOutputs` annotated with the value 'test'. -The result is a set of Lineage IDs (lid) that can be consumed by processes as `path` or inspected with the `lineage` function. +The following snippet shows how to create a channel (`ch`) using this method. It searches for `FileOutputs` annotated with the value 'test'. +The result is a set of Lineage IDs (lid) that can be consumed by processes as `path` or inspected with the `lineage` function. ```nextflow - process foo { - input: - path('output_file') - - // ... - } - - workflow { - ch = channel - .queryLineage('type': 'FileOutput', 'annotations.value': 'test') - - foo(ch) - - ch.map { lid -> lineage(lid) } - } - +channel + .queryLineage(type: 'FileOutput', 'annotations.value': 'test') + .map { lid -> lineage(lid) } ``` (channel-topic)= diff --git a/docs/reference/cli.md b/docs/reference/cli.md index 0f8dd965fa..39fa2c1a9d 100644 --- a/docs/reference/cli.md +++ b/docs/reference/cli.md @@ -732,7 +732,7 @@ View a metadata description fragment. A fragment can be a property of a metadata $ nextflow lineage view ``` -Find a specific metadata description that matches to a set of key-value parameters. Keys are defined similarly to the `fragments` used in the `view` command. +Find all metadata descriptions that matche a set of key-value pairs. Keys are defined similarly to fragments as used in the `view` command. ```console $ nextflow lineage find = = ... @@ -744,7 +744,7 @@ Display a git-style diff between two metadata descriptions. $ nextflow lineage diff ``` -Render the lineage graph for a workflow or task output in an HTML file. (default file path: `./lineage.html`). +Render the lineage graph for a workflow or task output as an HTML file. (default file path: `./lineage.html`). ```console $ nextflow lineage render [html-file-path] From 321c02a4ed11ebf16935e642c374dad6ac4cbf88 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Wed, 30 Apr 2025 08:53:04 -0500 Subject: [PATCH 11/19] cleanup Signed-off-by: Ben Sherman --- .../src/main/groovy/nextflow/Channel.groovy | 11 +++-- .../src/main/groovy/nextflow/Nextflow.groovy | 6 +-- .../nextflow/extension/LinChannelEx.groovy | 28 ------------ .../nextflow/extension/LinExtension.groovy | 45 +++++++++++++++++++ .../nextflow/extension/OperatorImpl.groovy | 2 +- ...lExImpl.groovy => LinExtensionImpl.groovy} | 8 ++-- .../src/resources/META-INF/extensions.idx | 2 +- ...est.groovy => LinExtensionImplTest.groovy} | 35 +++++++-------- 8 files changed, 76 insertions(+), 61 deletions(-) delete mode 100644 modules/nextflow/src/main/groovy/nextflow/extension/LinChannelEx.groovy create mode 100644 modules/nextflow/src/main/groovy/nextflow/extension/LinExtension.groovy rename modules/nf-lineage/src/main/nextflow/lineage/{LinChannelExImpl.groovy => LinExtensionImpl.groovy} (92%) rename modules/nf-lineage/src/test/nextflow/lineage/{LinChannelExImplTest.groovy => LinExtensionImplTest.groovy} (86%) diff --git a/modules/nextflow/src/main/groovy/nextflow/Channel.groovy b/modules/nextflow/src/main/groovy/nextflow/Channel.groovy index c930a9694a..e3de4a62ef 100644 --- a/modules/nextflow/src/main/groovy/nextflow/Channel.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/Channel.groovy @@ -16,9 +16,6 @@ package nextflow -import nextflow.extension.LinChannelEx -import nextflow.plugin.Plugins - import static nextflow.util.CheckHelper.* import java.nio.file.FileSystem @@ -43,6 +40,7 @@ import nextflow.datasource.SraExplorer import nextflow.exception.AbortOperationException import nextflow.extension.CH import nextflow.extension.GroupTupleOp +import nextflow.extension.LinExtension import nextflow.extension.MapOp import nextflow.file.DirListener import nextflow.file.DirWatcher @@ -50,6 +48,7 @@ import nextflow.file.DirWatcherV2 import nextflow.file.FileHelper import nextflow.file.FilePatternSplitter import nextflow.file.PathVisitor +import nextflow.plugin.Plugins import nextflow.plugin.extension.PluginExtensionProvider import nextflow.util.Duration import nextflow.util.TestOnly @@ -672,11 +671,11 @@ class Channel { } private static void queryLineage0(DataflowWriteChannel channel, Map params) { - final operation = Plugins.getExtension(LinChannelEx) + final operation = Plugins.getExtension(LinExtension) if( !operation ) throw new IllegalStateException("Unable to load lineage extensions.") - def future = CompletableFuture.runAsync( { operation.queryLineage(session, channel, params) } as Runnable) + final future = CompletableFuture.runAsync(() -> operation.queryLineage(session, channel, params)) future.exceptionally(this.&handlerException) } -} \ No newline at end of file +} diff --git a/modules/nextflow/src/main/groovy/nextflow/Nextflow.groovy b/modules/nextflow/src/main/groovy/nextflow/Nextflow.groovy index 52dd250ba8..6a76e55d08 100644 --- a/modules/nextflow/src/main/groovy/nextflow/Nextflow.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/Nextflow.groovy @@ -29,7 +29,7 @@ import nextflow.ast.OpXformImpl import nextflow.exception.StopSplitIterationException import nextflow.exception.WorkflowScriptErrorException import nextflow.extension.GroupKey -import nextflow.extension.LinChannelEx +import nextflow.extension.LinExtension import nextflow.extension.OperatorImpl import nextflow.file.FileHelper import nextflow.file.FilePatternSplitter @@ -425,10 +425,10 @@ class Nextflow { static Closure multiMapCriteria(Closure closure) { closure } static Object lineage( String lid ) { - final operation = Plugins.getExtension(LinChannelEx) + final operation = Plugins.getExtension(LinExtension) if( !operation ) throw new IllegalStateException("Unable to load lineage extensions.") - return operation.viewLineage(session, lid) + return operation.lineage(session, lid) } } diff --git a/modules/nextflow/src/main/groovy/nextflow/extension/LinChannelEx.groovy b/modules/nextflow/src/main/groovy/nextflow/extension/LinChannelEx.groovy deleted file mode 100644 index 195ac870c4..0000000000 --- a/modules/nextflow/src/main/groovy/nextflow/extension/LinChannelEx.groovy +++ /dev/null @@ -1,28 +0,0 @@ -package nextflow.extension - -import groovyx.gpars.dataflow.DataflowWriteChannel -import nextflow.Session - -/** - * Interface to implement the Lineage channel factories and functions. - * @author Jorge Ejarque params) -} \ No newline at end of file diff --git a/modules/nextflow/src/main/groovy/nextflow/extension/LinExtension.groovy b/modules/nextflow/src/main/groovy/nextflow/extension/LinExtension.groovy new file mode 100644 index 0000000000..7d7e6c7004 --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/extension/LinExtension.groovy @@ -0,0 +1,45 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.extension + +import groovyx.gpars.dataflow.DataflowWriteChannel +import nextflow.Session + +/** + * Interface for nf-lineage extensions. + * + * @author Jorge Ejarque params) +} diff --git a/modules/nextflow/src/main/groovy/nextflow/extension/OperatorImpl.groovy b/modules/nextflow/src/main/groovy/nextflow/extension/OperatorImpl.groovy index e13b79d017..3614de19db 100644 --- a/modules/nextflow/src/main/groovy/nextflow/extension/OperatorImpl.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/extension/OperatorImpl.groovy @@ -36,7 +36,6 @@ import nextflow.Channel import nextflow.Global import nextflow.NF import nextflow.Session -import nextflow.plugin.Plugins import nextflow.script.ChannelOut import nextflow.script.TokenBranchDef import nextflow.script.TokenMultiMapDef @@ -1246,4 +1245,5 @@ class OperatorImpl { .apply() .getOutput() } + } diff --git a/modules/nf-lineage/src/main/nextflow/lineage/LinChannelExImpl.groovy b/modules/nf-lineage/src/main/nextflow/lineage/LinExtensionImpl.groovy similarity index 92% rename from modules/nf-lineage/src/main/nextflow/lineage/LinChannelExImpl.groovy rename to modules/nf-lineage/src/main/nextflow/lineage/LinExtensionImpl.groovy index abe34eed06..4d9518247e 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/LinChannelExImpl.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/LinExtensionImpl.groovy @@ -21,7 +21,7 @@ import groovy.util.logging.Slf4j import groovyx.gpars.dataflow.DataflowWriteChannel import nextflow.Channel import nextflow.Session -import nextflow.extension.LinChannelEx +import nextflow.extension.LinExtension import nextflow.lineage.serde.LinSerializable import static nextflow.lineage.fs.LinPath.* @@ -33,13 +33,15 @@ import static nextflow.lineage.fs.LinPath.* */ @CompileStatic @Slf4j -class LinChannelExImpl implements LinChannelEx { +class LinExtensionImpl implements LinExtension { - Object viewLineage(Session session, String lid) { + @Override + Object lineage(Session session, String lid) { final store = getStore(session) return LinUtils.getMetadataObject(store, new URI(lid)) } + @Override void queryLineage(Session session, DataflowWriteChannel channel, Map params) { new LinPropertyValidator().validateQueryParams(params) final store = getStore(session) diff --git a/modules/nf-lineage/src/resources/META-INF/extensions.idx b/modules/nf-lineage/src/resources/META-INF/extensions.idx index 85b08c4461..5b327e222d 100644 --- a/modules/nf-lineage/src/resources/META-INF/extensions.idx +++ b/modules/nf-lineage/src/resources/META-INF/extensions.idx @@ -15,6 +15,6 @@ # nextflow.lineage.DefaultLinStoreFactory +nextflow.lineage.LinExtensionImpl nextflow.lineage.LinObserverFactory nextflow.lineage.cli.LinCommandImpl -nextflow.lineage.LinChannelExImpl \ No newline at end of file diff --git a/modules/nf-lineage/src/test/nextflow/lineage/LinChannelExImplTest.groovy b/modules/nf-lineage/src/test/nextflow/lineage/LinExtensionImplTest.groovy similarity index 86% rename from modules/nf-lineage/src/test/nextflow/lineage/LinChannelExImplTest.groovy rename to modules/nf-lineage/src/test/nextflow/lineage/LinExtensionImplTest.groovy index 7d9d9013a2..7210c51c36 100644 --- a/modules/nf-lineage/src/test/nextflow/lineage/LinChannelExImplTest.groovy +++ b/modules/nf-lineage/src/test/nextflow/lineage/LinExtensionImplTest.groovy @@ -16,29 +16,26 @@ package nextflow.lineage -import nextflow.Channel -import nextflow.extension.CH -import nextflow.lineage.model.Annotation -import nextflow.lineage.model.FileOutput - import java.nio.file.Path import java.time.Instant import java.time.OffsetDateTime +import java.time.ZoneOffset +import nextflow.Channel import nextflow.Session +import nextflow.extension.CH import nextflow.lineage.config.LineageConfig +import nextflow.lineage.model.Annotation import nextflow.lineage.model.Checksum import nextflow.lineage.model.DataPath +import nextflow.lineage.model.FileOutput import nextflow.lineage.model.Parameter import nextflow.lineage.model.Workflow import nextflow.lineage.model.WorkflowOutput import nextflow.lineage.model.WorkflowRun - import spock.lang.Specification import spock.lang.TempDir -import java.time.ZoneOffset - import static nextflow.lineage.fs.LinPath.* /** @@ -46,7 +43,7 @@ import static nextflow.lineage.fs.LinPath.* * * @author Jorge Ejarque */ -class LinChannelExImplTest extends Specification { +class LinExtensionImplTest extends Specification { @TempDir Path tempDir @@ -77,26 +74,26 @@ class LinChannelExImplTest extends Specification { lidStore.open(LineageConfig.create(session)) lidStore.save(key, value1) lidStore.save("$key#output", wfOutputs) - def channelLinExt = Spy(new LinChannelExImpl()) + def linExt = Spy(new LinExtensionImpl()) when: - def results = channelLinExt.viewLineage(session, 'lid://testKey') + def results = linExt.lineage(session, 'lid://testKey') then: - channelLinExt.getStore(session) >> lidStore + linExt.getStore(session) >> lidStore and: results == value1 when: - results = channelLinExt.viewLineage(session, 'lid://testKey#output') + results = linExt.lineage(session, 'lid://testKey#output') then: - channelLinExt.getStore(session) >> lidStore + linExt.getStore(session) >> lidStore and: results == outputs when: - results = channelLinExt.viewLineage(session, 'lid://testKey#params') + results = linExt.lineage(session, 'lid://testKey#params') then: - channelLinExt.getStore(session) >> lidStore + linExt.getStore(session) >> lidStore and: results == params } @@ -124,12 +121,12 @@ class LinChannelExImplTest extends Specification { lidStore.save(key2, value2) lidStore.save(key3, value3) lidStore.save(key4, value4) - def channelLinExt = Spy(new LinChannelExImpl()) + def linExt = Spy(new LinExtensionImpl()) when: def results = CH.create() - channelLinExt.queryLineage(session, results, [ "type":"FileOutput", "annotations.key":"key2", "annotations.value":"value2" ]) + linExt.queryLineage(session, results, [ "type":"FileOutput", "annotations.key":"key2", "annotations.value":"value2" ]) then: - channelLinExt.getStore(session) >> lidStore + linExt.getStore(session) >> lidStore and: results.val == asUriString(key2) results.val == asUriString(key3) From 224d92d1a060d731d0fec4f0b6128f1f20f43302 Mon Sep 17 00:00:00 2001 From: jorgee Date: Fri, 2 May 2025 15:53:52 +0200 Subject: [PATCH 12/19] change queryLineage to return file outputs Signed-off-by: jorgee --- docs/reference/stdlib-namespaces.md | 5 -- .../src/main/groovy/nextflow/Channel.groovy | 6 +- .../src/main/groovy/nextflow/Nextflow.groovy | 7 -- .../nextflow/extension/LinExtension.groovy | 16 ++-- .../nextflow/script/types/ChannelFactory.java | 2 + .../nextflow/lineage/DefaultLinStore.groovy | 2 +- .../nextflow/lineage/LinExtensionImpl.groovy | 29 +++++--- .../lineage/LinPropertyValidator.groovy | 2 +- .../src/main/nextflow/lineage/LinStore.groovy | 2 +- .../src/main/nextflow/lineage/LinUtils.groovy | 21 ++++-- .../lineage/cli/LinCommandImpl.groovy | 18 ++++- .../lineage/DefaultLinStoreTest.groovy | 2 +- .../lineage/LinExtensionImplTest.groovy | 73 +++++++------------ .../test/nextflow/lineage/LinUtilsTest.groovy | 26 +++---- 14 files changed, 100 insertions(+), 111 deletions(-) diff --git a/docs/reference/stdlib-namespaces.md b/docs/reference/stdlib-namespaces.md index 8c80c70b13..5ea35ad826 100644 --- a/docs/reference/stdlib-namespaces.md +++ b/docs/reference/stdlib-namespaces.md @@ -87,11 +87,6 @@ The global namespace contains globally available constants and functions. `groupKey( key, size: int ) -> GroupKey` : Create a grouping key to use with the {ref}`operator-grouptuple` operator. -`lineage( lid ) -> ?` -: :::{versionadded} 25.04.0 -: ::: -: Get the metadata record for a lineage ID. - `multiMapCriteria( criteria: Closure ) -> Closure` : Create a multi-map criteria to use with the {ref}`operator-multiMap` operator. diff --git a/modules/nextflow/src/main/groovy/nextflow/Channel.groovy b/modules/nextflow/src/main/groovy/nextflow/Channel.groovy index e3de4a62ef..92185050cd 100644 --- a/modules/nextflow/src/main/groovy/nextflow/Channel.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/Channel.groovy @@ -659,7 +659,8 @@ class Channel { fromPath0Future = future.exceptionally(Channel.&handlerException) } - static DataflowWriteChannel queryLineage(Map params) { + static DataflowWriteChannel queryLineage(Map params) { + checkParams('queryLineage', params, LinExtension.PARAMS) final result = CH.create() if( NF.isDsl2() ) { session.addIgniter { queryLineage0(result, params) } @@ -670,12 +671,11 @@ class Channel { return result } - private static void queryLineage0(DataflowWriteChannel channel, Map params) { + private static void queryLineage0(DataflowWriteChannel channel, Map params) { final operation = Plugins.getExtension(LinExtension) if( !operation ) throw new IllegalStateException("Unable to load lineage extensions.") final future = CompletableFuture.runAsync(() -> operation.queryLineage(session, channel, params)) future.exceptionally(this.&handlerException) } - } diff --git a/modules/nextflow/src/main/groovy/nextflow/Nextflow.groovy b/modules/nextflow/src/main/groovy/nextflow/Nextflow.groovy index 6a76e55d08..8604c97b3e 100644 --- a/modules/nextflow/src/main/groovy/nextflow/Nextflow.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/Nextflow.groovy @@ -424,11 +424,4 @@ class Nextflow { */ static Closure multiMapCriteria(Closure closure) { closure } - static Object lineage( String lid ) { - final operation = Plugins.getExtension(LinExtension) - if( !operation ) - throw new IllegalStateException("Unable to load lineage extensions.") - return operation.lineage(session, lid) - } - } diff --git a/modules/nextflow/src/main/groovy/nextflow/extension/LinExtension.groovy b/modules/nextflow/src/main/groovy/nextflow/extension/LinExtension.groovy index 7d7e6c7004..7dc05f64e4 100644 --- a/modules/nextflow/src/main/groovy/nextflow/extension/LinExtension.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/extension/LinExtension.groovy @@ -25,21 +25,15 @@ import nextflow.Session * @author Jorge Ejarque params) + abstract void queryLineage(Session session, DataflowWriteChannel channel, Map params) } diff --git a/modules/nf-lang/src/main/java/nextflow/script/types/ChannelFactory.java b/modules/nf-lang/src/main/java/nextflow/script/types/ChannelFactory.java index 826d1fcb39..b037352a28 100644 --- a/modules/nf-lang/src/main/java/nextflow/script/types/ChannelFactory.java +++ b/modules/nf-lang/src/main/java/nextflow/script/types/ChannelFactory.java @@ -39,6 +39,8 @@ public interface ChannelFactory { Channel of(E... values); + Channel queryLineage(Map params); + Channel topic(String name); Channel value(E value); diff --git a/modules/nf-lineage/src/main/nextflow/lineage/DefaultLinStore.groovy b/modules/nf-lineage/src/main/nextflow/lineage/DefaultLinStore.groovy index 85673d6843..6b1f0dd6fe 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/DefaultLinStore.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/DefaultLinStore.groovy @@ -93,7 +93,7 @@ class DefaultLinStore implements LinStore { void close() throws IOException { } @Override - Map search(Map params) { + Map search(Map> params) { final results = new HashMap() Files.walkFileTree(location, new FileVisitor() { diff --git a/modules/nf-lineage/src/main/nextflow/lineage/LinExtensionImpl.groovy b/modules/nf-lineage/src/main/nextflow/lineage/LinExtensionImpl.groovy index 4d9518247e..c7d0a25c97 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/LinExtensionImpl.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/LinExtensionImpl.groovy @@ -22,6 +22,9 @@ import groovyx.gpars.dataflow.DataflowWriteChannel import nextflow.Channel import nextflow.Session import nextflow.extension.LinExtension +import nextflow.lineage.fs.LinPathFactory +import nextflow.lineage.model.Annotation +import nextflow.lineage.model.FileOutput import nextflow.lineage.serde.LinSerializable import static nextflow.lineage.fs.LinPath.* @@ -36,17 +39,25 @@ import static nextflow.lineage.fs.LinPath.* class LinExtensionImpl implements LinExtension { @Override - Object lineage(Session session, String lid) { + void queryLineage(Session session, DataflowWriteChannel channel, Map opts) { + final queryParams = buildQueryParams(opts) + log.trace("Querying lineage with params: $queryParams") + new LinPropertyValidator().validateQueryParams(queryParams) final store = getStore(session) - return LinUtils.getMetadataObject(store, new URI(lid)) + emitSearchResults(channel, store.search(queryParams)) + channel.bind(Channel.STOP) } - @Override - void queryLineage(Session session, DataflowWriteChannel channel, Map params) { - new LinPropertyValidator().validateQueryParams(params) - final store = getStore(session) - emitSearchResults(channel, store.search(params)) - channel.bind(Channel.STOP) + private static Map> buildQueryParams(Map opts){ + final queryParams = [type: [FileOutput.class.simpleName] ] + if( opts.workflowRun ) + queryParams['workflowRun'] = [opts.workflowRun as String] + if( opts.taskRun ) + queryParams['taskRun'] = [opts.taskRun as String] + if( opts.annotations ) { + queryParams['annotations'] = (opts.annotations as Map).collect { String key, String value -> new Annotation(key, value).toString() } + } + return queryParams } protected LinStore getStore(Session session) { @@ -61,6 +72,6 @@ class LinExtensionImpl implements LinExtension { if( !results ) { return } - results.keySet().forEach { channel.bind(asUriString(it)) } + results.keySet().forEach { channel.bind( LinPathFactory.create( asUriString(it) ) ) } } } diff --git a/modules/nf-lineage/src/main/nextflow/lineage/LinPropertyValidator.groovy b/modules/nf-lineage/src/main/nextflow/lineage/LinPropertyValidator.groovy index a89f7fc692..ec116ea044 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/LinPropertyValidator.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/LinPropertyValidator.groovy @@ -71,7 +71,7 @@ class LinPropertyValidator { } } - void validateQueryParams(Map params) { + void validateQueryParams(Map> params) { for( String key : params.keySet() ) { validate(key.tokenize('.')) } diff --git a/modules/nf-lineage/src/main/nextflow/lineage/LinStore.groovy b/modules/nf-lineage/src/main/nextflow/lineage/LinStore.groovy index ba31b12df9..05586443b5 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/LinStore.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/LinStore.groovy @@ -58,6 +58,6 @@ interface LinStore extends Closeable { * @params Map of query params * @return Key-lineage entry pairs fulfilling the query params */ - Map search(Map params) + Map search(Map> params) } diff --git a/modules/nf-lineage/src/main/nextflow/lineage/LinUtils.groovy b/modules/nf-lineage/src/main/nextflow/lineage/LinUtils.groovy index e16738fffa..c5a6c0e1f6 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/LinUtils.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/LinUtils.groovy @@ -131,7 +131,7 @@ class LinUtils { * @param params parameter-value pairs to evaluate in each object * @param results results collection to include the matching objects */ - protected static void treatObject(def object, Map params, List results) { + protected static void treatObject(def object, Map> params, List results) { if (params) { if (object instanceof Collection) { (object as Collection).forEach { treatObject(it, params, results) } @@ -150,7 +150,7 @@ class LinUtils { * @param params parameter-value pairs to evaluate * @return true if all object parameters exist and matches with the value, otherwise false. */ - static boolean checkParams(Object object, Map params) { + static boolean checkParams(Object object, Map> params) { for( final entry : params.entrySet() ) { final value = navigate(object, entry.key) if( !checkParam(value, entry.value) ) { @@ -160,17 +160,22 @@ class LinUtils { return true } - private static boolean checkParam(Object value, Object expected) { + private static boolean checkParam(Object value, List expected) { if( !value ) return false + + // If value collection, convert to String and check all expected values are in the value. if( value instanceof Collection ) { - for( final v : value as Collection ) { - if( v.toString() == expected.toString() ) - return true - } + final colValue = value as Collection + return colValue.collect { it.toString() }.containsAll(expected) + } + + //Single object can't be compared with collection with one of more elements + if( expected.size() > 1 ) { return false } - return value.toString() == expected.toString() + + return value.toString() == expected[0] } /** diff --git a/modules/nf-lineage/src/main/nextflow/lineage/cli/LinCommandImpl.groovy b/modules/nf-lineage/src/main/nextflow/lineage/cli/LinCommandImpl.groovy index e73d3d9784..abb4658277 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/cli/LinCommandImpl.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/cli/LinCommandImpl.groovy @@ -319,13 +319,25 @@ class LinCommandImpl implements CmdLineage.LinCommand { return } try { - final params = args.collectEntries { - it.split('=').collect { URLDecoder.decode(it, 'UTF-8') } - } as Map + final params = parseFindArgs(args) new LinPropertyValidator().validateQueryParams(params) println LinUtils.encodeSearchOutputs( store.search(params).keySet().collect { asUriString(it) }, true ) } catch (Throwable e){ println "Error searching for ${args[0]}. ${e.message}" } } + + private Map> parseFindArgs(List args){ + Map> params = [:].withDefault { [] } + + args.collectEntries { pair -> + def idx = pair.indexOf('=') + if( idx < 0 ) + throw new IllegalArgumentException("Parameter $pair doesn't contain '=' separator") + final key = URLDecoder.decode(pair[0..> configMap - } - lidStore.open(LineageConfig.create(session)) - lidStore.save(key, value1) - lidStore.save("$key#output", wfOutputs) - def linExt = Spy(new LinExtensionImpl()) - - when: - def results = linExt.lineage(session, 'lid://testKey') - then: - linExt.getStore(session) >> lidStore - and: - results == value1 - - when: - results = linExt.lineage(session, 'lid://testKey#output') - then: - linExt.getStore(session) >> lidStore - and: - results == outputs - - when: - results = linExt.lineage(session, 'lid://testKey#params') - then: - linExt.getStore(session) >> lidStore - and: - results == params - } - def 'should return global query results' () { given: def uniqueId = UUID.randomUUID() @@ -107,11 +65,11 @@ class LinExtensionImplTest extends Specification { def key = "testKey" def value1 = new WorkflowRun(workflow, uniqueId.toString(), "test_run", [ new Parameter("String", "param1", "value1"), new Parameter("String", "param2", "value2")] ) def key2 = "testKey2" - def value2 = new FileOutput("/path/tp/file1", new Checksum("78910", "nextflow", "standard"), "testkey", "testkey", null, 1234, time, time, [new Annotation("key1","value1"), new Annotation("key2","value2")]) + def value2 = new FileOutput("/path/tp/file1", new Checksum("78910", "nextflow", "standard"), "testkey", "testkey", "taskid", 1234, time, time, [new Annotation("key1","value1"), new Annotation("key2","value2")]) def key3 = "testKey3" def value3 = new FileOutput("/path/tp/file2", new Checksum("78910", "nextflow", "standard"), "testkey", "testkey", null, 1234, time, time, [new Annotation("key2","value2"), new Annotation("key3","value3")]) def key4 = "testKey4" - def value4 = new FileOutput("/path/tp/file", new Checksum("78910", "nextflow", "standard"), "testkey", "testkey", null, 1234, time, time, [new Annotation("key4","value4"), new Annotation("key3","value3")]) + def value4 = new FileOutput("/path/tp/file", new Checksum("78910", "nextflow", "standard"), "testkey", "testkey", "taskid", 1234, time, time, [new Annotation("key4","value4"), new Annotation("key3","value3")]) def lidStore = new DefaultLinStore() def session = Mock(Session) { getConfig() >> configMap @@ -124,12 +82,31 @@ class LinExtensionImplTest extends Specification { def linExt = Spy(new LinExtensionImpl()) when: def results = CH.create() - linExt.queryLineage(session, results, [ "type":"FileOutput", "annotations.key":"key2", "annotations.value":"value2" ]) + linExt.queryLineage(session, results, [annotations: [key2:"value2", key3:"value3"]]) then: linExt.getStore(session) >> lidStore and: - results.val == asUriString(key2) - results.val == asUriString(key3) + results.val == LinPathFactory.create( asUriString(key3) ) results.val == Channel.STOP + + when: + results = CH.create() + linExt.queryLineage(session, results, [taskRun: "taskid", annotations: [key4:"value4"]]) + then: + linExt.getStore(session) >> lidStore + and: + results.val == LinPathFactory.create( asUriString(key4) ) + results.val == Channel.STOP + + when: + results = CH.create() + linExt.queryLineage(session, results, [workflowRun: "testkey", taskRun: "taskid", annotations: [key2:"value2"]]) + then: + linExt.getStore(session) >> lidStore + and: + results.val == LinPathFactory.create( asUriString(key2) ) + results.val == Channel.STOP + + } } diff --git a/modules/nf-lineage/src/test/nextflow/lineage/LinUtilsTest.groovy b/modules/nf-lineage/src/test/nextflow/lineage/LinUtilsTest.groovy index 0b92059d05..56f143d0ce 100644 --- a/modules/nf-lineage/src/test/nextflow/lineage/LinUtilsTest.groovy +++ b/modules/nf-lineage/src/test/nextflow/lineage/LinUtilsTest.groovy @@ -131,13 +131,13 @@ class LinUtilsTest extends Specification{ where: PARAMS | EXPECTED - ["type": "value"] | true - ["type": "wrong"] | false - ["workflow.repository": "subvalue"] | true - ["workflow.repository": "wrong"] | false - ["output.path": "wrong"] | false - ["output.path": "/to/file"] | true - ["output.path": "file2"] | true + ["type": ["value"]] | true + ["type": ["wrong"]] | false + ["workflow.repository": ["subvalue"]] | true + ["workflow.repository": ["wrong"]] | false + ["output.path": ["wrong"]] | false + ["output.path": ["/to/file"]] | true + ["output.path": ["file2"]] | true } @@ -172,13 +172,13 @@ class LinUtilsTest extends Specification{ where: OBJECT | PARAMS | EXPECTED - ["field": "value"] | ["field": "value"] | [["field": "value"]] - ["field": "wrong"] | ["field": "value"] | [] - [["field": "value"], ["field": "x"]] | ["field": "value"] | [["field": "value"]] + ["field": "value"] | ["field": ["value"]] | [["field": "value"]] + ["field": "wrong"] | ["field": ["value"]] | [] + [["field": "value"], ["field": "x"]] | ["field": ["value"]] | [["field": "value"]] "string" | [:] | ["string"] - ["nested": ["subfield": "match"]] | ["nested.subfield": "match"] | [["nested": ["subfield": "match"]]] - ["nested": ["subfield": "nomatch"]] | ["nested.subfield": "match"] | [] - [["nested": ["subfield": "match"]], ["nested": ["subfield": "other"]]] | ["nested.subfield": "match"] | [["nested": ["subfield": "match"]]] + ["nested": ["subfield": "match"]] | ["nested.subfield": ["match"]] | [["nested": ["subfield": "match"]]] + ["nested": ["subfield": "nomatch"]] | ["nested.subfield": ["match"]] | [] + [["nested": ["subfield": "match"]], ["nested": ["subfield": "other"]]] | ["nested.subfield": ["match"]] | [["nested": ["subfield": "match"]]] } def 'should navigate' (){ From 9136cccb88b972fe014b29ee07b0b2c152f357e6 Mon Sep 17 00:00:00 2001 From: jorgee Date: Fri, 2 May 2025 16:01:24 +0200 Subject: [PATCH 13/19] update docs Signed-off-by: jorgee --- docs/reference/channel.md | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/docs/reference/channel.md b/docs/reference/channel.md index 6bd56f3631..556fdaeeec 100644 --- a/docs/reference/channel.md +++ b/docs/reference/channel.md @@ -415,15 +415,13 @@ See also: [channel.fromList](#fromlist) factory method. :::{warning} *Experimental: may change in a future release.* ::: -The `channel.queryLineage` factory creates a channel that emits the IDs of the lineage records that match the given key-value pairs. +The `channel.queryLineage` factory creates a channel that emits the files generated by workflows or tasks or matching with a set of given key-value annotations. -The following snippet shows how to create a channel (`ch`) using this method. It searches for `FileOutputs` annotated with the value 'test'. -The result is a set of Lineage IDs (lid) that can be consumed by processes as `path` or inspected with the `lineage` function. +The following snippet shows how to create a channel (`ch`) using this method. It searches for files published by a specific workflow that includes the annotation `sample:'beta'`. ```nextflow channel - .queryLineage(type: 'FileOutput', 'annotations.value': 'test') - .map { lid -> lineage(lid) } + .queryLineage(workflowRun: 'lid://0d1d1622ced3e4edc690bec768919b45', annotations: [sample:'beta']) ``` (channel-topic)= From 689cdccc60ac195df6b867aa654d7c074808330b Mon Sep 17 00:00:00 2001 From: jorgee Date: Fri, 2 May 2025 16:46:24 +0200 Subject: [PATCH 14/19] fixes from merge Signed-off-by: jorgee --- docs/reference/channel.md | 4 ++-- .../groovy/nextflow/extension/LinExtension.groovy | 2 +- .../main/nextflow/lineage/LinExtensionImpl.groovy | 7 +++---- .../main/nextflow/lineage/cli/LinCommandImpl.groovy | 2 +- .../nextflow/lineage/LinExtensionImplTest.groovy | 13 ++++++------- .../nextflow/lineage/cli/LinCommandImplTest.groovy | 9 ++++----- 6 files changed, 17 insertions(+), 20 deletions(-) diff --git a/docs/reference/channel.md b/docs/reference/channel.md index 556fdaeeec..9b511ca87d 100644 --- a/docs/reference/channel.md +++ b/docs/reference/channel.md @@ -417,11 +417,11 @@ See also: [channel.fromList](#fromlist) factory method. The `channel.queryLineage` factory creates a channel that emits the files generated by workflows or tasks or matching with a set of given key-value annotations. -The following snippet shows how to create a channel (`ch`) using this method. It searches for files published by a specific workflow that includes the annotation `sample:'beta'`. +The following snippet shows how to create a channel (`ch`) using this method. It searches for files published by a specific workflow annotated with label `'beta'`. ```nextflow channel - .queryLineage(workflowRun: 'lid://0d1d1622ced3e4edc690bec768919b45', annotations: [sample:'beta']) + .queryLineage(workflowRun: 'lid://0d1d1622ced3e4edc690bec768919b45', labels: ['beta']) ``` (channel-topic)= diff --git a/modules/nextflow/src/main/groovy/nextflow/extension/LinExtension.groovy b/modules/nextflow/src/main/groovy/nextflow/extension/LinExtension.groovy index 7dc05f64e4..682b9db0ff 100644 --- a/modules/nextflow/src/main/groovy/nextflow/extension/LinExtension.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/extension/LinExtension.groovy @@ -26,7 +26,7 @@ import nextflow.Session */ interface LinExtension { - Map PARAMS = [workflowRun: [String,GString], taskRun: [String,GString], annotations: Map] + Map PARAMS = [workflowRun: [String,GString], taskRun: [String,GString], labels: List] /** * Query Lineage metadata to get files produced by tasks, workflows or annotations. diff --git a/modules/nf-lineage/src/main/nextflow/lineage/LinExtensionImpl.groovy b/modules/nf-lineage/src/main/nextflow/lineage/LinExtensionImpl.groovy index c7d0a25c97..f447d15850 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/LinExtensionImpl.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/LinExtensionImpl.groovy @@ -23,7 +23,6 @@ import nextflow.Channel import nextflow.Session import nextflow.extension.LinExtension import nextflow.lineage.fs.LinPathFactory -import nextflow.lineage.model.Annotation import nextflow.lineage.model.FileOutput import nextflow.lineage.serde.LinSerializable @@ -42,7 +41,7 @@ class LinExtensionImpl implements LinExtension { void queryLineage(Session session, DataflowWriteChannel channel, Map opts) { final queryParams = buildQueryParams(opts) log.trace("Querying lineage with params: $queryParams") - new LinPropertyValidator().validateQueryParams(queryParams) + new LinPropertyValidator().validateQueryParams(queryParams.keySet()) final store = getStore(session) emitSearchResults(channel, store.search(queryParams)) channel.bind(Channel.STOP) @@ -54,8 +53,8 @@ class LinExtensionImpl implements LinExtension { queryParams['workflowRun'] = [opts.workflowRun as String] if( opts.taskRun ) queryParams['taskRun'] = [opts.taskRun as String] - if( opts.annotations ) { - queryParams['annotations'] = (opts.annotations as Map).collect { String key, String value -> new Annotation(key, value).toString() } + if( opts.labels ) { + queryParams['labels'] = opts.labels as List } return queryParams } diff --git a/modules/nf-lineage/src/main/nextflow/lineage/cli/LinCommandImpl.groovy b/modules/nf-lineage/src/main/nextflow/lineage/cli/LinCommandImpl.groovy index abb4658277..7a971c78e5 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/cli/LinCommandImpl.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/cli/LinCommandImpl.groovy @@ -320,7 +320,7 @@ class LinCommandImpl implements CmdLineage.LinCommand { } try { final params = parseFindArgs(args) - new LinPropertyValidator().validateQueryParams(params) + new LinPropertyValidator().validateQueryParams(params.keySet()) println LinUtils.encodeSearchOutputs( store.search(params).keySet().collect { asUriString(it) }, true ) } catch (Throwable e){ println "Error searching for ${args[0]}. ${e.message}" diff --git a/modules/nf-lineage/src/test/nextflow/lineage/LinExtensionImplTest.groovy b/modules/nf-lineage/src/test/nextflow/lineage/LinExtensionImplTest.groovy index 4a5dd4cebb..155fa4f329 100644 --- a/modules/nf-lineage/src/test/nextflow/lineage/LinExtensionImplTest.groovy +++ b/modules/nf-lineage/src/test/nextflow/lineage/LinExtensionImplTest.groovy @@ -26,7 +26,6 @@ import nextflow.Session import nextflow.extension.CH import nextflow.lineage.config.LineageConfig import nextflow.lineage.fs.LinPathFactory -import nextflow.lineage.model.Annotation import nextflow.lineage.model.Checksum import nextflow.lineage.model.DataPath import nextflow.lineage.model.FileOutput @@ -65,11 +64,11 @@ class LinExtensionImplTest extends Specification { def key = "testKey" def value1 = new WorkflowRun(workflow, uniqueId.toString(), "test_run", [ new Parameter("String", "param1", "value1"), new Parameter("String", "param2", "value2")] ) def key2 = "testKey2" - def value2 = new FileOutput("/path/tp/file1", new Checksum("78910", "nextflow", "standard"), "testkey", "testkey", "taskid", 1234, time, time, [new Annotation("key1","value1"), new Annotation("key2","value2")]) + def value2 = new FileOutput("/path/tp/file1", new Checksum("78910", "nextflow", "standard"), "testkey", "testkey", "taskid", 1234, time, time, ["value1","value2"]) def key3 = "testKey3" - def value3 = new FileOutput("/path/tp/file2", new Checksum("78910", "nextflow", "standard"), "testkey", "testkey", null, 1234, time, time, [new Annotation("key2","value2"), new Annotation("key3","value3")]) + def value3 = new FileOutput("/path/tp/file2", new Checksum("78910", "nextflow", "standard"), "testkey", "testkey", null, 1234, time, time, ["value2", "value3"]) def key4 = "testKey4" - def value4 = new FileOutput("/path/tp/file", new Checksum("78910", "nextflow", "standard"), "testkey", "testkey", "taskid", 1234, time, time, [new Annotation("key4","value4"), new Annotation("key3","value3")]) + def value4 = new FileOutput("/path/tp/file", new Checksum("78910", "nextflow", "standard"), "testkey", "testkey", "taskid", 1234, time, time, ["value4","value3"]) def lidStore = new DefaultLinStore() def session = Mock(Session) { getConfig() >> configMap @@ -82,7 +81,7 @@ class LinExtensionImplTest extends Specification { def linExt = Spy(new LinExtensionImpl()) when: def results = CH.create() - linExt.queryLineage(session, results, [annotations: [key2:"value2", key3:"value3"]]) + linExt.queryLineage(session, results, [labels: ["value2", "value3"]]) then: linExt.getStore(session) >> lidStore and: @@ -91,7 +90,7 @@ class LinExtensionImplTest extends Specification { when: results = CH.create() - linExt.queryLineage(session, results, [taskRun: "taskid", annotations: [key4:"value4"]]) + linExt.queryLineage(session, results, [taskRun: "taskid", labels: ["value4"]]) then: linExt.getStore(session) >> lidStore and: @@ -100,7 +99,7 @@ class LinExtensionImplTest extends Specification { when: results = CH.create() - linExt.queryLineage(session, results, [workflowRun: "testkey", taskRun: "taskid", annotations: [key2:"value2"]]) + linExt.queryLineage(session, results, [workflowRun: "testkey", taskRun: "taskid", labels: ["value2"]]) then: linExt.getStore(session) >> lidStore and: diff --git a/modules/nf-lineage/src/test/nextflow/lineage/cli/LinCommandImplTest.groovy b/modules/nf-lineage/src/test/nextflow/lineage/cli/LinCommandImplTest.groovy index 0951e90edf..e182598afb 100644 --- a/modules/nf-lineage/src/test/nextflow/lineage/cli/LinCommandImplTest.groovy +++ b/modules/nf-lineage/src/test/nextflow/lineage/cli/LinCommandImplTest.groovy @@ -22,7 +22,6 @@ import nextflow.dag.MermaidHtmlRenderer import nextflow.lineage.LinHistoryRecord import nextflow.lineage.LinStoreFactory import nextflow.lineage.DefaultLinHistoryLog -import nextflow.lineage.model.Annotation import nextflow.lineage.model.Checksum import nextflow.lineage.model.FileOutput import nextflow.lineage.model.DataPath @@ -386,7 +385,7 @@ class LinCommandImplTest extends Specification{ when: def config = new ConfigMap() new LinCommandImpl().log(config) - new LinCommandImpl().describe(config, ["lid:///?type=FileOutput"]) + new LinCommandImpl().describe(config, ["lid:///12345"]) new LinCommandImpl().render(config, ["lid://12345", "output.html"]) new LinCommandImpl().diff(config, ["lid://89012", "lid://12345"]) @@ -416,9 +415,9 @@ class LinCommandImplTest extends Specification{ def encoder = new LinEncoder().withPrettyPrint(true) def time = OffsetDateTime.ofInstant(Instant.ofEpochMilli(123456789), ZoneOffset.UTC) def entry = new FileOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), - "lid://123987/file.bam", "lid://123987/", null, 1234, time, time, [new Annotation("experiment", "test")]) + "lid://123987/file.bam", "lid://123987/", null, 1234, time, time, ["experiment=test"]) def entry2 = new FileOutput("path/to/file2",new Checksum("42472qet","nextflow","standard"), - "lid://123987/file2.bam", "lid://123987/", null, 1235, time, time, [new Annotation("experiment", "test")]) + "lid://123987/file2.bam", "lid://123987/", null, 1235, time, time, ["experiment=test"]) def entry3 = new FileOutput("path/to/file3",new Checksum("42472qet","nextflow","standard"), "lid://123987/file2.bam", "lid://123987/", null, 1235, time, time, null) def expectedOutput1 = '[\n "lid://123987/file.bam",\n "lid://123987/file2.bam"\n]' @@ -427,7 +426,7 @@ class LinCommandImplTest extends Specification{ lidFile2.text = encoder.encode(entry2) lidFile3.text = encoder.encode(entry3) when: - new LinCommandImpl().find(configMap, ["type=FileOutput", "annotations.value=test"]) + new LinCommandImpl().find(configMap, ["type=FileOutput", "labels=experiment=test"]) def stdout = capture .toString() .readLines()// remove the log part From 9d6b77af3901e51dfbb117e1ac8b27b3a7478f77 Mon Sep 17 00:00:00 2001 From: jorgee Date: Fri, 2 May 2025 17:44:05 +0200 Subject: [PATCH 15/19] fix LinPath getFileName bug Signed-off-by: jorgee --- .../main/nextflow/lineage/fs/LinPath.groovy | 4 ++-- .../nextflow/lineage/fs/LinPathTest.groovy | 22 +++++++++++-------- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/modules/nf-lineage/src/main/nextflow/lineage/fs/LinPath.groovy b/modules/nf-lineage/src/main/nextflow/lineage/fs/LinPath.groovy index 4fb6be57e5..4f9bb622a3 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/fs/LinPath.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/fs/LinPath.groovy @@ -316,7 +316,7 @@ class LinPath implements Path, LogicalDataPath { @Override Path getFileName() { final result = Path.of(filePath).getFileName()?.toString() - return result ? new LinPath(fragment, query, result, null) : null + return result ? new LinPath(query, fragment, result, null) : null } @Override @@ -340,7 +340,7 @@ class LinPath implements Path, LogicalDataPath { throw new IllegalArgumentException("Path name index cannot be less than zero - offending value: $index") final path = Path.of(filePath) if( index == path.nameCount - 1 ) { - return new LinPath(fragment, query, path.getName(index).toString(), null) + return new LinPath( query, fragment, path.getName(index).toString(), null) } return new LinPath(index == 0 ? fileSystem : null, path.getName(index).toString()) } diff --git a/modules/nf-lineage/src/test/nextflow/lineage/fs/LinPathTest.groovy b/modules/nf-lineage/src/test/nextflow/lineage/fs/LinPathTest.groovy index f0256f7c68..f26755aa6c 100644 --- a/modules/nf-lineage/src/test/nextflow/lineage/fs/LinPathTest.groovy +++ b/modules/nf-lineage/src/test/nextflow/lineage/fs/LinPathTest.groovy @@ -268,10 +268,13 @@ class LinPathTest extends Specification { } def 'should get file name' () { - when: - def lid1 = new LinPath(fs, '1234567890/this/file.bam') - then: - lid1.getFileName() == new LinPath(null, 'file.bam') + expect: + new LinPath(fs, PATH).getFileName() == EXPECTED + where: + PATH | EXPECTED + '1234567890/this/file.bam' | new LinPath(null, 'file.bam') + '12345/hola?query#output' | new LinPath("query", "output", "hola", null) + } def 'should get file parent' () { @@ -303,11 +306,12 @@ class LinPathTest extends Specification { expect: new LinPath(fs, PATH).getName(INDEX) == EXPECTED where: - PATH | INDEX | EXPECTED - '123' | 0 | new LinPath(fs, '123') - '123/a' | 1 | new LinPath(null, 'a') - '123/a/' | 1 | new LinPath(null, 'a') - '123/a/b' | 2 | new LinPath(null, 'b') + PATH | INDEX | EXPECTED + '123' | 0 | new LinPath(fs, '123') + '123/a' | 1 | new LinPath(null, 'a') + '123/a/' | 1 | new LinPath(null, 'a') + '123/a/b' | 2 | new LinPath(null, 'b') + '123/a?q#output' | 1 | new LinPath(null, 'a?q#output') } @Unroll From e77a9c0fe4c9f7ecb6dfcf6e5c61d59f0e565eec Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Fri, 2 May 2025 14:45:56 -0500 Subject: [PATCH 16/19] cleanup Signed-off-by: Ben Sherman --- docs/reference/channel.md | 20 +++- docs/reference/cli.md | 14 ++- .../src/main/groovy/nextflow/Channel.groovy | 6 +- .../src/main/groovy/nextflow/Nextflow.groovy | 2 - .../nextflow/extension/LinExtension.groovy | 6 +- .../groovy/nextflow/cli/CmdLineageTest.groovy | 12 +- .../nextflow/script/types/ChannelFactory.java | 2 +- .../nextflow/lineage/LinExtensionImpl.groovy | 3 +- .../src/main/nextflow/lineage/LinStore.groovy | 2 +- .../src/main/nextflow/lineage/LinUtils.groovy | 104 +++++++++--------- .../lineage/cli/LinCommandImpl.groovy | 2 +- .../main/nextflow/lineage/fs/LinPath.groovy | 13 +-- .../test/nextflow/lineage/LinUtilsTest.groovy | 2 +- .../lineage/cli/LinCommandImplTest.groovy | 4 +- .../nextflow/lineage/fs/LinPathTest.groovy | 2 +- 15 files changed, 104 insertions(+), 90 deletions(-) diff --git a/docs/reference/channel.md b/docs/reference/channel.md index 9b511ca87d..838d8315e0 100644 --- a/docs/reference/channel.md +++ b/docs/reference/channel.md @@ -415,15 +415,27 @@ See also: [channel.fromList](#fromlist) factory method. :::{warning} *Experimental: may change in a future release.* ::: -The `channel.queryLineage` factory creates a channel that emits the files generated by workflows or tasks or matching with a set of given key-value annotations. - -The following snippet shows how to create a channel (`ch`) using this method. It searches for files published by a specific workflow annotated with label `'beta'`. +The `channel.queryLineage` factory creates a channel that emits files from the lineage store that match the given key-value params: ```nextflow channel - .queryLineage(workflowRun: 'lid://0d1d1622ced3e4edc690bec768919b45', labels: ['beta']) + .queryLineage(workflowRun: 'lid://0d1d1622ced3e4edc690bec768919b45', labels: ['alpha', 'beta']) + .view() ``` +The above snippet emits files published by the given workflow run that are labeled as `alpha` and `beta`. + +Available options: + +`labels` +: List of labels associated with the desired files. + +`taskRun` +: LID of the task run that produced the desired files. + +`workflowRun` +: LID of the workflow run that produced the desired files. + (channel-topic)= ## topic diff --git a/docs/reference/cli.md b/docs/reference/cli.md index 39fa2c1a9d..5ecf473683 100644 --- a/docs/reference/cli.md +++ b/docs/reference/cli.md @@ -720,25 +720,29 @@ TIMESTAMP RUN NAME SESSION ID 2025-04-22 14:45:43 backstabbing_heyrovsky 21bc4fad-e8b8-447d-9410-388f926a711f lid://c914d714877cc5c882c55a5428b510b1 ``` -View a metadata description. +View a metadata record. ```console $ nextflow lineage view ``` -View a metadata description fragment. A fragment can be a property of a metadata description (e.g., `output` or `params`) or a set of nested properties separated by a `.` (e.g., `workflow.repository`). +The output of a workflow run can be shown by appending `#output` to the workflow run LID: ```console -$ nextflow lineage view +$ nextflow lineage view ``` -Find all metadata descriptions that matche a set of key-value pairs. Keys are defined similarly to fragments as used in the `view` command. +:::{tip} +You can use the [jq](https://jqlang.org/) command-line tool to apply further queries and transformations on the resulting metadata record. +::: + +Find all metadata records that match a set of key-value pairs: ```console $ nextflow lineage find = = ... ``` -Display a git-style diff between two metadata descriptions. +Display a git-style diff between two metadata records. ```console $ nextflow lineage diff diff --git a/modules/nextflow/src/main/groovy/nextflow/Channel.groovy b/modules/nextflow/src/main/groovy/nextflow/Channel.groovy index 92185050cd..b9171ad485 100644 --- a/modules/nextflow/src/main/groovy/nextflow/Channel.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/Channel.groovy @@ -672,10 +672,10 @@ class Channel { } private static void queryLineage0(DataflowWriteChannel channel, Map params) { - final operation = Plugins.getExtension(LinExtension) - if( !operation ) + final linExt = Plugins.getExtension(LinExtension) + if( !linExt ) throw new IllegalStateException("Unable to load lineage extensions.") - final future = CompletableFuture.runAsync(() -> operation.queryLineage(session, channel, params)) + final future = CompletableFuture.runAsync(() -> linExt.queryLineage(session, channel, params)) future.exceptionally(this.&handlerException) } } diff --git a/modules/nextflow/src/main/groovy/nextflow/Nextflow.groovy b/modules/nextflow/src/main/groovy/nextflow/Nextflow.groovy index 8604c97b3e..aee6368755 100644 --- a/modules/nextflow/src/main/groovy/nextflow/Nextflow.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/Nextflow.groovy @@ -29,12 +29,10 @@ import nextflow.ast.OpXformImpl import nextflow.exception.StopSplitIterationException import nextflow.exception.WorkflowScriptErrorException import nextflow.extension.GroupKey -import nextflow.extension.LinExtension import nextflow.extension.OperatorImpl import nextflow.file.FileHelper import nextflow.file.FilePatternSplitter import nextflow.mail.Mailer -import nextflow.plugin.Plugins import nextflow.script.TokenBranchDef import nextflow.script.TokenMultiMapDef import nextflow.splitter.FastaSplitter diff --git a/modules/nextflow/src/main/groovy/nextflow/extension/LinExtension.groovy b/modules/nextflow/src/main/groovy/nextflow/extension/LinExtension.groovy index 682b9db0ff..5c868b918e 100644 --- a/modules/nextflow/src/main/groovy/nextflow/extension/LinExtension.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/extension/LinExtension.groovy @@ -26,7 +26,11 @@ import nextflow.Session */ interface LinExtension { - Map PARAMS = [workflowRun: [String,GString], taskRun: [String,GString], labels: List] + static final Map PARAMS = [ + labels: List, + taskRun: [String,GString], + workflowRun: [String,GString], + ] /** * Query Lineage metadata to get files produced by tasks, workflows or annotations. diff --git a/modules/nextflow/src/test/groovy/nextflow/cli/CmdLineageTest.groovy b/modules/nextflow/src/test/groovy/nextflow/cli/CmdLineageTest.groovy index 49cb5ef6e4..da7787cb59 100644 --- a/modules/nextflow/src/test/groovy/nextflow/cli/CmdLineageTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/cli/CmdLineageTest.groovy @@ -179,7 +179,7 @@ class CmdLineageTest extends Specification { then: stdout.size() == 1 - stdout[0] == "Error loading lid://12345 - Lineage object 12345 not found" + stdout[0] == "Error loading lid://12345 - Lineage record 12345 not found" cleanup: folder?.deleteDir() @@ -286,11 +286,11 @@ class CmdLineageTest extends Specification { def lidCmd = new CmdLineage(launcher: launcher, args: ["find", "type=FileOutput"]) lidCmd.run() def stdout = capture - .toString() - .readLines()// remove the log part - .findResults { line -> !line.contains('DEBUG') ? line : null } - .findResults { line -> !line.contains('INFO') ? line : null } - .findResults { line -> !line.contains('plugin') ? line : null } + .toString() + .readLines()// remove the log part + .findResults { line -> !line.contains('DEBUG') ? line : null } + .findResults { line -> !line.contains('INFO') ? line : null } + .findResults { line -> !line.contains('plugin') ? line : null } then: stdout.size() == expectedOutput.readLines().size() diff --git a/modules/nf-lang/src/main/java/nextflow/script/types/ChannelFactory.java b/modules/nf-lang/src/main/java/nextflow/script/types/ChannelFactory.java index b037352a28..6bd81d8626 100644 --- a/modules/nf-lang/src/main/java/nextflow/script/types/ChannelFactory.java +++ b/modules/nf-lang/src/main/java/nextflow/script/types/ChannelFactory.java @@ -39,7 +39,7 @@ public interface ChannelFactory { Channel of(E... values); - Channel queryLineage(Map params); + Channel queryLineage(Map opts); Channel topic(String name); diff --git a/modules/nf-lineage/src/main/nextflow/lineage/LinExtensionImpl.groovy b/modules/nf-lineage/src/main/nextflow/lineage/LinExtensionImpl.groovy index f447d15850..44f2142ecd 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/LinExtensionImpl.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/LinExtensionImpl.groovy @@ -53,9 +53,8 @@ class LinExtensionImpl implements LinExtension { queryParams['workflowRun'] = [opts.workflowRun as String] if( opts.taskRun ) queryParams['taskRun'] = [opts.taskRun as String] - if( opts.labels ) { + if( opts.labels ) queryParams['labels'] = opts.labels as List - } return queryParams } diff --git a/modules/nf-lineage/src/main/nextflow/lineage/LinStore.groovy b/modules/nf-lineage/src/main/nextflow/lineage/LinStore.groovy index 05586443b5..2dc6974f6e 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/LinStore.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/LinStore.groovy @@ -55,7 +55,7 @@ interface LinStore extends Closeable { /** * Search for lineage entries. - * @params Map of query params + * @param params Map of query params * @return Key-lineage entry pairs fulfilling the query params */ Map search(Map> params) diff --git a/modules/nf-lineage/src/main/nextflow/lineage/LinUtils.groovy b/modules/nf-lineage/src/main/nextflow/lineage/LinUtils.groovy index c319edd606..d1129745b5 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/LinUtils.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/LinUtils.groovy @@ -42,39 +42,37 @@ class LinUtils { private static final String[] EMPTY_ARRAY = new String[] {} /** - * Get a metadata lineage object or fragment from the Lineage store. + * Get a lineage record or fragment from the Lineage store. * * @param store Lineage store. * @param uri Object or fragment to retrieve in URI-like format. * Format 'lid://[#fragment]' where: * - Key: Metadata Element key * - Fragment: Element fragment to retrieve. - * @return Lineage metadata object or fragment. + * @return Lineage record or fragment. */ static Object getMetadataObject(LinStore store, URI uri) { - if( uri.scheme != SCHEME ) { + if( uri.scheme != SCHEME ) throw new IllegalArgumentException("Invalid LID URI - scheme is different for $SCHEME") - } final key = uri.authority ? uri.authority + uri.path : uri.path - if( key == SEPARATOR ) { - throw new IllegalArgumentException("Cannot get object from the root LID URI") - } + if( key == SEPARATOR ) + throw new IllegalArgumentException("Cannot get record from the root LID URI") if ( uri.query ) - log.warn("Query string is not supported the Linage URI ($uri). It will be ignored.") + log.warn("Query string is not supported for Lineage URI: `$uri` -- it will be ignored") final children = parseChildrenFromFragment(uri.fragment) return getMetadataObject0(store, key, children ) } private static Object getMetadataObject0(LinStore store, String key, String[] children = []) { - final object = store.load(key) - if (!object) { - throw new FileNotFoundException("Lineage object $key not found") + final record = store.load(key) + if (!record) { + throw new FileNotFoundException("Lineage record $key not found") } if (children && children.size() > 0) { - return getSubObject(store, key, object, children) + return getSubObject(store, key, record, children) } - return object + return record } /** @@ -92,67 +90,67 @@ class LinUtils { } /** - * Get a metadata sub-object. + * Get a lineage sub-record. * - * If the requested sub-object is the workflow or task outputs, retrieves the outputs from the outputs description. + * If the requested sub-record is the workflow or task outputs, retrieves the outputs from the outputs description. * - * @param store Store to retrieve lineage metadata objects. - * @param key Parent metadata key. - * @param object Parent object. - * @param children Array of string in indicating the properties to navigate to get the sub-object. - * @return Sub-object or null in it does not exist. + * @param store Store to retrieve lineage records. + * @param key Parent key. + * @param record Parent record. + * @param children Array of string in indicating the properties to navigate to get the sub-record. + * @return Sub-record or null in it does not exist. */ - static Object getSubObject(LinStore store, String key, LinSerializable object, String[] children) { - if( isSearchingOutputs(object, children) ) { + static Object getSubObject(LinStore store, String key, LinSerializable record, String[] children) { + if( isSearchingOutputs(record, children) ) { // When asking for a Workflow or task output retrieve the outputs description final outputs = store.load("${key}#output") if (!outputs) return null return navigate(outputs, children.join('.')) } - return navigate(object, children.join('.')) + return navigate(record, children.join('.')) } /** * Check if the Lid pseudo path or query is for Task or Workflow outputs. * - * @param object Parent Lid metadata object - * @param children Array of string in indicating the properties to navigate to get the sub-object. + * @param record Parent lineage record + * @param children Array of string in indicating the properties to navigate to get the sub-record. * @return return 'true' if the parent is a Task/Workflow run and the first element in children is 'outputs'. Otherwise 'false' */ - static boolean isSearchingOutputs(LinSerializable object, String[] children) { - return (object instanceof WorkflowRun || object instanceof TaskRun) && children && children[0] == 'output' + static boolean isSearchingOutputs(LinSerializable record, String[] children) { + return (record instanceof WorkflowRun || record instanceof TaskRun) && children && children[0] == 'output' } /** - * Evaluates object or the objects in a collection matches a set of parameter-value pairs. It includes in the results collection in case of match. + * Evaluates record or the records in a collection matches a set of parameter-value pairs. It includes in the results collection in case of match. * - * @param object Object or collection of objects to evaluate - * @param params parameter-value pairs to evaluate in each object - * @param results results collection to include the matching objects + * @param record Object or collection of records to evaluate + * @param params parameter-value pairs to evaluate in each record + * @param results results collection to include the matching records */ - protected static void treatObject(def object, Map> params, List results) { + protected static void treatObject(def record, Map> params, List results) { if (params) { - if (object instanceof Collection) { - (object as Collection).forEach { treatObject(it, params, results) } - } else if (checkParams(object, params)) { - results.add(object) + if (record instanceof Collection) { + (record as Collection).forEach { treatObject(it, params, results) } + } else if (checkParams(record, params)) { + results.add(record) } } else { - results.add(object) + results.add(record) } } /** - * Check if an object fulfill the parameter-value + * Check if an record fulfill the parameter-value * - * @param object Object to evaluate + * @param record Object to evaluate * @param params parameter-value pairs to evaluate - * @return true if all object parameters exist and matches with the value, otherwise false. + * @return true if all record parameters exist and matches with the value, otherwise false. */ - static boolean checkParams(Object object, Map> params) { + static boolean checkParams(Object record, Map> params) { for( final entry : params.entrySet() ) { - final value = navigate(object, entry.key) + final value = navigate(record, entry.key) if( !checkParam(value, entry.value) ) { return false } @@ -170,7 +168,7 @@ class LinUtils { return colValue.collect { it.toString() }.containsAll(expected) } - //Single object can't be compared with collection with one of more elements + //Single record can't be compared with collection with one of more elements if( expected.size() > 1 ) { return false } @@ -179,16 +177,16 @@ class LinUtils { } /** - * Retrieves the sub-object or value indicated by a path. + * Retrieves the sub-record or value indicated by a path. * * @param obj Object to navigate * @param path Elements path separated by '.' e.g. field.subfield - * @return sub-object / value + * @return sub-record / value */ static Object navigate(Object obj, String path) { if (!obj) return null - // type has been replaced by class when evaluating LidSerializable objects + // type has been replaced by class when evaluating LidSerializable records if (obj instanceof LinSerializable && path == 'type') return obj.getClass()?.simpleName try { @@ -197,7 +195,7 @@ class LinUtils { } } catch (Throwable e) { - log.debug("Error navigating to $path in object", e) + log.debug("Error navigating to $path in record", e) return null } } @@ -221,8 +219,8 @@ class LinUtils { private static Object navigateCollection(Collection collection, String key) { final results = [] - for (Object object : collection) { - final res = getSubPath(object, key) + for (Object record : collection) { + final res = getSubPath(record, key) if (res) results.add(res) } @@ -230,7 +228,7 @@ class LinUtils { log.trace("No property found for $key") return null } - // Return a single object if only ine results is found. + // Return a single record if only ine results is found. return results.size() == 1 ? results[0] : results } @@ -261,9 +259,9 @@ class LinUtils { /** * Helper function to unify the encoding of outputs when querying and navigating the lineage pseudoFS. - * Outputs can include LinSerializable objects, collections or parts of these objects. - * LinSerializable objects can be encoded with the LinEncoder, but collections or parts of - * these objects require to extend the GsonEncoder. + * Outputs can include LinSerializable records, collections or parts of these records. + * LinSerializable records can be encoded with the LinEncoder, but collections or parts of + * these records require to extend the GsonEncoder. * * @param output Output to encode * @return Output encoded as a JSON string diff --git a/modules/nf-lineage/src/main/nextflow/lineage/cli/LinCommandImpl.groovy b/modules/nf-lineage/src/main/nextflow/lineage/cli/LinCommandImpl.groovy index 7a971c78e5..c059e1566c 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/cli/LinCommandImpl.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/cli/LinCommandImpl.groovy @@ -331,7 +331,7 @@ class LinCommandImpl implements CmdLineage.LinCommand { Map> params = [:].withDefault { [] } args.collectEntries { pair -> - def idx = pair.indexOf('=') + final idx = pair.indexOf('=') if( idx < 0 ) throw new IllegalArgumentException("Parameter $pair doesn't contain '=' separator") final key = URLDecoder.decode(pair[0.. Date: Fri, 2 May 2025 15:00:36 -0500 Subject: [PATCH 17/19] Update migration notes Signed-off-by: Ben Sherman --- docs/migrations/25-04.md | 8 ++++++++ docs/reference/config.md | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/docs/migrations/25-04.md b/docs/migrations/25-04.md index 205ed72e09..b0696ebdcb 100644 --- a/docs/migrations/25-04.md +++ b/docs/migrations/25-04.md @@ -37,6 +37,14 @@ The third preview of workflow outputs introduces the following breaking changes See {ref}`workflow-output-def` to learn more about the workflow output definition. +

Data lineage

+ +This release introduces built-in provenance tracking, also known as *data lineage*. When `lineage.enabled` is set to `true` in your configuration, Nextflow will record every workflow run, task execution, output file, and the links between them. + +You can explore this lineage from the command line using the {ref}`cli-lineage` command. Additionally, you can refer to files in the lineage store from a Nextflow script using the `lid://` path prefix as well as the {ref}`channel-query-lineage` channel factory. + +See the {ref}`cli-lineage` command and {ref}`config-lineage` config scope for details. + ## Enhancements

Improved inspect command

diff --git a/docs/reference/config.md b/docs/reference/config.md index 4dd1941524..bbac6272d3 100644 --- a/docs/reference/config.md +++ b/docs/reference/config.md @@ -1123,7 +1123,7 @@ See the {ref}`k8s-page` page for more details. ## `lineage` -The `lineage` scope controls the generation of lineage metadata. +The `lineage` scope controls the generation of {ref}`cli-lineage` metadata. The following settings are available: From 251c06e779dff673c736f7d4c3427ce35541b5ef Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Fri, 2 May 2025 15:09:40 -0500 Subject: [PATCH 18/19] Rename queryLineage -> fromLineage Signed-off-by: Ben Sherman --- docs/migrations/25-04.md | 2 +- docs/reference/channel.md | 62 +++++++++---------- .../src/main/groovy/nextflow/Channel.groovy | 12 ++-- .../nextflow/extension/LinExtension.groovy | 2 +- .../nextflow/script/types/ChannelFactory.java | 4 +- .../nextflow/lineage/LinExtensionImpl.groovy | 4 +- .../lineage/LinExtensionImplTest.groovy | 6 +- 7 files changed, 46 insertions(+), 46 deletions(-) diff --git a/docs/migrations/25-04.md b/docs/migrations/25-04.md index b0696ebdcb..00eea9829b 100644 --- a/docs/migrations/25-04.md +++ b/docs/migrations/25-04.md @@ -41,7 +41,7 @@ See {ref}`workflow-output-def` to learn more about the workflow output definitio This release introduces built-in provenance tracking, also known as *data lineage*. When `lineage.enabled` is set to `true` in your configuration, Nextflow will record every workflow run, task execution, output file, and the links between them. -You can explore this lineage from the command line using the {ref}`cli-lineage` command. Additionally, you can refer to files in the lineage store from a Nextflow script using the `lid://` path prefix as well as the {ref}`channel-query-lineage` channel factory. +You can explore this lineage from the command line using the {ref}`cli-lineage` command. Additionally, you can refer to files in the lineage store from a Nextflow script using the `lid://` path prefix as well as the {ref}`channel-from-lineage` channel factory. See the {ref}`cli-lineage` command and {ref}`config-lineage` config scope for details. diff --git a/docs/reference/channel.md b/docs/reference/channel.md index 838d8315e0..fc824512d1 100644 --- a/docs/reference/channel.md +++ b/docs/reference/channel.md @@ -58,6 +58,37 @@ But when more than one argument is provided, they are always managed as *single* channel.from( [1, 2], [5,6], [7,9] ) ``` +(channel-from-lineage)= + +## fromLineage + +:::{versionadded} 25.04.0 +::: + +:::{warning} *Experimental: may change in a future release.* +::: + +The `channel.fromLineage` factory creates a channel that emits files from the {ref}`cli-lineage` store that match the given key-value params: + +```nextflow +channel + .fromLineage(workflowRun: 'lid://0d1d1622ced3e4edc690bec768919b45', labels: ['alpha', 'beta']) + .view() +``` + +The above snippet emits files published by the given workflow run that are labeled as `alpha` and `beta`. + +Available options: + +`labels` +: List of labels associated with the desired files. + +`taskRun` +: LID of the task run that produced the desired files. + +`workflowRun` +: LID of the workflow run that produced the desired files. + (channel-fromlist)= ## fromList @@ -405,37 +436,6 @@ Y See also: [channel.fromList](#fromlist) factory method. -(channel-query-lineage)= - -## queryLineage - -:::{versionadded} 25.04.0 -::: - -:::{warning} *Experimental: may change in a future release.* -::: - -The `channel.queryLineage` factory creates a channel that emits files from the lineage store that match the given key-value params: - -```nextflow -channel - .queryLineage(workflowRun: 'lid://0d1d1622ced3e4edc690bec768919b45', labels: ['alpha', 'beta']) - .view() -``` - -The above snippet emits files published by the given workflow run that are labeled as `alpha` and `beta`. - -Available options: - -`labels` -: List of labels associated with the desired files. - -`taskRun` -: LID of the task run that produced the desired files. - -`workflowRun` -: LID of the workflow run that produced the desired files. - (channel-topic)= ## topic diff --git a/modules/nextflow/src/main/groovy/nextflow/Channel.groovy b/modules/nextflow/src/main/groovy/nextflow/Channel.groovy index b9171ad485..3d3ff69b58 100644 --- a/modules/nextflow/src/main/groovy/nextflow/Channel.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/Channel.groovy @@ -659,23 +659,23 @@ class Channel { fromPath0Future = future.exceptionally(Channel.&handlerException) } - static DataflowWriteChannel queryLineage(Map params) { - checkParams('queryLineage', params, LinExtension.PARAMS) + static DataflowWriteChannel fromLineage(Map params) { + checkParams('fromLineage', params, LinExtension.PARAMS) final result = CH.create() if( NF.isDsl2() ) { - session.addIgniter { queryLineage0(result, params) } + session.addIgniter { fromLineage0(result, params) } } else { - queryLineage0(result, params ) + fromLineage0(result, params ) } return result } - private static void queryLineage0(DataflowWriteChannel channel, Map params) { + private static void fromLineage0(DataflowWriteChannel channel, Map params) { final linExt = Plugins.getExtension(LinExtension) if( !linExt ) throw new IllegalStateException("Unable to load lineage extensions.") - final future = CompletableFuture.runAsync(() -> linExt.queryLineage(session, channel, params)) + final future = CompletableFuture.runAsync(() -> linExt.fromLineage(session, channel, params)) future.exceptionally(this.&handlerException) } } diff --git a/modules/nextflow/src/main/groovy/nextflow/extension/LinExtension.groovy b/modules/nextflow/src/main/groovy/nextflow/extension/LinExtension.groovy index 5c868b918e..d65010131c 100644 --- a/modules/nextflow/src/main/groovy/nextflow/extension/LinExtension.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/extension/LinExtension.groovy @@ -39,5 +39,5 @@ interface LinExtension { * @param channel Channel to publish the Lineage Ids matching the query params * @param params Parameters for the lineage metadata query */ - abstract void queryLineage(Session session, DataflowWriteChannel channel, Map params) + abstract void fromLineage(Session session, DataflowWriteChannel channel, Map params) } diff --git a/modules/nf-lang/src/main/java/nextflow/script/types/ChannelFactory.java b/modules/nf-lang/src/main/java/nextflow/script/types/ChannelFactory.java index 6bd81d8626..b9dd747de3 100644 --- a/modules/nf-lang/src/main/java/nextflow/script/types/ChannelFactory.java +++ b/modules/nf-lang/src/main/java/nextflow/script/types/ChannelFactory.java @@ -31,6 +31,8 @@ public interface ChannelFactory { Channel fromFilePairs(Map opts, String pattern, Closure grouping); + Channel fromLineage(Map opts); + Channel fromList(Collection values); Channel fromPath(Map opts, String pattern); @@ -39,8 +41,6 @@ public interface ChannelFactory { Channel of(E... values); - Channel queryLineage(Map opts); - Channel topic(String name); Channel value(E value); diff --git a/modules/nf-lineage/src/main/nextflow/lineage/LinExtensionImpl.groovy b/modules/nf-lineage/src/main/nextflow/lineage/LinExtensionImpl.groovy index 44f2142ecd..ac92fe817c 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/LinExtensionImpl.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/LinExtensionImpl.groovy @@ -38,7 +38,7 @@ import static nextflow.lineage.fs.LinPath.* class LinExtensionImpl implements LinExtension { @Override - void queryLineage(Session session, DataflowWriteChannel channel, Map opts) { + void fromLineage(Session session, DataflowWriteChannel channel, Map opts) { final queryParams = buildQueryParams(opts) log.trace("Querying lineage with params: $queryParams") new LinPropertyValidator().validateQueryParams(queryParams.keySet()) @@ -47,7 +47,7 @@ class LinExtensionImpl implements LinExtension { channel.bind(Channel.STOP) } - private static Map> buildQueryParams(Map opts){ + private static Map> buildQueryParams(Map opts) { final queryParams = [type: [FileOutput.class.simpleName] ] if( opts.workflowRun ) queryParams['workflowRun'] = [opts.workflowRun as String] diff --git a/modules/nf-lineage/src/test/nextflow/lineage/LinExtensionImplTest.groovy b/modules/nf-lineage/src/test/nextflow/lineage/LinExtensionImplTest.groovy index 155fa4f329..3a7d4c4eaf 100644 --- a/modules/nf-lineage/src/test/nextflow/lineage/LinExtensionImplTest.groovy +++ b/modules/nf-lineage/src/test/nextflow/lineage/LinExtensionImplTest.groovy @@ -81,7 +81,7 @@ class LinExtensionImplTest extends Specification { def linExt = Spy(new LinExtensionImpl()) when: def results = CH.create() - linExt.queryLineage(session, results, [labels: ["value2", "value3"]]) + linExt.fromLineage(session, results, [labels: ["value2", "value3"]]) then: linExt.getStore(session) >> lidStore and: @@ -90,7 +90,7 @@ class LinExtensionImplTest extends Specification { when: results = CH.create() - linExt.queryLineage(session, results, [taskRun: "taskid", labels: ["value4"]]) + linExt.fromLineage(session, results, [taskRun: "taskid", labels: ["value4"]]) then: linExt.getStore(session) >> lidStore and: @@ -99,7 +99,7 @@ class LinExtensionImplTest extends Specification { when: results = CH.create() - linExt.queryLineage(session, results, [workflowRun: "testkey", taskRun: "taskid", labels: ["value2"]]) + linExt.fromLineage(session, results, [workflowRun: "testkey", taskRun: "taskid", labels: ["value2"]]) then: linExt.getStore(session) >> lidStore and: From 8b4ca1e39f5c0a418468f0e8181689dcda16d25a Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Fri, 2 May 2025 15:12:39 -0500 Subject: [PATCH 19/19] Rnemae "metadata object" -> "lineage record" Signed-off-by: Ben Sherman --- docs/reference/cli.md | 12 ++++++------ .../src/main/nextflow/lineage/fs/LinPath.groovy | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/docs/reference/cli.md b/docs/reference/cli.md index 5ecf473683..88db1aebf4 100644 --- a/docs/reference/cli.md +++ b/docs/reference/cli.md @@ -703,7 +703,7 @@ $ nextflow lineage SUBCOMMAND [arg ..] **Description** -The `lineage` command is used to inspect lineage metadata. +The `lineage` command is used to inspect lineage metadata. Data lineage can be enabled by setting `lineage.enabled` to `true` in your Nextflow configuration (see the {ref}`config-lineage` config scope for details). **Options** @@ -720,7 +720,7 @@ TIMESTAMP RUN NAME SESSION ID 2025-04-22 14:45:43 backstabbing_heyrovsky 21bc4fad-e8b8-447d-9410-388f926a711f lid://c914d714877cc5c882c55a5428b510b1 ``` -View a metadata record. +View a lineage record. ```console $ nextflow lineage view @@ -729,20 +729,20 @@ $ nextflow lineage view The output of a workflow run can be shown by appending `#output` to the workflow run LID: ```console -$ nextflow lineage view +$ nextflow lineage view lid://c914d714877cc5c882c55a5428b510b1#output ``` :::{tip} -You can use the [jq](https://jqlang.org/) command-line tool to apply further queries and transformations on the resulting metadata record. +You can use the [jq](https://jqlang.org/) command-line tool to apply further queries and transformations on the resulting lineage record. ::: -Find all metadata records that match a set of key-value pairs: +Find all lineage records that match a set of key-value pairs: ```console $ nextflow lineage find = = ... ``` -Display a git-style diff between two metadata records. +Display a git-style diff between two lineage records. ```console $ nextflow lineage diff diff --git a/modules/nf-lineage/src/main/nextflow/lineage/fs/LinPath.groovy b/modules/nf-lineage/src/main/nextflow/lineage/fs/LinPath.groovy index ee4bf022a9..d9a9e30d85 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/fs/LinPath.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/fs/LinPath.groovy @@ -453,20 +453,20 @@ class LinPath implements Path, LogicalDataPath { } /** - * Get the path associated to a DataOutput metadata. + * Get the path associated with a FileOutput record. * - * @return Path associated to a DataOutput - * @throws FileNotFoundException if the metadata associated to the LinPath does not exist or its type is not a DataOutput. + * @return Path associated with a FileOutput record + * @throws FileNotFoundException if the record does not exist or its type is not a FileOutput. */ protected Path getTargetPath() { return findTarget(fileSystem, filePath, false, parseChildrenFromFragment(fragment)) } /** - * Get the path associated to any metadata object. + * Get the path associated with a lineage record. * - * @return Path associated to a DataOutput or LinMetadataFile with the metadata object for other types. - * @throws FileNotFoundException if the metadata associated to the LinPath does not exist + * @return Path associated with a FileOutput record, or LinMetadataFile with the lineage record for other types. + * @throws FileNotFoundException if the record does not exist */ protected Path getTargetOrMetadataPath() { return findTarget(fileSystem, filePath, true, parseChildrenFromFragment(fragment))