Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Improve format and element sniffing #171

Merged
merged 6 commits into from
Apr 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ public static class InputSection {
@CommandLine.Option(names = {"-e", "--element"},
description = {"Top-level element.",
"Choose from {${COMPLETION-CANDIDATES}}",
"Default: phenopacket"})
"Default: an educated guess from the input"})
public PhenopacketElement element = null;

}
Expand Down Expand Up @@ -71,33 +71,29 @@ protected BaseIOCommand() {
protected List<MessageAndPath> readMessagesOrExit(PhenopacketSchemaVersion schemaVersion) {
PhenopacketParser parser = parserFactory.forFormat(schemaVersion);
if (inputs == null) {
// The user did not set `-i | --input` option, assuming a single input is coming from STDIN.
// The user did not provide positional parameters, assuming a single input is coming from STDIN.
InputStream is = System.in;
try {
setFormatAndElement(is, schemaVersion);
setFormatAndElement(is);
Message message = parser.parse(inputSection.format, inputSection.element, is);
return List.of(new MessageAndPath(message, null));
} catch (SniffException e) {
System.err.println("Unable to detect input format from STDIN.\nConsider using the `--format` option.");
} catch (IOException e) {
System.err.println("Unable to read STDIN: " + e.getMessage() + "\nPlease check the input format.");
}
System.exit(1);
} else {
// Assuming a one or more input are provided via `-i | --input`.
// Assuming a one or more inputs are provided via positional parameters.
//
// Picocli should ensure that `input` is never an empty list. `input` is `null` if no `-i` was supplied.
// Picocli should ensure that `input` is never an empty list.
// The `input` is `null` if no positional parameters were supplied.
assert !inputs.isEmpty();

List<MessageAndPath> messages = new ArrayList<>();
for (Path input : inputs) {
try (InputStream is = new BufferedInputStream(Files.newInputStream(input))) {
setFormatAndElement(is, schemaVersion);
setFormatAndElement(is);
Message message = parser.parse(inputSection.format, inputSection.element, is);
messages.add(new MessageAndPath(message, input));
} catch (SniffException e) {
System.err.printf("Unable to detect input format of %s.\nConsider using the `--format` option.%n", input.toAbsolutePath());
System.exit(1);
} catch (IOException e) {
System.err.printf("Unable to read input file %s: %s\nPlease check the input format.%n", input.toAbsolutePath(), e.getMessage());
System.exit(1);
Expand All @@ -110,39 +106,54 @@ protected List<MessageAndPath> readMessagesOrExit(PhenopacketSchemaVersion schem

/**
* Peek into the provided {@link InputStream} {@code is} to set {@link InputSection#format}
* and {@link InputSection#element} items
* and {@link InputSection#element} items.
*
* @throws IOException if I/O error happens
* @throws SniffException if we cannot sniff the format
* @throws IOException if the format/element sniffing fails and the user did not set the CLI fields or if an I/O error happens.
*/
private void setFormatAndElement(InputStream is, PhenopacketSchemaVersion schemaVersion) throws IOException, SniffException {
// Set format.
PhenopacketFormat fmt = FormatSniffer.sniff(is);
private void setFormatAndElement(InputStream is) throws IOException {
SniffException se = null;

// Set the format.
PhenopacketFormat fmt = null;
try {
fmt = FormatSniffer.sniff(is);
} catch (SniffException e) {
se = e;
}
if (inputSection.format == null) {
LOGGER.info("Input format was not provided, making an educated guess..");
if (fmt == null)
throw new IOException("Input format (-f | --format) was not provided and format sniffing failed", se);
LOGGER.info("The input looks like a {} file", fmt);
inputSection.format = fmt;
} else {
if (!inputSection.format.equals(fmt))
if (fmt != null && !inputSection.format.equals(fmt))
// This can happen e.g. if processing multiple files at once but one turns out to be a different format.
// We emit warning because this is likely not what the user intended and the code will likely explode
// We emit a warning because this is likely not what the user intended and the code will likely explode
// further downstream.
LOGGER.warn("Input format is set to {} but the current input looks like a {}", inputSection.format, fmt);
}

// Set element.
PhenopacketElement element = ElementSniffer.sniff(is, schemaVersion, inputSection.format);
// Set the element.
PhenopacketElement element = null;
try {
element = ElementSniffer.sniff(is, inputSection.format);
} catch (SniffException e) {
se = e;
}
if (inputSection.element == null) {
LOGGER.info("Input element type (-e | --element) was not provided, making an educated guess..");
LOGGER.info("Input element was not provided, making an educated guess..");
if (element == null)
throw new IOException("Input element (-e | --element) was not provided and element sniffing failed", se);
LOGGER.info("The input looks like a {} ", element);
inputSection.element = element;
}
// else {
// TODO - enable once element sniffing is implemented
// if (!inputSection.element.equals(element))
// Let's go an extra mile and check for the user.
// LOGGER.warn("Input element is set to {} but the current input looks like a {}", inputSection.element, element);
// }
else {
if (element != null && !inputSection.element.equals(element))
// Let's go an extra mile and check for the user.
// Same as above, we emit a warning since the code will likely explode further downstream.
LOGGER.warn("Input element is set to {} but the current input looks like a {}", inputSection.element, element);
}
}

protected record MessageAndPath(Message message, Path path) {}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package org.phenopackets.phenopackettools.io;

import com.google.protobuf.Message;
import org.phenopackets.phenopackettools.util.format.ElementSniffer;
import org.phenopackets.phenopackettools.util.format.FormatSniffer;
import org.phenopackets.phenopackettools.core.PhenopacketElement;
import org.phenopackets.phenopackettools.core.PhenopacketFormat;
Expand All @@ -27,8 +28,12 @@ default Message parse(PhenopacketFormat format, PhenopacketElement element, Path
// We need to detect the element.

default Message parse(PhenopacketFormat format, InputStream is) throws IOException {
PhenopacketElement element = sniffElement(is);
return parse(format, element, is);
try {
PhenopacketElement element = sniffElement(is, format);
return parse(format, element, is);
} catch (SniffException e) {
throw new IOException(e);
}
}

default Message parse(PhenopacketFormat format, Path path) throws IOException {
Expand Down Expand Up @@ -65,8 +70,8 @@ default Message parse(Path path) throws IOException, SniffException {

/* ******************************************* UTILITY METHODS ******************************************* */

private static PhenopacketElement sniffElement(InputStream is) {
return PhenopacketElement.PHENOPACKET; // TODO - implement
private static PhenopacketElement sniffElement(InputStream is, PhenopacketFormat format) throws SniffException, IOException {
return ElementSniffer.sniff(is, format);
}

private static PhenopacketFormat sniffFormat(InputStream is) throws SniffException, IOException {
Expand Down
1 change: 0 additions & 1 deletion phenopacket-tools-util/src/main/java/module-info.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
// The `print` package exposes `JsonFormat.Printer`, hence the transitive export.
requires transitive com.google.protobuf.util;
requires org.phenopackets.schema;
requires org.slf4j;

exports org.phenopackets.phenopackettools.util.format;
exports org.phenopackets.phenopackettools.util.print;
Expand Down
Loading