Skip to content

Commit

Permalink
Merge pull request #171 from ielis/improve_format_and_element_sniffing
Browse files Browse the repository at this point in the history
Improve format and element sniffing
  • Loading branch information
ielis authored Apr 12, 2023
2 parents 4f76b40 + f7f3194 commit f2488ff
Show file tree
Hide file tree
Showing 19 changed files with 932 additions and 314 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ public static class InputSection {
@CommandLine.Option(names = {"-e", "--element"},
description = {"Top-level element.",
"Choose from {${COMPLETION-CANDIDATES}}",
"Default: phenopacket"})
"Default: an educated guess from the input"})
public PhenopacketElement element = null;

}
Expand Down Expand Up @@ -71,33 +71,29 @@ protected BaseIOCommand() {
protected List<MessageAndPath> readMessagesOrExit(PhenopacketSchemaVersion schemaVersion) {
PhenopacketParser parser = parserFactory.forFormat(schemaVersion);
if (inputs == null) {
// The user did not set `-i | --input` option, assuming a single input is coming from STDIN.
// The user did not provide positional parameters, assuming a single input is coming from STDIN.
InputStream is = System.in;
try {
setFormatAndElement(is, schemaVersion);
setFormatAndElement(is);
Message message = parser.parse(inputSection.format, inputSection.element, is);
return List.of(new MessageAndPath(message, null));
} catch (SniffException e) {
System.err.println("Unable to detect input format from STDIN.\nConsider using the `--format` option.");
} catch (IOException e) {
System.err.println("Unable to read STDIN: " + e.getMessage() + "\nPlease check the input format.");
}
System.exit(1);
} else {
// Assuming a one or more input are provided via `-i | --input`.
// Assuming a one or more inputs are provided via positional parameters.
//
// Picocli should ensure that `input` is never an empty list. `input` is `null` if no `-i` was supplied.
// Picocli should ensure that `input` is never an empty list.
// The `input` is `null` if no positional parameters were supplied.
assert !inputs.isEmpty();

List<MessageAndPath> messages = new ArrayList<>();
for (Path input : inputs) {
try (InputStream is = new BufferedInputStream(Files.newInputStream(input))) {
setFormatAndElement(is, schemaVersion);
setFormatAndElement(is);
Message message = parser.parse(inputSection.format, inputSection.element, is);
messages.add(new MessageAndPath(message, input));
} catch (SniffException e) {
System.err.printf("Unable to detect input format of %s.\nConsider using the `--format` option.%n", input.toAbsolutePath());
System.exit(1);
} catch (IOException e) {
System.err.printf("Unable to read input file %s: %s\nPlease check the input format.%n", input.toAbsolutePath(), e.getMessage());
System.exit(1);
Expand All @@ -110,39 +106,54 @@ protected List<MessageAndPath> readMessagesOrExit(PhenopacketSchemaVersion schem

/**
* Peek into the provided {@link InputStream} {@code is} to set {@link InputSection#format}
* and {@link InputSection#element} items
* and {@link InputSection#element} items.
*
* @throws IOException if I/O error happens
* @throws SniffException if we cannot sniff the format
* @throws IOException if the format/element sniffing fails and the user did not set the CLI fields or if an I/O error happens.
*/
private void setFormatAndElement(InputStream is, PhenopacketSchemaVersion schemaVersion) throws IOException, SniffException {
// Set format.
PhenopacketFormat fmt = FormatSniffer.sniff(is);
private void setFormatAndElement(InputStream is) throws IOException {
SniffException se = null;

// Set the format.
PhenopacketFormat fmt = null;
try {
fmt = FormatSniffer.sniff(is);
} catch (SniffException e) {
se = e;
}
if (inputSection.format == null) {
LOGGER.info("Input format was not provided, making an educated guess..");
if (fmt == null)
throw new IOException("Input format (-f | --format) was not provided and format sniffing failed", se);
LOGGER.info("The input looks like a {} file", fmt);
inputSection.format = fmt;
} else {
if (!inputSection.format.equals(fmt))
if (fmt != null && !inputSection.format.equals(fmt))
// This can happen e.g. if processing multiple files at once but one turns out to be a different format.
// We emit warning because this is likely not what the user intended and the code will likely explode
// We emit a warning because this is likely not what the user intended and the code will likely explode
// further downstream.
LOGGER.warn("Input format is set to {} but the current input looks like a {}", inputSection.format, fmt);
}

// Set element.
PhenopacketElement element = ElementSniffer.sniff(is, schemaVersion, inputSection.format);
// Set the element.
PhenopacketElement element = null;
try {
element = ElementSniffer.sniff(is, inputSection.format);
} catch (SniffException e) {
se = e;
}
if (inputSection.element == null) {
LOGGER.info("Input element type (-e | --element) was not provided, making an educated guess..");
LOGGER.info("Input element was not provided, making an educated guess..");
if (element == null)
throw new IOException("Input element (-e | --element) was not provided and element sniffing failed", se);
LOGGER.info("The input looks like a {} ", element);
inputSection.element = element;
}
// else {
// TODO - enable once element sniffing is implemented
// if (!inputSection.element.equals(element))
// Let's go an extra mile and check for the user.
// LOGGER.warn("Input element is set to {} but the current input looks like a {}", inputSection.element, element);
// }
else {
if (element != null && !inputSection.element.equals(element))
// Let's go an extra mile and check for the user.
// Same as above, we emit a warning since the code will likely explode further downstream.
LOGGER.warn("Input element is set to {} but the current input looks like a {}", inputSection.element, element);
}
}

protected record MessageAndPath(Message message, Path path) {}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package org.phenopackets.phenopackettools.io;

import com.google.protobuf.Message;
import org.phenopackets.phenopackettools.util.format.ElementSniffer;
import org.phenopackets.phenopackettools.util.format.FormatSniffer;
import org.phenopackets.phenopackettools.core.PhenopacketElement;
import org.phenopackets.phenopackettools.core.PhenopacketFormat;
Expand All @@ -27,8 +28,12 @@ default Message parse(PhenopacketFormat format, PhenopacketElement element, Path
// We need to detect the element.

default Message parse(PhenopacketFormat format, InputStream is) throws IOException {
PhenopacketElement element = sniffElement(is);
return parse(format, element, is);
try {
PhenopacketElement element = sniffElement(is, format);
return parse(format, element, is);
} catch (SniffException e) {
throw new IOException(e);
}
}

default Message parse(PhenopacketFormat format, Path path) throws IOException {
Expand Down Expand Up @@ -65,8 +70,8 @@ default Message parse(Path path) throws IOException, SniffException {

/* ******************************************* UTILITY METHODS ******************************************* */

private static PhenopacketElement sniffElement(InputStream is) {
return PhenopacketElement.PHENOPACKET; // TODO - implement
private static PhenopacketElement sniffElement(InputStream is, PhenopacketFormat format) throws SniffException, IOException {
return ElementSniffer.sniff(is, format);
}

private static PhenopacketFormat sniffFormat(InputStream is) throws SniffException, IOException {
Expand Down
1 change: 0 additions & 1 deletion phenopacket-tools-util/src/main/java/module-info.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
// The `print` package exposes `JsonFormat.Printer`, hence the transitive export.
requires transitive com.google.protobuf.util;
requires org.phenopackets.schema;
requires org.slf4j;

exports org.phenopackets.phenopackettools.util.format;
exports org.phenopackets.phenopackettools.util.print;
Expand Down
Loading

0 comments on commit f2488ff

Please # to comment.