From d69ba6c94f3224120c050eb352c929f7a5348d69 Mon Sep 17 00:00:00 2001 From: Jules Jacobsen Date: Wed, 15 Nov 2023 12:55:26 +0000 Subject: [PATCH] Update to issues #501 & #473 - Include VariantEffect and geneSymbol annotations on ClinVarData at build time. --- .../genome/JannovarSmallVariantAnnotator.java | 15 +- .../core/model/AlleleProtoAdaptor.java | 128 +++++++++++----- .../core/model/pathogenicity/ClinVarData.java | 110 ++++++++------ exomiser-core/src/main/proto/allele.proto | 73 ++++++++- .../model/pathogenicity/ClinVarDataTest.java | 16 +- exomiser-data-genome/README.md | 49 ++++++ .../exomiser/data/genome/BuildCommand.java | 63 ++++++-- .../data/genome/ClinVarBuildRunner.java | 40 ++++- .../genome/TranscriptDataBuildRunner.java | 6 +- .../data/genome/indexers/AlleleConverter.java | 139 +++++++++++++----- .../src/main/resources/application.properties | 2 +- .../src/main/resources/logback-spring.xml | 26 ++++ .../data/genome/ClinVarBuildRunnerTest.java | 9 +- .../clinvar-test-transcript-data.ser | Bin 0 -> 11558 bytes 14 files changed, 531 insertions(+), 145 deletions(-) create mode 100644 exomiser-data-genome/README.md create mode 100644 exomiser-data-genome/src/main/resources/logback-spring.xml create mode 100644 exomiser-data-genome/src/test/resources/clinvar-test-transcript-data.ser diff --git a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/genome/JannovarSmallVariantAnnotator.java b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/genome/JannovarSmallVariantAnnotator.java index 7c469b437..ab36e4b5a 100644 --- a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/genome/JannovarSmallVariantAnnotator.java +++ b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/genome/JannovarSmallVariantAnnotator.java @@ -31,14 +31,10 @@ import org.monarchinitiative.exomiser.core.model.TranscriptAnnotation; import org.monarchinitiative.exomiser.core.model.VariantAnnotation; import org.monarchinitiative.svart.GenomicVariant; -import org.monarchinitiative.svart.VariantType; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.Set; +import java.util.*; import java.util.stream.Stream; import static java.util.stream.Collectors.groupingBy; @@ -127,16 +123,15 @@ private Stream splitAnnotationsByGene(VariantAnnotations var return annotations.stream() .collect(groupingBy(Annotation::getGeneSymbol)) .values().stream() - //.peek(annotationList -> annotationList.forEach(annotation -> logger.info("{}", toAnnotationString(annotation)))) + //.peek(annotationList -> annotationList.forEach(annotation -> logger.info("{}", toAnnotationString(genomeVariant, annotation)))) .map(annos -> new VariantAnnotations(genomeVariant, annos)); } - private String toAnnotationString(VariantType variantType, SVAnnotation annotation) { - return variantType + ", " + - annotation.getVariant() + ", " + + private String toAnnotationString(GenomeVariant genomeVariant, Annotation annotation) { + return genomeVariant + ", " + annotation.getTranscript().getGeneSymbol() + ", " + annotation.getTranscript().getGeneID() + ", " + - annotation.getMostPathogenicVariantEffect() + ", " + + annotation.getMostPathogenicVarType() + ", " + annotation.getPutativeImpact() + ", " + annotation.getTranscript(); } diff --git a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/model/AlleleProtoAdaptor.java b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/model/AlleleProtoAdaptor.java index f2bc73266..3c2f82c0b 100644 --- a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/model/AlleleProtoAdaptor.java +++ b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/model/AlleleProtoAdaptor.java @@ -21,6 +21,7 @@ package org.monarchinitiative.exomiser.core.model; import com.google.common.collect.ImmutableMap; +import de.charite.compbio.jannovar.annotation.VariantEffect; import org.monarchinitiative.exomiser.core.model.frequency.Frequency; import org.monarchinitiative.exomiser.core.model.frequency.FrequencyData; import org.monarchinitiative.exomiser.core.model.frequency.FrequencySource; @@ -28,6 +29,7 @@ import org.monarchinitiative.exomiser.core.model.pathogenicity.PathogenicityData; import org.monarchinitiative.exomiser.core.model.pathogenicity.PathogenicityScore; import org.monarchinitiative.exomiser.core.model.pathogenicity.PathogenicitySource; +import org.monarchinitiative.exomiser.core.proto.AlleleProto; import org.monarchinitiative.exomiser.core.proto.AlleleProto.AlleleKey; import org.monarchinitiative.exomiser.core.proto.AlleleProto.AlleleProperties; import org.monarchinitiative.exomiser.core.proto.AlleleProto.ClinVar; @@ -160,10 +162,13 @@ public static ClinVarData toClinVarData(ClinVar clinVar) { } ClinVarData.Builder builder = ClinVarData.builder(); builder.alleleId(clinVar.getAlleleId()); + builder.variationId(clinVar.getVariationId()); builder.primaryInterpretation(toClinSig(clinVar.getPrimaryInterpretation())); builder.secondaryInterpretations(toClinSigSet(clinVar.getSecondaryInterpretationsList())); builder.includedAlleles(getToIncludedAlleles(clinVar.getIncludedAllelesMap())); builder.reviewStatus(clinVar.getReviewStatus()); + builder.geneSymbol(clinVar.getGeneSymbol()); + builder.variantEffect(toVariantEffect(clinVar.getVariantEffect())); return builder.build(); } @@ -192,39 +197,94 @@ private static Set toClinSigSet(List proto } private static ClinVarData.ClinSig toClinSig(ClinVar.ClinSig protoClinSig) { - switch (protoClinSig) { - case BENIGN: - return ClinVarData.ClinSig.BENIGN; - case BENIGN_OR_LIKELY_BENIGN: - return ClinVarData.ClinSig.BENIGN_OR_LIKELY_BENIGN; - case LIKELY_BENIGN: - return ClinVarData.ClinSig.LIKELY_BENIGN; - case UNCERTAIN_SIGNIFICANCE: - return ClinVarData.ClinSig.UNCERTAIN_SIGNIFICANCE; - case LIKELY_PATHOGENIC: - return ClinVarData.ClinSig.LIKELY_PATHOGENIC; - case PATHOGENIC_OR_LIKELY_PATHOGENIC: - return ClinVarData.ClinSig.PATHOGENIC_OR_LIKELY_PATHOGENIC; - case PATHOGENIC: - return ClinVarData.ClinSig.PATHOGENIC; - case CONFLICTING_PATHOGENICITY_INTERPRETATIONS: - return ClinVarData.ClinSig.CONFLICTING_PATHOGENICITY_INTERPRETATIONS; - case AFFECTS: - return ClinVarData.ClinSig.AFFECTS; - case ASSOCIATION: - return ClinVarData.ClinSig.ASSOCIATION; - case DRUG_RESPONSE: - return ClinVarData.ClinSig.DRUG_RESPONSE; - case OTHER: - return ClinVarData.ClinSig.OTHER; - case PROTECTIVE: - return ClinVarData.ClinSig.PROTECTIVE; - case RISK_FACTOR: - return ClinVarData.ClinSig.RISK_FACTOR; - case NOT_PROVIDED: - case UNRECOGNIZED: - default: - return ClinVarData.ClinSig.NOT_PROVIDED; - } + return switch (protoClinSig) { + case BENIGN -> ClinVarData.ClinSig.BENIGN; + case BENIGN_OR_LIKELY_BENIGN -> ClinVarData.ClinSig.BENIGN_OR_LIKELY_BENIGN; + case LIKELY_BENIGN -> ClinVarData.ClinSig.LIKELY_BENIGN; + case UNCERTAIN_SIGNIFICANCE -> ClinVarData.ClinSig.UNCERTAIN_SIGNIFICANCE; + case LIKELY_PATHOGENIC -> ClinVarData.ClinSig.LIKELY_PATHOGENIC; + case PATHOGENIC_OR_LIKELY_PATHOGENIC -> ClinVarData.ClinSig.PATHOGENIC_OR_LIKELY_PATHOGENIC; + case PATHOGENIC -> ClinVarData.ClinSig.PATHOGENIC; + case CONFLICTING_PATHOGENICITY_INTERPRETATIONS -> + ClinVarData.ClinSig.CONFLICTING_PATHOGENICITY_INTERPRETATIONS; + case AFFECTS -> ClinVarData.ClinSig.AFFECTS; + case ASSOCIATION -> ClinVarData.ClinSig.ASSOCIATION; + case DRUG_RESPONSE -> ClinVarData.ClinSig.DRUG_RESPONSE; + case OTHER -> ClinVarData.ClinSig.OTHER; + case PROTECTIVE -> ClinVarData.ClinSig.PROTECTIVE; + case RISK_FACTOR -> ClinVarData.ClinSig.RISK_FACTOR; + default -> ClinVarData.ClinSig.NOT_PROVIDED; + }; + } + + private static VariantEffect toVariantEffect(AlleleProto.VariantEffect clinVarVariantEffect) { + return switch (clinVarVariantEffect) { + case SEQUENCE_VARIANT -> VariantEffect.SEQUENCE_VARIANT; + case CHROMOSOME_NUMBER_VARIATION -> VariantEffect.CHROMOSOME_NUMBER_VARIATION; + case TRANSCRIPT_ABLATION -> VariantEffect.TRANSCRIPT_ABLATION; + case EXON_LOSS_VARIANT -> VariantEffect.EXON_LOSS_VARIANT; + case INVERSION -> VariantEffect.INVERSION; + case INSERTION -> VariantEffect.INSERTION; + case TRANSLOCATION -> VariantEffect.TRANSLOCATION; + case FRAMESHIFT_ELONGATION -> VariantEffect.FRAMESHIFT_ELONGATION; + case FRAMESHIFT_TRUNCATION -> VariantEffect.FRAMESHIFT_TRUNCATION; + case FRAMESHIFT_VARIANT -> VariantEffect.FRAMESHIFT_VARIANT; + case INTERNAL_FEATURE_ELONGATION -> VariantEffect.INTERNAL_FEATURE_ELONGATION; + case FEATURE_TRUNCATION -> VariantEffect.FEATURE_TRUNCATION; + case TRANSCRIPT_AMPLIFICATION -> VariantEffect.TRANSCRIPT_AMPLIFICATION; + case COPY_NUMBER_CHANGE -> VariantEffect.COPY_NUMBER_CHANGE; + case MNV -> VariantEffect.MNV; + case COMPLEX_SUBSTITUTION -> VariantEffect.COMPLEX_SUBSTITUTION; + case STOP_GAINED -> VariantEffect.STOP_GAINED; + case STOP_LOST -> VariantEffect.STOP_LOST; + case START_LOST -> VariantEffect.START_LOST; + case SPLICE_ACCEPTOR_VARIANT -> VariantEffect.SPLICE_ACCEPTOR_VARIANT; + case SPLICE_DONOR_VARIANT -> VariantEffect.SPLICE_DONOR_VARIANT; + case RARE_AMINO_ACID_VARIANT -> VariantEffect.RARE_AMINO_ACID_VARIANT; + case MISSENSE_VARIANT -> VariantEffect.MISSENSE_VARIANT; + case INFRAME_INSERTION -> VariantEffect.INFRAME_INSERTION; + case DISRUPTIVE_INFRAME_INSERTION -> VariantEffect.DISRUPTIVE_INFRAME_INSERTION; + case INFRAME_DELETION -> VariantEffect.INFRAME_DELETION; + case DISRUPTIVE_INFRAME_DELETION -> VariantEffect.DISRUPTIVE_INFRAME_DELETION; + case FIVE_PRIME_UTR_TRUNCATION -> VariantEffect.FIVE_PRIME_UTR_TRUNCATION; + case THREE_PRIME_UTR_TRUNCATION -> VariantEffect.THREE_PRIME_UTR_TRUNCATION; + case SPLICE_REGION_VARIANT -> VariantEffect.SPLICE_REGION_VARIANT; + case STOP_RETAINED_VARIANT -> VariantEffect.STOP_RETAINED_VARIANT; + case INITIATOR_CODON_VARIANT -> VariantEffect.INITIATOR_CODON_VARIANT; + case SYNONYMOUS_VARIANT -> VariantEffect.SYNONYMOUS_VARIANT; + case CODING_TRANSCRIPT_INTRON_VARIANT -> VariantEffect.CODING_TRANSCRIPT_INTRON_VARIANT; + case FIVE_PRIME_UTR_PREMATURE_START_CODON_GAIN_VARIANT -> + VariantEffect.FIVE_PRIME_UTR_PREMATURE_START_CODON_GAIN_VARIANT; + case FIVE_PRIME_UTR_EXON_VARIANT -> VariantEffect.FIVE_PRIME_UTR_EXON_VARIANT; + case THREE_PRIME_UTR_EXON_VARIANT -> VariantEffect.THREE_PRIME_UTR_EXON_VARIANT; + case FIVE_PRIME_UTR_INTRON_VARIANT -> VariantEffect.FIVE_PRIME_UTR_INTRON_VARIANT; + case THREE_PRIME_UTR_INTRON_VARIANT -> VariantEffect.THREE_PRIME_UTR_INTRON_VARIANT; + case NON_CODING_TRANSCRIPT_EXON_VARIANT -> VariantEffect.NON_CODING_TRANSCRIPT_EXON_VARIANT; + case NON_CODING_TRANSCRIPT_INTRON_VARIANT -> VariantEffect.NON_CODING_TRANSCRIPT_INTRON_VARIANT; + case DIRECT_TANDEM_DUPLICATION -> VariantEffect.DIRECT_TANDEM_DUPLICATION; + case MOBILE_ELEMENT_DELETION -> VariantEffect.MOBILE_ELEMENT_DELETION; + case MOBILE_ELEMENT_INSERTION -> VariantEffect.MOBILE_ELEMENT_INSERTION; + case UPSTREAM_GENE_VARIANT -> VariantEffect.UPSTREAM_GENE_VARIANT; + case DOWNSTREAM_GENE_VARIANT -> VariantEffect.DOWNSTREAM_GENE_VARIANT; + case INTERGENIC_VARIANT -> VariantEffect.INTERGENIC_VARIANT; + case TFBS_ABLATION -> VariantEffect.TFBS_ABLATION; + case TFBS_AMPLIFICATION -> VariantEffect.TFBS_AMPLIFICATION; + case TF_BINDING_SITE_VARIANT -> VariantEffect.TF_BINDING_SITE_VARIANT; + case REGULATORY_REGION_VARIANT -> VariantEffect.REGULATORY_REGION_VARIANT; + case REGULATORY_REGION_ABLATION -> VariantEffect.REGULATORY_REGION_ABLATION; + case REGULATORY_REGION_AMPLIFICATION -> VariantEffect.REGULATORY_REGION_AMPLIFICATION; + case CONSERVED_INTRON_VARIANT -> VariantEffect.CONSERVED_INTRON_VARIANT; + case INTRAGENIC_VARIANT -> VariantEffect.INTRAGENIC_VARIANT; + case CONSERVED_INTERGENIC_VARIANT -> VariantEffect.CONSERVED_INTERGENIC_VARIANT; + case STRUCTURAL_VARIANT -> VariantEffect.STRUCTURAL_VARIANT; + case CODING_SEQUENCE_VARIANT -> VariantEffect.CODING_SEQUENCE_VARIANT; + case INTRON_VARIANT -> VariantEffect.INTRON_VARIANT; + case EXON_VARIANT -> VariantEffect.EXON_VARIANT; + case SPLICING_VARIANT -> VariantEffect.SPLICING_VARIANT; + case MIRNA -> VariantEffect.MIRNA; + case CODING_TRANSCRIPT_VARIANT -> VariantEffect.CODING_TRANSCRIPT_VARIANT; + case NON_CODING_TRANSCRIPT_VARIANT -> VariantEffect.NON_CODING_TRANSCRIPT_VARIANT; + case UNRECOGNIZED -> VariantEffect.SEQUENCE_VARIANT; + }; } } diff --git a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/model/pathogenicity/ClinVarData.java b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/model/pathogenicity/ClinVarData.java index 8d3350dc4..07a5ecfc2 100644 --- a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/model/pathogenicity/ClinVarData.java +++ b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/model/pathogenicity/ClinVarData.java @@ -21,8 +21,7 @@ package org.monarchinitiative.exomiser.core.model.pathogenicity; import com.fasterxml.jackson.annotation.JsonIgnore; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.Sets; +import de.charite.compbio.jannovar.annotation.VariantEffect; import java.util.*; @@ -65,6 +64,8 @@ public enum ClinSig { private final String reviewStatus; private final Map includedAlleles; + private final String geneSymbol; + private final VariantEffect variantEffect; // https://www.medschool.umaryland.edu/Genetic_Variant_Interpretation_Tool1.html/ // BP1, Missense variant in a gene for which primarily truncating variants are known to cause disease @@ -115,9 +116,11 @@ private ClinVarData(Builder builder) { this.alleleId = builder.alleleId; this.variationId = builder.variationId; this.primaryInterpretation = builder.primaryInterpretation; - this.secondaryInterpretations = Sets.immutableEnumSet(builder.secondaryInterpretations); + this.secondaryInterpretations = Collections.unmodifiableSet(builder.secondaryInterpretations); this.reviewStatus = builder.reviewStatus.replace("_", " "); - this.includedAlleles = ImmutableMap.copyOf(builder.includedAlleles); + this.includedAlleles = Collections.unmodifiableMap(builder.includedAlleles); + this.geneSymbol = builder.geneSymbol; + this.variantEffect = builder.variantEffect; } public static ClinVarData empty() { @@ -153,6 +156,14 @@ public Map getIncludedAlleles() { return includedAlleles; } + public String getGeneSymbol() { + return geneSymbol; + } + + public VariantEffect getVariantEffect() { + return variantEffect; + } + /** * @return true if the secondary CLNSIG contains one of 'affects', 'other', 'association', 'risk factor' or * 'protective'. These are considered unimportant from the mendelian disease perspective. The category 'drug response' @@ -163,49 +174,36 @@ public Map getIncludedAlleles() { @JsonIgnore public boolean isSecondaryAssociationRiskFactorOrOther() { for (ClinVarData.ClinSig secondaryClinSig : secondaryInterpretations) { - switch (secondaryClinSig) { - case AFFECTS: - case OTHER: - case ASSOCIATION: - case RISK_FACTOR: - case PROTECTIVE: - return true; - default: - return false; + if (Objects.requireNonNull(secondaryClinSig) == ClinSig.AFFECTS || secondaryClinSig == ClinSig.OTHER || secondaryClinSig == ClinSig.ASSOCIATION || secondaryClinSig == ClinSig.RISK_FACTOR || secondaryClinSig == ClinSig.PROTECTIVE) { + return true; } } return false; } /** - * Returns the ClinVar star rating according to the criteria provided at - * https://www.ncbi.nlm.nih.gov/clinvar/docs/review_status/#revstat_def - *

- * In the VCF CLNREVSTAT the start ratings are mapped as follows: + * Returns the ClinVar star rating. + * In the VCF CLNREVSTAT the star ratings are mapped as follows: *

+ *

      * 1* criteria_provided,_conflicting_interpretations
      * 1* criteria_provided,_single_submitter
      * 2* criteria_provided,_multiple_submitters,_no_conflicts
      * 3* reviewed_by_expert_panel
      * 4* practice_guideline
+     * 
* * @return an integer value between 0 (worst) and 4 (best) * @since 13.0.0 */ public int starRating() { - switch (reviewStatus) { - case "criteria provided, single submitter": - case "criteria provided, conflicting interpretations": - return 1; - case "criteria provided, multiple submitters, no conflicts": - return 2; - case "reviewed by expert panel": - return 3; - case "practice guideline": - return 4; - default: - return 0; - } + return switch (reviewStatus) { + case "criteria provided, single submitter", "criteria provided, conflicting interpretations" -> 1; + case "criteria provided, multiple submitters, no conflicts" -> 2; + case "reviewed by expert panel" -> 3; + case "practice guideline" -> 4; + default -> 0; + }; } @Override @@ -214,10 +212,10 @@ public boolean equals(Object o) { if (o == null || getClass() != o.getClass()) return false; ClinVarData that = (ClinVarData) o; return Objects.equals(alleleId, that.alleleId) && - primaryInterpretation == that.primaryInterpretation && - Objects.equals(secondaryInterpretations, that.secondaryInterpretations) && - Objects.equals(reviewStatus, that.reviewStatus) && - Objects.equals(includedAlleles, that.includedAlleles); + primaryInterpretation == that.primaryInterpretation && + Objects.equals(secondaryInterpretations, that.secondaryInterpretations) && + Objects.equals(reviewStatus, that.reviewStatus) && + Objects.equals(includedAlleles, that.includedAlleles); } @Override @@ -228,12 +226,27 @@ public int hashCode() { @Override public String toString() { return "ClinVarData{" + - "alleleId='" + alleleId + '\'' + - ", primaryInterpretation=" + primaryInterpretation + - ", secondaryInterpretations=" + secondaryInterpretations + - ", reviewStatus='" + reviewStatus + '\'' + - ", includedAlleles=" + includedAlleles + - '}'; + "variationId='" + variationId + '\'' + + ", alleleId='" + alleleId + '\'' + + ", geneSymbol='" + geneSymbol + '\'' + + ", variantEffect='" + variantEffect + '\'' + + ", primaryInterpretation=" + primaryInterpretation + + ", secondaryInterpretations=" + secondaryInterpretations + + ", reviewStatus='" + reviewStatus + '\'' + + ", includedAlleles=" + includedAlleles + + '}'; + } + + public Builder toBuilder() { + return new Builder() + .variationId(variationId) + .alleleId(alleleId) + .primaryInterpretation(primaryInterpretation) + .secondaryInterpretations(secondaryInterpretations) + .reviewStatus(reviewStatus) + .includedAlleles(includedAlleles) + .geneSymbol(geneSymbol) + .variantEffect(variantEffect); } public static Builder builder() { @@ -249,6 +262,9 @@ public static class Builder { private String reviewStatus = ""; private Map includedAlleles = Collections.emptyMap(); + private String geneSymbol = ""; + private VariantEffect variantEffect = VariantEffect.SEQUENCE_VARIANT; + public Builder alleleId(String alleleId) { Objects.requireNonNull(alleleId); this.alleleId = alleleId; @@ -269,7 +285,7 @@ public Builder primaryInterpretation(ClinSig primaryInterpretation) { public Builder secondaryInterpretations(Set secondaryInterpretations) { Objects.requireNonNull(secondaryInterpretations); - this.secondaryInterpretations = secondaryInterpretations; + this.secondaryInterpretations = secondaryInterpretations.isEmpty() ? Set.of() : EnumSet.copyOf(secondaryInterpretations); return this; } @@ -285,6 +301,18 @@ public Builder includedAlleles(Map includedAlleles) { return this; } + public Builder geneSymbol(String geneSymbol) { + Objects.requireNonNull(geneSymbol); + this.geneSymbol = geneSymbol; + return this; + } + + public Builder variantEffect(VariantEffect variantEffect) { + Objects.requireNonNull(variantEffect); + this.variantEffect = variantEffect; + return this; + } + public ClinVarData build() { return new ClinVarData(this); } diff --git a/exomiser-core/src/main/proto/allele.proto b/exomiser-core/src/main/proto/allele.proto index fd95d30ef..61e8146d1 100644 --- a/exomiser-core/src/main/proto/allele.proto +++ b/exomiser-core/src/main/proto/allele.proto @@ -48,7 +48,74 @@ message ClinVar { string reviewStatus = 4; map includedAlleles = 5; string variationId = 6; -// string gene_symbol = 7; -// string gene_id = 8; -// VariantEffect variant_effect = 9; + string gene_symbol = 7; + string gene_id = 8; + VariantEffect variant_effect = 9; +} + +enum VariantEffect { + SEQUENCE_VARIANT = 0; // n.b. this is the LOWEST value in the jannovar enum + CHROMOSOME_NUMBER_VARIATION = 1; + TRANSCRIPT_ABLATION = 2; + EXON_LOSS_VARIANT = 3; + INVERSION = 4; + INSERTION = 5; + TRANSLOCATION = 6; + FRAMESHIFT_ELONGATION = 7; + FRAMESHIFT_TRUNCATION = 8; + FRAMESHIFT_VARIANT = 9; + INTERNAL_FEATURE_ELONGATION = 10; + FEATURE_TRUNCATION = 11; + TRANSCRIPT_AMPLIFICATION = 12; + COPY_NUMBER_CHANGE = 13; + MNV = 14; + COMPLEX_SUBSTITUTION = 15; + STOP_GAINED = 16; + STOP_LOST = 17; + START_LOST = 18; + SPLICE_ACCEPTOR_VARIANT = 19; + SPLICE_DONOR_VARIANT = 20; + RARE_AMINO_ACID_VARIANT = 21; + MISSENSE_VARIANT = 22; + INFRAME_INSERTION = 23; + DISRUPTIVE_INFRAME_INSERTION = 24; + INFRAME_DELETION = 25; + DISRUPTIVE_INFRAME_DELETION = 26; + FIVE_PRIME_UTR_TRUNCATION = 27; + THREE_PRIME_UTR_TRUNCATION = 28; + SPLICE_REGION_VARIANT = 30; + STOP_RETAINED_VARIANT = 31; + INITIATOR_CODON_VARIANT = 32; + SYNONYMOUS_VARIANT = 33; + CODING_TRANSCRIPT_INTRON_VARIANT = 34; + FIVE_PRIME_UTR_PREMATURE_START_CODON_GAIN_VARIANT = 35; + FIVE_PRIME_UTR_EXON_VARIANT = 36; + THREE_PRIME_UTR_EXON_VARIANT = 37; + FIVE_PRIME_UTR_INTRON_VARIANT = 38; + THREE_PRIME_UTR_INTRON_VARIANT = 39; + NON_CODING_TRANSCRIPT_EXON_VARIANT = 40; + NON_CODING_TRANSCRIPT_INTRON_VARIANT = 41; + DIRECT_TANDEM_DUPLICATION = 42; + MOBILE_ELEMENT_DELETION = 43; + MOBILE_ELEMENT_INSERTION = 44; + UPSTREAM_GENE_VARIANT = 45; + DOWNSTREAM_GENE_VARIANT = 46; + INTERGENIC_VARIANT = 47; + TFBS_ABLATION = 48; + TFBS_AMPLIFICATION = 49; + TF_BINDING_SITE_VARIANT = 50; + REGULATORY_REGION_VARIANT = 51; + REGULATORY_REGION_ABLATION = 52; + REGULATORY_REGION_AMPLIFICATION = 53; + CONSERVED_INTRON_VARIANT = 54; + INTRAGENIC_VARIANT = 55; + CONSERVED_INTERGENIC_VARIANT = 56; + STRUCTURAL_VARIANT = 57; + CODING_SEQUENCE_VARIANT = 58; + INTRON_VARIANT = 59; + EXON_VARIANT = 60; + SPLICING_VARIANT = 61; + MIRNA = 62; + CODING_TRANSCRIPT_VARIANT = 63; + NON_CODING_TRANSCRIPT_VARIANT = 64; } \ No newline at end of file diff --git a/exomiser-core/src/test/java/org/monarchinitiative/exomiser/core/model/pathogenicity/ClinVarDataTest.java b/exomiser-core/src/test/java/org/monarchinitiative/exomiser/core/model/pathogenicity/ClinVarDataTest.java index 2d44ca2d4..2bc235eaf 100644 --- a/exomiser-core/src/test/java/org/monarchinitiative/exomiser/core/model/pathogenicity/ClinVarDataTest.java +++ b/exomiser-core/src/test/java/org/monarchinitiative/exomiser/core/model/pathogenicity/ClinVarDataTest.java @@ -20,7 +20,7 @@ package org.monarchinitiative.exomiser.core.model.pathogenicity; -import com.google.common.collect.ImmutableMap; +import de.charite.compbio.jannovar.annotation.VariantEffect; import org.junit.jupiter.api.Test; import org.monarchinitiative.exomiser.core.model.pathogenicity.ClinVarData.ClinSig; @@ -52,12 +52,18 @@ public void testEmptyBuilder() { @Test public void testBuilderWithValues() { String alleleId = "12345"; + String variationId = "23456"; + String geneSymbol = "GENE1"; + VariantEffect variantEffect = VariantEffect.MISSENSE_VARIANT; ClinSig clinSig = ClinSig.PATHOGENIC; Set secondaryInterpretations = EnumSet.of(ClinSig.RISK_FACTOR, ClinSig.ASSOCIATION); String reviewStatus = "multiple_submitters,_no_conflict"; - Map included = ImmutableMap.of("54321", ClinSig.PATHOGENIC_OR_LIKELY_PATHOGENIC); + Map included = Map.of("54321", ClinSig.PATHOGENIC_OR_LIKELY_PATHOGENIC); ClinVarData instance = ClinVarData.builder() + .variationId(variationId) .alleleId(alleleId) + .geneSymbol(geneSymbol) + .variantEffect(variantEffect) .primaryInterpretation(clinSig) .secondaryInterpretations(secondaryInterpretations) .reviewStatus(reviewStatus) @@ -65,10 +71,14 @@ public void testBuilderWithValues() { .build(); assertThat(instance.getAlleleId(), equalTo(alleleId)); + assertThat(instance.getVariationId(), equalTo(variationId)); + assertThat(instance.getGeneSymbol(), equalTo(geneSymbol)); + assertThat(instance.getVariantEffect(), equalTo(variantEffect)); assertThat(instance.getPrimaryInterpretation(), equalTo(clinSig)); assertThat(instance.getSecondaryInterpretations(), equalTo(secondaryInterpretations)); assertThat(instance.getReviewStatus(), equalTo("multiple submitters, no conflict")); assertThat(instance.getIncludedAlleles(), equalTo(included)); + System.out.println(instance); } @Test @@ -77,7 +87,7 @@ public void testStringValue() { ClinSig clinSig = ClinSig.PATHOGENIC; Set secondaryInterpretations = EnumSet.of(ClinSig.RISK_FACTOR, ClinSig.ASSOCIATION); String reviewStatus = "multiple_submitters,_no_conflict"; - Map included = ImmutableMap.of("54321", ClinSig.PATHOGENIC_OR_LIKELY_PATHOGENIC); + Map included = Map.of("54321", ClinSig.PATHOGENIC_OR_LIKELY_PATHOGENIC); ClinVarData instance = ClinVarData.builder() .alleleId(alleleId) .primaryInterpretation(clinSig) diff --git a/exomiser-data-genome/README.md b/exomiser-data-genome/README.md new file mode 100644 index 000000000..8016ca0da --- /dev/null +++ b/exomiser-data-genome/README.md @@ -0,0 +1,49 @@ +Exomiser - Genome DB Build += + +This is a Spring Boot CLI application and as such has one idiosyncrasy which will prevent a build from launching, if not +set in the `application.properties`. + +The absolute requirement for anything, even `--help` to work is for the `--build-dir` variable to be set and this _must_ +be set using an equals sign i.e. + +```shell +$ java -jar exomiser-data-genome-${project.version}.jar --build-dir=. --help +``` + +By default, this is set in the `application.properties` to `.` i.e. the current working directory but can be overriden. + +Also, note that the `--assembly` and `--version` **must come before any other arguments** in order that they are correctly +set for use with other optional arguments. + +Build transcript databases and build/annotate ClinVar data. The ClinVar data build now requires a transcript database so +that the variants can be annotated for gene symbol and variant effect to be in line with the output from Exomiser. Whilst +these are available in the `MC` field of the ClinVar VCF file, the effects are not sorted according to reference +transcript order, so in cases where more than one transcript overlaps a variant, the most damaging effect is reported +first, even if the MANE/MANE_Clinical or canonical transcript has a less damaging effect. + +```shell +$ java -jar exomiser-data-genome-${project.version}.jar --assembly hg38 --version 2311 --transcripts ensembl --clinvar +``` +Will create an output directory `2311_hg38` containing the files: + +```shell +2311_hg38/ +├── 2311_hg38_clinvar.mv.db +└── 2311_hg38_transcripts_ensembl.ser +``` + +Create a new ClinVar database from the latest ClinVar release using an existing Exomiser release, in this case 2309_hg38: + +```shell +$ java -jar exomiser-data-genome-${project.version}.jar --assembly hg38 --version 231112 --clinvar /data/exomiser/2309_hg38/2309_hg38_transcripts_ensembl.ser +``` + +Will just create the ClinVar database, annotated using the specified transcript data: + +```shell +231112_hg38/ +└── 231112_hg38_clinvar.mv.db +``` +_n.b_ here the ClinVar data has been created for the 20231112 release, so it is possible to update the clinvar data for +Exomiser on a weekly basis to keep up with ClinVar \ No newline at end of file diff --git a/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/BuildCommand.java b/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/BuildCommand.java index bf61a247b..2292d0e01 100644 --- a/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/BuildCommand.java +++ b/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/BuildCommand.java @@ -20,8 +20,10 @@ package org.monarchinitiative.exomiser.data.genome; +import de.charite.compbio.jannovar.data.JannovarData; import org.monarchinitiative.exomiser.core.genome.GenomeAssembly; import org.monarchinitiative.exomiser.core.genome.jannovar.JannovarDataFactory; +import org.monarchinitiative.exomiser.core.genome.jannovar.JannovarDataSourceLoader; import org.monarchinitiative.exomiser.core.genome.jannovar.TranscriptSource; import org.monarchinitiative.exomiser.data.genome.config.AssemblyResources; import org.monarchinitiative.exomiser.data.genome.model.AlleleResource; @@ -36,6 +38,8 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; +import java.time.LocalDate; +import java.time.format.DateTimeFormatter; import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -51,24 +55,23 @@ @Component @Command(name = "build", description = "Command to build the Exomiser genome data bundle.") public class BuildCommand implements Callable { - private static final Logger logger = LoggerFactory.getLogger(BuildCommand.class); @Option(names = {"-h", "--help"}, usageHelp = true, description = "display this help message") boolean usageHelpRequested; - @Option(names = "--build-dir", required = true) - private Path buildDir; + @Option(names = "--build-dir", description = "The directory in which to build the data (default: ${DEFAULT-VALUE}).") + private Path buildDir = Path.of(System.getProperty("user.dir")); private final AssemblyResources hg19Resources; private final AssemblyResources hg38Resources; private final Path jannovarIniFile; - @Option(names = "--assembly", required = true, converter = AssemblyConverter.class, description = "Genome assembly to build the data for - one of hg19 or hg38.") + @Option(names = "--assembly", required = true, converter = AssemblyConverter.class, description = "Genome assembly for the build. Either hg19 or hg38.", order = 0) private GenomeAssembly assembly; - @Option(names = "--version", required = true, description = "Data version for this build. Typically this would be of the form YYMM i.e. 2308 indicates the data was built in August 2023.") - private String version; - @Option(names = "--clinvar", description = "Flag to trigger building of ClinVar data.") - private boolean buildClinVar; + @Option(names = "--version", description = "Data version for this build. Typically this would be of the form yyMM i.e. 2308 indicates the data was built in August 2023 (default: ${DEFAULT-VALUE}).") + private String version = DateTimeFormatter.ofPattern("yyMM").format(LocalDate.now()); + @Option(names = "--clinvar", arity = "0..1", converter = ClinVarOptionConverter.class, description = "Flag to trigger building of ClinVar data using the specified transcript data file. If not specified, the transcript_ensembl.ser for the current build will be used.") + private Path buildClinVar; @Option(names = "--transcripts", converter = TranscriptSourceConverter.class, split = ",", arity = "0..1", fallbackValue = "ensembl,refseq,ucsc", description = "List of transcript databases to build. If specified without parameter, will build all sources: ${FALLBACK-VALUE}") private List transcriptSources; @Option(names = "--variants", split = ",", arity = "0..1", fallbackValue = "esp,exac,uk10k,topmed,dbsnp,gnomad-exome,gnomad-genome,dbnsfp", description = "List of variant data sources to build. If specified without parameter, will build all sources: ${FALLBACK-VALUE}") @@ -102,8 +105,9 @@ public Integer call() throws IOException { BuildInfo buildInfo = BuildInfo.of(assembly, version); String buildString = buildInfo.getBuildString(); logger.info("Building version {}", buildString); - Path outPath = buildDir.resolve(buildString); - logger.info("Build directory set to {}", outPath); + Path outPath = getOutPath(buildDir, buildInfo); + logger.info("Build directory set to {}", buildDir); + logger.info("Build artefacts will be written to {}", outPath); if (!outPath.toFile().exists()) { Files.createDirectories(outPath); } @@ -113,8 +117,8 @@ public Integer call() throws IOException { if (shouldBuildAllData()) { logger.info("BUILDING ALLL THIe THINGS!"); - buildClinVarData(buildInfo, outPath, assemblyResources.getClinVarResource()); buildTranscriptData(buildInfo, outPath, List.of(TranscriptSource.values())); + buildClinVarData(buildInfo, outPath, assemblyResources.getClinVarResource()); buildVariantData(buildInfo, outPath, new ArrayList<>(alleleResources.values())); buildGenomeData(buildInfo, outPath, assemblyResources); } @@ -123,7 +127,7 @@ public Integer call() throws IOException { buildTranscriptData(buildInfo, outPath, transcriptSources); } - if (buildClinVar) { + if (buildClinVar != null) { ClinVarAlleleResource clinVarResource = assemblyResources.getClinVarResource(); buildClinVarData(buildInfo, outPath, clinVarResource); } @@ -141,8 +145,12 @@ public Integer call() throws IOException { return 0; } + private Path getOutPath(Path buildDir, BuildInfo buildInfo) { + return buildDir.resolve(buildInfo.getBuildString()); + } + private boolean shouldBuildAllData() { - return !buildGenome && !buildClinVar && transcriptSources == null && variantSources == null; + return !buildGenome && buildClinVar == null && transcriptSources == null && variantSources == null; } private void buildTranscriptData(BuildInfo buildInfo, Path outPath, List transcriptSources) { @@ -160,11 +168,21 @@ private void buildTranscriptData(BuildInfo buildInfo, Path outPath, List userDefinedAlleleResources) { logger.info("Downloading variant resources - {}", userDefinedAlleleResources.stream() .map(AlleleResource::getName) @@ -202,6 +220,7 @@ public GenomeAssembly convert(String value) throws Exception { return GenomeAssembly.parseAssembly(value); } } + static class TranscriptSourceConverter implements CommandLine.ITypeConverter { @Override @@ -209,4 +228,20 @@ public TranscriptSource convert(String value) throws Exception { return TranscriptSource.parseValue(value.trim()); } } + + private class ClinVarOptionConverter implements ITypeConverter { + @Override + public Path convert(String value) throws Exception { + if (value == null) { + return null; + } + return value.isEmpty() ? fallbackPath() : Path.of(value); + } + + public Path fallbackPath() { + BuildInfo buildInfo = BuildInfo.of(assembly, version); + logger.info("Transcript file path for ClinVar annotation not specified for build {}. Using fallback path...", buildInfo.getBuildString()); + return getOutPath(buildDir, buildInfo).resolve(TranscriptDataBuildRunner.transcriptFileName(buildInfo, TranscriptSource.ENSEMBL)); + } + } } diff --git a/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/ClinVarBuildRunner.java b/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/ClinVarBuildRunner.java index 912f71659..28441cdf9 100644 --- a/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/ClinVarBuildRunner.java +++ b/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/ClinVarBuildRunner.java @@ -1,20 +1,33 @@ package org.monarchinitiative.exomiser.data.genome; +import de.charite.compbio.jannovar.data.JannovarData; import org.h2.mvstore.MVMap; import org.h2.mvstore.MVStore; import org.h2.mvstore.MVStoreTool; +import org.monarchinitiative.exomiser.core.genome.GenomeAssembly; +import org.monarchinitiative.exomiser.core.genome.JannovarVariantAnnotator; +import org.monarchinitiative.exomiser.core.genome.VariantAnnotator; import org.monarchinitiative.exomiser.core.genome.dao.serialisers.MvStoreUtil; +import org.monarchinitiative.exomiser.core.model.ChromosomalRegionIndex; +import org.monarchinitiative.exomiser.core.model.VariantAnnotation; +import org.monarchinitiative.exomiser.core.model.pathogenicity.ClinVarData; import org.monarchinitiative.exomiser.core.proto.AlleleProto; import org.monarchinitiative.exomiser.data.genome.indexers.AlleleConverter; import org.monarchinitiative.exomiser.data.genome.model.Allele; import org.monarchinitiative.exomiser.data.genome.model.BuildInfo; import org.monarchinitiative.exomiser.data.genome.model.resource.ClinVarAlleleResource; +import org.monarchinitiative.svart.CoordinateSystem; +import org.monarchinitiative.svart.Coordinates; +import org.monarchinitiative.svart.GenomicVariant; +import org.monarchinitiative.svart.Strand; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; +import java.util.List; +import java.util.Map; import java.util.stream.Stream; @@ -26,12 +39,16 @@ public class ClinVarBuildRunner { private final BuildInfo buildInfo; private final ClinVarAlleleResource clinVarAlleleResource; private final Path outFile; + private final VariantAnnotator variantAnnotator; + private final GenomeAssembly genomeAssembly; - public ClinVarBuildRunner(BuildInfo buildInfo, Path outDir, ClinVarAlleleResource clinVarAlleleResource) { + public ClinVarBuildRunner(BuildInfo buildInfo, Path outDir, ClinVarAlleleResource clinVarAlleleResource, JannovarData jannovarData) { this.outDir = outDir.toAbsolutePath(); this.buildInfo = buildInfo; this.clinVarAlleleResource = clinVarAlleleResource; - outFile = outDir.toAbsolutePath().resolve(buildInfo.getBuildString() + "_clinvar.mv.db"); + this.outFile = outDir.toAbsolutePath().resolve(buildInfo.getBuildString() + "_clinvar.mv.db"); + genomeAssembly = buildInfo.getAssembly(); + variantAnnotator = new JannovarVariantAnnotator(genomeAssembly, jannovarData, ChromosomalRegionIndex.empty()); } public Path getOutFile() { @@ -52,8 +69,11 @@ public void run() { try (Stream alleleStream = clinVarAlleleResource.parseResource()) { alleleStream .forEach(allele -> { + ClinVarData clinVarData = annotateClinvar(allele); + allele.setClinVarData(clinVarData); + logger.debug("{}-{}-{}-{} {}", allele.getChr(), allele.getPos(), allele.getRef(), allele.getAlt(), clinVarData); var alleleKey = AlleleConverter.toAlleleKey(allele); - var clinvarProto = AlleleConverter.toProtoClinVar(allele.getClinVarData()); + var clinvarProto = AlleleConverter.toProtoClinVar(clinVarData); clinVarMap.put(alleleKey, clinvarProto); }); } @@ -64,4 +84,18 @@ public void run() { logger.info("Compacting MVStore"); MVStoreTool.compact(outFileName, true); } + + private ClinVarData annotateClinvar(Allele allele) { + GenomicVariant genomicVariant = GenomicVariant.of(genomeAssembly.getContigById(allele.getChr()), Strand.POSITIVE, Coordinates.ofAllele(CoordinateSystem.ONE_BASED, allele.getPos(), allele.getRef()), allele.getRef(), allele.getAlt()); + List variantAnnotations = variantAnnotator.annotate(genomicVariant); + if (!variantAnnotations.isEmpty()) { + VariantAnnotation variantAnnotation = variantAnnotations.get(0); + return allele.getClinVarData() + .toBuilder() + .geneSymbol(variantAnnotation.getGeneSymbol()) + .variantEffect(variantAnnotation.getVariantEffect()) + .build(); + } + return allele.getClinVarData(); + } } diff --git a/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/TranscriptDataBuildRunner.java b/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/TranscriptDataBuildRunner.java index b9ecdbacc..6095d57da 100644 --- a/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/TranscriptDataBuildRunner.java +++ b/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/TranscriptDataBuildRunner.java @@ -48,9 +48,13 @@ public TranscriptDataBuildRunner(BuildInfo buildInfo, JannovarDataFactory jannov this.transcriptSources = transcriptSources; } + public static String transcriptFileName(BuildInfo buildInfo, TranscriptSource transcriptSource) { + return buildInfo.getBuildString() + "_transcripts_" + transcriptSource + ".ser"; + } + public void run() { transcriptSources.forEach(transcriptSource -> { - String outputName = String.format("%s_transcripts_%s.ser", buildInfo.getBuildString(), transcriptSource); + String outputName = transcriptFileName(buildInfo, transcriptSource); logger.info("Building {}", outputName); jannovarDataFactory.buildAndWrite(buildInfo.getAssembly(), transcriptSource, outPath.resolve(outputName)); }); diff --git a/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/indexers/AlleleConverter.java b/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/indexers/AlleleConverter.java index c90b4e556..422b2e096 100644 --- a/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/indexers/AlleleConverter.java +++ b/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/indexers/AlleleConverter.java @@ -20,7 +20,9 @@ package org.monarchinitiative.exomiser.data.genome.indexers; +import de.charite.compbio.jannovar.annotation.VariantEffect; import org.monarchinitiative.exomiser.core.model.pathogenicity.ClinVarData; +import org.monarchinitiative.exomiser.core.proto.AlleleProto; import org.monarchinitiative.exomiser.core.proto.AlleleProto.AlleleKey; import org.monarchinitiative.exomiser.core.proto.AlleleProto.AlleleProperties; import org.monarchinitiative.exomiser.core.proto.AlleleProto.ClinVar; @@ -91,43 +93,114 @@ public static ClinVar toProtoClinVar(ClinVarData clinVarData) { for (Map.Entry entry : clinVarData.getIncludedAlleles().entrySet()) { builder.putIncludedAlleles(entry.getKey(), toProtoClinSig(entry.getValue())); } + builder.setGeneSymbol(clinVarData.getGeneSymbol()); + builder.setVariantEffect(toProtoVariantEffect(clinVarData.getVariantEffect())); return builder.build(); } private static ClinVar.ClinSig toProtoClinSig(ClinVarData.ClinSig clinSig) { - switch (clinSig){ - case BENIGN: - return ClinVar.ClinSig.BENIGN; - case BENIGN_OR_LIKELY_BENIGN: - return ClinVar.ClinSig.BENIGN_OR_LIKELY_BENIGN; - case LIKELY_BENIGN: - return ClinVar.ClinSig.LIKELY_BENIGN; - case UNCERTAIN_SIGNIFICANCE: - return ClinVar.ClinSig.UNCERTAIN_SIGNIFICANCE; - case LIKELY_PATHOGENIC: - return ClinVar.ClinSig.LIKELY_PATHOGENIC; - case PATHOGENIC_OR_LIKELY_PATHOGENIC: - return ClinVar.ClinSig.PATHOGENIC_OR_LIKELY_PATHOGENIC; - case PATHOGENIC: - return ClinVar.ClinSig.PATHOGENIC; - case CONFLICTING_PATHOGENICITY_INTERPRETATIONS: - return ClinVar.ClinSig.CONFLICTING_PATHOGENICITY_INTERPRETATIONS; - case AFFECTS: - return ClinVar.ClinSig.AFFECTS; - case ASSOCIATION: - return ClinVar.ClinSig.ASSOCIATION; - case DRUG_RESPONSE: - return ClinVar.ClinSig.DRUG_RESPONSE; - case NOT_PROVIDED: - return ClinVar.ClinSig.NOT_PROVIDED; - case OTHER: - return ClinVar.ClinSig.OTHER; - case PROTECTIVE: - return ClinVar.ClinSig.PROTECTIVE; - case RISK_FACTOR: - return ClinVar.ClinSig.RISK_FACTOR; - } - throw new IllegalArgumentException(clinSig + " not a recognised value"); + return switch (clinSig) { + case BENIGN -> ClinVar.ClinSig.BENIGN; + case BENIGN_OR_LIKELY_BENIGN -> ClinVar.ClinSig.BENIGN_OR_LIKELY_BENIGN; + case LIKELY_BENIGN -> ClinVar.ClinSig.LIKELY_BENIGN; + case UNCERTAIN_SIGNIFICANCE -> ClinVar.ClinSig.UNCERTAIN_SIGNIFICANCE; + case LIKELY_PATHOGENIC -> ClinVar.ClinSig.LIKELY_PATHOGENIC; + case PATHOGENIC_OR_LIKELY_PATHOGENIC -> ClinVar.ClinSig.PATHOGENIC_OR_LIKELY_PATHOGENIC; + case PATHOGENIC -> ClinVar.ClinSig.PATHOGENIC; + case CONFLICTING_PATHOGENICITY_INTERPRETATIONS -> ClinVar.ClinSig.CONFLICTING_PATHOGENICITY_INTERPRETATIONS; + case AFFECTS -> ClinVar.ClinSig.AFFECTS; + case ASSOCIATION -> ClinVar.ClinSig.ASSOCIATION; + case DRUG_RESPONSE -> ClinVar.ClinSig.DRUG_RESPONSE; + case NOT_PROVIDED -> ClinVar.ClinSig.NOT_PROVIDED; + case OTHER -> ClinVar.ClinSig.OTHER; + case PROTECTIVE -> ClinVar.ClinSig.PROTECTIVE; + case RISK_FACTOR -> ClinVar.ClinSig.RISK_FACTOR; + }; + } + + public static AlleleProto.VariantEffect toProtoVariantEffect(VariantEffect variantEffect) { + return switch (variantEffect) { + case CHROMOSOME_NUMBER_VARIATION -> AlleleProto.VariantEffect.CHROMOSOME_NUMBER_VARIATION; + case TRANSCRIPT_ABLATION -> AlleleProto.VariantEffect.TRANSCRIPT_ABLATION; + case EXON_LOSS_VARIANT -> AlleleProto.VariantEffect.EXON_LOSS_VARIANT; + case INVERSION -> AlleleProto.VariantEffect.INVERSION; + case INSERTION -> AlleleProto.VariantEffect.INSERTION; + case TRANSLOCATION -> AlleleProto.VariantEffect.TRANSLOCATION; + case FRAMESHIFT_ELONGATION -> AlleleProto.VariantEffect.FRAMESHIFT_ELONGATION; + case FRAMESHIFT_TRUNCATION -> AlleleProto.VariantEffect.FRAMESHIFT_TRUNCATION; + case FRAMESHIFT_VARIANT -> AlleleProto.VariantEffect.FRAMESHIFT_VARIANT; + case INTERNAL_FEATURE_ELONGATION -> AlleleProto.VariantEffect.INTERNAL_FEATURE_ELONGATION; + case FEATURE_TRUNCATION -> AlleleProto.VariantEffect.FEATURE_TRUNCATION; + case TRANSCRIPT_AMPLIFICATION -> AlleleProto.VariantEffect.TRANSCRIPT_AMPLIFICATION; + case COPY_NUMBER_CHANGE -> AlleleProto.VariantEffect.COPY_NUMBER_CHANGE; + case MNV -> AlleleProto.VariantEffect.MNV; + case COMPLEX_SUBSTITUTION -> AlleleProto.VariantEffect.COMPLEX_SUBSTITUTION; + case STOP_GAINED -> AlleleProto.VariantEffect.STOP_GAINED; + case STOP_LOST -> AlleleProto.VariantEffect.STOP_LOST; + case START_LOST -> AlleleProto.VariantEffect.START_LOST; + case SPLICE_ACCEPTOR_VARIANT -> AlleleProto.VariantEffect.SPLICE_ACCEPTOR_VARIANT; + case SPLICE_DONOR_VARIANT -> AlleleProto.VariantEffect.SPLICE_DONOR_VARIANT; + case RARE_AMINO_ACID_VARIANT -> AlleleProto.VariantEffect.RARE_AMINO_ACID_VARIANT; + // unused marker + case _SMALLEST_HIGH_IMPACT -> AlleleProto.VariantEffect.SEQUENCE_VARIANT; + case MISSENSE_VARIANT -> AlleleProto.VariantEffect.MISSENSE_VARIANT; + case INFRAME_INSERTION -> AlleleProto.VariantEffect.INFRAME_INSERTION; + case DISRUPTIVE_INFRAME_INSERTION -> AlleleProto.VariantEffect.DISRUPTIVE_INFRAME_INSERTION; + case INFRAME_DELETION -> AlleleProto.VariantEffect.INFRAME_DELETION; + case DISRUPTIVE_INFRAME_DELETION -> AlleleProto.VariantEffect.DISRUPTIVE_INFRAME_DELETION; + case FIVE_PRIME_UTR_TRUNCATION -> AlleleProto.VariantEffect.FIVE_PRIME_UTR_TRUNCATION; + case THREE_PRIME_UTR_TRUNCATION -> AlleleProto.VariantEffect.THREE_PRIME_UTR_TRUNCATION; + // unused marker + case _SMALLEST_MODERATE_IMPACT -> AlleleProto.VariantEffect.SEQUENCE_VARIANT; + case SPLICE_REGION_VARIANT -> AlleleProto.VariantEffect.SPLICE_REGION_VARIANT; + case STOP_RETAINED_VARIANT -> AlleleProto.VariantEffect.STOP_RETAINED_VARIANT; + case INITIATOR_CODON_VARIANT -> AlleleProto.VariantEffect.INITIATOR_CODON_VARIANT; + case SYNONYMOUS_VARIANT -> AlleleProto.VariantEffect.SYNONYMOUS_VARIANT; + case CODING_TRANSCRIPT_INTRON_VARIANT -> AlleleProto.VariantEffect.CODING_TRANSCRIPT_INTRON_VARIANT; + case FIVE_PRIME_UTR_PREMATURE_START_CODON_GAIN_VARIANT -> AlleleProto.VariantEffect.FIVE_PRIME_UTR_PREMATURE_START_CODON_GAIN_VARIANT; + case FIVE_PRIME_UTR_EXON_VARIANT -> AlleleProto.VariantEffect.FIVE_PRIME_UTR_EXON_VARIANT; + case THREE_PRIME_UTR_EXON_VARIANT -> AlleleProto.VariantEffect.THREE_PRIME_UTR_EXON_VARIANT; + case FIVE_PRIME_UTR_INTRON_VARIANT -> AlleleProto.VariantEffect.FIVE_PRIME_UTR_INTRON_VARIANT; + case THREE_PRIME_UTR_INTRON_VARIANT -> AlleleProto.VariantEffect.THREE_PRIME_UTR_INTRON_VARIANT; + case NON_CODING_TRANSCRIPT_EXON_VARIANT -> AlleleProto.VariantEffect.NON_CODING_TRANSCRIPT_EXON_VARIANT; + case NON_CODING_TRANSCRIPT_INTRON_VARIANT -> AlleleProto.VariantEffect.NON_CODING_TRANSCRIPT_INTRON_VARIANT; + // unused marker + case _SMALLEST_LOW_IMPACT -> AlleleProto.VariantEffect.SEQUENCE_VARIANT; + case DIRECT_TANDEM_DUPLICATION -> AlleleProto.VariantEffect.DIRECT_TANDEM_DUPLICATION; + case MOBILE_ELEMENT_DELETION -> AlleleProto.VariantEffect.MOBILE_ELEMENT_DELETION; + case MOBILE_ELEMENT_INSERTION -> AlleleProto.VariantEffect.MOBILE_ELEMENT_INSERTION; + // unused + case CUSTOM -> AlleleProto.VariantEffect.SEQUENCE_VARIANT; + case UPSTREAM_GENE_VARIANT -> AlleleProto.VariantEffect.UPSTREAM_GENE_VARIANT; + case DOWNSTREAM_GENE_VARIANT -> AlleleProto.VariantEffect.DOWNSTREAM_GENE_VARIANT; + case INTERGENIC_VARIANT -> AlleleProto.VariantEffect.INTERGENIC_VARIANT; + case TFBS_ABLATION -> AlleleProto.VariantEffect.TFBS_ABLATION; + case TFBS_AMPLIFICATION -> AlleleProto.VariantEffect.TFBS_AMPLIFICATION; + case TF_BINDING_SITE_VARIANT -> AlleleProto.VariantEffect.TF_BINDING_SITE_VARIANT; + case REGULATORY_REGION_VARIANT -> AlleleProto.VariantEffect.REGULATORY_REGION_VARIANT; + case REGULATORY_REGION_ABLATION -> AlleleProto.VariantEffect.REGULATORY_REGION_ABLATION; + case REGULATORY_REGION_AMPLIFICATION -> AlleleProto.VariantEffect.REGULATORY_REGION_AMPLIFICATION; + case CONSERVED_INTRON_VARIANT -> AlleleProto.VariantEffect.CONSERVED_INTRON_VARIANT; + case INTRAGENIC_VARIANT -> AlleleProto.VariantEffect.INTRAGENIC_VARIANT; + case CONSERVED_INTERGENIC_VARIANT -> AlleleProto.VariantEffect.CONSERVED_INTERGENIC_VARIANT; + case STRUCTURAL_VARIANT -> AlleleProto.VariantEffect.STRUCTURAL_VARIANT; + case CODING_SEQUENCE_VARIANT -> AlleleProto.VariantEffect.CODING_SEQUENCE_VARIANT; + case INTRON_VARIANT -> AlleleProto.VariantEffect.INTRON_VARIANT; + case EXON_VARIANT -> AlleleProto.VariantEffect.EXON_VARIANT; + case SPLICING_VARIANT -> AlleleProto.VariantEffect.SPLICING_VARIANT; + case MIRNA -> AlleleProto.VariantEffect.MIRNA; + // unused + case GENE_VARIANT -> AlleleProto.VariantEffect.SEQUENCE_VARIANT; + case CODING_TRANSCRIPT_VARIANT -> AlleleProto.VariantEffect.CODING_TRANSCRIPT_VARIANT; + case NON_CODING_TRANSCRIPT_VARIANT -> AlleleProto.VariantEffect.NON_CODING_TRANSCRIPT_VARIANT; + // unused + case TRANSCRIPT_VARIANT -> AlleleProto.VariantEffect.SEQUENCE_VARIANT; + // unused + case INTERGENIC_REGION -> AlleleProto.VariantEffect.SEQUENCE_VARIANT; + // unused + case CHROMOSOME -> AlleleProto.VariantEffect.SEQUENCE_VARIANT; + case SEQUENCE_VARIANT -> AlleleProto.VariantEffect.SEQUENCE_VARIANT; + }; } } diff --git a/exomiser-data-genome/src/main/resources/application.properties b/exomiser-data-genome/src/main/resources/application.properties index 778d1f79c..e0b8cef13 100644 --- a/exomiser-data-genome/src/main/resources/application.properties +++ b/exomiser-data-genome/src/main/resources/application.properties @@ -20,7 +20,7 @@ spring.flyway.enabled=false spring.h2.console.enabled=true # build-dir defines the main directory which will be used to build the databases. -build-dir= +build-dir=. build-version=1711 jannovar.ini-file=${build-dir}/default_sources.ini dbsnp-ftp-url=ftp://ftp.ncbi.nlm.nih.gov/snp/latest_release/VCF diff --git a/exomiser-data-genome/src/main/resources/logback-spring.xml b/exomiser-data-genome/src/main/resources/logback-spring.xml new file mode 100644 index 000000000..f69a24573 --- /dev/null +++ b/exomiser-data-genome/src/main/resources/logback-spring.xml @@ -0,0 +1,26 @@ + + + + + + + + \ No newline at end of file diff --git a/exomiser-data-genome/src/test/java/org/monarchinitiative/exomiser/data/genome/ClinVarBuildRunnerTest.java b/exomiser-data-genome/src/test/java/org/monarchinitiative/exomiser/data/genome/ClinVarBuildRunnerTest.java index 7e326a2f7..551d33363 100644 --- a/exomiser-data-genome/src/test/java/org/monarchinitiative/exomiser/data/genome/ClinVarBuildRunnerTest.java +++ b/exomiser-data-genome/src/test/java/org/monarchinitiative/exomiser/data/genome/ClinVarBuildRunnerTest.java @@ -1,5 +1,6 @@ package org.monarchinitiative.exomiser.data.genome; +import de.charite.compbio.jannovar.data.JannovarData; import org.h2.mvstore.MVMap; import org.h2.mvstore.MVStore; import org.junit.jupiter.api.Test; @@ -7,6 +8,7 @@ import org.monarchinitiative.exomiser.core.genome.GenomeAssembly; import org.monarchinitiative.exomiser.core.genome.dao.ClinVarWhiteListReader; import org.monarchinitiative.exomiser.core.genome.dao.serialisers.MvStoreUtil; +import org.monarchinitiative.exomiser.core.genome.jannovar.JannovarDataSourceLoader; import org.monarchinitiative.exomiser.core.proto.AlleleProto; import org.monarchinitiative.exomiser.data.genome.model.BuildInfo; import org.monarchinitiative.exomiser.data.genome.model.resource.ClinVarAlleleResource; @@ -30,7 +32,10 @@ void run(@TempDir Path tempDir) throws Exception { ClinVarAlleleResource clinVarAlleleResource = new ClinVarAlleleResource("clinvar", new URL("https://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh37/clinvar.vcf.gz"), testResourcePath); ResourceDownloader.download(clinVarAlleleResource); - ClinVarBuildRunner instance = new ClinVarBuildRunner(buildInfo, tempDir, clinVarAlleleResource); + Path testJannovarFilePath = Path.of("src/test/resources/clinvar-test-transcript-data.ser"); + JannovarData jannovarData = JannovarDataSourceLoader.loadJannovarData(testJannovarFilePath); + + ClinVarBuildRunner instance = new ClinVarBuildRunner(buildInfo, tempDir, clinVarAlleleResource, jannovarData); instance.run(); Path outputFile = instance.getOutFile(); @@ -40,7 +45,7 @@ void run(@TempDir Path tempDir) throws Exception { // The ClinVar data is used for the ClinVarDao and for building the WhiteList (along with optional user data) MVMap clinvar = MvStoreUtil.openClinVarMVMap(clinvarStore); assertThat(clinvar.size(), equalTo(2000)); - + clinvar.values().forEach(clinvarProto -> assertThat(clinvarProto.getVariantEffect() != AlleleProto.VariantEffect.SEQUENCE_VARIANT, is(true))); Set whiteListAlleleKeys = ClinVarWhiteListReader.readVariantWhiteList(clinvarStore); assertThat(whiteListAlleleKeys.size(), equalTo(23)); } diff --git a/exomiser-data-genome/src/test/resources/clinvar-test-transcript-data.ser b/exomiser-data-genome/src/test/resources/clinvar-test-transcript-data.ser new file mode 100644 index 0000000000000000000000000000000000000000..d68da28ccc88707a55dd7c6a7d57727481c062e0 GIT binary patch literal 11558 zcmV+>E!omaR8T@6iwFP!00000|LuKguw7S~UCU^Adh&4fBuj?wrn;!^)G4R(a__mb zrKp#t{&g`+f{Tbc-r#Ij0rf-g)o%OrY;b_#K&u64Ab>|p%$*oqGdcN07 z-!xqY^Z7V+>33enF1gj}($8I9*6K2x&&R3DptF))a;w#4kh`pGbs5d)5U~na{_m%eb?KU2?0{C0(?izG=El zQvi4=k1%OQ`)I?oICY8>(nVXn?>vO(xRx8T&q)-f#a;xpmV%BN84xI;}og4m#Fn+*z+ax!Ib~I8SJOt5cSQ_10<9nNz3S zY~5&*ZZy~QnPgFzvp&<#2KC9!R-fs7{-WuVWnn|^Q+GQX*~gmA;A)eFjjc{u7B*U^ zUgt!0%FX6RbuX>K~v;-%$dSp2`DUWPd?7W;KvnKoW>SIx{tn>6~}>bN?|lUeg4qS8a4^v}Z=h_Rd&W&R` z&wTTNf0i1@b^iCYPmNOJ_|AvVy`-NSCv-ml*2lZ4(dpd!lXv~&)L7s7^1XNeqtuw| zeEz2Q{XuGM>^%B|vtOASCv{$YhEHg%r9`sR~T-2+=PDr1r;k-{@`Iyg5-DCbQ{|V7f%~!OuqYw6@ z+(w^#e)=liC4J#SzO}Pk^5dwN?V@1n|V$0+4=f;WVR%>AI*;DZO;OGvC@4 z%$is>etKLvV!><$?9jf?LvSWt`6Dk?9$dar-l#nKyk*>Ln;eAJV|8q3HmBANv*39x zDduWhk)<2UXJY0l3%s3uZ<8{4ju2|>wX-d>Rn6Mqxr3OF37Tn19;)q`eFn&YeHH8+ zm+zAI)Ak8UlLu*1Y4bkigrwF11ava$PU~$OXV-Xt>i#o!o;tTSH~QV-V7%>Bv-!mB zySANnM%cc4d;aN?p2^Z~-yQdD|C9XFRpaTf+uL^H?3x`R*@5)UY;K&=8%*k9y2(2G zdjIQpZQFAifn?i1on5^voSM)5gIDbeJ5T9%`@LS>KdWgETkY|yw*B60XW#Z$@8AB~ zGtwGu-@kw7p51fD?%s7qreJB)w>V3H6y7YNUZ#WuthtoW))BS!7>p#6Io&JV5 z{QLP{8rJOTyLay0wG9B!ANq<)C z+c^7Mv#Vz=TGP2`&D?Lzsddh#Hh7MwgKh@THG5Cn{+b=Tdj_A+zGGZnd&k(|^Ww9| z)!Y8Z_+)+m)3ee3;-8JHE8dCr4Idg;cYPS`k9}lZz3>sVzy9CG)n8tL_61jts|!Ag z_P0MeuD*K}+F$$c>H^>e014$aNzeLjT4Io%Q72;@VG+gv zz?OB^SU$NSAmZ4OUw$E#0i(OKIYNpTY8&aAl^}l%?3KCqycIM85@#;si|_tUxF5G3+8D< zfZfsS9qmTQT z-7;CpSRV~r16SB?6tET^9X&S9Y!0kHHV?}XAu|E{(oES=7;Iy4eIFx-6#?gFYN8AT zggm%V7DOiAi#6a!SX)?LSSuJ7t{4`rS-OZ#BfBv!BTb9!NPC!PPO}Q`VtQ*!mInh< z3kAXwM2{VSvBJVTZV$DJCJaDYX%ee)DV$}uPpL=nnuYYKj4V*+C%6h$0kNZ_XcL9#KY zF`BSIJp-2m+$R9{LbIE3!O(3_LAnCe2Konqlx#pz1TsZtO>S-208!seUwamVxMPI2 z`T}17U)w5M1#B?z*CrCMcLXi~Jycah8F-8N82`oHBk;^_6R~PZA!8w9g13FOR&6|q^^ef!IB4$cv=76V=8;L9BRA2bKY&45KD zt%cS^j#HGuOE)d9i4bR?cw=F(Z(McE)iomQ%K}1WTmx>-3v-qof@Ozb*&$eV2$mg! z|4)bD;tidPH<&|!){i$YatM0e#li6>6Y;;~&ZQ2)lfR_k`160q&Ow?>{lhte)Ut>0 z8}1?enwk1C3twj8%Pf4Eg)g)4WfuN-%ED>ch4n!zfk|S6BX!OI)@g!em?pX?gYp6_ z4%FG7Wv2gYWcojx>-=!em_AycSi6wv_b1c&!Q85y`$G>kgkN}aT3!Ca)Oh}Po|;xa zcz$}azW?r1Xg_ht&Z%!-#W-gaFQNT2E~dyR45w92xkzIJ{z9_r092k<#X-0bu5>8v zGR+s00ql4PCfVh}4;JTzV!Hx#$pXN3r(m213>*x zst5OrbbKUXPQ(Kk3ID;#^>QReSaCz}vIH5Atm+P411j(gr4~Zes5{x#X&`Dvy94;sb-~+6U2o{Lw-6 zk;l+}z>R-~JTZ4}NY~z57i(~yedWPn^^vcm{iAOTtH1thw6Fi>Fps%@W&;7k(LXRWF<+*T5>JIHkY+Wq za!}}odzGJh6d8MZX#fO39MBKyB|hs_Pt9O}_>V9Ce~CSK8V@2;AbpPXY|Zs_QPsEj2PBL>On@nZ4}$t{Pa_kw zHlBbMJS*2lR`d>@)Z0x7IgA(`}*Z8vO= zp%pucWV%j_k0Bz5dg`bP(M9GK1v&IUdLxRoGX!b|Q@q$cZVP-C{kPf2$S$C_a79WW zBv6dt`^q3`UCB^m%kf`$zJvQi5(_IxkAmS51io5~z0g$WiWTz=!61YrCL9<`YD}%? zEWIc=<(UpI+ChOmxV&6RPQUp5jLXpCIyuDVlu}<9@NlseLu5oGh>u4`Y3Q#wI(42# zJIZVFAmucK_IQTpoTM*wW;JETX6O=@S$MkRI@uQ$mWz+SWLV-+5PpaZ?KPrF2wt^f zNVf6_chXy9ECx8^^h0P@l)tTM`m)o&D+Wf07>Ex6pSu;c zLQq9qivca7QlQ)E6-;H|#&{?rcno7i#U!9*QbcN?iLUb?gCdGppeR+0z61q)Y`8Dh zAu{w60w3IAJQC)K3k@NU&F(9xrC26{N{;b(a#TSXeQkyqxuYS@W_Sfk-U1w zo?}q3*A=T^HxvaGPQtrY2-meEU@W_T>1|~~t%V$OqwE(u# zSCJYxFIxrhHSTOE$rOV2IqIfx8W?-g1Uu!Y1B%bsQdiu|Q3X zPTszmK1Ogt;6I*cNkxhi0H3N5dW7&I0|*i>$^j#l^nSPuP7((o*wZn^HvSK;`% z^V0E)*Isw#$*rG1dv-Z~GXMN9-d&Cly(b+%a9KIN@tSme{in+DrPuQM_1&8l8g>k` zfSoZO4<-er3A8o{G|2RO#~7?QLlmV7d|WUvpP>*ssJiG8%?mSHCAy$Cj3y$rUXfC9 zMh&2_oPcL1eHkt-zbiI}aYIa_XkZNTXD~GxGa(;G%s4vl>y=O;qwz?*6nhN&0n~Gh zK@}eZmKqRKTquz#1t7eB44=hLaZQ3q-D7O*qN9VuE(~V?2iecTm2hHrpcqQ+*=Y4!BQl5%!|9pza3aVIlz6&6j7j|sPL4-EIWE4k|J$6r^vbarhFdfY zzv!CuHz^>@%fw>ib((jvT+J*XB5R&bqps=OE8xgMAjJxg+YcQ{x0gQ>ZP#SD%#1`v zsbY{X5lHjt+ailcx3h7>isVS8koPN@slxkZygea8;$@=EbRNmG zl+r|Ypb!>%Zgl${vPX3IwTvR!)*~wUpxh9xw+n@y`$DS`C7?wQgd)*CsbX11x?f^z z!Wb-&9^fvVQVB^c8ZgAPlAfpTqPTD@C!h!EG9s#c>^VxYO}YZfoENCY63GN8F^lAi zo9iX4!AoCaSa5y9Toj5zz6AZgI67P&#$FZ1j$ZPHsk>5#K}RYydZC~=)X%7#FT;?- z;1!`U&p8%S&b`n8T%l=#e+8Nt3ihJMB6Y>Xi^^6(oFe?uP&COTGz$CSSkOl-$ZNB4 z*ultLY`~-FPDx;6&^ay)H!C#J3}wzhA5oxS3&*`6>tvNsl>3Uh3TTci@h|I5J|DF0 z)UYP};ABY~#ew+|cb9B>dQ^@Uh&euGd+Gm>EEIAb9luJ1|jFEwyz(N3!f=C`w6qe(N3SgGhqlPLUF4C28q4;$6_6pcK2e~8}f}i48 zA%y8$-boHFq#!Rp@G3^W(*m}H??Z^NM8V7di_-1Ta^Rs+A!=pqAHIoz;2s38F!N3x zKb1g%h2R>=#ie|iEgdKOAFimmV%l-oTL%PAp!r zB3U$U#ZeMfWb}$xRxFg%j;bES{z4OzrX)DM;DVMw?lNb_=olSrh$4}UD}w#?QX0mo znZI+TAVC@RCUz+6j1I&g0)oaGTN7lUf%jJ2f-y0~*eH>a&NB4hyg$A(1!0^;PT-*4 z_R7Wy+#sh9nem)o>L15f4zP^9-g&tZ?E@HhJ!uh`N`ot z!8&z)`Qn7sjPL!2KmAv|ad$~l%3P!+NvUbw8}$2&e8ge;cCnAxPjj6nC-Vp|l&Gh@ z{^&4nVs|j@FMV^waR2M3<0Gfc50inqdvoXR&E^lHbyeFROf%_?r;Gi;+p@oRSK|-b zamydP?U&&X=IhsoHaC@10CP&e?9j5Gxa=n``-#hb;?bb!iOYWCvY)u@CocPm z|FHc;dw1iqv-tPXSv<74a|q5NTGuZqSL{!Q(_SuDe9dV)PV0@WT=Dzg8dX1jX=LS! zcYb?R-TmC?WPN|_chJ84d!y>Z_n`ggJ)`QU{}=7+?j2R{y$|g#{b*F(d_USB`pKwz z_5rkScyLtR`7^XHeP~qO_AuJ_JvORtc^vKcKQXF)@&wxFKQ*ep^fcN}KWk&F-@H+U zzW^9gOjf?p)sQD)Dgh2>M!;Y-z$u{~cvYs!=TtXyNkKCX=; zB7)TNrWk`G%?4iybef3Cu{boIoE5K_g|VT5c654Kn-%54d&!L!79}VF6c4qm7>f%m z+KEJJANBB<2j!==VEIp>;Et!PjzjkZ zVY&ulXV$*gfH`np;Q38SDAU+hZQ0$AEMYD~PXAd`~H5l`1=xKm2!j!8T z4wLr9HF9`KvL|7n2Z&)PFon>SD;$Dx2T#WUg-`$lr;uJV7|YVpy^=6mk(wMiPw(? zK==dClXwgg1W<#CSRB_3hT7y%8tG`1jQg?5Qj(zZeWEut$h1un&q$*xM*-l5Dp8_r z6tM=j!+J#7NCih>0aV1zXlyKA1c+*uO+H!RW%GExgo`E(OaQ8nII&GU0s|vzbg^3qGO5}-%7+E%M%@8Fz zo|qUFgu~q#aCZzt!E&-H(MD7J*2Sr*>8N+OSCS9ANA)ps&xQ4f{r+??Jj^@E>p{2f zFR42Oab8k)I2uo;gUKQ{VOo#s#gAsz-BG_D9-;1VI_anDZ!A8-Z1xex-N|4uINXCy zZ~N(E`*xe(@bHzVRPX%4 zR`VM!|L7^zU);F$WPSg@Ri{|{+#64+F1Q)(%P-qf9lUG{Uchqamg?o=$+)UGWIohaTNhz4LLj?|FPn_1!1X{?LQgUr{4Z^(4!jw2f8xzss|VkL_C0UiTHX2%v_Ess*6Nve zp?$@_#{KH6V+&(`YB3(!9I&$m_&{W;oSy>M&w#6@U7d-2xlflJVS z=cQY#7cNEn;QO{#@3{=^`!3sB-F!LPU-{(L>f_g-{kH4U*seqSx$C#)y{ivQy>C+? zIvc^$d=}q@QF=1>AaAII?AehCtHMTc84++#3?L|%I4`)Bcg=SkLSVc!_=ci-F$Z8D zaLOzE=1R1vymTQb=wr}ds5b~QBR_Zsfsb{Cz{2oiS3Sd9BCU?ZhmW&@u96%0$k#P0 z0$k$ij&y^#k2!k=(GgVv8KIK}f6w_6^O8S@usnj|p|Rnx61D|=Bp7Xl$r&#~KpuwL z9#C+6kqZrOMBokVjWJRYI|mB}pT>!JVb-4BuIQRa7+KLo3Q1w)K~HNiZR!=mm-9;E z6X{AI`lX?O7dSF37nO7^{H(ay-Tj=P3${~z&^qst7FArVm zTZo2_bgnp~;^sAUPFVwEEz}$MXIw^j>d+2SjNl8K35`C&Z$%^$sd2@J^~?t}kn_Uh z28^MTjhF0bXzV$JqKQe+)uPDp!7q7iWWP<^0=_giE|7Sdt4zPdIi-Ouu#J6iQFMn& zM|YeX&}8U4(|nBTwP@kxXZ<}VY;t*kpN>sfvW^J z$f%nVDHNBWaJw(xcu^(@JX3ovGo`Vw%0M8H{; zn=XYy#v{@_ijlFd*6su$S7?x**aBpTiYnn>VlELGU|voDvJ(0^g&<&dp^+}e4H${X zF&(y06e7yt;yOh75@T#lLAwwe#nFOk7aV!IfQ^>GDNw3RE)}YH4}l?5EHVNPQC>Gp zBFVRteN~);?q^BHGFWH>W>E+I$MU8a-82SS!x)7&F0d{{!m6cVIEjw}#unyt%nVNf z?t-i_*Ykh?xhvi+q`p7jKc#HS~0h9{7?q)q}5^g4y@z=%qEW;1C&6 z3SJ>o-|QfbluSJa8w(83sNl<_8#gh8mWnksKTR}{Z^d9h*A|nMPSGb8 znb7Ki+;}G4ioHb5M-ho821K#X!*FFKxB{xYz`Cd)h;}N_j073YP~gln4jgIpqD(3% z4&$oaeZEnPVTd5|BdI2jk~DfOfbzhxgc=mL0>|?rAxiPJfzeSQeGy2+RLVBFpx^}_ zc;s{Z3iXKmP*p9ntGF~X)C_-%la;jx0r$$DDryU5q6zf;)3W;%3f>|_=8TzxESCjg z0*yrk4=jwAMtnn-&^s!*<`an=xVr*jjDDb9!o9ds@SeZv4kc0we`C#QIaw*7w*Ukt z0YcG_B5+=cp?9v8mxmQ295mCwW+a;kP#V*5Wl@6XYY`#|Ly1HR7Q7q;uS=AK00e>J z6!h>c*>Gc0?VG?%j*ymhRAO9_T$9>l+z6)uv9*n+nij31orxr49tAl8>1&kbkT(WA z8yf`67H=gnhGhflJ4DpTMVV${Y6Y)jAhfEsgjNuuEGY>jYD&Nr+7P{nB%){9Rm8*y z)FlvVcz{eu(A_NvDpw-{oC%Yxhk>FnsYAQKst>jy=Pnk)jqUOxTE37T4+R3eLzC!z zC|E~W@Un7R9G2QN-%w#(!&yCJnLMqWU4o%{vnz0cOKziNnj&ZesjI->$f!AGz43Ik z$z%+nAiv0ug9BdNnZgEqEYeHwA5g=`+WCk-mQ(XDpw9RpUREO7N0RD7OYsn>j>KMB zTb79Mj*g=<1#aF27)vBS@V9095(@~7Mi9G1PU%Q6J6TYMAz3MJ?xh;6d3K0in zE3O!eXiPn$Z(mhzq?~upB3nfXpadYbiq(hWy|sfU zt*jKDkYnE|CmmmG@jI>t#DG?Uo05*y4K z1Zb@Amh)|Y!v%3eHTO19d`pW zV1j@M8WepJ)24W`_Lh?A99f@ttpu7EH&pCGeuI`mzEc!o@hWZdpk9n+6xtt%Bd|Ms za{{bwp(#ZiynLT1&kIgE&d1)V`Wd{=3o96QD0zE`L{TdYlWCe*RHH;sS<=v`4DhYl zPvDCS(h=|V>t5)%69q%_8Ckw+p2V`L*xCJoi?3=(5H8K_*L z3&vVm#N|W-$kPDwIDnn?ts!|n(Ge+Nuv$(r0>{Y;UJKvw(#ha;Bjzw;Xb7$^5o6;5 zVygDN=S!aHF$J<6mc|&z50q~g8e}5eiQwgil zg%lC?Ud4~((BMN!6sy7KG zth^Q1j?Baa@l7XhGBT%&{fXS6hIEw(lnfG$RY4Sw!6!VjD$k(N<;IcV+Y#?^fC&i5-d6R4! zkRduP3*jUH(oik!Xma!$xJ&drIH^=*ZU@gblnfbb6Unh8QGlmX_rL-@k5>-4z{AY;Cc~bhT-}T@UJI?a};*$3?PkO`gVN?FSqow?<=c1JV zcsQPR)0ZXXKVvEXem&i`X>rByDi`+c+?`HdPzYdo@bsn10M_O*0F)Ojc-v{W->WfA-U7R!_h6T0L<3_UF>^ z#p(Fu*3TbJU-I~`o=?Z`f1w=zEH$g!UM$BSc!}4m-?pZ>ES`Tu@k|hgWQ>Yn;O#58 z3CPcReH64Rj6m*^&LtnC%glSid^@kyH;j*5JWHPj`b$12oC-~W<)t@;0TaTUpqu9b zBI#2)DI6c~j4b?R6>rcDvreI|i%v4*oA$T_5jF}>3Am1L877p&C1;FPJT4&E$ZrV5 zh${hj3!#g}r4CAd293goDGMoQ*D2VDrD1$R0EbHlq#r|Zp%`l~;W7Gw4E$&!vRL%4 zyu$p&OhtG%I=`VAa^HC$j9C>_oQkJqb`n8xA5SMi399TE?W@;)LLh03A8EA|P#eNeazl4k5Il+L9n8L?zqZKP&Z=!VzEE`;JbVds)p z=HqqwgKochW&Zf6(44Qd@QIJ%cw#SlZa?v{=8Zc}+wsOheYhH}-e9@}nlmmfEXC?h zx})y4SGG}jeXm!Ky7j0&t2eBNgYgk+vbuFIKQM9_$o)5mslJ--@P2gU=Qa)lxo=z3 zdD|L;T(llq*M7*qo{Y!6Tz%Do+|Oo6J@ktq*M5F9h|T@$^Jf5KOsNW3V{Di)4LlfhMx*f}ynp`ZnL__!yvNU8K?|yr>#v1~kvI@xk@hv4NgaI4$3d<|M#Rq&qwUsVVA-v%m;&r^1Y9c%zwFxNSP#e$p zdsZz3BCo>Qp2Q!h!A^m%!zm=!P)ODRv3BQ#9Ka^K7>nx>RYngc?f?fK(8rn=PK5TS zghmOqQLm~u&W)@v!__V@vd%av^FB=7c*k0msM8Q-Wv`&*jD8wQ5?14kG<%_pKa@8K zA<7w>bdcFmK9=W#Yn3wviB(#ggLY?~p;#FET{#K@pYtMBT1a1R4cI7#vm+d*_+Nk$8xX77u3+e2NPJM1J& zKa^d>-ifO_6#fgy5Prc;SzSV!%`^FWv{IfEgLYgssuBQu##T?2sXEdwC~JlMrN;yi zGK!J4CagS&rFf!I7Az;I1B-lI`)V?0p|rU_t|`yQ%<06*4MDI@Hw(fLZEo--K^e-u zWK6ZSOlNXl7`p^N1}RohWRPScO0)vKw*JX-P_ERXs=DC@P*WvfTM;c#2Mz-9Y=`-Qs0(ZKg6qqBBzkK_DL zZ+O=3o%_z(yLWGIU$1vqe{N{cSucDElsP?1Ujo(db%(u0Pj`36cd7e#|B0RZPc-)r ztvgn>+`r*8dCKGY-rTG^Z8R7rZ+2eq6u$bx8_ujA{_G9%0n72pt>fD+EXR*sl8z5u zQjX6)kdALTP>yfS$5&lijxYa2I=Vg|F?!!bQzaW%FXqKoS+;>EGMpX^ z@p{<=Sf8wi)Rz?tRFj~%j8k|eC_=$d4t|{-OjKkEu{>CO|vInYc3dn&Nqoi*W$UHrt^C1EuJz+}Q($0D`>5ZpH)ZvIiITPcAIidZ0?nuZ01+p2b^rhX literal 0 HcmV?d00001