diff --git a/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/model/parsers/DbSnpAlleleParser.java b/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/model/parsers/DbSnpAlleleParser.java index 495c34575..15a623387 100644 --- a/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/model/parsers/DbSnpAlleleParser.java +++ b/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/model/parsers/DbSnpAlleleParser.java @@ -41,6 +41,7 @@ public class DbSnpAlleleParser extends VcfAlleleParser { @Override List parseInfoField(List alleles, String info) { + System.out.println(info); Map> minorAlleleFrequencies = parseMinorAlleleFrequencies(info); for (Map.Entry> entry : minorAlleleFrequencies.entrySet()) { @@ -91,6 +92,7 @@ private Map> parseMinorAlleleFrequenci // case "ALSPAC": // // http://www.bristol.ac.uk/alspac/researchers/cohort-profile/ // mafMap.put(AlleleProperty.ALSPAC, parseFreqField(frequencyValues)); + default: // do nothing } } } diff --git a/exomiser-data-genome/src/test/java/org/monarchinitiative/exomiser/data/genome/indexers/MvStoreAlleleIndexerTest.java b/exomiser-data-genome/src/test/java/org/monarchinitiative/exomiser/data/genome/indexers/MvStoreAlleleIndexerTest.java index cb0331a2a..98ec113d7 100644 --- a/exomiser-data-genome/src/test/java/org/monarchinitiative/exomiser/data/genome/indexers/MvStoreAlleleIndexerTest.java +++ b/exomiser-data-genome/src/test/java/org/monarchinitiative/exomiser/data/genome/indexers/MvStoreAlleleIndexerTest.java @@ -431,7 +431,7 @@ public void processAndWriteToDisk(@TempDir Path tempDir) throws Exception { int originalMapSize = alleleMap.size(); logger.info("Map contains {} entries:", originalMapSize); assertThat(originalMapSize, equalTo(10)); - + Map original = Map.copyOf(alleleMap); logger.info("Closing map"); mvStore.close(); @@ -443,30 +443,12 @@ public void processAndWriteToDisk(@TempDir Path tempDir) throws Exception { .open(); MVMap reOpenedAlleleMap = reOpened.openMap("alleles", MvStoreUtil.alleleMapBuilder()); - + var reopened = Map.copyOf(reOpenedAlleleMap); + assertThat(reopened, equalTo(original)); logger.info("Re-opened map contains {} entries:", reOpenedAlleleMap.size()); - assertThat(reOpenedAlleleMap.size(), equalTo(originalMapSize)); - assertThat(getAlleleProperties(reOpenedAlleleMap, 1, 10019, "TA", "T").getRsId(), equalTo("rs775809821")); - assertThat(getAlleleProperties(reOpenedAlleleMap, 1, 10039, "A", "C").getRsId(), equalTo("rs978760828")); - assertThat(getAlleleProperties(reOpenedAlleleMap, 1, 10043, "T", "A").getRsId(), equalTo("rs1008829651")); - assertThat(getAlleleProperties(reOpenedAlleleMap, 1, 10051, "A", "G").getRsId(), equalTo("rs1052373574")); - assertThat(getAlleleProperties(reOpenedAlleleMap, 1, 10055, "T", "A").getRsId(), equalTo("rs892501864")); - assertThat(getAlleleProperties(reOpenedAlleleMap, 1, 10055, "T", "TA").getRsId(), equalTo("rs768019142")); - assertThat(getAlleleProperties(reOpenedAlleleMap, 1, 10063, "A", "C").getRsId(), equalTo("rs1010989343")); - assertThat(getAlleleProperties(reOpenedAlleleMap, 1, 10077, "C", "G").getRsId(), equalTo("rs1022805358")); - assertThat(getAlleleProperties(reOpenedAlleleMap, 1, 10109, "A", "T").getRsId(), equalTo("rs376007522")); - assertThat(getAlleleProperties(reOpenedAlleleMap, 1, 10108, "C", "T").getRsId(), equalTo("rs62651026")); - reOpened.close(); } - private AlleleProperties getAlleleProperties(MVMap reOpenedAlleleMap, int chr, int pos, String ref, String alt) { - AlleleKey last = alleleKey(chr, pos, ref, alt); - AlleleProperties lastProperties = reOpenedAlleleMap.get(last); - logger.debug("{}-{}-{}-{} {{} {}}", chr, pos, ref, alt, lastProperties.getRsId(), lastProperties.getPropertiesMap()); - return lastProperties; - } - // @Disabled("Just playing about") // @Test diff --git a/exomiser-data-genome/src/test/java/org/monarchinitiative/exomiser/data/genome/model/archive/ArchiveFileReaderTest.java b/exomiser-data-genome/src/test/java/org/monarchinitiative/exomiser/data/genome/model/archive/ArchiveFileReaderTest.java index 6fd959c52..37943046a 100644 --- a/exomiser-data-genome/src/test/java/org/monarchinitiative/exomiser/data/genome/model/archive/ArchiveFileReaderTest.java +++ b/exomiser-data-genome/src/test/java/org/monarchinitiative/exomiser/data/genome/model/archive/ArchiveFileReaderTest.java @@ -21,6 +21,8 @@ package org.monarchinitiative.exomiser.data.genome.model.archive; import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; import java.nio.file.Path; @@ -30,29 +32,18 @@ /** * @author Jules Jacobsen */ -public class ArchiveFileReaderTest { - - @Test - void readEmptyLines() { - ArchiveFileReader instance = new SimpleArchiveFileReader(new TabixArchive(Path.of("src/test/resources/test_empty.vcf.gz"))); - long lineCount = instance.lines().count(); - assertThat(lineCount, equalTo(0L)); - } - - @Test - void readLines() { - ArchiveFileReader instance = new SimpleArchiveFileReader(new TabixArchive(Path.of("src/test/resources/test_first_ten_dbsnp.vcf.gz"))); - long lineCount = instance.lines().count(); - // 57 header + 10 allele = 67 lines total in the file - assertThat(lineCount, equalTo(67L)); - } - - @Test - void readLinesFromBgzip() { - ArchiveFileReader instance = new SimpleArchiveFileReader(new TabixArchive(Path.of("src/test/resources/gnomad-test/chr1.vcf.bgz"))); +class ArchiveFileReaderTest { + + @ParameterizedTest + @CsvSource({ + "src/test/resources/test_empty.vcf.gz, 0", // empty + "src/test/resources/test_first_ten_dbsnp.vcf.gz, 79", // gzipped vcf + "src/test/resources/gnomad-test/chr1.vcf.bgz, 62" // bgzipped vcf + }) + void readTabixArchive(Path archiveFile, long expectedLineCount) { + ArchiveFileReader instance = new SimpleArchiveFileReader(new TabixArchive(archiveFile)); long lineCount = instance.lines().count(); - // 57 header + 5 allele = 62 lines total in the file - assertThat(lineCount, equalTo(62L)); + assertThat(lineCount, equalTo(expectedLineCount)); } @Test diff --git a/exomiser-data-genome/src/test/java/org/monarchinitiative/exomiser/data/genome/model/parsers/DbSnpAlleleParserTest.java b/exomiser-data-genome/src/test/java/org/monarchinitiative/exomiser/data/genome/model/parsers/DbSnpAlleleParserTest.java index 5f675f520..3a3af77f6 100644 --- a/exomiser-data-genome/src/test/java/org/monarchinitiative/exomiser/data/genome/model/parsers/DbSnpAlleleParserTest.java +++ b/exomiser-data-genome/src/test/java/org/monarchinitiative/exomiser/data/genome/model/parsers/DbSnpAlleleParserTest.java @@ -49,16 +49,15 @@ public void testSingleAlleleSnpNoCaf() { String line = "1\t9446333\trs761066172\tG\tA\t.\t.\tRS=761066172;RSPOS=9446333;dbSNPBuildID=144;SSR=0;SAO=0;VP=0x050000000005000002000100;WGT=1;VC=SNV;ASP"; List alleles = instance.parseLine(line); - assertThat(alleles.size(), equalTo(1)); - Allele allele = alleles.get(0); - - System.out.println(allele); - assertThat(allele.getChr(), equalTo(1)); - assertThat(allele.getPos(), equalTo(9446333)); - assertThat(allele.getRsId(), equalTo("rs761066172")); - assertThat(allele.getRef(), equalTo("G")); - assertThat(allele.getAlt(), equalTo("A")); - assertThat(allele.getFrequencies().isEmpty(), is(true)); + assertThat(alleles.size(), equalTo(0)); + // These are no longer produced as they have no TOPMED frequency information +// Allele allele = alleles.get(0); +// assertThat(allele.getChr(), equalTo(1)); +// assertThat(allele.getPos(), equalTo(9446333)); +// assertThat(allele.getRsId(), equalTo("rs761066172")); +// assertThat(allele.getRef(), equalTo("G")); +// assertThat(allele.getAlt(), equalTo("A")); +// assertThat(allele.getFrequencies().isEmpty(), is(true)); } @Test @@ -67,17 +66,16 @@ public void testSingleAlleleSnpMultiRsId() { String line = "1\t12345\t74640812;rs115693429\tG\tA\t.\t.\t."; List alleles = instance.parseLine(line); - assertThat(alleles.size(), equalTo(1)); - Allele allele = alleles.get(0); - - System.out.println(allele); - assertThat(allele.getChr(), equalTo(1)); - assertThat(allele.getPos(), equalTo(12345)); -// assertThat(allele.getRsId(), equalTo("rs200118651")); - assertThat(allele.getRsId(), equalTo("74640812")); - assertThat(allele.getRef(), equalTo("G")); - assertThat(allele.getAlt(), equalTo("A")); - assertThat(allele.getFrequencies().isEmpty(), is(true)); + assertThat(alleles.size(), equalTo(0)); + // These are no longer produced as they have no TOPMED frequency information +// Allele allele = alleles.get(0); +// assertThat(allele.getChr(), equalTo(1)); +// assertThat(allele.getPos(), equalTo(12345)); +//// assertThat(allele.getRsId(), equalTo("rs200118651")); +// assertThat(allele.getRsId(), equalTo("74640812")); +// assertThat(allele.getRef(), equalTo("G")); +// assertThat(allele.getAlt(), equalTo("A")); +// assertThat(allele.getFrequencies().isEmpty(), is(true)); } @@ -131,7 +129,7 @@ public void testSingleAlleleSnpBuild155() { @Test public void testSingleAlleleDeletion() { DbSnpAlleleParser instance = new DbSnpAlleleParser(); - String line = "1\t10353088\trs763778935\tTC\tT\t.\t.\tRS=763778935;RSPOS=10353089;dbSNPBuildID=144;SSR=0;SAO=0;VP=0x050000080005000002000200;GENEINFO=KIF1B:23095;WGT=1;VC=DIV;INT;ASP"; + String line = "1\t10353088\trs763778935\tTC\tT\t.\t.\tRS=763778935;RSPOS=10353089;dbSNPBuildID=144;SSR=0;SAO=0;VP=0x050000080005000002000200;GENEINFO=KIF1B:23095;WGT=1;VC=DIV;INT;ASP;TOPMED=0.99335818042813455,0.00664181957186544"; List alleles = instance.parseLine(line); assertThat(alleles.size(), equalTo(1)); @@ -143,7 +141,7 @@ public void testSingleAlleleDeletion() { assertThat(allele.getRsId(), equalTo("rs763778935")); assertThat(allele.getRef(), equalTo("TC")); assertThat(allele.getAlt(), equalTo("T")); - assertThat(allele.getFrequencies().isEmpty(), is(true)); + assertThat(allele.getFrequencies(), equalTo(List.of(AlleleData.frequencyOf(TOPMED, 0.664181957186544f)))); } @Test @@ -152,25 +150,25 @@ public void testMultiAlleleNoCaf() { String line = "1\t9633387\trs776815368\tG\tGT,GTT\t.\t.\tRS=776815368;RSPOS=9633387;dbSNPBuildID=144;SSR=0;SAO=0;VP=0x050000080005000002000200;GENEINFO=SLC25A33:84275;WGT=1;VC=DIV;INT;ASP"; List alleles = instance.parseLine(line); - assertThat(alleles.size(), equalTo(2)); - Allele allele1 = alleles.get(0); - - System.out.println(allele1); - assertThat(allele1.getChr(), equalTo(1)); - assertThat(allele1.getPos(), equalTo(9633387)); - assertThat(allele1.getRsId(), equalTo("rs776815368")); - assertThat(allele1.getRef(), equalTo("G")); - assertThat(allele1.getAlt(), equalTo("GT")); - assertThat(allele1.getFrequencies().isEmpty(), is(true)); - - Allele allele2 = alleles.get(1); - System.out.println(allele2); - assertThat(allele2.getChr(), equalTo(1)); - assertThat(allele2.getPos(), equalTo(9633387)); - assertThat(allele2.getRsId(), equalTo("rs776815368")); - assertThat(allele2.getRef(), equalTo("G")); - assertThat(allele2.getAlt(), equalTo("GTT")); - assertThat(allele2.getFrequencies().isEmpty(), is(true)); + assertThat(alleles.size(), equalTo(0)); + // These are no longer produced as they have no TOPMED frequency information +// Allele allele1 = alleles.get(0); +// System.out.println(allele1); +// assertThat(allele1.getChr(), equalTo(1)); +// assertThat(allele1.getPos(), equalTo(9633387)); +// assertThat(allele1.getRsId(), equalTo("rs776815368")); +// assertThat(allele1.getRef(), equalTo("G")); +// assertThat(allele1.getAlt(), equalTo("GT")); +// assertThat(allele1.getFrequencies().isEmpty(), is(true)); +// +// Allele allele2 = alleles.get(1); +// System.out.println(allele2); +// assertThat(allele2.getChr(), equalTo(1)); +// assertThat(allele2.getPos(), equalTo(9633387)); +// assertThat(allele2.getRsId(), equalTo("rs776815368")); +// assertThat(allele2.getRef(), equalTo("G")); +// assertThat(allele2.getAlt(), equalTo("GTT")); +// assertThat(allele2.getFrequencies().isEmpty(), is(true)); } @Test @@ -179,18 +177,19 @@ public void testMultiAlleleWithCaf() { String line = "1\t9973965\trs555705142\tA\tAT,ATTT\t.\t.\tRS=555705142;RSPOS=9973965;dbSNPBuildID=142;SSR=0;SAO=0;VP=0x050000000005150026000200;WGT=1;VC=DIV;ASP;VLD;G5;KGPhase3;CAF=0.87,.,0.13;COMMON=1"; List alleles = instance.parseLine(line); - assertThat(alleles.size(), equalTo(2)); - Allele allele1 = alleles.get(0); - - System.out.println(allele1); - assertThat(allele1.getChr(), equalTo(1)); - assertThat(allele1.getPos(), equalTo(9973965)); - assertThat(allele1.getRsId(), equalTo("rs555705142")); - assertThat(allele1.getRef(), equalTo("A")); - assertThat(allele1.getAlt(), equalTo("AT")); - assertThat(allele1.getFrequencies().isEmpty(), is(true)); + assertThat(alleles.size(), equalTo(1)); - Allele allele2 = alleles.get(1); + // No longer produced as it has no TOPMED frequency information +// Allele allele1 = alleles.get(0); +// System.out.println(allele1); +// assertThat(allele1.getChr(), equalTo(1)); +// assertThat(allele1.getPos(), equalTo(9973965)); +// assertThat(allele1.getRsId(), equalTo("rs555705142")); +// assertThat(allele1.getRef(), equalTo("A")); +// assertThat(allele1.getAlt(), equalTo("AT")); +// assertThat(allele1.getFrequencies().isEmpty(), is(true)); + + Allele allele2 = alleles.get(0); System.out.println(allele2); assertThat(allele2.getChr(), equalTo(1)); assertThat(allele2.getPos(), equalTo(9973965)); @@ -206,7 +205,7 @@ public void testLotsOfMultiAlleleWithCaf() { String line = "3\t134153617\trs56011117\tG\tGT,GTT,GTTGT,GTTGTTTTTTTTTGTTT\t.\t.\tRS=56011117;RSPOS=134153617;dbSNPBuildID=129;SSR=0;SAO=0;VP=0x05000000000504002e000204;WGT=1;VC=DIV;ASP;VLD;KGPhase3;NOV;CAF=0.995,0.004992,.,.,.;COMMON=1"; List alleles = instance.parseLine(line); - assertThat(alleles.size(), equalTo(4)); + assertThat(alleles.size(), equalTo(1)); Allele allele1 = alleles.get(0); assertThat(allele1.getChr(), equalTo(3)); @@ -216,30 +215,31 @@ public void testLotsOfMultiAlleleWithCaf() { assertThat(allele1.getAlt(), equalTo("GT")); assertThat(allele1.getFrequencies(), equalTo(List.of(AlleleData.frequencyOf(KG, 0.4992f)))); - Allele allele2 = alleles.get(1); - assertThat(allele2.getChr(), equalTo(3)); - assertThat(allele2.getPos(), equalTo(134153617)); - assertThat(allele2.getRsId(), equalTo("rs56011117")); - assertThat(allele2.getRef(), equalTo("G")); - assertThat(allele2.getAlt(), equalTo("GTT")); - assertThat(allele2.getFrequencies().isEmpty(), is(true)); - - Allele allele3 = alleles.get(2); - - assertThat(allele3.getChr(), equalTo(3)); - assertThat(allele3.getPos(), equalTo(134153617)); - assertThat(allele3.getRsId(), equalTo("rs56011117")); - assertThat(allele3.getRef(), equalTo("G")); - assertThat(allele3.getAlt(), equalTo("GTTGT")); - assertThat(allele3.getFrequencies().isEmpty(), is(true)); - - Allele allele4 = alleles.get(3); - assertThat(allele4.getChr(), equalTo(3)); - assertThat(allele4.getPos(), equalTo(134153617)); - assertThat(allele4.getRsId(), equalTo("rs56011117")); - assertThat(allele4.getRef(), equalTo("G")); - assertThat(allele4.getAlt(), equalTo("GTTGTTTTTTTTTGTTT")); - assertThat(allele4.getFrequencies().isEmpty(), is(true)); + // These are no longer produced as they have no frequency information +// Allele allele2 = alleles.get(1); +// assertThat(allele2.getChr(), equalTo(3)); +// assertThat(allele2.getPos(), equalTo(134153617)); +// assertThat(allele2.getRsId(), equalTo("rs56011117")); +// assertThat(allele2.getRef(), equalTo("G")); +// assertThat(allele2.getAlt(), equalTo("GTT")); +// assertThat(allele2.getFrequencies().isEmpty(), is(true)); +// +// Allele allele3 = alleles.get(2); +// +// assertThat(allele3.getChr(), equalTo(3)); +// assertThat(allele3.getPos(), equalTo(134153617)); +// assertThat(allele3.getRsId(), equalTo("rs56011117")); +// assertThat(allele3.getRef(), equalTo("G")); +// assertThat(allele3.getAlt(), equalTo("GTTGT")); +// assertThat(allele3.getFrequencies().isEmpty(), is(true)); +// +// Allele allele4 = alleles.get(3); +// assertThat(allele4.getChr(), equalTo(3)); +// assertThat(allele4.getPos(), equalTo(134153617)); +// assertThat(allele4.getRsId(), equalTo("rs56011117")); +// assertThat(allele4.getRef(), equalTo("G")); +// assertThat(allele4.getAlt(), equalTo("GTTGTTTTTTTTTGTTT")); +// assertThat(allele4.getFrequencies().isEmpty(), is(true)); } /** @@ -265,10 +265,11 @@ void testMultiAlleleCafAndTopMed() { allele1.addFrequency(AlleleData.frequencyOf(KG, 0.03994f)); allele1.addFrequency(AlleleData.frequencyOf(TOPMED, 0.0274744f)); - Allele allele2 = new Allele(1, 9974103, "A", "T"); - allele2.setRsId("rs527824753"); + // This is no longer expected as it has no TOPMED frequency information +// Allele allele2 = new Allele(1, 9974103, "A", "T"); +// allele2.setRsId("rs527824753"); - assertParseLineEquals(line, List.of(allele1, allele2)); + assertParseLineEquals(line, List.of(allele1)); } @Test @@ -310,14 +311,14 @@ public void testMitochondrialSnp() { String line = "NC_012920.1\t15061\trs527236205\tA\tG\t.\t.\tRS=527236205;dbSNPBuildID=141;SSR=0;GENEINFO=MT-CYB:4519|MT-ND6:4541;VC=SNV;SYN;R5;GNO;FREQ=MGP:0.9963,0.003745|SGDP_PRJ:0,1|TOMMO:0.9984,0.001628|dbGaP_PopFreq:0.9987,0.001336;CLNVI=.,;CLNORIGIN=.,1073741824;CLNSIG=.,4;CLNDISDB=.,MONDO:MONDO:0021068/MeSH:D010051/MedGen:C0919267/OMIM:167000/Human_Phenotype_Ontology:HP:0100615;CLNDN=.,Neoplasm_of_ovary;CLNREVSTAT=.,no_criteria;CLNACC=.,RCV000133452.1;CLNHGVS=NC_012920.1:m.15061=,NC_012920.1:m.15061A>G"; List alleles = instance.parseLine(line); - assertThat(alleles.size(), equalTo(1)); - Allele allele = alleles.get(0); - - assertThat(allele.getChr(), equalTo(25)); - assertThat(allele.getPos(), equalTo(15061)); - assertThat(allele.getRsId(), equalTo("rs527236205")); - assertThat(allele.getRef(), equalTo("A")); - assertThat(allele.getAlt(), equalTo("G")); - assertThat(allele.getFrequencies().isEmpty(), is(true)); + assertThat(alleles.size(), equalTo(0)); + // These are no longer produced as they have no TOPMED frequency information +// Allele allele = alleles.get(0); +// assertThat(allele.getChr(), equalTo(25)); +// assertThat(allele.getPos(), equalTo(15061)); +// assertThat(allele.getRsId(), equalTo("rs527236205")); +// assertThat(allele.getRef(), equalTo("A")); +// assertThat(allele.getAlt(), equalTo("G")); +// assertThat(allele.getFrequencies().isEmpty(), is(true)); } } \ No newline at end of file diff --git a/exomiser-data-genome/src/test/resources/test_first_ten_dbsnp.vcf.gz b/exomiser-data-genome/src/test/resources/test_first_ten_dbsnp.vcf.gz index 494062a1b..7dcbec848 100644 Binary files a/exomiser-data-genome/src/test/resources/test_first_ten_dbsnp.vcf.gz and b/exomiser-data-genome/src/test/resources/test_first_ten_dbsnp.vcf.gz differ