Skip to content

Commit

Permalink
Fix broken data-genome tests
Browse files Browse the repository at this point in the history
  • Loading branch information
julesjacobsen committed Feb 22, 2024
1 parent 334e6f8 commit f215a90
Show file tree
Hide file tree
Showing 5 changed files with 109 additions and 133 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ public class DbSnpAlleleParser extends VcfAlleleParser {

@Override
List<Allele> parseInfoField(List<Allele> alleles, String info) {
System.out.println(info);
Map<AlleleProto.FrequencySource, List<String>> minorAlleleFrequencies = parseMinorAlleleFrequencies(info);

for (Map.Entry<AlleleProto.FrequencySource, List<String>> entry : minorAlleleFrequencies.entrySet()) {
Expand Down Expand Up @@ -91,6 +92,7 @@ private Map<AlleleProto.FrequencySource, List<String>> parseMinorAlleleFrequenci
// case "ALSPAC":
// // http://www.bristol.ac.uk/alspac/researchers/cohort-profile/
// mafMap.put(AlleleProperty.ALSPAC, parseFreqField(frequencyValues));
default: // do nothing
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -431,7 +431,7 @@ public void processAndWriteToDisk(@TempDir Path tempDir) throws Exception {
int originalMapSize = alleleMap.size();
logger.info("Map contains {} entries:", originalMapSize);
assertThat(originalMapSize, equalTo(10));

Map<AlleleKey, AlleleProperties> original = Map.copyOf(alleleMap);
logger.info("Closing map");
mvStore.close();

Expand All @@ -443,30 +443,12 @@ public void processAndWriteToDisk(@TempDir Path tempDir) throws Exception {
.open();

MVMap<AlleleKey, AlleleProperties> reOpenedAlleleMap = reOpened.openMap("alleles", MvStoreUtil.alleleMapBuilder());

var reopened = Map.copyOf(reOpenedAlleleMap);
assertThat(reopened, equalTo(original));
logger.info("Re-opened map contains {} entries:", reOpenedAlleleMap.size());
assertThat(reOpenedAlleleMap.size(), equalTo(originalMapSize));
assertThat(getAlleleProperties(reOpenedAlleleMap, 1, 10019, "TA", "T").getRsId(), equalTo("rs775809821"));
assertThat(getAlleleProperties(reOpenedAlleleMap, 1, 10039, "A", "C").getRsId(), equalTo("rs978760828"));
assertThat(getAlleleProperties(reOpenedAlleleMap, 1, 10043, "T", "A").getRsId(), equalTo("rs1008829651"));
assertThat(getAlleleProperties(reOpenedAlleleMap, 1, 10051, "A", "G").getRsId(), equalTo("rs1052373574"));
assertThat(getAlleleProperties(reOpenedAlleleMap, 1, 10055, "T", "A").getRsId(), equalTo("rs892501864"));
assertThat(getAlleleProperties(reOpenedAlleleMap, 1, 10055, "T", "TA").getRsId(), equalTo("rs768019142"));
assertThat(getAlleleProperties(reOpenedAlleleMap, 1, 10063, "A", "C").getRsId(), equalTo("rs1010989343"));
assertThat(getAlleleProperties(reOpenedAlleleMap, 1, 10077, "C", "G").getRsId(), equalTo("rs1022805358"));
assertThat(getAlleleProperties(reOpenedAlleleMap, 1, 10109, "A", "T").getRsId(), equalTo("rs376007522"));
assertThat(getAlleleProperties(reOpenedAlleleMap, 1, 10108, "C", "T").getRsId(), equalTo("rs62651026"));

reOpened.close();
}

private AlleleProperties getAlleleProperties(MVMap<AlleleKey, AlleleProperties> reOpenedAlleleMap, int chr, int pos, String ref, String alt) {
AlleleKey last = alleleKey(chr, pos, ref, alt);
AlleleProperties lastProperties = reOpenedAlleleMap.get(last);
logger.debug("{}-{}-{}-{} {{} {}}", chr, pos, ref, alt, lastProperties.getRsId(), lastProperties.getPropertiesMap());
return lastProperties;
}


// @Disabled("Just playing about")
// @Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
package org.monarchinitiative.exomiser.data.genome.model.archive;

import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.CsvSource;

import java.nio.file.Path;

Expand All @@ -30,29 +32,18 @@
/**
* @author Jules Jacobsen <j.jacobsen@qmul.ac.uk>
*/
public class ArchiveFileReaderTest {

@Test
void readEmptyLines() {
ArchiveFileReader instance = new SimpleArchiveFileReader(new TabixArchive(Path.of("src/test/resources/test_empty.vcf.gz")));
long lineCount = instance.lines().count();
assertThat(lineCount, equalTo(0L));
}

@Test
void readLines() {
ArchiveFileReader instance = new SimpleArchiveFileReader(new TabixArchive(Path.of("src/test/resources/test_first_ten_dbsnp.vcf.gz")));
long lineCount = instance.lines().count();
// 57 header + 10 allele = 67 lines total in the file
assertThat(lineCount, equalTo(67L));
}

@Test
void readLinesFromBgzip() {
ArchiveFileReader instance = new SimpleArchiveFileReader(new TabixArchive(Path.of("src/test/resources/gnomad-test/chr1.vcf.bgz")));
class ArchiveFileReaderTest {

@ParameterizedTest
@CsvSource({
"src/test/resources/test_empty.vcf.gz, 0", // empty
"src/test/resources/test_first_ten_dbsnp.vcf.gz, 79", // gzipped vcf
"src/test/resources/gnomad-test/chr1.vcf.bgz, 62" // bgzipped vcf
})
void readTabixArchive(Path archiveFile, long expectedLineCount) {
ArchiveFileReader instance = new SimpleArchiveFileReader(new TabixArchive(archiveFile));
long lineCount = instance.lines().count();
// 57 header + 5 allele = 62 lines total in the file
assertThat(lineCount, equalTo(62L));
assertThat(lineCount, equalTo(expectedLineCount));
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,16 +49,15 @@ public void testSingleAlleleSnpNoCaf() {
String line = "1\t9446333\trs761066172\tG\tA\t.\t.\tRS=761066172;RSPOS=9446333;dbSNPBuildID=144;SSR=0;SAO=0;VP=0x050000000005000002000100;WGT=1;VC=SNV;ASP";
List<Allele> alleles = instance.parseLine(line);

assertThat(alleles.size(), equalTo(1));
Allele allele = alleles.get(0);

System.out.println(allele);
assertThat(allele.getChr(), equalTo(1));
assertThat(allele.getPos(), equalTo(9446333));
assertThat(allele.getRsId(), equalTo("rs761066172"));
assertThat(allele.getRef(), equalTo("G"));
assertThat(allele.getAlt(), equalTo("A"));
assertThat(allele.getFrequencies().isEmpty(), is(true));
assertThat(alleles.size(), equalTo(0));
// These are no longer produced as they have no TOPMED frequency information
// Allele allele = alleles.get(0);
// assertThat(allele.getChr(), equalTo(1));
// assertThat(allele.getPos(), equalTo(9446333));
// assertThat(allele.getRsId(), equalTo("rs761066172"));
// assertThat(allele.getRef(), equalTo("G"));
// assertThat(allele.getAlt(), equalTo("A"));
// assertThat(allele.getFrequencies().isEmpty(), is(true));
}

@Test
Expand All @@ -67,17 +66,16 @@ public void testSingleAlleleSnpMultiRsId() {
String line = "1\t12345\t74640812;rs115693429\tG\tA\t.\t.\t.";
List<Allele> alleles = instance.parseLine(line);

assertThat(alleles.size(), equalTo(1));
Allele allele = alleles.get(0);

System.out.println(allele);
assertThat(allele.getChr(), equalTo(1));
assertThat(allele.getPos(), equalTo(12345));
// assertThat(allele.getRsId(), equalTo("rs200118651"));
assertThat(allele.getRsId(), equalTo("74640812"));
assertThat(allele.getRef(), equalTo("G"));
assertThat(allele.getAlt(), equalTo("A"));
assertThat(allele.getFrequencies().isEmpty(), is(true));
assertThat(alleles.size(), equalTo(0));
// These are no longer produced as they have no TOPMED frequency information
// Allele allele = alleles.get(0);
// assertThat(allele.getChr(), equalTo(1));
// assertThat(allele.getPos(), equalTo(12345));
//// assertThat(allele.getRsId(), equalTo("rs200118651"));
// assertThat(allele.getRsId(), equalTo("74640812"));
// assertThat(allele.getRef(), equalTo("G"));
// assertThat(allele.getAlt(), equalTo("A"));
// assertThat(allele.getFrequencies().isEmpty(), is(true));
}


Expand Down Expand Up @@ -131,7 +129,7 @@ public void testSingleAlleleSnpBuild155() {
@Test
public void testSingleAlleleDeletion() {
DbSnpAlleleParser instance = new DbSnpAlleleParser();
String line = "1\t10353088\trs763778935\tTC\tT\t.\t.\tRS=763778935;RSPOS=10353089;dbSNPBuildID=144;SSR=0;SAO=0;VP=0x050000080005000002000200;GENEINFO=KIF1B:23095;WGT=1;VC=DIV;INT;ASP";
String line = "1\t10353088\trs763778935\tTC\tT\t.\t.\tRS=763778935;RSPOS=10353089;dbSNPBuildID=144;SSR=0;SAO=0;VP=0x050000080005000002000200;GENEINFO=KIF1B:23095;WGT=1;VC=DIV;INT;ASP;TOPMED=0.99335818042813455,0.00664181957186544";
List<Allele> alleles = instance.parseLine(line);

assertThat(alleles.size(), equalTo(1));
Expand All @@ -143,7 +141,7 @@ public void testSingleAlleleDeletion() {
assertThat(allele.getRsId(), equalTo("rs763778935"));
assertThat(allele.getRef(), equalTo("TC"));
assertThat(allele.getAlt(), equalTo("T"));
assertThat(allele.getFrequencies().isEmpty(), is(true));
assertThat(allele.getFrequencies(), equalTo(List.of(AlleleData.frequencyOf(TOPMED, 0.664181957186544f))));
}

@Test
Expand All @@ -152,25 +150,25 @@ public void testMultiAlleleNoCaf() {
String line = "1\t9633387\trs776815368\tG\tGT,GTT\t.\t.\tRS=776815368;RSPOS=9633387;dbSNPBuildID=144;SSR=0;SAO=0;VP=0x050000080005000002000200;GENEINFO=SLC25A33:84275;WGT=1;VC=DIV;INT;ASP";
List<Allele> alleles = instance.parseLine(line);

assertThat(alleles.size(), equalTo(2));
Allele allele1 = alleles.get(0);

System.out.println(allele1);
assertThat(allele1.getChr(), equalTo(1));
assertThat(allele1.getPos(), equalTo(9633387));
assertThat(allele1.getRsId(), equalTo("rs776815368"));
assertThat(allele1.getRef(), equalTo("G"));
assertThat(allele1.getAlt(), equalTo("GT"));
assertThat(allele1.getFrequencies().isEmpty(), is(true));

Allele allele2 = alleles.get(1);
System.out.println(allele2);
assertThat(allele2.getChr(), equalTo(1));
assertThat(allele2.getPos(), equalTo(9633387));
assertThat(allele2.getRsId(), equalTo("rs776815368"));
assertThat(allele2.getRef(), equalTo("G"));
assertThat(allele2.getAlt(), equalTo("GTT"));
assertThat(allele2.getFrequencies().isEmpty(), is(true));
assertThat(alleles.size(), equalTo(0));
// These are no longer produced as they have no TOPMED frequency information
// Allele allele1 = alleles.get(0);
// System.out.println(allele1);
// assertThat(allele1.getChr(), equalTo(1));
// assertThat(allele1.getPos(), equalTo(9633387));
// assertThat(allele1.getRsId(), equalTo("rs776815368"));
// assertThat(allele1.getRef(), equalTo("G"));
// assertThat(allele1.getAlt(), equalTo("GT"));
// assertThat(allele1.getFrequencies().isEmpty(), is(true));
//
// Allele allele2 = alleles.get(1);
// System.out.println(allele2);
// assertThat(allele2.getChr(), equalTo(1));
// assertThat(allele2.getPos(), equalTo(9633387));
// assertThat(allele2.getRsId(), equalTo("rs776815368"));
// assertThat(allele2.getRef(), equalTo("G"));
// assertThat(allele2.getAlt(), equalTo("GTT"));
// assertThat(allele2.getFrequencies().isEmpty(), is(true));
}

@Test
Expand All @@ -179,18 +177,19 @@ public void testMultiAlleleWithCaf() {
String line = "1\t9973965\trs555705142\tA\tAT,ATTT\t.\t.\tRS=555705142;RSPOS=9973965;dbSNPBuildID=142;SSR=0;SAO=0;VP=0x050000000005150026000200;WGT=1;VC=DIV;ASP;VLD;G5;KGPhase3;CAF=0.87,.,0.13;COMMON=1";
List<Allele> alleles = instance.parseLine(line);

assertThat(alleles.size(), equalTo(2));
Allele allele1 = alleles.get(0);

System.out.println(allele1);
assertThat(allele1.getChr(), equalTo(1));
assertThat(allele1.getPos(), equalTo(9973965));
assertThat(allele1.getRsId(), equalTo("rs555705142"));
assertThat(allele1.getRef(), equalTo("A"));
assertThat(allele1.getAlt(), equalTo("AT"));
assertThat(allele1.getFrequencies().isEmpty(), is(true));
assertThat(alleles.size(), equalTo(1));

Allele allele2 = alleles.get(1);
// No longer produced as it has no TOPMED frequency information
// Allele allele1 = alleles.get(0);
// System.out.println(allele1);
// assertThat(allele1.getChr(), equalTo(1));
// assertThat(allele1.getPos(), equalTo(9973965));
// assertThat(allele1.getRsId(), equalTo("rs555705142"));
// assertThat(allele1.getRef(), equalTo("A"));
// assertThat(allele1.getAlt(), equalTo("AT"));
// assertThat(allele1.getFrequencies().isEmpty(), is(true));

Allele allele2 = alleles.get(0);
System.out.println(allele2);
assertThat(allele2.getChr(), equalTo(1));
assertThat(allele2.getPos(), equalTo(9973965));
Expand All @@ -206,7 +205,7 @@ public void testLotsOfMultiAlleleWithCaf() {
String line = "3\t134153617\trs56011117\tG\tGT,GTT,GTTGT,GTTGTTTTTTTTTGTTT\t.\t.\tRS=56011117;RSPOS=134153617;dbSNPBuildID=129;SSR=0;SAO=0;VP=0x05000000000504002e000204;WGT=1;VC=DIV;ASP;VLD;KGPhase3;NOV;CAF=0.995,0.004992,.,.,.;COMMON=1";
List<Allele> alleles = instance.parseLine(line);

assertThat(alleles.size(), equalTo(4));
assertThat(alleles.size(), equalTo(1));
Allele allele1 = alleles.get(0);

assertThat(allele1.getChr(), equalTo(3));
Expand All @@ -216,30 +215,31 @@ public void testLotsOfMultiAlleleWithCaf() {
assertThat(allele1.getAlt(), equalTo("GT"));
assertThat(allele1.getFrequencies(), equalTo(List.of(AlleleData.frequencyOf(KG, 0.4992f))));

Allele allele2 = alleles.get(1);
assertThat(allele2.getChr(), equalTo(3));
assertThat(allele2.getPos(), equalTo(134153617));
assertThat(allele2.getRsId(), equalTo("rs56011117"));
assertThat(allele2.getRef(), equalTo("G"));
assertThat(allele2.getAlt(), equalTo("GTT"));
assertThat(allele2.getFrequencies().isEmpty(), is(true));

Allele allele3 = alleles.get(2);

assertThat(allele3.getChr(), equalTo(3));
assertThat(allele3.getPos(), equalTo(134153617));
assertThat(allele3.getRsId(), equalTo("rs56011117"));
assertThat(allele3.getRef(), equalTo("G"));
assertThat(allele3.getAlt(), equalTo("GTTGT"));
assertThat(allele3.getFrequencies().isEmpty(), is(true));

Allele allele4 = alleles.get(3);
assertThat(allele4.getChr(), equalTo(3));
assertThat(allele4.getPos(), equalTo(134153617));
assertThat(allele4.getRsId(), equalTo("rs56011117"));
assertThat(allele4.getRef(), equalTo("G"));
assertThat(allele4.getAlt(), equalTo("GTTGTTTTTTTTTGTTT"));
assertThat(allele4.getFrequencies().isEmpty(), is(true));
// These are no longer produced as they have no frequency information
// Allele allele2 = alleles.get(1);
// assertThat(allele2.getChr(), equalTo(3));
// assertThat(allele2.getPos(), equalTo(134153617));
// assertThat(allele2.getRsId(), equalTo("rs56011117"));
// assertThat(allele2.getRef(), equalTo("G"));
// assertThat(allele2.getAlt(), equalTo("GTT"));
// assertThat(allele2.getFrequencies().isEmpty(), is(true));
//
// Allele allele3 = alleles.get(2);
//
// assertThat(allele3.getChr(), equalTo(3));
// assertThat(allele3.getPos(), equalTo(134153617));
// assertThat(allele3.getRsId(), equalTo("rs56011117"));
// assertThat(allele3.getRef(), equalTo("G"));
// assertThat(allele3.getAlt(), equalTo("GTTGT"));
// assertThat(allele3.getFrequencies().isEmpty(), is(true));
//
// Allele allele4 = alleles.get(3);
// assertThat(allele4.getChr(), equalTo(3));
// assertThat(allele4.getPos(), equalTo(134153617));
// assertThat(allele4.getRsId(), equalTo("rs56011117"));
// assertThat(allele4.getRef(), equalTo("G"));
// assertThat(allele4.getAlt(), equalTo("GTTGTTTTTTTTTGTTT"));
// assertThat(allele4.getFrequencies().isEmpty(), is(true));
}

/**
Expand All @@ -265,10 +265,11 @@ void testMultiAlleleCafAndTopMed() {
allele1.addFrequency(AlleleData.frequencyOf(KG, 0.03994f));
allele1.addFrequency(AlleleData.frequencyOf(TOPMED, 0.0274744f));

Allele allele2 = new Allele(1, 9974103, "A", "T");
allele2.setRsId("rs527824753");
// This is no longer expected as it has no TOPMED frequency information
// Allele allele2 = new Allele(1, 9974103, "A", "T");
// allele2.setRsId("rs527824753");

assertParseLineEquals(line, List.of(allele1, allele2));
assertParseLineEquals(line, List.of(allele1));
}

@Test
Expand Down Expand Up @@ -310,14 +311,14 @@ public void testMitochondrialSnp() {
String line = "NC_012920.1\t15061\trs527236205\tA\tG\t.\t.\tRS=527236205;dbSNPBuildID=141;SSR=0;GENEINFO=MT-CYB:4519|MT-ND6:4541;VC=SNV;SYN;R5;GNO;FREQ=MGP:0.9963,0.003745|SGDP_PRJ:0,1|TOMMO:0.9984,0.001628|dbGaP_PopFreq:0.9987,0.001336;CLNVI=.,;CLNORIGIN=.,1073741824;CLNSIG=.,4;CLNDISDB=.,MONDO:MONDO:0021068/MeSH:D010051/MedGen:C0919267/OMIM:167000/Human_Phenotype_Ontology:HP:0100615;CLNDN=.,Neoplasm_of_ovary;CLNREVSTAT=.,no_criteria;CLNACC=.,RCV000133452.1;CLNHGVS=NC_012920.1:m.15061=,NC_012920.1:m.15061A>G";
List<Allele> alleles = instance.parseLine(line);

assertThat(alleles.size(), equalTo(1));
Allele allele = alleles.get(0);

assertThat(allele.getChr(), equalTo(25));
assertThat(allele.getPos(), equalTo(15061));
assertThat(allele.getRsId(), equalTo("rs527236205"));
assertThat(allele.getRef(), equalTo("A"));
assertThat(allele.getAlt(), equalTo("G"));
assertThat(allele.getFrequencies().isEmpty(), is(true));
assertThat(alleles.size(), equalTo(0));
// These are no longer produced as they have no TOPMED frequency information
// Allele allele = alleles.get(0);
// assertThat(allele.getChr(), equalTo(25));
// assertThat(allele.getPos(), equalTo(15061));
// assertThat(allele.getRsId(), equalTo("rs527236205"));
// assertThat(allele.getRef(), equalTo("A"));
// assertThat(allele.getAlt(), equalTo("G"));
// assertThat(allele.getFrequencies().isEmpty(), is(true));
}
}
Binary file modified exomiser-data-genome/src/test/resources/test_first_ten_dbsnp.vcf.gz
Binary file not shown.

0 comments on commit f215a90

Please # to comment.