diff --git a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/NativeStatementIterator.java b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/NativeStatementIterator.java index 6d8c84cfa9..b8776bdaf8 100644 --- a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/NativeStatementIterator.java +++ b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/NativeStatementIterator.java @@ -61,10 +61,10 @@ public Statement getNextElement() throws SailException { } int subjID = ByteArrayUtil.getInt(nextValue, TripleStore.SUBJ_IDX); - Resource subj = (Resource) valueStore.getValue(subjID); + Resource subj = valueStore.getResource(subjID); int predID = ByteArrayUtil.getInt(nextValue, TripleStore.PRED_IDX); - IRI pred = (IRI) valueStore.getValue(predID); + IRI pred = valueStore.getIRI(predID); int objID = ByteArrayUtil.getInt(nextValue, TripleStore.OBJ_IDX); Value obj = valueStore.getValue(objID); @@ -72,7 +72,7 @@ public Statement getNextElement() throws SailException { Resource context = null; int contextID = ByteArrayUtil.getInt(nextValue, TripleStore.CONTEXT_IDX); if (contextID != 0) { - context = (Resource) valueStore.getValue(contextID); + context = valueStore.getResource(contextID); } return valueStore.createStatement(subj, pred, obj, context); diff --git a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/ValueStore.java b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/ValueStore.java index d24541e3d1..c3f7a835c5 100644 --- a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/ValueStore.java +++ b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/ValueStore.java @@ -33,12 +33,17 @@ import org.eclipse.rdf4j.model.vocabulary.XSD; import org.eclipse.rdf4j.sail.SailException; import org.eclipse.rdf4j.sail.nativerdf.datastore.DataStore; +import org.eclipse.rdf4j.sail.nativerdf.model.CorruptIRI; +import org.eclipse.rdf4j.sail.nativerdf.model.CorruptIRIOrBNode; +import org.eclipse.rdf4j.sail.nativerdf.model.CorruptLiteral; import org.eclipse.rdf4j.sail.nativerdf.model.CorruptValue; import org.eclipse.rdf4j.sail.nativerdf.model.NativeBNode; import org.eclipse.rdf4j.sail.nativerdf.model.NativeIRI; import org.eclipse.rdf4j.sail.nativerdf.model.NativeLiteral; import org.eclipse.rdf4j.sail.nativerdf.model.NativeResource; import org.eclipse.rdf4j.sail.nativerdf.model.NativeValue; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * File-based indexed storage and retrieval of RDF values. ValueStore maps RDF values to integer IDs and vice-versa. @@ -50,9 +55,7 @@ @InternalUseOnly public class ValueStore extends SimpleValueFactory { - /*-----------* - * Constants * - *-----------*/ + private static final Logger logger = LoggerFactory.getLogger(ValueStore.class); /** * The default value cache size. @@ -127,7 +130,8 @@ public class ValueStore extends SimpleValueFactory { /** * Do not throw an exception in case a value cannot be loaded, e.g. due to a corrupt value store. */ - private final boolean softFailOnCorruptData; + public static boolean SOFT_FAIL_ON_CORRUPT_DATA = "true" + .equalsIgnoreCase(System.getProperty("org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData"));; /*--------------* * Constructors * @@ -153,14 +157,6 @@ public ValueStore(File dataDir, boolean forceSync, int valueCacheSize, int value setNewRevision(); - /* - * Soft failure when a ValueStore is corrupt (i.e., one or more NativeValues cannot be read properly) can be - * enabled using the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData (boolean). The - * default behavior is that ValueStore will fail hard with a SailException, whereas softFaileOnCorruptData set - * to true will make ValueStore return instances of CorruptValue if NativeValue cannot be read. - */ - this.softFailOnCorruptData = "true" - .equalsIgnoreCase(System.getProperty("org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData")); } /*---------* @@ -195,6 +191,7 @@ public Lock getReadLock() throws InterruptedException { * @throws IOException If an I/O error occurred. */ public NativeValue getValue(int id) throws IOException { + // Check value cache Integer cacheID = id; NativeValue resultValue = valueCache.get(cacheID); @@ -206,12 +203,55 @@ public NativeValue getValue(int id) throws IOException { if (data != null) { resultValue = data2value(id, data); - // Store value in cache - valueCache.put(cacheID, resultValue); + if (!(resultValue instanceof CorruptValue)) { + // Store value in cache + valueCache.put(cacheID, resultValue); + } } } return resultValue; + + } + + /** + * Gets the Resource for the specified ID. + * + * @param id A value ID. + * @return The Resource for the ID, or null no such value could be found. + * @throws IOException If an I/O error occurred. + */ + public T getResource(int id) throws IOException { + + NativeValue resultValue = getValue(id); + + if (!(resultValue instanceof Resource)) { + if (SOFT_FAIL_ON_CORRUPT_DATA && resultValue instanceof CorruptValue) { + return (T) new CorruptIRIOrBNode(revision, id, ((CorruptValue) resultValue).getData()); + } + } + + return (T) resultValue; + } + + /** + * Gets the IRI for the specified ID. + * + * @param id A value ID. + * @return The IRI for the ID, or null no such value could be found. + * @throws IOException If an I/O error occurred. + */ + public T getIRI(int id) throws IOException { + + NativeValue resultValue = getValue(id); + + if (!(resultValue instanceof Resource)) { + if (SOFT_FAIL_ON_CORRUPT_DATA && resultValue instanceof CorruptValue) { + return (T) new CorruptIRIOrBNode(revision, id, ((CorruptValue) resultValue).getData()); + } + } + + return (T) resultValue; } /** @@ -542,7 +582,8 @@ private boolean isNamespaceData(byte[] data) { private NativeValue data2value(int id, byte[] data) throws IOException { if (data.length == 0) { - if (softFailOnCorruptData) { + if (SOFT_FAIL_ON_CORRUPT_DATA) { + logger.error("Soft fail on corrupt data: Empty data array for value with id {}", id); return new CorruptValue(revision, id, data); } throw new SailException("Empty data array for value with id " + id); @@ -555,20 +596,29 @@ private NativeValue data2value(int id, byte[] data) throws IOException { case LITERAL_VALUE: return data2literal(id, data); default: - if (softFailOnCorruptData) { + if (SOFT_FAIL_ON_CORRUPT_DATA) { + logger.error("Soft fail on corrupt data: Invalid type {} for value with id {}", data[0], id); return new CorruptValue(revision, id, data); } throw new SailException("Invalid type " + data[0] + " for value with id " + id); } } - private NativeIRI data2uri(int id, byte[] data) throws IOException { - int nsID = ByteArrayUtil.getInt(data, 1); - String namespace = getNamespace(nsID); + private T data2uri(int id, byte[] data) throws IOException { + try { + int nsID = ByteArrayUtil.getInt(data, 1); + String namespace = getNamespace(nsID); + + String localName = new String(data, 5, data.length - 5, StandardCharsets.UTF_8); - String localName = new String(data, 5, data.length - 5, StandardCharsets.UTF_8); + return (T) new NativeIRI(revision, namespace, localName, id); + } catch (Throwable e) { + if (SOFT_FAIL_ON_CORRUPT_DATA && (e instanceof Exception || e instanceof AssertionError)) { + return (T) new CorruptIRI(revision, id, data); + } + throw e; + } - return new NativeIRI(revision, namespace, localName, id); } private NativeBNode data2bnode(int id, byte[] data) { @@ -576,31 +626,39 @@ private NativeBNode data2bnode(int id, byte[] data) { return new NativeBNode(revision, nodeID, id); } - private NativeLiteral data2literal(int id, byte[] data) throws IOException { - // Get datatype - int datatypeID = ByteArrayUtil.getInt(data, 1); - IRI datatype = null; - if (datatypeID != NativeValue.UNKNOWN_ID) { - datatype = (IRI) getValue(datatypeID); - } + private T data2literal(int id, byte[] data) throws IOException { + try { + // Get datatype + int datatypeID = ByteArrayUtil.getInt(data, 1); + IRI datatype = null; + if (datatypeID != NativeValue.UNKNOWN_ID) { + datatype = (IRI) getValue(datatypeID); + } - // Get language tag - String lang = null; - int langLength = data[5]; - if (langLength > 0) { - lang = new String(data, 6, langLength, StandardCharsets.UTF_8); - } + // Get language tag + String lang = null; + int langLength = data[5]; + if (langLength > 0) { + lang = new String(data, 6, langLength, StandardCharsets.UTF_8); + } - // Get label - String label = new String(data, 6 + langLength, data.length - 6 - langLength, StandardCharsets.UTF_8); + // Get label + String label = new String(data, 6 + langLength, data.length - 6 - langLength, StandardCharsets.UTF_8); - if (lang != null) { - return new NativeLiteral(revision, label, lang, id); - } else if (datatype != null) { - return new NativeLiteral(revision, label, datatype, id); - } else { - return new NativeLiteral(revision, label, CoreDatatype.XSD.STRING, id); + if (lang != null) { + return (T) new NativeLiteral(revision, label, lang, id); + } else if (datatype != null) { + return (T) new NativeLiteral(revision, label, datatype, id); + } else { + return (T) new NativeLiteral(revision, label, CoreDatatype.XSD.STRING, id); + } + } catch (Throwable e) { + if (SOFT_FAIL_ON_CORRUPT_DATA && (e instanceof Exception || e instanceof AssertionError)) { + return (T) new CorruptLiteral(revision, id, data); + } + throw e; } + } private String data2namespace(byte[] data) { diff --git a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptIRI.java b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptIRI.java new file mode 100644 index 0000000000..2a0f633a19 --- /dev/null +++ b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptIRI.java @@ -0,0 +1,64 @@ +/******************************************************************************* + * Copyright (c) 2024 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + ******************************************************************************/ + +package org.eclipse.rdf4j.sail.nativerdf.model; + +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.sail.nativerdf.ValueStoreRevision; + +/** + * CorruptIRI is used when a NativeValue cannot be read from the ValueStore and if soft failure is enabled (see + * ValueStore#softFailOnCorruptData). + * + * @author Håvard M. Ottestad + */ +public class CorruptIRI extends CorruptValue implements IRI { + + private static final long serialVersionUID = -6995615243794525852L; + + public CorruptIRI(ValueStoreRevision revision, int internalID, byte[] data) { + super(revision, internalID, data); + } + + public String stringValue() { + return "CorruptIRI_with_ID_" + getInternalID(); + } + + @Override + public String getNamespace() { + return "CORRUPT"; + } + + @Override + public String getLocalName() { + return "CORRUPT"; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + + if (o instanceof CorruptIRI && getInternalID() != NativeValue.UNKNOWN_ID) { + CorruptIRI otherCorruptValue = (CorruptIRI) o; + + if (otherCorruptValue.getInternalID() != NativeValue.UNKNOWN_ID + && getValueStoreRevision().equals(otherCorruptValue.getValueStoreRevision())) { + // CorruptValue is from the same revision of the same native store with both IDs set + return getInternalID() == otherCorruptValue.getInternalID(); + } + } + + return super.equals(o); + } + +} diff --git a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptIRIOrBNode.java b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptIRIOrBNode.java new file mode 100644 index 0000000000..f06c6ad016 --- /dev/null +++ b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptIRIOrBNode.java @@ -0,0 +1,70 @@ +/******************************************************************************* + * Copyright (c) 2024 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + ******************************************************************************/ + +package org.eclipse.rdf4j.sail.nativerdf.model; + +import org.eclipse.rdf4j.model.BNode; +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.sail.nativerdf.ValueStoreRevision; + +/** + * CorruptIRIOrBNode is used when a NativeValue cannot be read from the ValueStore and if soft failure is enabled (see + * ValueStore#softFailOnCorruptData). + * + * @author Håvard M. Ottestad + */ +public class CorruptIRIOrBNode extends CorruptValue implements IRI, BNode { + + private static final long serialVersionUID = 3709784393454516043L; + + public CorruptIRIOrBNode(ValueStoreRevision revision, int internalID, byte[] data) { + super(revision, internalID, data); + } + + public String stringValue() { + return "CorruptIRI_with_ID_" + getInternalID(); + } + + @Override + public String getNamespace() { + return "CORRUPT"; + } + + @Override + public String getLocalName() { + return "CORRUPT"; + } + + @Override + public String getID() { + return ""; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + + if (o instanceof CorruptIRIOrBNode && getInternalID() != NativeValue.UNKNOWN_ID) { + CorruptIRIOrBNode otherCorruptValue = (CorruptIRIOrBNode) o; + + if (otherCorruptValue.getInternalID() != NativeValue.UNKNOWN_ID + && getValueStoreRevision().equals(otherCorruptValue.getValueStoreRevision())) { + // CorruptValue is from the same revision of the same native store with both IDs set + return getInternalID() == otherCorruptValue.getInternalID(); + } + } + + return super.equals(o); + } + +} diff --git a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptLiteral.java b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptLiteral.java new file mode 100644 index 0000000000..00d3f8cc91 --- /dev/null +++ b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptLiteral.java @@ -0,0 +1,132 @@ +/******************************************************************************* + * Copyright (c) 2024 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + ******************************************************************************/ + +package org.eclipse.rdf4j.sail.nativerdf.model; + +import java.math.BigDecimal; +import java.math.BigInteger; +import java.util.Optional; + +import javax.xml.datatype.XMLGregorianCalendar; + +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.Literal; +import org.eclipse.rdf4j.model.base.CoreDatatype; +import org.eclipse.rdf4j.sail.nativerdf.ValueStoreRevision; + +/** + * CorruptLiteral is used when a NativeValue cannot be read from the ValueStore and if soft failure is enabled (see + * ValueStore#softFailOnCorruptData). + * + * @author Håvard M. Ottestad + */ +public class CorruptLiteral extends CorruptValue implements Literal { + + private static final long serialVersionUID = -2510885288827542623L; + + public CorruptLiteral(ValueStoreRevision revision, int internalID, byte[] data) { + super(revision, internalID, data); + } + + public String stringValue() { + return "CorruptLiteral_with_ID_" + getInternalID(); + } + + @Override + public String getLabel() { + return ""; + } + + @Override + public Optional getLanguage() { + return Optional.empty(); + } + + @Override + public IRI getDatatype() { + return null; + } + + @Override + public boolean booleanValue() { + return false; + } + + @Override + public byte byteValue() { + return 0; + } + + @Override + public short shortValue() { + return 0; + } + + @Override + public int intValue() { + return 0; + } + + @Override + public long longValue() { + return 0; + } + + @Override + public BigInteger integerValue() { + return null; + } + + @Override + public BigDecimal decimalValue() { + return null; + } + + @Override + public float floatValue() { + return 0; + } + + @Override + public double doubleValue() { + return 0; + } + + @Override + public XMLGregorianCalendar calendarValue() { + return null; + } + + @Override + public CoreDatatype getCoreDatatype() { + return null; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + + if (o instanceof CorruptLiteral && getInternalID() != NativeValue.UNKNOWN_ID) { + CorruptLiteral otherCorruptValue = (CorruptLiteral) o; + + if (otherCorruptValue.getInternalID() != NativeValue.UNKNOWN_ID + && getValueStoreRevision().equals(otherCorruptValue.getValueStoreRevision())) { + // CorruptValue is from the same revision of the same native store with both IDs set + return getInternalID() == otherCorruptValue.getInternalID(); + } + } + + return super.equals(o); + } + +} diff --git a/core/sail/nativerdf/src/test/java/org/eclipse/rdf4j/sail/nativerdf/NativeSailStoreCorruptionTest.java b/core/sail/nativerdf/src/test/java/org/eclipse/rdf4j/sail/nativerdf/NativeSailStoreCorruptionTest.java new file mode 100644 index 0000000000..ba8be3038c --- /dev/null +++ b/core/sail/nativerdf/src/test/java/org/eclipse/rdf4j/sail/nativerdf/NativeSailStoreCorruptionTest.java @@ -0,0 +1,224 @@ +/******************************************************************************* + * Copyright (c) 2024 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + ******************************************************************************/ + +package org.eclipse.rdf4j.sail.nativerdf; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.io.File; +import java.io.IOException; +import java.io.RandomAccessFile; +import java.nio.file.Files; +import java.nio.file.StandardCopyOption; +import java.util.ArrayList; +import java.util.List; + +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.Statement; +import org.eclipse.rdf4j.model.ValueFactory; +import org.eclipse.rdf4j.model.impl.SimpleValueFactory; +import org.eclipse.rdf4j.model.util.Values; +import org.eclipse.rdf4j.model.vocabulary.RDF; +import org.eclipse.rdf4j.model.vocabulary.RDFS; +import org.eclipse.rdf4j.repository.Repository; +import org.eclipse.rdf4j.repository.RepositoryConnection; +import org.eclipse.rdf4j.repository.RepositoryResult; +import org.eclipse.rdf4j.repository.sail.SailRepository; +import org.jetbrains.annotations.NotNull; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Tests how the NativeStore handles corruption in the data files. + */ +public class NativeSailStoreCorruptionTest { + + private static final Logger logger = LoggerFactory.getLogger(NativeSailStoreCorruptionTest.class); + + @TempDir + File tempFolder; + + protected Repository repo; + + protected final ValueFactory F = SimpleValueFactory.getInstance(); + + private File dataDir; + + @BeforeEach + public void before() throws IOException { + this.dataDir = new File(tempFolder, "dbmodel"); + dataDir.mkdir(); + repo = new SailRepository(new NativeStore(dataDir, "spoc,posc")); + repo.init(); + + IRI CTX_1 = F.createIRI("urn:one"); + IRI CTX_2 = F.createIRI("urn:two"); + + Statement S0 = F.createStatement(F.createIRI("http://example.org/a0"), RDFS.LABEL, F.createLiteral("zero")); + Statement S1 = F.createStatement(F.createIRI("http://example.org/b1"), RDFS.LABEL, F.createLiteral("one")); + Statement S2 = F.createStatement(F.createIRI("http://example.org/c2"), RDFS.LABEL, F.createLiteral("two")); + Statement S3 = F.createStatement(Values.bnode(), RDF.TYPE, Values.bnode()); + Statement S4 = F.createStatement(F.createIRI("http://example.org/c2"), RDFS.LABEL, + F.createLiteral("two", "en")); + Statement S5 = F.createStatement(F.createIRI("http://example.org/c2"), RDFS.LABEL, F.createLiteral(1.2)); + + try (RepositoryConnection conn = repo.getConnection()) { + conn.add(S0); + conn.add(S1, CTX_1); + conn.add(S2, CTX_2); + conn.add(S2, CTX_2); + conn.add(S3, CTX_2); + conn.add(S4, CTX_2); + conn.add(S5, CTX_2); + } + backupFile(dataDir, "values.dat"); + } + + public static void overwriteByteInFile(File valuesFile, long pos, int newVal) throws IOException { + + // Use RandomAccessFile in "rw" mode to read and write to the file + try (RandomAccessFile raf = new RandomAccessFile(valuesFile, "rw")) { + // Get the length of the file + long fileLength = raf.length(); + + // Check if the position is within the file bounds + if (pos >= fileLength) { + throw new IOException( + "Attempt to write outside the existing file bounds: " + pos + " >= " + fileLength); + } + + // Move the file pointer to byte position 32 + raf.seek(pos); + + // Write the byte value 0x0 at the current position + raf.writeByte(newVal); + } + } + + public static void backupFile(File dataDir, String s) throws IOException { + File valuesFile = new File(dataDir, s); + File backupFile = new File(dataDir, s + ".bak"); + + if (!valuesFile.exists()) { + throw new IOException("values.dat does not exist and cannot be backed up."); + } + + // Copy values.dat to values.dat.bak + Files.copy(valuesFile.toPath(), backupFile.toPath(), StandardCopyOption.REPLACE_EXISTING); + } + + public static void restoreFile(File dataDir, String s) throws IOException { + File valuesFile = new File(dataDir, s); + File backupFile = new File(dataDir, s + ".bak"); + + if (!backupFile.exists()) { + throw new IOException("Backup file values.dat.bak does not exist."); + } + + // Copy values.dat.bak back to values.dat + Files.copy(backupFile.toPath(), valuesFile.toPath(), StandardCopyOption.REPLACE_EXISTING); + } + + @Test + public void testCorruptValuesDatFileNamespace() throws IOException { + repo.shutDown(); + + overwriteByteInFile(new File(dataDir, "values.dat"), 12, 0x0); + + repo.init(); + + List list = getStatements(); + assertEquals(6, list.size()); + } + + @Test + public void testCorruptValuesDatFileNamespaceDatatype() throws IOException { + repo.shutDown(); + + overwriteByteInFile(new File(dataDir, "values.dat"), 96, 0x0); + + repo.init(); + + List list = getStatements(); + assertEquals(6, list.size()); + } + + @Test + public void testCorruptValuesDatFileEmptyDataArrayError() throws IOException { + repo.shutDown(); + + overwriteByteInFile(new File(dataDir, "values.dat"), 173, 0x0); + + repo.init(); + + List list = getStatements(); + assertEquals(6, list.size()); + } + + @Test + public void testCorruptValuesDatFileInvalidTypeError() throws IOException { + repo.shutDown(); + + overwriteByteInFile(new File(dataDir, "values.dat"), 174, 0x0); + + repo.init(); + + List list = getStatements(); + assertEquals(6, list.size()); + } + + @Test + public void testCorruptValuesDatFileEntireValuesDatFile() throws IOException { + for (int i = 4; i < 437; i++) { + logger.debug("Corrupting byte at position " + i); + repo.shutDown(); + restoreFile(dataDir, "values.dat"); + + overwriteByteInFile(new File(dataDir, "values.dat"), i, 0x0); + + repo.init(); + + List list = getStatements(); + assertEquals(6, list.size()); + + } + + } + + @NotNull + private List getStatements() { + List list = new ArrayList<>(); + + ValueStore.SOFT_FAIL_ON_CORRUPT_DATA = true; + + try (RepositoryConnection conn = repo.getConnection()) { + try (RepositoryResult statements = conn.getStatements(null, null, null, false)) { + while (statements.hasNext()) { + Statement next = statements.next(); + list.add(next); + logger.debug(next.toString()); + } + } + return list; + } finally { + ValueStore.SOFT_FAIL_ON_CORRUPT_DATA = false; + } + } + + @AfterEach + public void after() { + repo.shutDown(); + } +} diff --git a/site/content/documentation/programming/repository.md b/site/content/documentation/programming/repository.md index d1fa4f8324..b8ebd6bdb8 100644 --- a/site/content/documentation/programming/repository.md +++ b/site/content/documentation/programming/repository.md @@ -98,6 +98,9 @@ import org.eclipse.rdf4j.sail.nativerdf.NativeStore; Repository repo = new SailRepository(new NativeStore()); ``` +In the unlikely event of corruption the system property `org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData` can be set to `true` to +allow the NativeStore to output CorruptValue/CorruptIRI/CorruptIRIOrBNode/CorruptLiteral objects. + ### Elasticsearch RDF Repository {{< tag " New in RDF4J 3.1" >}}