diff --git a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/NativeStatementIterator.java b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/NativeStatementIterator.java
index 6d8c84cfa9..b8776bdaf8 100644
--- a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/NativeStatementIterator.java
+++ b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/NativeStatementIterator.java
@@ -61,10 +61,10 @@ public Statement getNextElement() throws SailException {
}
int subjID = ByteArrayUtil.getInt(nextValue, TripleStore.SUBJ_IDX);
- Resource subj = (Resource) valueStore.getValue(subjID);
+ Resource subj = valueStore.getResource(subjID);
int predID = ByteArrayUtil.getInt(nextValue, TripleStore.PRED_IDX);
- IRI pred = (IRI) valueStore.getValue(predID);
+ IRI pred = valueStore.getIRI(predID);
int objID = ByteArrayUtil.getInt(nextValue, TripleStore.OBJ_IDX);
Value obj = valueStore.getValue(objID);
@@ -72,7 +72,7 @@ public Statement getNextElement() throws SailException {
Resource context = null;
int contextID = ByteArrayUtil.getInt(nextValue, TripleStore.CONTEXT_IDX);
if (contextID != 0) {
- context = (Resource) valueStore.getValue(contextID);
+ context = valueStore.getResource(contextID);
}
return valueStore.createStatement(subj, pred, obj, context);
diff --git a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/ValueStore.java b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/ValueStore.java
index d24541e3d1..c3f7a835c5 100644
--- a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/ValueStore.java
+++ b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/ValueStore.java
@@ -33,12 +33,17 @@
import org.eclipse.rdf4j.model.vocabulary.XSD;
import org.eclipse.rdf4j.sail.SailException;
import org.eclipse.rdf4j.sail.nativerdf.datastore.DataStore;
+import org.eclipse.rdf4j.sail.nativerdf.model.CorruptIRI;
+import org.eclipse.rdf4j.sail.nativerdf.model.CorruptIRIOrBNode;
+import org.eclipse.rdf4j.sail.nativerdf.model.CorruptLiteral;
import org.eclipse.rdf4j.sail.nativerdf.model.CorruptValue;
import org.eclipse.rdf4j.sail.nativerdf.model.NativeBNode;
import org.eclipse.rdf4j.sail.nativerdf.model.NativeIRI;
import org.eclipse.rdf4j.sail.nativerdf.model.NativeLiteral;
import org.eclipse.rdf4j.sail.nativerdf.model.NativeResource;
import org.eclipse.rdf4j.sail.nativerdf.model.NativeValue;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
* File-based indexed storage and retrieval of RDF values. ValueStore maps RDF values to integer IDs and vice-versa.
@@ -50,9 +55,7 @@
@InternalUseOnly
public class ValueStore extends SimpleValueFactory {
- /*-----------*
- * Constants *
- *-----------*/
+ private static final Logger logger = LoggerFactory.getLogger(ValueStore.class);
/**
* The default value cache size.
@@ -127,7 +130,8 @@ public class ValueStore extends SimpleValueFactory {
/**
* Do not throw an exception in case a value cannot be loaded, e.g. due to a corrupt value store.
*/
- private final boolean softFailOnCorruptData;
+ public static boolean SOFT_FAIL_ON_CORRUPT_DATA = "true"
+ .equalsIgnoreCase(System.getProperty("org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData"));;
/*--------------*
* Constructors *
@@ -153,14 +157,6 @@ public ValueStore(File dataDir, boolean forceSync, int valueCacheSize, int value
setNewRevision();
- /*
- * Soft failure when a ValueStore is corrupt (i.e., one or more NativeValues cannot be read properly) can be
- * enabled using the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData (boolean). The
- * default behavior is that ValueStore will fail hard with a SailException, whereas softFaileOnCorruptData set
- * to true will make ValueStore return instances of CorruptValue if NativeValue cannot be read.
- */
- this.softFailOnCorruptData = "true"
- .equalsIgnoreCase(System.getProperty("org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData"));
}
/*---------*
@@ -195,6 +191,7 @@ public Lock getReadLock() throws InterruptedException {
* @throws IOException If an I/O error occurred.
*/
public NativeValue getValue(int id) throws IOException {
+
// Check value cache
Integer cacheID = id;
NativeValue resultValue = valueCache.get(cacheID);
@@ -206,12 +203,55 @@ public NativeValue getValue(int id) throws IOException {
if (data != null) {
resultValue = data2value(id, data);
- // Store value in cache
- valueCache.put(cacheID, resultValue);
+ if (!(resultValue instanceof CorruptValue)) {
+ // Store value in cache
+ valueCache.put(cacheID, resultValue);
+ }
}
}
return resultValue;
+
+ }
+
+ /**
+ * Gets the Resource for the specified ID.
+ *
+ * @param id A value ID.
+ * @return The Resource for the ID, or null no such value could be found.
+ * @throws IOException If an I/O error occurred.
+ */
+ public T getResource(int id) throws IOException {
+
+ NativeValue resultValue = getValue(id);
+
+ if (!(resultValue instanceof Resource)) {
+ if (SOFT_FAIL_ON_CORRUPT_DATA && resultValue instanceof CorruptValue) {
+ return (T) new CorruptIRIOrBNode(revision, id, ((CorruptValue) resultValue).getData());
+ }
+ }
+
+ return (T) resultValue;
+ }
+
+ /**
+ * Gets the IRI for the specified ID.
+ *
+ * @param id A value ID.
+ * @return The IRI for the ID, or null no such value could be found.
+ * @throws IOException If an I/O error occurred.
+ */
+ public T getIRI(int id) throws IOException {
+
+ NativeValue resultValue = getValue(id);
+
+ if (!(resultValue instanceof Resource)) {
+ if (SOFT_FAIL_ON_CORRUPT_DATA && resultValue instanceof CorruptValue) {
+ return (T) new CorruptIRIOrBNode(revision, id, ((CorruptValue) resultValue).getData());
+ }
+ }
+
+ return (T) resultValue;
}
/**
@@ -542,7 +582,8 @@ private boolean isNamespaceData(byte[] data) {
private NativeValue data2value(int id, byte[] data) throws IOException {
if (data.length == 0) {
- if (softFailOnCorruptData) {
+ if (SOFT_FAIL_ON_CORRUPT_DATA) {
+ logger.error("Soft fail on corrupt data: Empty data array for value with id {}", id);
return new CorruptValue(revision, id, data);
}
throw new SailException("Empty data array for value with id " + id);
@@ -555,20 +596,29 @@ private NativeValue data2value(int id, byte[] data) throws IOException {
case LITERAL_VALUE:
return data2literal(id, data);
default:
- if (softFailOnCorruptData) {
+ if (SOFT_FAIL_ON_CORRUPT_DATA) {
+ logger.error("Soft fail on corrupt data: Invalid type {} for value with id {}", data[0], id);
return new CorruptValue(revision, id, data);
}
throw new SailException("Invalid type " + data[0] + " for value with id " + id);
}
}
- private NativeIRI data2uri(int id, byte[] data) throws IOException {
- int nsID = ByteArrayUtil.getInt(data, 1);
- String namespace = getNamespace(nsID);
+ private T data2uri(int id, byte[] data) throws IOException {
+ try {
+ int nsID = ByteArrayUtil.getInt(data, 1);
+ String namespace = getNamespace(nsID);
+
+ String localName = new String(data, 5, data.length - 5, StandardCharsets.UTF_8);
- String localName = new String(data, 5, data.length - 5, StandardCharsets.UTF_8);
+ return (T) new NativeIRI(revision, namespace, localName, id);
+ } catch (Throwable e) {
+ if (SOFT_FAIL_ON_CORRUPT_DATA && (e instanceof Exception || e instanceof AssertionError)) {
+ return (T) new CorruptIRI(revision, id, data);
+ }
+ throw e;
+ }
- return new NativeIRI(revision, namespace, localName, id);
}
private NativeBNode data2bnode(int id, byte[] data) {
@@ -576,31 +626,39 @@ private NativeBNode data2bnode(int id, byte[] data) {
return new NativeBNode(revision, nodeID, id);
}
- private NativeLiteral data2literal(int id, byte[] data) throws IOException {
- // Get datatype
- int datatypeID = ByteArrayUtil.getInt(data, 1);
- IRI datatype = null;
- if (datatypeID != NativeValue.UNKNOWN_ID) {
- datatype = (IRI) getValue(datatypeID);
- }
+ private T data2literal(int id, byte[] data) throws IOException {
+ try {
+ // Get datatype
+ int datatypeID = ByteArrayUtil.getInt(data, 1);
+ IRI datatype = null;
+ if (datatypeID != NativeValue.UNKNOWN_ID) {
+ datatype = (IRI) getValue(datatypeID);
+ }
- // Get language tag
- String lang = null;
- int langLength = data[5];
- if (langLength > 0) {
- lang = new String(data, 6, langLength, StandardCharsets.UTF_8);
- }
+ // Get language tag
+ String lang = null;
+ int langLength = data[5];
+ if (langLength > 0) {
+ lang = new String(data, 6, langLength, StandardCharsets.UTF_8);
+ }
- // Get label
- String label = new String(data, 6 + langLength, data.length - 6 - langLength, StandardCharsets.UTF_8);
+ // Get label
+ String label = new String(data, 6 + langLength, data.length - 6 - langLength, StandardCharsets.UTF_8);
- if (lang != null) {
- return new NativeLiteral(revision, label, lang, id);
- } else if (datatype != null) {
- return new NativeLiteral(revision, label, datatype, id);
- } else {
- return new NativeLiteral(revision, label, CoreDatatype.XSD.STRING, id);
+ if (lang != null) {
+ return (T) new NativeLiteral(revision, label, lang, id);
+ } else if (datatype != null) {
+ return (T) new NativeLiteral(revision, label, datatype, id);
+ } else {
+ return (T) new NativeLiteral(revision, label, CoreDatatype.XSD.STRING, id);
+ }
+ } catch (Throwable e) {
+ if (SOFT_FAIL_ON_CORRUPT_DATA && (e instanceof Exception || e instanceof AssertionError)) {
+ return (T) new CorruptLiteral(revision, id, data);
+ }
+ throw e;
}
+
}
private String data2namespace(byte[] data) {
diff --git a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptIRI.java b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptIRI.java
new file mode 100644
index 0000000000..2a0f633a19
--- /dev/null
+++ b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptIRI.java
@@ -0,0 +1,64 @@
+/*******************************************************************************
+ * Copyright (c) 2024 Eclipse RDF4J contributors.
+ *
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Distribution License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/org/documents/edl-v10.php.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ ******************************************************************************/
+
+package org.eclipse.rdf4j.sail.nativerdf.model;
+
+import org.eclipse.rdf4j.model.IRI;
+import org.eclipse.rdf4j.sail.nativerdf.ValueStoreRevision;
+
+/**
+ * CorruptIRI is used when a NativeValue cannot be read from the ValueStore and if soft failure is enabled (see
+ * ValueStore#softFailOnCorruptData).
+ *
+ * @author Håvard M. Ottestad
+ */
+public class CorruptIRI extends CorruptValue implements IRI {
+
+ private static final long serialVersionUID = -6995615243794525852L;
+
+ public CorruptIRI(ValueStoreRevision revision, int internalID, byte[] data) {
+ super(revision, internalID, data);
+ }
+
+ public String stringValue() {
+ return "CorruptIRI_with_ID_" + getInternalID();
+ }
+
+ @Override
+ public String getNamespace() {
+ return "CORRUPT";
+ }
+
+ @Override
+ public String getLocalName() {
+ return "CORRUPT";
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+
+ if (o instanceof CorruptIRI && getInternalID() != NativeValue.UNKNOWN_ID) {
+ CorruptIRI otherCorruptValue = (CorruptIRI) o;
+
+ if (otherCorruptValue.getInternalID() != NativeValue.UNKNOWN_ID
+ && getValueStoreRevision().equals(otherCorruptValue.getValueStoreRevision())) {
+ // CorruptValue is from the same revision of the same native store with both IDs set
+ return getInternalID() == otherCorruptValue.getInternalID();
+ }
+ }
+
+ return super.equals(o);
+ }
+
+}
diff --git a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptIRIOrBNode.java b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptIRIOrBNode.java
new file mode 100644
index 0000000000..f06c6ad016
--- /dev/null
+++ b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptIRIOrBNode.java
@@ -0,0 +1,70 @@
+/*******************************************************************************
+ * Copyright (c) 2024 Eclipse RDF4J contributors.
+ *
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Distribution License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/org/documents/edl-v10.php.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ ******************************************************************************/
+
+package org.eclipse.rdf4j.sail.nativerdf.model;
+
+import org.eclipse.rdf4j.model.BNode;
+import org.eclipse.rdf4j.model.IRI;
+import org.eclipse.rdf4j.sail.nativerdf.ValueStoreRevision;
+
+/**
+ * CorruptIRIOrBNode is used when a NativeValue cannot be read from the ValueStore and if soft failure is enabled (see
+ * ValueStore#softFailOnCorruptData).
+ *
+ * @author Håvard M. Ottestad
+ */
+public class CorruptIRIOrBNode extends CorruptValue implements IRI, BNode {
+
+ private static final long serialVersionUID = 3709784393454516043L;
+
+ public CorruptIRIOrBNode(ValueStoreRevision revision, int internalID, byte[] data) {
+ super(revision, internalID, data);
+ }
+
+ public String stringValue() {
+ return "CorruptIRI_with_ID_" + getInternalID();
+ }
+
+ @Override
+ public String getNamespace() {
+ return "CORRUPT";
+ }
+
+ @Override
+ public String getLocalName() {
+ return "CORRUPT";
+ }
+
+ @Override
+ public String getID() {
+ return "";
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+
+ if (o instanceof CorruptIRIOrBNode && getInternalID() != NativeValue.UNKNOWN_ID) {
+ CorruptIRIOrBNode otherCorruptValue = (CorruptIRIOrBNode) o;
+
+ if (otherCorruptValue.getInternalID() != NativeValue.UNKNOWN_ID
+ && getValueStoreRevision().equals(otherCorruptValue.getValueStoreRevision())) {
+ // CorruptValue is from the same revision of the same native store with both IDs set
+ return getInternalID() == otherCorruptValue.getInternalID();
+ }
+ }
+
+ return super.equals(o);
+ }
+
+}
diff --git a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptLiteral.java b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptLiteral.java
new file mode 100644
index 0000000000..00d3f8cc91
--- /dev/null
+++ b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptLiteral.java
@@ -0,0 +1,132 @@
+/*******************************************************************************
+ * Copyright (c) 2024 Eclipse RDF4J contributors.
+ *
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Distribution License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/org/documents/edl-v10.php.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ ******************************************************************************/
+
+package org.eclipse.rdf4j.sail.nativerdf.model;
+
+import java.math.BigDecimal;
+import java.math.BigInteger;
+import java.util.Optional;
+
+import javax.xml.datatype.XMLGregorianCalendar;
+
+import org.eclipse.rdf4j.model.IRI;
+import org.eclipse.rdf4j.model.Literal;
+import org.eclipse.rdf4j.model.base.CoreDatatype;
+import org.eclipse.rdf4j.sail.nativerdf.ValueStoreRevision;
+
+/**
+ * CorruptLiteral is used when a NativeValue cannot be read from the ValueStore and if soft failure is enabled (see
+ * ValueStore#softFailOnCorruptData).
+ *
+ * @author Håvard M. Ottestad
+ */
+public class CorruptLiteral extends CorruptValue implements Literal {
+
+ private static final long serialVersionUID = -2510885288827542623L;
+
+ public CorruptLiteral(ValueStoreRevision revision, int internalID, byte[] data) {
+ super(revision, internalID, data);
+ }
+
+ public String stringValue() {
+ return "CorruptLiteral_with_ID_" + getInternalID();
+ }
+
+ @Override
+ public String getLabel() {
+ return "";
+ }
+
+ @Override
+ public Optional getLanguage() {
+ return Optional.empty();
+ }
+
+ @Override
+ public IRI getDatatype() {
+ return null;
+ }
+
+ @Override
+ public boolean booleanValue() {
+ return false;
+ }
+
+ @Override
+ public byte byteValue() {
+ return 0;
+ }
+
+ @Override
+ public short shortValue() {
+ return 0;
+ }
+
+ @Override
+ public int intValue() {
+ return 0;
+ }
+
+ @Override
+ public long longValue() {
+ return 0;
+ }
+
+ @Override
+ public BigInteger integerValue() {
+ return null;
+ }
+
+ @Override
+ public BigDecimal decimalValue() {
+ return null;
+ }
+
+ @Override
+ public float floatValue() {
+ return 0;
+ }
+
+ @Override
+ public double doubleValue() {
+ return 0;
+ }
+
+ @Override
+ public XMLGregorianCalendar calendarValue() {
+ return null;
+ }
+
+ @Override
+ public CoreDatatype getCoreDatatype() {
+ return null;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+
+ if (o instanceof CorruptLiteral && getInternalID() != NativeValue.UNKNOWN_ID) {
+ CorruptLiteral otherCorruptValue = (CorruptLiteral) o;
+
+ if (otherCorruptValue.getInternalID() != NativeValue.UNKNOWN_ID
+ && getValueStoreRevision().equals(otherCorruptValue.getValueStoreRevision())) {
+ // CorruptValue is from the same revision of the same native store with both IDs set
+ return getInternalID() == otherCorruptValue.getInternalID();
+ }
+ }
+
+ return super.equals(o);
+ }
+
+}
diff --git a/core/sail/nativerdf/src/test/java/org/eclipse/rdf4j/sail/nativerdf/NativeSailStoreCorruptionTest.java b/core/sail/nativerdf/src/test/java/org/eclipse/rdf4j/sail/nativerdf/NativeSailStoreCorruptionTest.java
new file mode 100644
index 0000000000..ba8be3038c
--- /dev/null
+++ b/core/sail/nativerdf/src/test/java/org/eclipse/rdf4j/sail/nativerdf/NativeSailStoreCorruptionTest.java
@@ -0,0 +1,224 @@
+/*******************************************************************************
+ * Copyright (c) 2024 Eclipse RDF4J contributors.
+ *
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Distribution License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/org/documents/edl-v10.php.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ ******************************************************************************/
+
+package org.eclipse.rdf4j.sail.nativerdf;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.file.Files;
+import java.nio.file.StandardCopyOption;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.eclipse.rdf4j.model.IRI;
+import org.eclipse.rdf4j.model.Statement;
+import org.eclipse.rdf4j.model.ValueFactory;
+import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
+import org.eclipse.rdf4j.model.util.Values;
+import org.eclipse.rdf4j.model.vocabulary.RDF;
+import org.eclipse.rdf4j.model.vocabulary.RDFS;
+import org.eclipse.rdf4j.repository.Repository;
+import org.eclipse.rdf4j.repository.RepositoryConnection;
+import org.eclipse.rdf4j.repository.RepositoryResult;
+import org.eclipse.rdf4j.repository.sail.SailRepository;
+import org.jetbrains.annotations.NotNull;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Tests how the NativeStore handles corruption in the data files.
+ */
+public class NativeSailStoreCorruptionTest {
+
+ private static final Logger logger = LoggerFactory.getLogger(NativeSailStoreCorruptionTest.class);
+
+ @TempDir
+ File tempFolder;
+
+ protected Repository repo;
+
+ protected final ValueFactory F = SimpleValueFactory.getInstance();
+
+ private File dataDir;
+
+ @BeforeEach
+ public void before() throws IOException {
+ this.dataDir = new File(tempFolder, "dbmodel");
+ dataDir.mkdir();
+ repo = new SailRepository(new NativeStore(dataDir, "spoc,posc"));
+ repo.init();
+
+ IRI CTX_1 = F.createIRI("urn:one");
+ IRI CTX_2 = F.createIRI("urn:two");
+
+ Statement S0 = F.createStatement(F.createIRI("http://example.org/a0"), RDFS.LABEL, F.createLiteral("zero"));
+ Statement S1 = F.createStatement(F.createIRI("http://example.org/b1"), RDFS.LABEL, F.createLiteral("one"));
+ Statement S2 = F.createStatement(F.createIRI("http://example.org/c2"), RDFS.LABEL, F.createLiteral("two"));
+ Statement S3 = F.createStatement(Values.bnode(), RDF.TYPE, Values.bnode());
+ Statement S4 = F.createStatement(F.createIRI("http://example.org/c2"), RDFS.LABEL,
+ F.createLiteral("two", "en"));
+ Statement S5 = F.createStatement(F.createIRI("http://example.org/c2"), RDFS.LABEL, F.createLiteral(1.2));
+
+ try (RepositoryConnection conn = repo.getConnection()) {
+ conn.add(S0);
+ conn.add(S1, CTX_1);
+ conn.add(S2, CTX_2);
+ conn.add(S2, CTX_2);
+ conn.add(S3, CTX_2);
+ conn.add(S4, CTX_2);
+ conn.add(S5, CTX_2);
+ }
+ backupFile(dataDir, "values.dat");
+ }
+
+ public static void overwriteByteInFile(File valuesFile, long pos, int newVal) throws IOException {
+
+ // Use RandomAccessFile in "rw" mode to read and write to the file
+ try (RandomAccessFile raf = new RandomAccessFile(valuesFile, "rw")) {
+ // Get the length of the file
+ long fileLength = raf.length();
+
+ // Check if the position is within the file bounds
+ if (pos >= fileLength) {
+ throw new IOException(
+ "Attempt to write outside the existing file bounds: " + pos + " >= " + fileLength);
+ }
+
+ // Move the file pointer to byte position 32
+ raf.seek(pos);
+
+ // Write the byte value 0x0 at the current position
+ raf.writeByte(newVal);
+ }
+ }
+
+ public static void backupFile(File dataDir, String s) throws IOException {
+ File valuesFile = new File(dataDir, s);
+ File backupFile = new File(dataDir, s + ".bak");
+
+ if (!valuesFile.exists()) {
+ throw new IOException("values.dat does not exist and cannot be backed up.");
+ }
+
+ // Copy values.dat to values.dat.bak
+ Files.copy(valuesFile.toPath(), backupFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
+ }
+
+ public static void restoreFile(File dataDir, String s) throws IOException {
+ File valuesFile = new File(dataDir, s);
+ File backupFile = new File(dataDir, s + ".bak");
+
+ if (!backupFile.exists()) {
+ throw new IOException("Backup file values.dat.bak does not exist.");
+ }
+
+ // Copy values.dat.bak back to values.dat
+ Files.copy(backupFile.toPath(), valuesFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
+ }
+
+ @Test
+ public void testCorruptValuesDatFileNamespace() throws IOException {
+ repo.shutDown();
+
+ overwriteByteInFile(new File(dataDir, "values.dat"), 12, 0x0);
+
+ repo.init();
+
+ List list = getStatements();
+ assertEquals(6, list.size());
+ }
+
+ @Test
+ public void testCorruptValuesDatFileNamespaceDatatype() throws IOException {
+ repo.shutDown();
+
+ overwriteByteInFile(new File(dataDir, "values.dat"), 96, 0x0);
+
+ repo.init();
+
+ List list = getStatements();
+ assertEquals(6, list.size());
+ }
+
+ @Test
+ public void testCorruptValuesDatFileEmptyDataArrayError() throws IOException {
+ repo.shutDown();
+
+ overwriteByteInFile(new File(dataDir, "values.dat"), 173, 0x0);
+
+ repo.init();
+
+ List list = getStatements();
+ assertEquals(6, list.size());
+ }
+
+ @Test
+ public void testCorruptValuesDatFileInvalidTypeError() throws IOException {
+ repo.shutDown();
+
+ overwriteByteInFile(new File(dataDir, "values.dat"), 174, 0x0);
+
+ repo.init();
+
+ List list = getStatements();
+ assertEquals(6, list.size());
+ }
+
+ @Test
+ public void testCorruptValuesDatFileEntireValuesDatFile() throws IOException {
+ for (int i = 4; i < 437; i++) {
+ logger.debug("Corrupting byte at position " + i);
+ repo.shutDown();
+ restoreFile(dataDir, "values.dat");
+
+ overwriteByteInFile(new File(dataDir, "values.dat"), i, 0x0);
+
+ repo.init();
+
+ List list = getStatements();
+ assertEquals(6, list.size());
+
+ }
+
+ }
+
+ @NotNull
+ private List getStatements() {
+ List list = new ArrayList<>();
+
+ ValueStore.SOFT_FAIL_ON_CORRUPT_DATA = true;
+
+ try (RepositoryConnection conn = repo.getConnection()) {
+ try (RepositoryResult statements = conn.getStatements(null, null, null, false)) {
+ while (statements.hasNext()) {
+ Statement next = statements.next();
+ list.add(next);
+ logger.debug(next.toString());
+ }
+ }
+ return list;
+ } finally {
+ ValueStore.SOFT_FAIL_ON_CORRUPT_DATA = false;
+ }
+ }
+
+ @AfterEach
+ public void after() {
+ repo.shutDown();
+ }
+}
diff --git a/site/content/documentation/programming/repository.md b/site/content/documentation/programming/repository.md
index d1fa4f8324..b8ebd6bdb8 100644
--- a/site/content/documentation/programming/repository.md
+++ b/site/content/documentation/programming/repository.md
@@ -98,6 +98,9 @@ import org.eclipse.rdf4j.sail.nativerdf.NativeStore;
Repository repo = new SailRepository(new NativeStore());
```
+In the unlikely event of corruption the system property `org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData` can be set to `true` to
+allow the NativeStore to output CorruptValue/CorruptIRI/CorruptIRIOrBNode/CorruptLiteral objects.
+
### Elasticsearch RDF Repository
{{< tag " New in RDF4J 3.1" >}}