Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

ZOOKEEPER-4846: Failure to reload database due to missing ACL #2183

Draft
wants to merge 4 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -447,7 +447,7 @@ public void createNode(final String path, byte[] data, List<ACL> acl, long ephem
}
List<ACL> parentAcl;
synchronized (parent) {
parentAcl = getACL(parent);
parentAcl = getACL(parent, false);

// Add the ACL to ACL cache first, to avoid the ACL not being
// created race condition during fuzzy snapshot sync.
Expand Down Expand Up @@ -566,7 +566,7 @@ public void deleteNode(String path, long zxid) throws NoNodeException {
List<ACL> acl;
nodes.remove(path);
synchronized (node) {
acl = getACL(node);
acl = getACL(node, false);
aclCache.removeUsage(node.acl);
nodeDataSize.addAndGet(-getNodeSize(path, node.data));
}
Expand All @@ -576,7 +576,7 @@ public void deleteNode(String path, long zxid) throws NoNodeException {
// separate patch.
List<ACL> parentAcl;
synchronized (parent) {
parentAcl = getACL(parent);
parentAcl = getACL(parent, false);
long owner = node.stat.getEphemeralOwner();
EphemeralType ephemeralType = EphemeralType.get(owner);
if (ephemeralType == EphemeralType.CONTAINER) {
Expand Down Expand Up @@ -638,7 +638,7 @@ public Stat setData(String path, byte[] data, int version, long zxid, long time)
List<ACL> acl;
byte[] lastData;
synchronized (n) {
acl = getACL(n);
acl = getACL(n, false);
lastData = n.data;
nodes.preChange(path, n);
n.data = data;
Expand Down Expand Up @@ -790,8 +790,12 @@ public List<ACL> getACL(String path, Stat stat) throws NoNodeException {
}

public List<ACL> getACL(DataNode node) {
return getACL(node, true);
}

private List<ACL> getACL(DataNode node, boolean mustSucceed) {
synchronized (node) {
return aclCache.convertLong(node.acl);
return aclCache.convertLong(node.acl, mustSucceed);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,11 @@ public synchronized Long convertAcls(List<ACL> acls) {
* @param longVal
* @return a list of ACLs that map to the long
*/
public synchronized List<ACL> convertLong(Long longVal) {
public List<ACL> convertLong(Long longVal) {
return convertLong(longVal, true);
}

public synchronized List<ACL> convertLong(Long longVal, boolean mustSucceed) {
if (longVal == null) {
return null;
}
Expand All @@ -89,8 +93,12 @@ public synchronized List<ACL> convertLong(Long longVal) {
}
List<ACL> acls = longKeyMap.get(longVal);
if (acls == null) {
LOG.error("ERROR: ACL not available for long {}", longVal);
throw new RuntimeException("Failed to fetch acls for " + longVal);
if (!mustSucceed) {
LOG.warn("ACL not available for long {}", longVal);
} else {
LOG.error("ERROR: ACL not available for long {}", longVal);
throw new RuntimeException("Failed to fetch acls for " + longVal);
}
}
return acls;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Objects;
import java.util.Set;
import org.apache.zookeeper.WatchedEvent;
import org.apache.zookeeper.Watcher;
Expand Down Expand Up @@ -186,7 +187,7 @@ public WatcherOrBitSet triggerWatch(String path, EventType type, long zxid, List
continue;
}
if (w instanceof ServerWatcher) {
((ServerWatcher) w).process(e, acl);
((ServerWatcher) w).process(e, Objects.requireNonNull(acl));
} else {
w.process(e);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.locks.ReentrantReadWriteLock;
Expand Down Expand Up @@ -236,7 +237,7 @@ public WatcherOrBitSet triggerWatch(String path, EventType type, long zxid, List
}

if (w instanceof ServerWatcher) {
((ServerWatcher) w).process(e, acl);
((ServerWatcher) w).process(e, Objects.requireNonNull(acl));
} else {
w.process(e);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,19 +40,24 @@
import org.apache.zookeeper.ZooDefs;
import org.apache.zookeeper.server.DataNode;
import org.apache.zookeeper.server.DataTree;
import org.apache.zookeeper.server.ReferenceCountedACLCache;
import org.apache.zookeeper.server.Request;
import org.apache.zookeeper.server.ZooKeeperServer;
import org.apache.zookeeper.test.ClientBase;
import org.apache.zookeeper.test.TestUtils;
import org.apache.zookeeper.txn.CreateTxn;
import org.apache.zookeeper.txn.DeleteTxn;
import org.apache.zookeeper.txn.SetDataTxn;
import org.apache.zookeeper.txn.TxnDigest;
import org.apache.zookeeper.txn.TxnHeader;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class FileTxnSnapLogTest {
private static final Logger LOG = LoggerFactory.getLogger(FileTxnSnapLogTest.class);

private File tmpDir;

Expand Down Expand Up @@ -363,6 +368,79 @@ public void testACLCreatedDuringFuzzySnapshotSync() throws IOException {
assertEquals(ZooDefs.Ids.CREATOR_ALL_ACL, followerDataTree.getACL(a1));
}

// ZOOKEEPER-4846
@Test
public void testDeleteWithMissingACL() throws IOException {
File dataDir = ClientBase.createEmptyTestDir();
FileTxnSnapLog snapLog = new FileTxnSnapLog(dataDir, dataDir);

DataTree dataTree = new DataTree();
int zxid = 1;

TxnHeader hdr;
Record txn;
Request req;

// Fully-processed node.
hdr = new TxnHeader(1, zxid, zxid, zxid, ZooDefs.OpCode.create);
txn = new CreateTxn("/bar", "bar".getBytes(), ZooDefs.Ids.OPEN_ACL_UNSAFE, false, -1);
req = new Request(0, 0, 0, hdr, txn, 0);

dataTree.processTxn(hdr, txn);
snapLog.append(req);
++zxid;

hdr = new TxnHeader(1, zxid, zxid, zxid, ZooDefs.OpCode.delete);
txn = new DeleteTxn("/foo");
req = new Request(0, 0, 0, hdr, txn, 0);

// The removal of /foo is added to the log, but NOT included
// in the DataTree
// dataTree.processTxn(hdr, txn);
snapLog.append(req);
++zxid;

// Close log.1
snapLog.txnLog.rollLog();

// Ensure we have a /foo node in the tree...
hdr = new TxnHeader(1, zxid, zxid, zxid, ZooDefs.OpCode.create);
txn = new CreateTxn("/foo", "foo".getBytes(), ZooDefs.Ids.CREATOR_ALL_ACL, false, -1);
req = new Request(0, 0, 0, hdr, txn, 0);

dataTree.processTxn(hdr, txn);
snapLog.append(req);
++zxid;

// ... but simulate a "fuzzy" snapshot starting with the
// creation of /bar.
dataTree.lastProcessedZxid -= 2;

// Remove /foo's ACL from the cache, as it was not known at
// the beginning of the "fuzzy" snapshot simulation.
ReferenceCountedACLCache aclCache = dataTree.getReferenceCountedAclCache();
Long aclNr = aclCache.convertAcls(ZooDefs.Ids.CREATOR_ALL_ACL);
aclCache.removeUsage(aclNr);
aclCache.removeUsage(aclNr);

// Write the snapshot.
ConcurrentHashMap<Long, Integer> sessions = new ConcurrentHashMap<>();
File snapshotFile = snapLog.save(dataTree, sessions, true);
LOG.info("Snapshot written to {}", snapshotFile);

// Close everything.
snapLog.close();

// Reload DB.
FileTxnSnapLog snapLog2 = new FileTxnSnapLog(dataDir, dataDir);
DataTree dataTree2 = new DataTree();
long restoredZxid = snapLog2.restore(dataTree2, sessions,
(TxnHeader _hdr, Record _txn, TxnDigest _digest) ->
LOG.info("Loaded {} {} {}", _hdr, _txn, _digest));

assertEquals(zxid - 1, restoredZxid);
}

@Test
public void testEmptySnapshotSerialization() throws IOException {
File dataDir = ClientBase.createEmptyTestDir();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import org.apache.zookeeper.Watcher;
import org.apache.zookeeper.Watcher.Event.EventType;
import org.apache.zookeeper.ZKTestCase;
import org.apache.zookeeper.ZooDefs.Ids;
import org.apache.zookeeper.metrics.MetricsUtils;
import org.apache.zookeeper.server.DumbWatcher;
import org.apache.zookeeper.server.ServerCnxn;
Expand Down Expand Up @@ -133,7 +134,7 @@ public WatcherTriggerWorker(
public void run() {
while (!stopped) {
String path = PATH_PREFIX + r.nextInt(paths);
WatcherOrBitSet s = manager.triggerWatch(path, EventType.NodeDeleted, -1, null);
WatcherOrBitSet s = manager.triggerWatch(path, EventType.NodeDeleted, -1, Ids.OPEN_ACL_UNSAFE);
if (s != null) {
triggeredCount.addAndGet(s.size());
}
Expand Down Expand Up @@ -756,27 +757,27 @@ public void testWatcherMetrics(String className) throws IOException {
//path2 is watched by watcher1
manager.addWatch(path2, watcher1);

manager.triggerWatch(path3, EventType.NodeCreated, 1, null);
manager.triggerWatch(path3, EventType.NodeCreated, 1, Ids.OPEN_ACL_UNSAFE);
//path3 is not being watched so metric is 0
checkMetrics("node_created_watch_count", 0L, 0L, 0D, 0L, 0L);
// Watchers shouldn't have received any events yet so the zxid should be -1.
checkMostRecentWatchedEvent(watcher1, null, null, -1);
checkMostRecentWatchedEvent(watcher2, null, null, -1);

//path1 is watched by two watchers so two fired
manager.triggerWatch(path1, EventType.NodeCreated, 2, null);
manager.triggerWatch(path1, EventType.NodeCreated, 2, Ids.OPEN_ACL_UNSAFE);
checkMetrics("node_created_watch_count", 2L, 2L, 2D, 1L, 2L);
checkMostRecentWatchedEvent(watcher1, path1, EventType.NodeCreated, 2);
checkMostRecentWatchedEvent(watcher2, path1, EventType.NodeCreated, 2);

//path2 is watched by one watcher so one fired now total is 3
manager.triggerWatch(path2, EventType.NodeCreated, 3, null);
manager.triggerWatch(path2, EventType.NodeCreated, 3, Ids.OPEN_ACL_UNSAFE);
checkMetrics("node_created_watch_count", 1L, 2L, 1.5D, 2L, 3L);
checkMostRecentWatchedEvent(watcher1, path2, EventType.NodeCreated, 3);
checkMostRecentWatchedEvent(watcher2, path1, EventType.NodeCreated, 2);

//watches on path1 are no longer there so zero fired
manager.triggerWatch(path1, EventType.NodeDataChanged, 4, null);
manager.triggerWatch(path1, EventType.NodeDataChanged, 4, Ids.OPEN_ACL_UNSAFE);
checkMetrics("node_changed_watch_count", 0L, 0L, 0D, 0L, 0L);
checkMostRecentWatchedEvent(watcher1, path2, EventType.NodeCreated, 3);
checkMostRecentWatchedEvent(watcher2, path1, EventType.NodeCreated, 2);
Expand All @@ -788,12 +789,12 @@ public void testWatcherMetrics(String className) throws IOException {
//path2 is watched by watcher1
manager.addWatch(path2, watcher1);

manager.triggerWatch(path1, EventType.NodeDataChanged, 5, null);
manager.triggerWatch(path1, EventType.NodeDataChanged, 5, Ids.OPEN_ACL_UNSAFE);
checkMetrics("node_changed_watch_count", 2L, 2L, 2D, 1L, 2L);
checkMostRecentWatchedEvent(watcher1, path1, EventType.NodeDataChanged, 5);
checkMostRecentWatchedEvent(watcher2, path1, EventType.NodeDataChanged, 5);

manager.triggerWatch(path2, EventType.NodeDeleted, 6, null);
manager.triggerWatch(path2, EventType.NodeDeleted, 6, Ids.OPEN_ACL_UNSAFE);
checkMetrics("node_deleted_watch_count", 1L, 1L, 1D, 1L, 1L);
checkMostRecentWatchedEvent(watcher1, path2, EventType.NodeDeleted, 6);
checkMostRecentWatchedEvent(watcher2, path1, EventType.NodeDataChanged, 5);
Expand Down
Loading