Skip to content

Commit

Permalink
Prevent recursive DSE snapshots (#704)
Browse files Browse the repository at this point in the history
  • Loading branch information
rzvoncek authored Jan 15, 2024
1 parent 4ed1fd5 commit c8609c8
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 7 deletions.
12 changes: 11 additions & 1 deletion medusa/cassandra_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,15 @@ def rebuild_search_index(self):
logging.debug(f'Rebuilding search index for {fqtn}')
session.execute(f"REBUILD SEARCH INDEX ON {fqtn}")

@staticmethod
def _ignore_snapshots(folder, contents):
ignored = set()
if folder.endswith('metadata/snapshots'):
logging.info(f'Ignoring {contents} in folder {folder}')
for c in contents:
ignored.add(c)
return ignored

def create_dse_snapshot(self, backup_name):
"""
There is no good way of making snapshot of DSE files
Expand All @@ -445,11 +454,12 @@ def create_dse_snapshot(self, backup_name):
That folder is nested in the parent folder, just like for regular tables
This way, we can reuse a lot of code later on
"""

tag = "{}{}".format(self.SNAPSHOT_PREFIX, backup_name)
if not self.dse_snapshot_exists(tag):
src_path = self._dse_root / self._dse_metadata_folder
dst_path = self._dse_root / self._dse_metadata_folder / 'snapshots' / tag
shutil.copytree(src_path, dst_path)
shutil.copytree(src_path, dst_path, ignore=Cassandra._ignore_snapshots)

return Cassandra.DseSnapshot(self, tag)

Expand Down
6 changes: 5 additions & 1 deletion medusa/service/snapshot/nodetool_snapshot_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,11 @@ def create_snapshot(self, *, tag):
# create the Nodetool command
cmd = self._nodetool.nodetool + ['snapshot', '-t', tag]
logging.debug('Executing: {}'.format(' '.join(cmd)))
subprocess.check_call(cmd, stdout=subprocess.DEVNULL, universal_newlines=True)
try:
subprocess.check_output(cmd, universal_newlines=True)
except subprocess.CalledProcessError as e:
logging.error('nodetool output: {}'.format(e.output))
logging.error('Creating snapshot failed and without a snapshot we cannot do a backup')

def delete_snapshot(self, *, tag):
# create the Nodetool command
Expand Down
22 changes: 22 additions & 0 deletions tests/cassandra_utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -942,5 +942,27 @@ def test_is_open_failed_close(self, mock_socket):
assert mock_instance.shutdown.call_count == 1
assert mock_instance.close.call_count == 1

def test_ignore_snapshots(self):
# none of these combinations is ignored
folder = 'keyspace/table'
contents = ['file1', 'file2']
expected_ignored = set()
actual_ignored = medusa.cassandra_utils.Cassandra._ignore_snapshots(folder, contents)
self.assertEqual(expected_ignored, actual_ignored)

# none of these combinations is ignored
folder = 'keyspace/table/snapshots'
contents = ['snapshot1', 'snapshot2']
expected_ignored = set()
actual_ignored = medusa.cassandra_utils.Cassandra._ignore_snapshots(folder, contents)
self.assertEqual(expected_ignored, actual_ignored)

# we only ignore stuff in this specific folder
folder = 'metadata/snapshots'
contents = ['snapshot1', 'snapshot2']
expected_ignored = {'snapshot1', 'snapshot2'}
actual_ignored = medusa.cassandra_utils.Cassandra._ignore_snapshots(folder, contents)
self.assertEqual(expected_ignored, actual_ignored)

if __name__ == '__main__':
unittest.main()
20 changes: 15 additions & 5 deletions tests/integration/features/integration_tests.feature
Original file line number Diff line number Diff line change
Expand Up @@ -1058,16 +1058,26 @@ Feature: Integration tests
@29
Scenario Outline: Backup and restore a DSE cluster with search enabled
Given I have a fresh DSE cluster version "6.8.38" with "<client encryption>" running named "scenario29"
Given I am using "<storage>" as storage provider in ccm cluster "<client encryption>"
Given I am using "<storage>" as storage provider in ccm cluster "<client encryption>" with gRPC server
Then the gRPC server is up
When I create the "test" table in keyspace "medusa"
And I create a search index on the "test" table in keyspace "medusa"
And I load 100 rows in the "medusa.test" table
When I run a DSE "nodetool flush" command
Then I can make a search query against the "medusa"."test" table
When I perform a backup in "differential" mode of the node named "first_backup" with md5 checks "disabled"
Then I can see the backup named "first_backup" when I list the backups
And the backup "first_backup" has server_type "dse" in its metadata
When I restore the backup named "first_backup"
When I perform an async backup over gRPC in "differential" mode of the node named "backup29-1"
Then the backup index exists
Then I can see the backup named "backup29-1" when I list the backups
And the backup "backup29-1" has server_type "dse" in its metadata
Then I verify over gRPC that the backup "backup29-1" exists and is of type "differential"
Then I verify over gRPC that the backup "backup29-1" has expected status SUCCESS
When I perform an async backup over gRPC in "differential" mode of the node named "backup29-2"
Then the backup index exists
Then I can see the backup named "backup29-2" when I list the backups
And the backup "backup29-2" has server_type "dse" in its metadata
Then I verify over gRPC that the backup "backup29-2" exists and is of type "differential"
Then I verify over gRPC that the backup "backup29-2" has expected status SUCCESS
When I restore the backup named "backup29-2"
And I wait for the DSE Search indexes to be rebuilt
Then I have 100 rows in the "medusa.test" table in ccm cluster "<client encryption>"
Then I can make a search query against the "medusa"."test" table
Expand Down

0 comments on commit c8609c8

Please # to comment.