diff --git a/medusa/cassandra_utils.py b/medusa/cassandra_utils.py index 04b348dca..0878c68a7 100644 --- a/medusa/cassandra_utils.py +++ b/medusa/cassandra_utils.py @@ -437,6 +437,15 @@ def rebuild_search_index(self): logging.debug(f'Rebuilding search index for {fqtn}') session.execute(f"REBUILD SEARCH INDEX ON {fqtn}") + @staticmethod + def _ignore_snapshots(folder, contents): + ignored = set() + if folder.endswith('metadata/snapshots'): + logging.info(f'Ignoring {contents} in folder {folder}') + for c in contents: + ignored.add(c) + return ignored + def create_dse_snapshot(self, backup_name): """ There is no good way of making snapshot of DSE files @@ -445,11 +454,12 @@ def create_dse_snapshot(self, backup_name): That folder is nested in the parent folder, just like for regular tables This way, we can reuse a lot of code later on """ + tag = "{}{}".format(self.SNAPSHOT_PREFIX, backup_name) if not self.dse_snapshot_exists(tag): src_path = self._dse_root / self._dse_metadata_folder dst_path = self._dse_root / self._dse_metadata_folder / 'snapshots' / tag - shutil.copytree(src_path, dst_path) + shutil.copytree(src_path, dst_path, ignore=Cassandra._ignore_snapshots) return Cassandra.DseSnapshot(self, tag) diff --git a/medusa/service/snapshot/nodetool_snapshot_service.py b/medusa/service/snapshot/nodetool_snapshot_service.py index 024202831..0936e4ffe 100644 --- a/medusa/service/snapshot/nodetool_snapshot_service.py +++ b/medusa/service/snapshot/nodetool_snapshot_service.py @@ -29,7 +29,11 @@ def create_snapshot(self, *, tag): # create the Nodetool command cmd = self._nodetool.nodetool + ['snapshot', '-t', tag] logging.debug('Executing: {}'.format(' '.join(cmd))) - subprocess.check_call(cmd, stdout=subprocess.DEVNULL, universal_newlines=True) + try: + subprocess.check_output(cmd, universal_newlines=True) + except subprocess.CalledProcessError as e: + logging.error('nodetool output: {}'.format(e.output)) + logging.error('Creating snapshot failed and without a snapshot we cannot do a backup') def delete_snapshot(self, *, tag): # create the Nodetool command diff --git a/tests/cassandra_utils_test.py b/tests/cassandra_utils_test.py index 409468b96..6169f2dee 100644 --- a/tests/cassandra_utils_test.py +++ b/tests/cassandra_utils_test.py @@ -942,5 +942,27 @@ def test_is_open_failed_close(self, mock_socket): assert mock_instance.shutdown.call_count == 1 assert mock_instance.close.call_count == 1 + def test_ignore_snapshots(self): + # none of these combinations is ignored + folder = 'keyspace/table' + contents = ['file1', 'file2'] + expected_ignored = set() + actual_ignored = medusa.cassandra_utils.Cassandra._ignore_snapshots(folder, contents) + self.assertEqual(expected_ignored, actual_ignored) + + # none of these combinations is ignored + folder = 'keyspace/table/snapshots' + contents = ['snapshot1', 'snapshot2'] + expected_ignored = set() + actual_ignored = medusa.cassandra_utils.Cassandra._ignore_snapshots(folder, contents) + self.assertEqual(expected_ignored, actual_ignored) + + # we only ignore stuff in this specific folder + folder = 'metadata/snapshots' + contents = ['snapshot1', 'snapshot2'] + expected_ignored = {'snapshot1', 'snapshot2'} + actual_ignored = medusa.cassandra_utils.Cassandra._ignore_snapshots(folder, contents) + self.assertEqual(expected_ignored, actual_ignored) + if __name__ == '__main__': unittest.main() diff --git a/tests/integration/features/integration_tests.feature b/tests/integration/features/integration_tests.feature index 9c8bb6152..c1b25f5e0 100644 --- a/tests/integration/features/integration_tests.feature +++ b/tests/integration/features/integration_tests.feature @@ -1058,16 +1058,26 @@ Feature: Integration tests @29 Scenario Outline: Backup and restore a DSE cluster with search enabled Given I have a fresh DSE cluster version "6.8.38" with "" running named "scenario29" - Given I am using "" as storage provider in ccm cluster "" + Given I am using "" as storage provider in ccm cluster "" with gRPC server + Then the gRPC server is up When I create the "test" table in keyspace "medusa" And I create a search index on the "test" table in keyspace "medusa" And I load 100 rows in the "medusa.test" table When I run a DSE "nodetool flush" command Then I can make a search query against the "medusa"."test" table - When I perform a backup in "differential" mode of the node named "first_backup" with md5 checks "disabled" - Then I can see the backup named "first_backup" when I list the backups - And the backup "first_backup" has server_type "dse" in its metadata - When I restore the backup named "first_backup" + When I perform an async backup over gRPC in "differential" mode of the node named "backup29-1" + Then the backup index exists + Then I can see the backup named "backup29-1" when I list the backups + And the backup "backup29-1" has server_type "dse" in its metadata + Then I verify over gRPC that the backup "backup29-1" exists and is of type "differential" + Then I verify over gRPC that the backup "backup29-1" has expected status SUCCESS + When I perform an async backup over gRPC in "differential" mode of the node named "backup29-2" + Then the backup index exists + Then I can see the backup named "backup29-2" when I list the backups + And the backup "backup29-2" has server_type "dse" in its metadata + Then I verify over gRPC that the backup "backup29-2" exists and is of type "differential" + Then I verify over gRPC that the backup "backup29-2" has expected status SUCCESS + When I restore the backup named "backup29-2" And I wait for the DSE Search indexes to be rebuilt Then I have 100 rows in the "medusa.test" table in ccm cluster "" Then I can make a search query against the "medusa"."test" table