From 192dfc5d3821bd566a8d0edd5d7360ef3951f4fc Mon Sep 17 00:00:00 2001 From: ajanikow <12255597+ajanikow@users.noreply.github.com> Date: Thu, 3 Oct 2024 06:47:38 +0000 Subject: [PATCH] [Feature] Require ResignLeadership during upgrade --- CHANGELOG.md | 1 + README.md | 1 + docs/cli/arangodb_operator.md | 1 + docs/generated/actions.md | 2 + internal/actions.config.go.tmpl | 2 +- internal/actions.go.tmpl | 2 +- internal/actions.register.go.tmpl | 2 +- internal/actions.register.test.go.tmpl | 2 +- internal/actions.yaml | 3 + pkg/apis/deployment/v1/actions.generated.go | 14 +++- .../deployment/v2alpha1/actions.generated.go | 14 +++- pkg/deployment/agency/state/state.go | 44 +++++++++- pkg/deployment/agency/state/state_test.go | 54 +++++++++++- pkg/deployment/features/resign_leadership.go | 13 +++ .../reconcile/action.config.generated.go | 2 +- .../reconcile/action.register.generated.go | 19 ++++- .../action.register.generated_test.go | 12 ++- ...action_ensure_secured_resign_leadership.go | 84 +++++++++++++++++++ .../action_resign_leadership_utils.go | 2 +- pkg/deployment/reconcile/helper_wrap.go | 2 +- .../reconcile/plan_builder_rotate_upgrade.go | 3 +- .../reconcile/plan_builder_storage.go | 3 +- .../reconcile/plan_builder_utils.go | 3 +- 23 files changed, 269 insertions(+), 16 deletions(-) create mode 100644 pkg/deployment/reconcile/action_ensure_secured_resign_leadership.go diff --git a/CHANGELOG.md b/CHANGELOG.md index 64058cc22..098701dc4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -37,6 +37,7 @@ - (Feature) (Scheduler) Additional types - (Feature) Alternative Upgrade Order Feature - (Feature) (Scheduler) SchedV1 Integration +- (Feature) Require ResignLeadership during upgrade ## [1.2.42](https://github.com/arangodb/kube-arangodb/tree/1.2.42) (2024-07-23) - (Maintenance) Go 1.22.4 & Kubernetes 1.29.6 libraries diff --git a/README.md b/README.md index 5643abcbd..ce1f37bc0 100644 --- a/README.md +++ b/README.md @@ -150,6 +150,7 @@ Flags: --deployment.feature.backup-cleanup Cleanup imported backups if required - Required ArangoDB 3.8.0 or higher --deployment.feature.deployment-spec-defaults-restore Restore defaults from last accepted state of deployment - Required ArangoDB 3.8.0 or higher (default true) --deployment.feature.enforced-resign-leadership Enforce ResignLeadership and ensure that Leaders are moved from restarted DBServer - Required ArangoDB 3.8.0 or higher (default true) + --deployment.feature.ensure-secured-resign-leadership Ensures that even if ResignLeadership job timeouted, data is still replicated on other servers - Required ArangoDB 3.8.0 or higher (default true) --deployment.feature.ephemeral-volumes Enables ephemeral volumes for apps and tmp directory - Required ArangoDB 3.8.0 or higher --deployment.feature.failover-leadership Support for leadership in fail-over mode - Required ArangoDB 3.8.0 or higher --deployment.feature.init-containers-copy-resources Copy resources spec to built-in init containers if they are not specified - Required ArangoDB 3.8.0 or higher (default true) diff --git a/docs/cli/arangodb_operator.md b/docs/cli/arangodb_operator.md index 390ac513b..b680b236a 100644 --- a/docs/cli/arangodb_operator.md +++ b/docs/cli/arangodb_operator.md @@ -47,6 +47,7 @@ Flags: --deployment.feature.backup-cleanup Cleanup imported backups if required - Required ArangoDB 3.8.0 or higher --deployment.feature.deployment-spec-defaults-restore Restore defaults from last accepted state of deployment - Required ArangoDB 3.8.0 or higher (default true) --deployment.feature.enforced-resign-leadership Enforce ResignLeadership and ensure that Leaders are moved from restarted DBServer - Required ArangoDB 3.8.0 or higher (default true) + --deployment.feature.ensure-secured-resign-leadership Ensures that even if ResignLeadership job timeouted, data is still replicated on other servers - Required ArangoDB 3.8.0 or higher (default true) --deployment.feature.ephemeral-volumes Enables ephemeral volumes for apps and tmp directory - Required ArangoDB 3.8.0 or higher --deployment.feature.failover-leadership Support for leadership in fail-over mode - Required ArangoDB 3.8.0 or higher --deployment.feature.init-containers-copy-resources Copy resources spec to built-in init containers if they are not specified - Required ArangoDB 3.8.0 or higher (default true) diff --git a/docs/generated/actions.md b/docs/generated/actions.md index 46fd22b02..7579fdef6 100644 --- a/docs/generated/actions.md +++ b/docs/generated/actions.md @@ -37,6 +37,7 @@ nav_order: 11 | EncryptionKeyRemove | no | 10m0s | no | Enterprise Only | Remove the encryption key to the pool | | EncryptionKeyStatusUpdate | no | 10m0s | no | Enterprise Only | Update status of encryption propagation | | EnforceResignLeadership | no | 45m0s | yes | Community & Enterprise | Run the ResignLeadership job on DBServer and checks data compatibility after | +| EnsureSecuredResignLeadership | no | 10m0s | no | Community & Enterprise | Ensures that data is still replicated on other servers | | Idle | no | 10m0s | no | Community & Enterprise | Define idle operation in case if preconditions are not meet | | JWTAdd | no | 10m0s | no | Enterprise Only | Adds new JWT to the pool | | JWTClean | no | 10m0s | no | Enterprise Only | Remove JWT key from the pool | @@ -133,6 +134,7 @@ spec: EncryptionKeyRemove: 10m0s EncryptionKeyStatusUpdate: 10m0s EnforceResignLeadership: 45m0s + EnsureSecuredResignLeadership: 10m0s Idle: 10m0s JWTAdd: 10m0s JWTClean: 10m0s diff --git a/internal/actions.config.go.tmpl b/internal/actions.config.go.tmpl index 9713495d0..7ae2e5552 100644 --- a/internal/actions.config.go.tmpl +++ b/internal/actions.config.go.tmpl @@ -1,6 +1,6 @@ {{- $root := . -}} // -// Copyright 2023 ArangoDB GmbH, Cologne, Germany +// Copyright 2023-2024 ArangoDB GmbH, Cologne, Germany // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/internal/actions.go.tmpl b/internal/actions.go.tmpl index 46704e451..b04a8d890 100644 --- a/internal/actions.go.tmpl +++ b/internal/actions.go.tmpl @@ -1,6 +1,6 @@ {{- $root := . -}} // -// Copyright 2016-2023 ArangoDB GmbH, Cologne, Germany +// Copyright 2016-2024 ArangoDB GmbH, Cologne, Germany // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/internal/actions.register.go.tmpl b/internal/actions.register.go.tmpl index b4cf26820..eebc75a57 100644 --- a/internal/actions.register.go.tmpl +++ b/internal/actions.register.go.tmpl @@ -1,6 +1,6 @@ {{- $root := . -}} // -// Copyright 2016-2023 ArangoDB GmbH, Cologne, Germany +// Copyright 2016-2024 ArangoDB GmbH, Cologne, Germany // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/internal/actions.register.test.go.tmpl b/internal/actions.register.test.go.tmpl index 655a5a036..69908ccf3 100644 --- a/internal/actions.register.test.go.tmpl +++ b/internal/actions.register.test.go.tmpl @@ -1,6 +1,6 @@ {{- $root := . -}} // -// Copyright 2016-2023 ArangoDB GmbH, Cologne, Germany +// Copyright 2016-2024 ArangoDB GmbH, Cologne, Germany // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/internal/actions.yaml b/internal/actions.yaml index 0cec2f9cb..1d781f4f3 100644 --- a/internal/actions.yaml +++ b/internal/actions.yaml @@ -33,6 +33,9 @@ actions: description: Run the ResignLeadership job on DBServer and checks data compatibility after timeout: 45m optional: true + EnsureSecuredResignLeadership: + description: Ensures that data is still replicated on other servers + timeout: 10m KillMemberPod: description: Execute Delete on Pod (put pod in Terminating state) scopes: diff --git a/pkg/apis/deployment/v1/actions.generated.go b/pkg/apis/deployment/v1/actions.generated.go index f9b6abee5..65211bec2 100644 --- a/pkg/apis/deployment/v1/actions.generated.go +++ b/pkg/apis/deployment/v1/actions.generated.go @@ -1,5 +1,5 @@ // -// Copyright 2016-2023 ArangoDB GmbH, Cologne, Germany +// Copyright 2016-2024 ArangoDB GmbH, Cologne, Germany // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -101,6 +101,9 @@ const ( // ActionEnforceResignLeadershipDefaultTimeout define default timeout for action ActionEnforceResignLeadership ActionEnforceResignLeadershipDefaultTimeout time.Duration = 2700 * time.Second // 45m0s + // ActionEnsureSecuredResignLeadershipDefaultTimeout define default timeout for action ActionEnsureSecuredResignLeadership + ActionEnsureSecuredResignLeadershipDefaultTimeout time.Duration = 600 * time.Second // 10m0s + // ActionIdleDefaultTimeout define default timeout for action ActionIdle ActionIdleDefaultTimeout time.Duration = ActionsDefaultTimeout @@ -362,6 +365,9 @@ const ( // ActionTypeEnforceResignLeadership in scopes Normal. Run the ResignLeadership job on DBServer and checks data compatibility after ActionTypeEnforceResignLeadership ActionType = "EnforceResignLeadership" + // ActionTypeEnsureSecuredResignLeadership in scopes Normal. Ensures that data is still replicated on other servers + ActionTypeEnsureSecuredResignLeadership ActionType = "EnsureSecuredResignLeadership" + // ActionTypeIdle in scopes Normal. Define idle operation in case if preconditions are not meet ActionTypeIdle ActionType = "Idle" @@ -601,6 +607,8 @@ func (a ActionType) DefaultTimeout() time.Duration { return ActionEncryptionKeyStatusUpdateDefaultTimeout case ActionTypeEnforceResignLeadership: return ActionEnforceResignLeadershipDefaultTimeout + case ActionTypeEnsureSecuredResignLeadership: + return ActionEnsureSecuredResignLeadershipDefaultTimeout case ActionTypeIdle: return ActionIdleDefaultTimeout case ActionTypeJWTAdd: @@ -779,6 +787,8 @@ func (a ActionType) Priority() ActionPriority { return ActionPriorityNormal case ActionTypeEnforceResignLeadership: return ActionPriorityNormal + case ActionTypeEnsureSecuredResignLeadership: + return ActionPriorityNormal case ActionTypeIdle: return ActionPriorityNormal case ActionTypeJWTAdd: @@ -969,6 +979,8 @@ func (a ActionType) Optional() bool { return false case ActionTypeEnforceResignLeadership: return true + case ActionTypeEnsureSecuredResignLeadership: + return false case ActionTypeIdle: return false case ActionTypeJWTAdd: diff --git a/pkg/apis/deployment/v2alpha1/actions.generated.go b/pkg/apis/deployment/v2alpha1/actions.generated.go index 070033fc9..81cc82236 100644 --- a/pkg/apis/deployment/v2alpha1/actions.generated.go +++ b/pkg/apis/deployment/v2alpha1/actions.generated.go @@ -1,5 +1,5 @@ // -// Copyright 2016-2023 ArangoDB GmbH, Cologne, Germany +// Copyright 2016-2024 ArangoDB GmbH, Cologne, Germany // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -101,6 +101,9 @@ const ( // ActionEnforceResignLeadershipDefaultTimeout define default timeout for action ActionEnforceResignLeadership ActionEnforceResignLeadershipDefaultTimeout time.Duration = 2700 * time.Second // 45m0s + // ActionEnsureSecuredResignLeadershipDefaultTimeout define default timeout for action ActionEnsureSecuredResignLeadership + ActionEnsureSecuredResignLeadershipDefaultTimeout time.Duration = 600 * time.Second // 10m0s + // ActionIdleDefaultTimeout define default timeout for action ActionIdle ActionIdleDefaultTimeout time.Duration = ActionsDefaultTimeout @@ -362,6 +365,9 @@ const ( // ActionTypeEnforceResignLeadership in scopes Normal. Run the ResignLeadership job on DBServer and checks data compatibility after ActionTypeEnforceResignLeadership ActionType = "EnforceResignLeadership" + // ActionTypeEnsureSecuredResignLeadership in scopes Normal. Ensures that data is still replicated on other servers + ActionTypeEnsureSecuredResignLeadership ActionType = "EnsureSecuredResignLeadership" + // ActionTypeIdle in scopes Normal. Define idle operation in case if preconditions are not meet ActionTypeIdle ActionType = "Idle" @@ -601,6 +607,8 @@ func (a ActionType) DefaultTimeout() time.Duration { return ActionEncryptionKeyStatusUpdateDefaultTimeout case ActionTypeEnforceResignLeadership: return ActionEnforceResignLeadershipDefaultTimeout + case ActionTypeEnsureSecuredResignLeadership: + return ActionEnsureSecuredResignLeadershipDefaultTimeout case ActionTypeIdle: return ActionIdleDefaultTimeout case ActionTypeJWTAdd: @@ -779,6 +787,8 @@ func (a ActionType) Priority() ActionPriority { return ActionPriorityNormal case ActionTypeEnforceResignLeadership: return ActionPriorityNormal + case ActionTypeEnsureSecuredResignLeadership: + return ActionPriorityNormal case ActionTypeIdle: return ActionPriorityNormal case ActionTypeJWTAdd: @@ -969,6 +979,8 @@ func (a ActionType) Optional() bool { return false case ActionTypeEnforceResignLeadership: return true + case ActionTypeEnsureSecuredResignLeadership: + return false case ActionTypeIdle: return false case ActionTypeJWTAdd: diff --git a/pkg/deployment/agency/state/state.go b/pkg/deployment/agency/state/state.go index 658fb863c..66631de6c 100644 --- a/pkg/deployment/agency/state/state.go +++ b/pkg/deployment/agency/state/state.go @@ -1,7 +1,7 @@ // // DISCLAIMER // -// Copyright 2016-2023 ArangoDB GmbH, Cologne, Germany +// Copyright 2016-2024 ArangoDB GmbH, Cologne, Germany // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -262,6 +262,48 @@ func (s State) PlanLeaderServersWithFailOver() Servers { return r } +// IsServerWithShardBackup returns true if server can be restarted with risk +func (s State) IsServerWithShardBackup(server Server) bool { + for db, dbData := range s.Plan.Collections { + for collection, collectionData := range dbData { + for shard, shardDetails := range collectionData.Shards { + if len(shardDetails) <= 1 { + // RF is 1, nothing to do + continue + } + + // Fund current state + currentDBs, ok := s.Current.Collections[db] + if !ok { + continue + } + + currentCollection, ok := currentDBs[collection] + if !ok { + continue + } + + currentShard, ok := currentCollection[shard] + if !ok { + continue + } + + if len(currentShard.Servers) == 0 { + continue + } + + if currentShard.Servers[0] == server { + if len(currentShard.Servers) == 1 { + return false + } + } + } + } + } + + return true +} + type CollectionShardDetails []CollectionShardDetail type CollectionShardDetail struct { diff --git a/pkg/deployment/agency/state/state_test.go b/pkg/deployment/agency/state/state_test.go index 2eaafab00..67f8f2e00 100644 --- a/pkg/deployment/agency/state/state_test.go +++ b/pkg/deployment/agency/state/state_test.go @@ -1,7 +1,7 @@ // // DISCLAIMER // -// Copyright 2016-2023 ArangoDB GmbH, Cologne, Germany +// Copyright 2016-2024 ArangoDB GmbH, Cologne, Germany // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -307,6 +307,58 @@ func Test_IsDBServerReadyToRestart(t *testing.T) { } } +func Test_IsServerWithShardBackup(t *testing.T) { + type testCase struct { + generator Generator + ready bool + server Server + } + newDBWithCol := func(writeConcern int) CollectionGeneratorInterface { + return NewDatabaseRandomGenerator().RandomCollection().WithWriteConcern(writeConcern) + } + tcs := map[string]testCase{ + "missing replica": { + generator: newDBWithCol(1).WithShard().WithPlan("A", "B").WithCurrent("A").Add().Add().Add(), + ready: false, + server: "A", + }, + "ready replica": { + generator: newDBWithCol(1).WithShard().WithPlan("A", "B").WithCurrent("A", "B").Add().Add().Add(), + ready: true, + server: "A", + }, + "not affected replica": { + generator: newDBWithCol(1).WithShard().WithPlan("A", "B").WithCurrent("A").Add().Add().Add(), + ready: true, + server: "B", + }, + "not affected nonexisting replica": { + generator: newDBWithCol(1).WithShard().WithPlan("A", "B").WithCurrent("A").Add().Add().Add(), + ready: true, + server: "C", + }, + "rf1": { + generator: newDBWithCol(1).WithShard().WithPlan("A").WithCurrent("A").Add().Add().Add(), + ready: true, + server: "A", + }, + } + + for name, tc := range tcs { + t.Run(name, func(t *testing.T) { + s := GenerateState(t, tc.generator) + + res := s.IsServerWithShardBackup(tc.server) + + if tc.ready { + require.True(t, res) + } else { + require.False(t, res) + } + }) + } +} + func Test_GetCollectionDatabaseByID(t *testing.T) { var s DumpState require.NoError(t, json.Unmarshal(agencyDump39, &s)) diff --git a/pkg/deployment/features/resign_leadership.go b/pkg/deployment/features/resign_leadership.go index 8e44bfb04..69a751f3f 100644 --- a/pkg/deployment/features/resign_leadership.go +++ b/pkg/deployment/features/resign_leadership.go @@ -22,6 +22,7 @@ package features func init() { registerFeature(enforcedResignLeadership) + registerFeature(ensureSecuredResignLeadership) } var enforcedResignLeadership = &feature{ @@ -31,7 +32,19 @@ var enforcedResignLeadership = &feature{ enabledByDefault: true, } +var ensureSecuredResignLeadership = &feature{ + name: "ensure-secured-resign-leadership", + description: "Ensures that even if ResignLeadership job timeouted, data is still replicated on other servers", + enterpriseRequired: false, + enabledByDefault: true, +} + // EnforcedResignLeadership returns enforced ResignLeadership. func EnforcedResignLeadership() Feature { return enforcedResignLeadership } + +// EnsureSecuredResignLeadership returns information if data is saved on other DBServers. +func EnsureSecuredResignLeadership() Feature { + return ensureSecuredResignLeadership +} diff --git a/pkg/deployment/reconcile/action.config.generated.go b/pkg/deployment/reconcile/action.config.generated.go index 40e7e737a..ed48f68ad 100644 --- a/pkg/deployment/reconcile/action.config.generated.go +++ b/pkg/deployment/reconcile/action.config.generated.go @@ -1,5 +1,5 @@ // -// Copyright 2023 ArangoDB GmbH, Cologne, Germany +// Copyright 2023-2024 ArangoDB GmbH, Cologne, Germany // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/pkg/deployment/reconcile/action.register.generated.go b/pkg/deployment/reconcile/action.register.generated.go index 2ec26c2a6..7a6cfaaa4 100644 --- a/pkg/deployment/reconcile/action.register.generated.go +++ b/pkg/deployment/reconcile/action.register.generated.go @@ -1,5 +1,5 @@ // -// Copyright 2016-2023 ArangoDB GmbH, Cologne, Germany +// Copyright 2016-2024 ArangoDB GmbH, Cologne, Germany // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -96,6 +96,9 @@ var ( _ Action = &actionEnforceResignLeadership{} _ actionFactory = newEnforceResignLeadershipAction + _ Action = &actionEnsureSecuredResignLeadership{} + _ actionFactory = newEnsureSecuredResignLeadershipAction + _ Action = &actionIdle{} _ actionFactory = newIdleAction @@ -619,6 +622,20 @@ func init() { registerAction(action, function) } + // EnsureSecuredResignLeadership + { + // Get Action type + action := api.ActionTypeEnsureSecuredResignLeadership + + // Get Action defition + function := newEnsureSecuredResignLeadershipAction + + // Wrap action main function + + // Register action + registerAction(action, function) + } + // Idle { // Get Action type diff --git a/pkg/deployment/reconcile/action.register.generated_test.go b/pkg/deployment/reconcile/action.register.generated_test.go index 7cec257f4..2a30ed9aa 100644 --- a/pkg/deployment/reconcile/action.register.generated_test.go +++ b/pkg/deployment/reconcile/action.register.generated_test.go @@ -1,5 +1,5 @@ // -// Copyright 2016-2023 ArangoDB GmbH, Cologne, Germany +// Copyright 2016-2024 ArangoDB GmbH, Cologne, Germany // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -286,6 +286,16 @@ func Test_Actions(t *testing.T) { }) }) + t.Run("EnsureSecuredResignLeadership", func(t *testing.T) { + ActionsExistence(t, api.ActionTypeEnsureSecuredResignLeadership) + t.Run("Internal", func(t *testing.T) { + require.False(t, api.ActionTypeEnsureSecuredResignLeadership.Internal()) + }) + t.Run("Optional", func(t *testing.T) { + require.False(t, api.ActionTypeEnsureSecuredResignLeadership.Optional()) + }) + }) + t.Run("Idle", func(t *testing.T) { ActionsExistence(t, api.ActionTypeIdle) t.Run("Internal", func(t *testing.T) { diff --git a/pkg/deployment/reconcile/action_ensure_secured_resign_leadership.go b/pkg/deployment/reconcile/action_ensure_secured_resign_leadership.go new file mode 100644 index 000000000..c56851ceb --- /dev/null +++ b/pkg/deployment/reconcile/action_ensure_secured_resign_leadership.go @@ -0,0 +1,84 @@ +// +// DISCLAIMER +// +// Copyright 2023-2024 ArangoDB GmbH, Cologne, Germany +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright holder is ArangoDB GmbH, Cologne, Germany +// + +package reconcile + +import ( + "context" + + api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1" + "github.com/arangodb/kube-arangodb/pkg/deployment/agency/state" + "github.com/arangodb/kube-arangodb/pkg/deployment/features" +) + +// newEnsureSecuredResignLeadershipAction creates a new Action that implements the given +// planned ResignLeadership action. +func newEnsureSecuredResignLeadershipAction(action api.Action, actionCtx ActionContext) Action { + a := &actionEnsureSecuredResignLeadership{} + + a.actionImpl = newActionImplDefRef(action, actionCtx) + + return a +} + +// actionEnsureSecuredResignLeadership implements an EnsureSecuredResignLeadership. +type actionEnsureSecuredResignLeadership struct { + actionImpl + + actionEmptyStart +} + +// CheckProgress checks if the Job is completed, if not then start it. Repeat in case of error or if still a leader +func (a *actionEnsureSecuredResignLeadership) CheckProgress(ctx context.Context) (bool, bool, error) { + if !features.EnsureSecuredResignLeadership().Enabled() { + a.log.Info("Skipped") + return false, false, nil + } + + group := a.action.Group + m, ok := a.actionCtx.GetMemberStatusByID(a.action.MemberID) + if !ok { + a.log.Error("No such member") + return true, false, nil + } + + if group != api.ServerGroupDBServers { + // Only DBServers can use ResignLeadership job + return true, false, nil + } + + agencyState, agencyOK := a.actionCtx.GetAgencyCache() + if !agencyOK { + a.log.Error("Unable to get maintenance mode") + return false, false, nil + } else if agencyState.Supervision.Maintenance.Exists() { + a.log.Warn("Maintenance is enabled, skipping action") + // We are done, action cannot be handled on maintenance mode + return true, false, nil + } + + if agencyState.IsServerWithShardBackup(state.Server(m.ID)) { + return true, false, nil + } + + a.log.Warn("Restart with a potential dataloss!") + + return false, false, nil +} diff --git a/pkg/deployment/reconcile/action_resign_leadership_utils.go b/pkg/deployment/reconcile/action_resign_leadership_utils.go index 824bf0d3d..4bad57a24 100644 --- a/pkg/deployment/reconcile/action_resign_leadership_utils.go +++ b/pkg/deployment/reconcile/action_resign_leadership_utils.go @@ -1,7 +1,7 @@ // // DISCLAIMER // -// Copyright 2023 ArangoDB GmbH, Cologne, Germany +// Copyright 2023-2024 ArangoDB GmbH, Cologne, Germany // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/pkg/deployment/reconcile/helper_wrap.go b/pkg/deployment/reconcile/helper_wrap.go index e50f6de9d..b48523912 100644 --- a/pkg/deployment/reconcile/helper_wrap.go +++ b/pkg/deployment/reconcile/helper_wrap.go @@ -1,7 +1,7 @@ // // DISCLAIMER // -// Copyright 2016-2023 ArangoDB GmbH, Cologne, Germany +// Copyright 2016-2024 ArangoDB GmbH, Cologne, Germany // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/pkg/deployment/reconcile/plan_builder_rotate_upgrade.go b/pkg/deployment/reconcile/plan_builder_rotate_upgrade.go index 8843bee35..166c2e93c 100644 --- a/pkg/deployment/reconcile/plan_builder_rotate_upgrade.go +++ b/pkg/deployment/reconcile/plan_builder_rotate_upgrade.go @@ -557,7 +557,8 @@ func (r *Reconciler) createUpgradeMemberPlan(member api.MemberStatus, } func withSecureWrap(member api.MemberStatus, - group api.ServerGroup, spec api.DeploymentSpec, rebootID *int, plan ...api.Action) api.Plan { + group api.ServerGroup, spec api.DeploymentSpec, rebootID *int, + plan ...api.Action) api.Plan { image := member.Image if image == nil { return plan diff --git a/pkg/deployment/reconcile/plan_builder_storage.go b/pkg/deployment/reconcile/plan_builder_storage.go index e27201870..390c032b3 100644 --- a/pkg/deployment/reconcile/plan_builder_storage.go +++ b/pkg/deployment/reconcile/plan_builder_storage.go @@ -1,7 +1,7 @@ // // DISCLAIMER // -// Copyright 2016-2023 ArangoDB GmbH, Cologne, Germany +// Copyright 2016-2024 ArangoDB GmbH, Cologne, Germany // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -176,6 +176,7 @@ func (r *Reconciler) pvcResizePlan(group api.ServerGroup, member api.MemberStatu case api.PVCResizeModeRotate: return withWaitForMember(api.Plan{ actions.NewAction(getResignLeadershipActionType(), group, member), + actions.NewAction(api.ActionTypeEnsureSecuredResignLeadership, group, member), actions.NewAction(api.ActionTypeKillMemberPod, group, member), actions.NewAction(api.ActionTypeRotateStartMember, group, member), actions.NewAction(api.ActionTypePVCResize, group, member), diff --git a/pkg/deployment/reconcile/plan_builder_utils.go b/pkg/deployment/reconcile/plan_builder_utils.go index 90e5ae701..e19fb2fbe 100644 --- a/pkg/deployment/reconcile/plan_builder_utils.go +++ b/pkg/deployment/reconcile/plan_builder_utils.go @@ -1,7 +1,7 @@ // // DISCLAIMER // -// Copyright 2016-2023 ArangoDB GmbH, Cologne, Germany +// Copyright 2016-2024 ArangoDB GmbH, Cologne, Germany // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -51,6 +51,7 @@ func createRotateMemberPlanWithAction(member api.MemberStatus, plan = withSecureWrap(member, group, spec, rebootId, plan...) plan = plan.After( + actions.NewAction(api.ActionTypeEnsureSecuredResignLeadership, group, member, reason), actions.NewAction(api.ActionTypeKillMemberPod, group, member, reason), actions.NewAction(action, group, member, reason), actions.NewAction(api.ActionTypeCleanMemberService, group, member, "Remove server service and enforce renewal/recreation"),