Skip to content

Commit

Permalink
fix(operator): remove unnecessary pod from autofailover list (#530)
Browse files Browse the repository at this point in the history
  • Loading branch information
kevinliu24 authored Oct 21, 2024
1 parent 8c13da1 commit 77f0684
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 6 deletions.
8 changes: 6 additions & 2 deletions pkg/controller/component/graphd_cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -292,12 +292,16 @@ func (c *graphdCluster) syncNebulaClusterStatus(
return err
}
thriftPort := nc.GraphdComponent().GetPort(v1alpha1.GraphdPortNameThrift)
for i := range hostItems {
host := hostItems[i]
klog.Infof("Current graphd state: %v. Current number of replicas: %v", nc.Status.Graphd.Phase, pointer.Int32Deref(newReplicas, 0))
for _, host := range hostItems {
klog.Infof("Currently looking at host: %v with status %v", strings.Split(host.HostAddr.Host, ".")[0], host.Status)
if host.Status == meta.HostStatus_OFFLINE && host.HostAddr.Port == thriftPort {
podName := strings.Split(host.HostAddr.Host, ".")[0]
ordinal := getPodOrdinal(podName)
if int32(ordinal) >= pointer.Int32Deref(nc.Spec.Graphd.Replicas, 0) {
klog.Infof("graphd pod [%s/%s] has already been terminated by the sts. Skipping failover and/or removing from auto failover list", nc.Namespace, podName)
// delete is a no-op if FailureHosts or podName is nil
delete(nc.Status.Graphd.FailureHosts, podName)
continue
}
if nc.Status.Graphd.FailureHosts == nil {
Expand Down
10 changes: 8 additions & 2 deletions pkg/controller/component/metad_cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -257,10 +257,16 @@ func (c *metadCluster) syncNebulaClusterStatus(nc *v1alpha1.NebulaCluster, oldWo
return err
}
thriftPort := nc.MetadComponent().GetPort(v1alpha1.MetadPortNameThrift)
for i := range hostItems {
host := hostItems[i]
for _, host := range hostItems {
if host.Status == meta.HostStatus_OFFLINE && host.HostAddr.Port == thriftPort {
podName := strings.Split(host.HostAddr.Host, ".")[0]
ordinal := getPodOrdinal(podName)
if int32(ordinal) >= pointer.Int32Deref(nc.Spec.Metad.Replicas, 0) {
klog.Infof("metad pod [%s/%s] has already been terminated by the sts. Skipping failover and/or removing from auto failover list", nc.Namespace, podName)
// delete is a no-op if FailureHosts or podName is nil
delete(nc.Status.Metad.FailureHosts, podName)
continue
}
if nc.Status.Metad.FailureHosts == nil {
nc.Status.Metad.FailureHosts = make(map[string]v1alpha1.FailureHost)
}
Expand Down
6 changes: 4 additions & 2 deletions pkg/controller/component/storaged_cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -335,12 +335,14 @@ func (c *storagedCluster) syncNebulaClusterStatus(
return err
}
thriftPort := nc.StoragedComponent().GetPort(v1alpha1.StoragedPortNameThrift)
for i := range hostItems {
host := hostItems[i]
for _, host := range hostItems {
if host.Status == meta.HostStatus_OFFLINE && host.HostAddr.Port == thriftPort {
podName := strings.Split(host.HostAddr.Host, ".")[0]
ordinal := getPodOrdinal(podName)
if int32(ordinal) >= pointer.Int32Deref(nc.Spec.Storaged.Replicas, 0) {
klog.Infof("storaged pod [%s/%s] has already been terminated by the sts. Skipping failover and/or removing from auto failover list", nc.Namespace, podName)
// delete is a no-op if FailureHosts or podName is nil
delete(nc.Status.Storaged.FailureHosts, podName)
continue
}
if nc.Status.Storaged.FailureHosts == nil {
Expand Down

0 comments on commit 77f0684

Please # to comment.