Skip to content

Commit 27dd84c

Browse files
srikar-jiluguvladvildanov
authored andcommitted
fix node routing in slotClosestNode (#3043)
* fix node routing when all nodes are failing * fix minlatency zero value
1 parent 51b6e2c commit 27dd84c

File tree

1 file changed

+32
-10
lines changed

1 file changed

+32
-10
lines changed

osscluster.go

+32-10
Original file line numberDiff line numberDiff line change
@@ -341,6 +341,8 @@ func (n *clusterNode) Close() error {
341341
return n.Client.Close()
342342
}
343343

344+
const maximumNodeLatency = 1 * time.Minute
345+
344346
func (n *clusterNode) updateLatency() {
345347
const numProbe = 10
346348
var dur uint64
@@ -361,7 +363,7 @@ func (n *clusterNode) updateLatency() {
361363
if successes == 0 {
362364
// If none of the pings worked, set latency to some arbitrarily high value so this node gets
363365
// least priority.
364-
latency = float64((1 * time.Minute) / time.Microsecond)
366+
latency = float64((maximumNodeLatency) / time.Microsecond)
365367
} else {
366368
latency = float64(dur) / float64(successes)
367369
}
@@ -735,20 +737,40 @@ func (c *clusterState) slotClosestNode(slot int) (*clusterNode, error) {
735737
return c.nodes.Random()
736738
}
737739

738-
var node *clusterNode
740+
var allNodesFailing = true
741+
var (
742+
closestNonFailingNode *clusterNode
743+
closestNode *clusterNode
744+
minLatency time.Duration
745+
)
746+
747+
// setting the max possible duration as zerovalue for minlatency
748+
minLatency = time.Duration(math.MaxInt64)
749+
739750
for _, n := range nodes {
740-
if n.Failing() {
741-
continue
742-
}
743-
if node == nil || n.Latency() < node.Latency() {
744-
node = n
751+
if closestNode == nil || n.Latency() < minLatency {
752+
closestNode = n
753+
minLatency = n.Latency()
754+
if !n.Failing() {
755+
closestNonFailingNode = n
756+
allNodesFailing = false
757+
}
745758
}
746759
}
747-
if node != nil {
748-
return node, nil
760+
761+
// pick the healthly node with the lowest latency
762+
if !allNodesFailing && closestNonFailingNode != nil {
763+
return closestNonFailingNode, nil
764+
}
765+
766+
// if all nodes are failing, we will pick the temporarily failing node with lowest latency
767+
if minLatency < maximumNodeLatency && closestNode != nil {
768+
internal.Logger.Printf(context.TODO(), "redis: all nodes are marked as failed, picking the temporarily failing node with lowest latency")
769+
return closestNode, nil
749770
}
750771

751-
// If all nodes are failing - return random node
772+
// If all nodes are having the maximum latency(all pings are failing) - return a random node across the cluster
773+
internal.Logger.Printf(context.TODO(), "redis: pings to all nodes are failing, picking a random node across the cluster")
752774
return c.nodes.Random()
753775
}
754776

0 commit comments

Comments
 (0)