From d0fadbf206d4c8868d0d70ef40feb8fdb94b4d99 Mon Sep 17 00:00:00 2001 From: Stephen Salinas Date: Wed, 10 Jul 2019 08:30:52 -0400 Subject: [PATCH 1/2] tweak cooldown thresholds and evaluation logic --- .../singularity/config/SingularityConfiguration.java | 4 ++-- .../singularity/scheduler/SingularityCooldown.java | 5 +++-- .../singularity/scheduler/SingularityScheduler.java | 9 ++++----- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/SingularityService/src/main/java/com/hubspot/singularity/config/SingularityConfiguration.java b/SingularityService/src/main/java/com/hubspot/singularity/config/SingularityConfiguration.java index 607c963b3b..74db0b1823 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/config/SingularityConfiguration.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/config/SingularityConfiguration.java @@ -120,7 +120,7 @@ public class SingularityConfiguration extends Configuration { private int fastFailureCooldownCount = 3; - private long fastFailureCooldownMs = 60000; + private long fastFailureCooldownMs = 30000; private long fastCooldownExpiresMinutesWithoutFailure = 5; @@ -128,7 +128,7 @@ public class SingularityConfiguration extends Configuration { private long slowFailureCooldownMs = 600000; - private long slowCooldownExpiresMinutesWithoutFailure = 8; + private long slowCooldownExpiresMinutesWithoutFailure = 5; private long cooldownMinScheduleSeconds = 120; diff --git a/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityCooldown.java b/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityCooldown.java index 7c04173e63..d4cb78200f 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityCooldown.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityCooldown.java @@ -67,8 +67,9 @@ private boolean hasFailureLoop(SingularityDeployStatistics deployStatistics, Opt .count(); java.util.Optional mostRecentFailure = failureTimestamps.stream().max(Comparator.comparingLong(Long::valueOf)); - return failureCount >= cooldownCount - && (!mostRecentFailure.isPresent() || mostRecentFailure.get() > System.currentTimeMillis() - TimeUnit.MINUTES.toMillis(expiresAfterMins)); + boolean mostRecentFailureOutsideWindow = mostRecentFailure.isPresent() && mostRecentFailure.get() > System.currentTimeMillis() - TimeUnit.MINUTES.toMillis(expiresAfterMins); + + return failureCount >= cooldownCount && !mostRecentFailureOutsideWindow; } boolean hasCooldownExpired(SingularityDeployStatistics deployStatistics, Optional recentFailureTimestamp) { diff --git a/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityScheduler.java b/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityScheduler.java index 61bf220336..f9afa7d8f8 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityScheduler.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityScheduler.java @@ -591,7 +591,10 @@ private Optional handleCompletedTaskWithStatistics(Optional Date: Wed, 10 Jul 2019 08:46:20 -0400 Subject: [PATCH 2/2] fix failure window condition --- .../com/hubspot/singularity/scheduler/SingularityCooldown.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityCooldown.java b/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityCooldown.java index d4cb78200f..e3b553e4a6 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityCooldown.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/scheduler/SingularityCooldown.java @@ -67,7 +67,7 @@ private boolean hasFailureLoop(SingularityDeployStatistics deployStatistics, Opt .count(); java.util.Optional mostRecentFailure = failureTimestamps.stream().max(Comparator.comparingLong(Long::valueOf)); - boolean mostRecentFailureOutsideWindow = mostRecentFailure.isPresent() && mostRecentFailure.get() > System.currentTimeMillis() - TimeUnit.MINUTES.toMillis(expiresAfterMins); + boolean mostRecentFailureOutsideWindow = !mostRecentFailure.isPresent() || mostRecentFailure.get() < System.currentTimeMillis() - TimeUnit.MINUTES.toMillis(expiresAfterMins); return failureCount >= cooldownCount && !mostRecentFailureOutsideWindow; }