Skip to content

Commit

Permalink
Merge pull request #1973 from HubSpot/cooldown_tweaks_2
Browse files Browse the repository at this point in the history
tweak cooldown thresholds and evaluation logic
  • Loading branch information
ssalinas authored Jul 11, 2019
2 parents 4c0be03 + 96e42ec commit 25115be
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -120,15 +120,15 @@ public class SingularityConfiguration extends Configuration {

private int fastFailureCooldownCount = 3;

private long fastFailureCooldownMs = 60000;
private long fastFailureCooldownMs = 30000;

private long fastCooldownExpiresMinutesWithoutFailure = 5;

private int slowFailureCooldownCount = 5;

private long slowFailureCooldownMs = 600000;

private long slowCooldownExpiresMinutesWithoutFailure = 8;
private long slowCooldownExpiresMinutesWithoutFailure = 5;

private long cooldownMinScheduleSeconds = 120;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,9 @@ private boolean hasFailureLoop(SingularityDeployStatistics deployStatistics, Opt
.count();
java.util.Optional<Long> mostRecentFailure = failureTimestamps.stream().max(Comparator.comparingLong(Long::valueOf));

return failureCount >= cooldownCount
&& (!mostRecentFailure.isPresent() || mostRecentFailure.get() > System.currentTimeMillis() - TimeUnit.MINUTES.toMillis(expiresAfterMins));
boolean mostRecentFailureOutsideWindow = !mostRecentFailure.isPresent() || mostRecentFailure.get() < System.currentTimeMillis() - TimeUnit.MINUTES.toMillis(expiresAfterMins);

return failureCount >= cooldownCount && !mostRecentFailureOutsideWindow;
}

boolean hasCooldownExpired(SingularityDeployStatistics deployStatistics, Optional<Long> recentFailureTimestamp) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -591,7 +591,10 @@ private Optional<PendingType> handleCompletedTaskWithStatistics(Optional<Singula
}

if (!status.hasReason() || !status.getReason().equals(Reason.REASON_INVALID_OFFERS)) {
if (!state.isSuccess() && taskHistoryUpdateCreateResult == SingularityCreateResult.CREATED && cooldown.shouldEnterCooldown(request, requestState, deployStatistics, timestamp)) {
if (state != ExtendedTaskState.TASK_KILLED
&& !state.isSuccess()
&& taskHistoryUpdateCreateResult == SingularityCreateResult.CREATED
&& cooldown.shouldEnterCooldown(request, requestState, deployStatistics, timestamp)) {
LOG.info("Request {} is entering cooldown due to task {}", request.getId(), taskId);
requestState = RequestState.SYSTEM_COOLDOWN;
requestManager.cooldown(request, System.currentTimeMillis());
Expand Down Expand Up @@ -715,10 +718,6 @@ private void updateDeployStatistics(SingularityDeployStatistics deployStatistics
} else {
bldr.setAverageSchedulingDelayMillis(Optional.of(startedAt - dueTime));
}

final SingularityDeployStatistics newStatistics = bldr.build();

deployManager.saveDeployStatistics(newStatistics);
}

bldr.setNumTasks(bldr.getNumTasks() + 1);
Expand Down

0 comments on commit 25115be

Please # to comment.