Skip to content

Commit

Permalink
Merge pull request #2011 from HubSpot/task_lost_attempts
Browse files Browse the repository at this point in the history
Retry more TASK_LOST cases on deploy
  • Loading branch information
ssalinas authored Sep 6, 2019
2 parents c943348 + 37a7424 commit 51da837
Showing 1 changed file with 13 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import com.hubspot.baragon.models.BaragonRequestState;
import com.hubspot.mesos.JavaUtils;
import com.hubspot.singularity.DeployState;
import com.hubspot.singularity.ExtendedTaskState;
import com.hubspot.singularity.LoadBalancerRequestType;
import com.hubspot.singularity.LoadBalancerRequestType.LoadBalancerRequestId;
import com.hubspot.singularity.RequestState;
Expand All @@ -47,6 +48,7 @@
import com.hubspot.singularity.SingularityRequestWithState;
import com.hubspot.singularity.SingularityTask;
import com.hubspot.singularity.SingularityTaskCleanup;
import com.hubspot.singularity.SingularityTaskHistoryUpdate;
import com.hubspot.singularity.SingularityTaskId;
import com.hubspot.singularity.SingularityTaskShellCommandRequestId;
import com.hubspot.singularity.SingularityUpdatePendingDeployRequest;
Expand Down Expand Up @@ -602,7 +604,17 @@ private SingularityDeployResult getDeployResult(final SingularityRequest request

private boolean canRetryTasks(Optional<SingularityDeploy> deploy, Collection<SingularityTaskId> inactiveDeployMatchingTasks) {
int maxRetries = deploy.get().getMaxTaskRetries().orElse(configuration.getDefaultDeployMaxTaskRetries());
return deploy.isPresent() && maxRetries > 0 && inactiveDeployMatchingTasks.size() <= maxRetries;
long matchingInactiveTasks = inactiveDeployMatchingTasks.stream()
.filter((t) -> {
// All TASK_LOSTs that are not resource limit related should be able to be retried
for (SingularityTaskHistoryUpdate historyUpdate : taskManager.getTaskHistoryUpdates(t)) {
if (historyUpdate.getTaskState() == ExtendedTaskState.TASK_LOST && !historyUpdate.getStatusReason().orElse("").startsWith("REASON_CONTAINER")) {
return false;
}
}
return true;
}).count();
return maxRetries > 0 && matchingInactiveTasks <= maxRetries;
}

private Set<SingularityTaskId> getNewInactiveDeployTasks(SingularityPendingDeploy pendingDeploy, Collection<SingularityTaskId> inactiveDeployMatchingTasks) {
Expand Down

0 comments on commit 51da837

Please # to comment.