Skip to content

YARN-3477 TimelineClientImpl swallows exceptions #47

New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,7 @@ private void addTimelineDelegationToken(
}
credentials.addToken(timelineService, timelineDelegationToken);
if (LOG.isDebugEnabled()) {
LOG.debug("Add timline delegation token into credentials: "
LOG.debug("Add timeline delegation token to credentials: "
+ timelineDelegationToken);
}
DataOutputBuffer dob = new DataOutputBuffer();
Expand All @@ -354,8 +354,10 @@ private void addTimelineDelegationToken(
return timelineClient.getDelegationToken(timelineDTRenewer);
} catch (Exception e ) {
if (timelineServiceBestEffort) {
LOG.warn("Failed to get delegation token from the timeline server: "
LOG.warn("Failed to get delegation token from the timeline server;" +
" timeline client no longer publishing data: "
+ e.getMessage());
LOG.debug("Full exception details", e);
return null;
}
throw e;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

import java.io.File;
import java.io.IOException;
import java.io.InterruptedIOException;
import java.lang.reflect.UndeclaredThrowableException;
import java.net.ConnectException;
import java.net.HttpURLConnection;
Expand Down Expand Up @@ -90,6 +91,10 @@ public class TimelineClientImpl extends TimelineClient {
private static final String RESOURCE_URI_STR = "/ws/v1/timeline/";
private static final Joiner JOINER = Joiner.on("");
public final static int DEFAULT_SOCKET_TIMEOUT = 1 * 60 * 1000; // 1 minute
public static final String ERROR_NO_ATS_RESPONSE
= "Failed to get the response from the timeline server";
public static final String ERROR_RETRIES_EXCEEDED =
"Failed to connect to timeline server";

private static Options opts;
private static final String ENTITY_DATA_TYPE = "entity";
Expand Down Expand Up @@ -177,11 +182,13 @@ public Object retryOn(TimelineClientRetryOp op)
retried = false;

// keep trying
Exception lastException;
while (true) {
try {
// try perform the op, if fail, keep retrying
return op.run();
} catch (IOException | RuntimeException e) {
lastException = e;
// break if there's no retries left
if (leftRetries == 0) {
break;
Expand All @@ -201,23 +208,32 @@ public Object retryOn(TimelineClientRetryOp op)
Thread.sleep(retryInterval);
} catch (InterruptedException ie) {
LOG.warn("Client retry sleep interrupted! ");
throw (InterruptedIOException)
(new InterruptedIOException(ie.toString()).initCause(ie));
}
}
throw new RuntimeException("Failed to connect to timeline server. "
+ "Connection retries limit exceeded. "
+ "The posted timeline event may be missing");
};
// reached only if the retry count has been exceeded.
// therefore, lastException no-null
String message = ERROR_RETRIES_EXCEEDED
+ " Connection retries limit (" + maxRetries + ") exceeded."
+ " The posted timeline event may be missing : " + lastException;
LOG.warn(message, lastException);

throw new RuntimeException(message, lastException);
}

private void logException(Exception e, int leftRetries) {
if (leftRetries > 0) {
LOG.info("Exception caught by TimelineClientConnectionRetry,"
+ " will try " + leftRetries + " more time(s).\nMessage: "
+ e.getMessage());
+ e);
LOG.debug("Failure", e);
} else {
// note that maxRetries may be -1 at the very beginning
LOG.info("ConnectionException caught by TimelineClientConnectionRetry,"
+ " will keep retrying.\nMessage: "
+ e.getMessage());
+ e);
LOG.debug("Failure", e);
}
}
}
Expand All @@ -244,8 +260,8 @@ public boolean shouldRetryOn(Exception e) {
try {
return (ClientResponse) connectionRetry.retryOn(jerseyRetryOp);
} catch (IOException e) {
throw new ClientHandlerException("Jersey retry failed!\nMessage: "
+ e.getMessage());
throw new ClientHandlerException("Jersey retry failed against " + resURI
+ "\nException: " + e, e);
}
}
}
Expand Down Expand Up @@ -324,20 +340,29 @@ public ClientResponse run() throws Exception {
}
});
} catch (UndeclaredThrowableException e) {
throw new IOException(e.getCause());
Throwable cause = e.getCause();
if (cause instanceof IOException) {
throw (IOException) cause;
} else {
throw new IOException(cause);
}
} catch (InterruptedException ie) {
throw new IOException(ie);
throw (InterruptedIOException)
(new InterruptedIOException(ie.toString()).initCause(ie));
}
if (resp == null ||
resp.getClientResponseStatus() != ClientResponse.Status.OK) {
String msg =
"Failed to get the response from the timeline server.";
LOG.error(msg);
if (LOG.isDebugEnabled() && resp != null) {
String output = resp.getEntity(String.class);
LOG.debug("HTTP error code: " + resp.getStatus()
+ " Server response : \n" + output);
String msg = ERROR_NO_ATS_RESPONSE +" at " + resURI;
if (resp != null) {
int status = resp.getStatus();
msg += " -status code=" + status;
if (LOG.isDebugEnabled()) {
String output = resp.getEntity(String.class);
LOG.debug("HTTP error code: " + status
+ " Server response : \n" + output);
}
}
LOG.error(msg);
throw new YarnException(msg);
}
return resp;
Expand Down Expand Up @@ -452,7 +477,8 @@ public Object run() throws IOException {
} catch (UndeclaredThrowableException e) {
throw new IOException(e.getCause());
} catch (InterruptedException e) {
throw new IOException(e);
throw (InterruptedIOException)
(new InterruptedIOException(e.toString()).initCause(e));
}
}

Expand Down
Loading