diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/YarnClientImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/YarnClientImpl.java index 29fd4174deaa0..7bad0e72bc210 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/YarnClientImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/YarnClientImpl.java @@ -338,7 +338,7 @@ private void addTimelineDelegationToken( } credentials.addToken(timelineService, timelineDelegationToken); if (LOG.isDebugEnabled()) { - LOG.debug("Add timline delegation token into credentials: " + LOG.debug("Add timeline delegation token to credentials: " + timelineDelegationToken); } DataOutputBuffer dob = new DataOutputBuffer(); @@ -354,8 +354,10 @@ private void addTimelineDelegationToken( return timelineClient.getDelegationToken(timelineDTRenewer); } catch (Exception e ) { if (timelineServiceBestEffort) { - LOG.warn("Failed to get delegation token from the timeline server: " + LOG.warn("Failed to get delegation token from the timeline server;" + + " timeline client no longer publishing data: " + e.getMessage()); + LOG.debug("Full exception details", e); return null; } throw e; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/impl/TimelineClientImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/impl/TimelineClientImpl.java index 04c84ca5f731d..782a4353d199f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/impl/TimelineClientImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/impl/TimelineClientImpl.java @@ -20,6 +20,7 @@ import java.io.File; import java.io.IOException; +import java.io.InterruptedIOException; import java.lang.reflect.UndeclaredThrowableException; import java.net.ConnectException; import java.net.HttpURLConnection; @@ -90,6 +91,10 @@ public class TimelineClientImpl extends TimelineClient { private static final String RESOURCE_URI_STR = "/ws/v1/timeline/"; private static final Joiner JOINER = Joiner.on(""); public final static int DEFAULT_SOCKET_TIMEOUT = 1 * 60 * 1000; // 1 minute + public static final String ERROR_NO_ATS_RESPONSE + = "Failed to get the response from the timeline server"; + public static final String ERROR_RETRIES_EXCEEDED = + "Failed to connect to timeline server"; private static Options opts; private static final String ENTITY_DATA_TYPE = "entity"; @@ -177,11 +182,13 @@ public Object retryOn(TimelineClientRetryOp op) retried = false; // keep trying + Exception lastException; while (true) { try { // try perform the op, if fail, keep retrying return op.run(); } catch (IOException | RuntimeException e) { + lastException = e; // break if there's no retries left if (leftRetries == 0) { break; @@ -201,23 +208,32 @@ public Object retryOn(TimelineClientRetryOp op) Thread.sleep(retryInterval); } catch (InterruptedException ie) { LOG.warn("Client retry sleep interrupted! "); + throw (InterruptedIOException) + (new InterruptedIOException(ie.toString()).initCause(ie)); } } - throw new RuntimeException("Failed to connect to timeline server. " - + "Connection retries limit exceeded. " - + "The posted timeline event may be missing"); - }; + // reached only if the retry count has been exceeded. + // therefore, lastException no-null + String message = ERROR_RETRIES_EXCEEDED + + " Connection retries limit (" + maxRetries + ") exceeded." + + " The posted timeline event may be missing : " + lastException; + LOG.warn(message, lastException); + + throw new RuntimeException(message, lastException); + } private void logException(Exception e, int leftRetries) { if (leftRetries > 0) { LOG.info("Exception caught by TimelineClientConnectionRetry," + " will try " + leftRetries + " more time(s).\nMessage: " - + e.getMessage()); + + e); + LOG.debug("Failure", e); } else { // note that maxRetries may be -1 at the very beginning LOG.info("ConnectionException caught by TimelineClientConnectionRetry," + " will keep retrying.\nMessage: " - + e.getMessage()); + + e); + LOG.debug("Failure", e); } } } @@ -244,8 +260,8 @@ public boolean shouldRetryOn(Exception e) { try { return (ClientResponse) connectionRetry.retryOn(jerseyRetryOp); } catch (IOException e) { - throw new ClientHandlerException("Jersey retry failed!\nMessage: " - + e.getMessage()); + throw new ClientHandlerException("Jersey retry failed against " + resURI + + "\nException: " + e, e); } } } @@ -324,20 +340,29 @@ public ClientResponse run() throws Exception { } }); } catch (UndeclaredThrowableException e) { - throw new IOException(e.getCause()); + Throwable cause = e.getCause(); + if (cause instanceof IOException) { + throw (IOException) cause; + } else { + throw new IOException(cause); + } } catch (InterruptedException ie) { - throw new IOException(ie); + throw (InterruptedIOException) + (new InterruptedIOException(ie.toString()).initCause(ie)); } if (resp == null || resp.getClientResponseStatus() != ClientResponse.Status.OK) { - String msg = - "Failed to get the response from the timeline server."; - LOG.error(msg); - if (LOG.isDebugEnabled() && resp != null) { - String output = resp.getEntity(String.class); - LOG.debug("HTTP error code: " + resp.getStatus() - + " Server response : \n" + output); + String msg = ERROR_NO_ATS_RESPONSE +" at " + resURI; + if (resp != null) { + int status = resp.getStatus(); + msg += " -status code=" + status; + if (LOG.isDebugEnabled()) { + String output = resp.getEntity(String.class); + LOG.debug("HTTP error code: " + status + + " Server response : \n" + output); + } } + LOG.error(msg); throw new YarnException(msg); } return resp; @@ -452,7 +477,8 @@ public Object run() throws IOException { } catch (UndeclaredThrowableException e) { throw new IOException(e.getCause()); } catch (InterruptedException e) { - throw new IOException(e); + throw (InterruptedIOException) + (new InterruptedIOException(e.toString()).initCause(e)); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestTimelineClient.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestTimelineClient.java index 859a6c9e22260..df15dd5f8f2a3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestTimelineClient.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestTimelineClient.java @@ -18,20 +18,14 @@ package org.apache.hadoop.yarn.client.api.impl; -import static org.mockito.Matchers.any; -import static org.mockito.Mockito.doReturn; -import static org.mockito.Mockito.doThrow; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.spy; -import static org.mockito.Mockito.when; - -import java.net.ConnectException; - +import com.sun.jersey.api.client.ClientHandlerException; +import com.sun.jersey.api.client.ClientResponse; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.io.Text; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager; +import org.apache.hadoop.service.ServiceOperations; import org.apache.hadoop.yarn.api.records.timeline.TimelineDomain; import org.apache.hadoop.yarn.api.records.timeline.TimelineEntities; import org.apache.hadoop.yarn.api.records.timeline.TimelineEntity; @@ -46,8 +40,14 @@ import org.junit.Before; import org.junit.Test; -import com.sun.jersey.api.client.ClientHandlerException; -import com.sun.jersey.api.client.ClientResponse; +import java.net.ConnectException; + +import static org.mockito.Matchers.any; +import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.when; public class TestTimelineClient { @@ -70,29 +70,21 @@ public void tearDown() { @Test public void testPostEntities() throws Exception { mockEntityClientResponse(client, ClientResponse.Status.OK, false, false); - try { - TimelinePutResponse response = client.putEntities(generateEntity()); - Assert.assertEquals(0, response.getErrors().size()); - } catch (YarnException e) { - Assert.fail("Exception is not expected"); - } + TimelinePutResponse response = client.putEntities(generateEntity()); + Assert.assertEquals(0, response.getErrors().size()); } @Test public void testPostEntitiesWithError() throws Exception { mockEntityClientResponse(client, ClientResponse.Status.OK, true, false); - try { - TimelinePutResponse response = client.putEntities(generateEntity()); - Assert.assertEquals(1, response.getErrors().size()); - Assert.assertEquals("test entity id", response.getErrors().get(0) - .getEntityId()); - Assert.assertEquals("test entity type", response.getErrors().get(0) - .getEntityType()); - Assert.assertEquals(TimelinePutResponse.TimelinePutError.IO_EXCEPTION, - response.getErrors().get(0).getErrorCode()); - } catch (YarnException e) { - Assert.fail("Exception is not expected"); - } + TimelinePutResponse response = client.putEntities(generateEntity()); + Assert.assertEquals(1, response.getErrors().size()); + Assert.assertEquals("test entity id", response.getErrors().get(0) + .getEntityId()); + Assert.assertEquals("test entity type", response.getErrors().get(0) + .getEntityType()); + Assert.assertEquals(TimelinePutResponse.TimelinePutError.IO_EXCEPTION, + response.getErrors().get(0).getErrorCode()); } @Test @@ -103,8 +95,7 @@ public void testPostEntitiesNoResponse() throws Exception { client.putEntities(generateEntity()); Assert.fail("Exception is expected"); } catch (YarnException e) { - Assert.assertTrue(e.getMessage().contains( - "Failed to get the response from the timeline server.")); + assertExceptionTextContains(e, TimelineClientImpl.ERROR_NO_ATS_RESPONSE); } } @@ -113,7 +104,7 @@ public void testPostEntitiesConnectionRefused() throws Exception { mockEntityClientResponse(client, null, false, true); try { client.putEntities(generateEntity()); - Assert.fail("RuntimeException is expected"); + Assert.fail("Exception is expected"); } catch (RuntimeException re) { Assert.assertTrue(re instanceof ClientHandlerException); } @@ -122,11 +113,7 @@ public void testPostEntitiesConnectionRefused() throws Exception { @Test public void testPutDomain() throws Exception { mockDomainClientResponse(client, ClientResponse.Status.OK, false); - try { - client.putDomain(generateDomain()); - } catch (YarnException e) { - Assert.fail("Exception is not expected"); - } + client.putDomain(generateDomain()); } @Test @@ -136,8 +123,13 @@ public void testPutDomainNoResponse() throws Exception { client.putDomain(generateDomain()); Assert.fail("Exception is expected"); } catch (YarnException e) { - Assert.assertTrue(e.getMessage().contains( - "Failed to get the response from the timeline server.")); + assertExceptionTextContains(e, TimelineClientImpl.ERROR_NO_ATS_RESPONSE); + } + } + + private void assertExceptionTextContains(Exception e, String text) { + if (!e.toString().contains(text)) { + throw new AssertionError("Did not find \"" + text + "\" in " + e, e); } } @@ -146,9 +138,9 @@ public void testPutDomainConnectionRefused() throws Exception { mockDomainClientResponse(client, null, true); try { client.putDomain(generateDomain()); - Assert.fail("RuntimeException is expected"); - } catch (RuntimeException re) { - Assert.assertTrue(re instanceof ClientHandlerException); + Assert.fail("Exception is expected"); + } catch (ClientHandlerException re) { + // expected } } @@ -160,44 +152,40 @@ public void testCheckRetryCount() throws Exception { conf.setInt(YarnConfiguration.TIMELINE_SERVICE_CLIENT_MAX_RETRIES, -2); createTimelineClient(conf); - Assert.fail(); + Assert.fail("IllegalArgumentException is expected"); } catch(IllegalArgumentException e) { - Assert.assertTrue(e.getMessage().contains( - YarnConfiguration.TIMELINE_SERVICE_CLIENT_MAX_RETRIES)); + assertExceptionTextContains(e, + YarnConfiguration.TIMELINE_SERVICE_CLIENT_MAX_RETRIES); } try { YarnConfiguration conf = new YarnConfiguration(); conf.setBoolean(YarnConfiguration.TIMELINE_SERVICE_ENABLED, true); conf.setLong(YarnConfiguration.TIMELINE_SERVICE_CLIENT_RETRY_INTERVAL_MS, - 0); + 0); createTimelineClient(conf); - Assert.fail(); + Assert.fail("Exception is expected"); } catch(IllegalArgumentException e) { - Assert.assertTrue(e.getMessage().contains( - YarnConfiguration.TIMELINE_SERVICE_CLIENT_RETRY_INTERVAL_MS)); + assertExceptionTextContains(e, + YarnConfiguration.TIMELINE_SERVICE_CLIENT_RETRY_INTERVAL_MS); } int newMaxRetries = 5; long newIntervalMs = 500; YarnConfiguration conf = new YarnConfiguration(); conf.setInt(YarnConfiguration.TIMELINE_SERVICE_CLIENT_MAX_RETRIES, - newMaxRetries); + newMaxRetries); conf.setLong(YarnConfiguration.TIMELINE_SERVICE_CLIENT_RETRY_INTERVAL_MS, - newIntervalMs); + newIntervalMs); conf.setBoolean(YarnConfiguration.TIMELINE_SERVICE_ENABLED, true); - TimelineClientImpl client = createTimelineClient(conf); + ServiceOperations.stop(client); + client = createTimelineClient(conf); try { // This call should fail because there is no timeline server client.putEntities(generateEntity()); Assert.fail("Exception expected! " + "Timeline server should be off to run this test. "); } catch (RuntimeException ce) { - Assert.assertTrue( - "Handler exception for reason other than retry: " + ce.getMessage(), - ce.getMessage().contains("Connection retries limit exceeded")); - // we would expect this exception here, check if the client has retried - Assert.assertTrue("Retry filter didn't perform any retries! ", client - .connectionRetry.getRetired()); + assertRetryException(client, ce); } } @@ -207,18 +195,18 @@ public void testDelegationTokenOperationsRetry() throws Exception { long newIntervalMs = 500; YarnConfiguration conf = new YarnConfiguration(); conf.setInt(YarnConfiguration.TIMELINE_SERVICE_CLIENT_MAX_RETRIES, - newMaxRetries); + newMaxRetries); conf.setLong(YarnConfiguration.TIMELINE_SERVICE_CLIENT_RETRY_INTERVAL_MS, - newIntervalMs); + newIntervalMs); conf.setBoolean(YarnConfiguration.TIMELINE_SERVICE_ENABLED, true); // use kerberos to bypass the issue in HADOOP-11215 conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION, "kerberos"); UserGroupInformation.setConfiguration(conf); - - TimelineClientImpl client = createTimelineClient(conf); - TestTimlineDelegationTokenSecretManager dtManager = - new TestTimlineDelegationTokenSecretManager(); + ServiceOperations.stop(client); + client = createTimelineClient(conf); + TestTimelineDelegationTokenSecretManager dtManager = + new TestTimelineDelegationTokenSecretManager(); try { dtManager.startThreads(); Thread.sleep(3000); @@ -229,7 +217,7 @@ public void testDelegationTokenOperationsRetry() throws Exception { UserGroupInformation.getCurrentUser().getShortUserName()); assertFail(); } catch (RuntimeException ce) { - assertException(client, ce); + assertRetryException(client, ce); } try { @@ -244,7 +232,7 @@ public void testDelegationTokenOperationsRetry() throws Exception { new Text("0.0.0.0:8188"))); assertFail(); } catch (RuntimeException ce) { - assertException(client, ce); + assertRetryException(client, ce); } try { @@ -259,10 +247,9 @@ public void testDelegationTokenOperationsRetry() throws Exception { new Text("0.0.0.0:8188"))); assertFail(); } catch (RuntimeException ce) { - assertException(client, ce); + assertRetryException(client, ce); } } finally { - client.stop(); dtManager.stopThreads(); } } @@ -272,13 +259,11 @@ private static void assertFail() { + "Timeline server should be off to run this test."); } - private void assertException(TimelineClientImpl client, RuntimeException ce) { - Assert.assertTrue( - "Handler exception for reason other than retry: " + ce.toString(), ce - .getMessage().contains("Connection retries limit exceeded")); + private void assertRetryException(TimelineClientImpl timelineClient, Exception ce) { + assertExceptionTextContains(ce, TimelineClientImpl.ERROR_RETRIES_EXCEEDED); // we would expect this exception here, check if the client has retried Assert.assertTrue("Retry filter didn't perform any retries! ", - client.connectionRetry.getRetired()); + timelineClient.connectionRetry.getRetired()); } private static ClientResponse mockEntityClientResponse( @@ -365,10 +350,10 @@ private static TimelineClientImpl createTimelineClient( return client; } - private static class TestTimlineDelegationTokenSecretManager extends + private static class TestTimelineDelegationTokenSecretManager extends AbstractDelegationTokenSecretManager { - public TestTimlineDelegationTokenSecretManager() { + public TestTimelineDelegationTokenSecretManager() { super(100000, 100000, 100000, 100000); }