Skip to content

Commit

Permalink
Merge pull request #4714 from meeseeksmachine/auto-backport-of-pr-456…
Browse files Browse the repository at this point in the history
…1-on-4.x

Backport PR #4561 on branch 4.x (Improve debugging when waiting for servers)
  • Loading branch information
minrk authored Feb 29, 2024
2 parents d4c7d97 + 05e569c commit 63e118f
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 21 deletions.
81 changes: 62 additions & 19 deletions jupyterhub/proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
from . import utils
from .metrics import CHECK_ROUTES_DURATION_SECONDS, PROXY_POLL_DURATION_SECONDS
from .objects import Server
from .utils import AnyTimeoutError, exponential_backoff, url_escape_path, url_path_join
from .utils import exponential_backoff, url_escape_path, url_path_join


def _one_at_a_time(method):
Expand Down Expand Up @@ -767,24 +767,67 @@ async def start(self):

self._write_pid_file()

def _check_process():
status = self.proxy_process.poll()
if status is not None:
with self.proxy_process:
e = RuntimeError("Proxy failed to start with exit code %i" % status)
raise e from None

for server in (public_server, api_server):
for i in range(10):
_check_process()
try:
await server.wait_up(1)
except AnyTimeoutError:
continue
else:
break
await server.wait_up(1)
_check_process()
async def wait_for_process():
"""Watch proxy process for early termination
Runs forever, checking every 0.5s if the process has exited
so we don't keep waiting for endpoints after the proxy has stopped.
Raises RuntimeError if/when the proxy process exits,
otherwise runs forever.
Should be cancelled when servers become ready.
"""
while True:
status = self.proxy_process.poll()
if status is not None:
with self.proxy_process:
e = RuntimeError(
f"Proxy failed to start with exit code {status}"
)
raise e from None
await asyncio.sleep(0.5)

# process_exited can only resolve with a RuntimeError when the process has exited,
# otherwise it must be cancelled.
process_exited = asyncio.ensure_future(wait_for_process())

# wait for both servers to be ready (or one server to fail)
server_futures = [
asyncio.ensure_future(server.wait_up(10))
for server in (public_server, api_server)
]
servers_ready = asyncio.gather(*server_futures)

# wait for process to crash or servers to be ready,
# whichever comes first
wait_timeout = 15
ready, pending = await asyncio.wait(
[
process_exited,
servers_ready,
],
return_when=asyncio.FIRST_COMPLETED,
timeout=wait_timeout,
)
for task in [servers_ready, process_exited] + server_futures:
# cancel any pending tasks
if not task.done():
task.cancel()
if not ready:
# timeouts passed to wait_up should prevent this,
# but weird things like DNS delays may result in
# wait_up taking a lot longer than it should
raise TimeoutError(
f"Waiting for proxy endpoints didn't complete in {wait_timeout}s"
)
if process_exited in ready:
# process exited, this will raise RuntimeError
await process_exited
else:
# if we got here, servers_ready is done
# await it to make sure exceptions are raised
await servers_ready

self.log.debug("Proxy started and appears to be up")
pc = PeriodicCallback(self.check_running, 1e3 * self.check_running_interval)
self._check_running_callback = pc
Expand Down
14 changes: 12 additions & 2 deletions jupyterhub/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import ssl
import sys
import threading
import time
import uuid
import warnings
from binascii import b2a_hex
Expand Down Expand Up @@ -90,6 +91,8 @@ def can_connect(ip, port):
except OSError as e:
if e.errno not in {errno.ECONNREFUSED, errno.ETIMEDOUT}:
app_log.error("Unexpected error connecting to %s:%i %s", ip, port, e)
else:
app_log.debug("Server at %s:%i not ready: %s", ip, port, e)
return False
else:
return True
Expand Down Expand Up @@ -242,26 +245,31 @@ async def wait_for_server(ip, port, timeout=10):
"""Wait for any server to show up at ip:port."""
if ip in {'', '0.0.0.0', '::'}:
ip = '127.0.0.1'
app_log.debug("Waiting %ss for server at %s:%s", timeout, ip, port)
tic = time.perf_counter()
await exponential_backoff(
lambda: can_connect(ip, port),
"Server at {ip}:{port} didn't respond in {timeout} seconds".format(
ip=ip, port=port, timeout=timeout
),
timeout=timeout,
)
toc = time.perf_counter()
app_log.debug("Server at %s:%s responded in %.2fs", ip, port, toc - tic)


async def wait_for_http_server(url, timeout=10, ssl_context=None):
"""Wait for an HTTP Server to respond at url.
Any non-5XX response code will do, even 404.
"""
loop = ioloop.IOLoop.current()
tic = loop.time()
client = AsyncHTTPClient()
if ssl_context:
client.ssl_options = ssl_context

app_log.debug("Waiting %ss for server at %s", timeout, url)
tic = time.perf_counter()

async def is_reachable():
try:
r = await client.fetch(url, follow_redirects=False)
Expand Down Expand Up @@ -294,6 +302,8 @@ async def is_reachable():
),
timeout=timeout,
)
toc = time.perf_counter()
app_log.debug("Server at %s responded in %.2fs", url, toc - tic)
return re


Expand Down

0 comments on commit 63e118f

Please # to comment.