Skip to content
This repository has been archived by the owner on Oct 31, 2023. It is now read-only.

Commit

Permalink
Cherry pick of: Keep Golem running after Docker error at startup (#4009)
Browse files Browse the repository at this point in the history
  • Loading branch information
kmazurek authored Mar 15, 2019
1 parent ef87825 commit f6ce313
Show file tree
Hide file tree
Showing 4 changed files with 88 additions and 17 deletions.
8 changes: 4 additions & 4 deletions golem/docker/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,11 @@ def check_environment(self):
***************************************************************
No supported VM hypervisor was found.
Golem will not be able to compute anything.
hypervisor.setup() returned {}
hypervisor.setup() returned {!r}
***************************************************************
""".format(e)
)
raise EnvironmentError
raise EnvironmentError("No VM hypervisor found.")

try:
# We're checking the availability of "docker" command line utility
Expand All @@ -69,11 +69,11 @@ def check_environment(self):
***************************************************************
Docker is not available, not building images.
Golem will not be able to compute anything.
Command 'docker info' returned {}
Command 'docker info' returned {!r}
***************************************************************
""".format(err)
)
raise EnvironmentError
raise EnvironmentError("Docker unavailable.")

try:
self.pull_images()
Expand Down
16 changes: 14 additions & 2 deletions golem/node.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

from pathlib import Path
from twisted.internet import threads
from twisted.internet.defer import gatherResults, Deferred
from twisted.internet.defer import gatherResults, Deferred, FirstError
from twisted.python.failure import Failure

from apps.appsmanager import AppsManager
Expand Down Expand Up @@ -161,9 +161,21 @@ def on_rpc_ready() -> Deferred:
docker = self._start_docker()
return gatherResults([terms_, keys, docker], consumeErrors=True)

def on_start_error(failure: FirstError):
sub_failure: Failure = failure.value.subFailure
sub_failure.trap(EnvironmentError)

logger.error(
"""
There was a problem setting up the environment: {}
Golem will run with limited functionality to support
communication with local clients.
""".format(sub_failure.getErrorMessage())
)

chain_function(rpc, on_rpc_ready).addCallbacks(
self._setup_client,
self._error('keys or docker'),
on_start_error,
).addErrback(self._error('setup client'))
self._reactor.run()
except Exception: # pylint: disable=broad-except
Expand Down
2 changes: 1 addition & 1 deletion golem/task/rpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def validate_client(client: Client):
if client.config_desc.in_shutdown:
raise CreateTaskError(
'Can not enqueue task: shutdown is in progress, '
'toggle shutdown mode off to create a new tasks.')
'toggle shutdown mode off to create new tasks.')
if client.task_server is None:
raise CreateTaskError("Golem is not ready")

Expand Down
79 changes: 69 additions & 10 deletions tests/golem/test_opt_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
from unittest.mock import patch, Mock, ANY, MagicMock

from click.testing import CliRunner
from twisted.internet.defer import Deferred, succeed
from twisted.internet.defer import Deferred, succeed, maybeDeferred, FirstError
from twisted.python.failure import Failure

import golem.argsparser as argsparser
from golem.appconfig import AppConfig
Expand Down Expand Up @@ -481,10 +482,8 @@ def mock_async_run(req, success=None, error=None):
return deferred


def done_deferred(*_):
deferred = Deferred()
deferred.callback(True)
return deferred
def done_deferred(f, *args, **kwargs):
return succeed(f(*args, **kwargs))


def chain_function(_, fn, *args, **kwargs):
Expand Down Expand Up @@ -519,13 +518,12 @@ def target(self):

@patch('golem.client.node_info_str')
@patch('golem.node.Node._start_keys_auth', set_keys_auth)
@patch('golem.node.Node._start_docker')
@patch('golem.core.golem_async.async_run', mock_async_run)
@patch('golem.node.chain_function', chain_function)
@patch('golem.node.threads.deferToThread', done_deferred)
@patch('golem.node.CrossbarRouter', Mock(_start_node=done_deferred))
@patch('golem.terms.TermsOfUse.are_accepted', return_value=True)
@patch('golem.node.Session')
@patch('golem.node.gatherResults')
@patch('twisted.internet.reactor', create=True)
class TestOptNode(TempDirFixture):

Expand Down Expand Up @@ -567,8 +565,66 @@ def test_start_rpc_router(self, reactor, *_):
assert reactor.addSystemEventTrigger.call_args[0] == (
'before', 'shutdown', self.node.rpc_router.stop)

@patch('golem.node.DockerManager')
def test_start_docker_mgr(self, *_):
# when
self.node_kwargs['use_docker_manager'] = True
self.node = Node(**self.node_kwargs)
self.node._setup_client = Mock()
self.node.start()

# then
assert self.node._docker_manager
assert self.node._docker_manager.check_environment.called # noqa # pylint: disable=no-member
assert self.node._docker_manager.apply_config.called # noqa # pylint: disable=no-member

@patch('golem.node.DockerManager')
def test_not_start_docker_mgr(self, *_):
# when
self.node_kwargs['use_docker_manager'] = False
self.node = Node(**self.node_kwargs)
self.node._setup_client = Mock()
self.node.start()

# then
assert not self.node._docker_manager

@patch('golem.node.DockerManager')
def test_start_docker_unavailable(self, mock_dm, *_):
self.node_kwargs['use_docker_manager'] = True
self.node = Node(**self.node_kwargs)
setup_client_mock = Mock()
self.node._setup_client = setup_client_mock
# This needs to be wrapped in FirstError since the exception is raised
# from a Deferred in gatherResults
mock_dm.check_environment.side_effect = FirstError(
Failure(EnvironmentError()), 0)

with patch('golem.node.DockerManager.install', return_value=mock_dm):
self.node.start()

setup_client_mock.assert_not_called()
self.node._docker_manager.apply_config.assert_not_called() # noqa # pylint: disable=no-member
self.node._docker_manager.check_environment.assert_called() # noqa # pylint: disable=no-member

@patch('golem.node.DockerManager')
def test_start_docker_other_error(self, mock_dm, *_):
self.node_kwargs['use_docker_manager'] = True
self.node = Node(**self.node_kwargs)
error_msg = 'just a test'
mock_dm.check_environment.side_effect = FirstError(
Failure(Exception(error_msg)), 0)

with patch('golem.node.DockerManager.install', return_value=mock_dm), \
self.assertLogs('golem.node', level='INFO') as logs:
self.node.start()
output = "\n".join(logs.output)

assert error_msg in output

@patch('golem.node.gatherResults')
@patch('golem.node.TransactionSystem')
def test_start_creates_client(self, _ets, reactor, mock_gather_results, *_):
def test_start_creates_client(self, _ets, mock_gather_results, reactor, *_):
mock_gather_results.return_value = mock_gather_results
mock_gather_results.addCallbacks.side_effect = \
lambda callback, _: callback([])
Expand All @@ -588,14 +644,15 @@ def test_start_creates_client(self, _ets, reactor, mock_gather_results, *_):
assert reactor.addSystemEventTrigger.call_args_list[1][0] == (
'before', 'shutdown', self.node.client.quit)

@patch('golem.node.gatherResults')
@patch('golem.node.TransactionSystem')
@patch('golem.node.Node._run')
def test_start_creates_client_and_calls_run(
self,
mock_run,
_ets,
reactor,
mock_gather_results,
reactor,
mock_session,
*_):
# given
Expand All @@ -621,8 +678,9 @@ def test_start_creates_client_and_calls_run(
assert mock_run.called
assert reactor.addSystemEventTrigger.call_count == 2

@patch('golem.node.gatherResults')
def test_start_starts_client(
self, reactor, mock_gather_results, mock_session, *_):
self, mock_gather_results, reactor, mock_session, *_):

# given
mock_gather_results.return_value = mock_gather_results
Expand Down Expand Up @@ -656,6 +714,7 @@ def test_start_starts_client(
self.node.client.connect.assert_called_with(parsed_peer[0])
assert reactor.addSystemEventTrigger.call_count == 2

@patch('golem.node.gatherResults')
def test_start_prints_exception_message(self, *_):
# given
self.node = Node(**self.node_kwargs)
Expand Down

0 comments on commit f6ce313

Please # to comment.