From 6bc8b9a88a6e2b16559ea8be76a64a64238c8766 Mon Sep 17 00:00:00 2001 From: Junchao-Mellanox <57339448+Junchao-Mellanox@users.noreply.github.com> Date: Thu, 26 Aug 2021 10:22:59 +0800 Subject: [PATCH] [config reload] Call systemctl reset-failed for snmp,telemetry,mgmt-framework services (#1773) When issue `config reload -y` or `config load_minigraph -y` command, most of the sonic services will be reset by command `systemctl reset-failed `. The purpose is to avoid services reach to its start retry limit and cannot be started. However, `systemctl reset-failed` only resets those services belong to sonic.target, snmp, telemetry and mgmt-framework are not part of them. So if we run `config reload -y` or `config load_minigraph -y` continues, snmp, telemetry and mgmt-framework services might enter into failed state. This PR is to fix the issue. I would like to cherry-pick this fix to 202012 branch, but this fix also depends on PR https://github.com/Azure/sonic-buildimage/pull/7846. So if we decide to cherry-pick this PR to 202012, we need cherry-pick https://github.com/Azure/sonic-buildimage/pull/7846 first. Also call `systemctl reset-failed` for services like snmp, telemetry and mgmt-framework. Manual test. + --- config/main.py | 10 +++++++--- tests/config_test.py | 14 ++++++++++++-- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/config/main.py b/config/main.py index c52df1bd2b..b6531eb78e 100644 --- a/config/main.py +++ b/config/main.py @@ -701,12 +701,16 @@ def _stop_services(): def _get_sonic_services(): out = clicommon.run_command("systemctl list-dependencies --plain sonic.target | sed '1d'", return_cmd=True) - return [unit.strip() for unit in out.splitlines()] + return (unit.strip() for unit in out.splitlines()) + + +def _get_delayed_sonic_services(): + out = clicommon.run_command("systemctl list-dependencies --plain sonic-delayed.target | sed '1d'", return_cmd=True) + return (unit.strip().rstrip('.timer') for unit in out.splitlines()) def _reset_failed_services(): - for service in _get_sonic_services(): - click.echo("Resetting failed status on {}".format(service)) + for service in itertools.chain(_get_sonic_services(), _get_delayed_sonic_services()): clicommon.run_command("systemctl reset-failed {}".format(service)) diff --git a/tests/config_test.py b/tests/config_test.py index d2d100a35c..826184664e 100644 --- a/tests/config_test.py +++ b/tests/config_test.py @@ -28,7 +28,13 @@ def mock_run_command_side_effect(*args, **kwargs): click.echo(click.style("Running command: ", fg='cyan') + click.style(command, fg='green')) if kwargs.get('return_cmd'): - return '' + if command == "systemctl list-dependencies --plain sonic-delayed.target | sed '1d'": + return 'snmp.timer' + elif command == "systemctl list-dependencies --plain sonic.target | sed '1d'": + return 'swss' + else: + return '' + class TestLoadMinigraph(object): @classmethod @@ -48,7 +54,11 @@ def test_load_minigraph(self, get_cmd_module, setup_single_broadcom_asic): traceback.print_tb(result.exc_info[2]) assert result.exit_code == 0 assert "\n".join([l.rstrip() for l in result.output.split('\n')]) == load_minigraph_command_output - assert mock_run_command.call_count == 7 + # Verify "systemctl reset-failed" is called for services under sonic.target + mock_run_command.assert_any_call('systemctl reset-failed swss') + # Verify "systemctl reset-failed" is called for services under sonic-delayed.target + mock_run_command.assert_any_call('systemctl reset-failed snmp') + assert mock_run_command.call_count == 10 def test_load_minigraph_with_port_config_bad_format(self, get_cmd_module, setup_single_broadcom_asic): with mock.patch(