Skip to content

Commit

Permalink
Merge pull request #4 from amazonlinux/dev
Browse files Browse the repository at this point in the history
Merging dev into main.
  • Loading branch information
ThatStasGuy authored Feb 19, 2024
2 parents 20574c1 + 6bce5f8 commit 7565d8d
Show file tree
Hide file tree
Showing 9 changed files with 333 additions and 70 deletions.
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
._UG#_STORE
.code-workspace
._UG#_Store
*.code-workspace
88 changes: 80 additions & 8 deletions bin/smart-restart.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@ readonly REBOOT_HINT_PATH=${REBOOT_HINT_PATH:-/run/smart-restart}
readonly REBOOT_HINT_MARKER="${REBOOT_HINT_PATH}"/reboot-hint-marker
readonly CONF_PATH=${CONF_PATH:-/etc/smart-restart-conf.d}
readonly DENYLISTS=("${CONF_PATH}"/*-denylist)
# shellcheck disable=SC2207
readonly PRE_RESTART=($(ls "$CONF_PATH"/*pre-restart | sort -n))
# shellcheck disable=SC2207
readonly POST_RESTART=($(ls "$CONF_PATH"/*post-restart | sort -n))

SYSCTL_COMMAND="${SYSCTL_COMMAND:-systemctl}"
Expand All @@ -25,14 +27,26 @@ DEBUG=${DEBUG:-}

SERVICES=()
BLOCKED_SERVICES=()
PRE_RESTART_HEALTHY=0
POST_RESTART_HEALTHY=0

INF() { echo "$1"; }
DBG() { [[ "$DEBUG" != "" ]] && >&2 echo "$1"; }
CRIT() { >&2 echo "*** ERROR: $1"; }

assert_root() {
if [[ "$(id -u)" != "0" ]]; then
CRIT "Please run this script as root."
exit 1
fi
return 0
}

assemble_service_list() {
# shellcheck disable=SC2207
local all_services=($($NEEDS_RESTARTING_COMMAND -s | xargs))

# shellcheck disable=SC2048
BLOCKED_SERVICES=("$(sed "s/#.*//g" ${DENYLISTS[*]})")

DBG "Denylist: ${DENYLISTS[*]}"
Expand Down Expand Up @@ -60,14 +74,16 @@ execute_pre_hooks() {
restart_services() {
local -i retval=0

if [[ ${#SERVICES[@]} != 0 ]]; then
# shellcheck disable=SC2086
if [[ ${#SERVICES[@]} != 0 ]]; then
DBG "Attempting to restart services: ${SERVICES[*]}"
# shellcheck disable=SC2048
# shellcheck disable=SC2048,SC2086
$SYSCTL_COMMAND restart ${SERVICES[*]} || retval=$?
else
DBG "No services to restart"
fi



if [[ ! "${BLOCKED_SERVICES[*]}" =~ "systemd" ]]; then
DBG "Attempting to restart systemd itself"
$SYSCTL_COMMAND daemon-reexec || retval=$?
Expand All @@ -77,6 +93,42 @@ restart_services() {
return $retval
}

# Though, {pre,post} health checks could live in restart_services(), I've moved them outside to be able to test the functionality.
count_pre_restart_health() {
DBG "No of services to check (pre-restart): ${#SERVICES[@]}"
if [[ ${#SERVICES[@]} != 0 ]]; then
# shellcheck disable=SC2048,SC2086
PRE_RESTART_HEALTHY=$($SYSCTL_COMMAND status ${SERVICES[*]} | grep "Active" | grep -cE "active \(running\)")
fi
}

count_post_restart_health() {
DBG "No of services to check (post-restart): ${#SERVICES[@]}"

if [[ ${#SERVICES[@]} != 0 ]]; then
# shellcheck disable=SC2048,SC2086
POST_RESTART_HEALTHY=$($SYSCTL_COMMAND status ${SERVICES[*]} | grep "Active" | grep -cE "active \(running\)")
fi
DBG "Pre restart cnt: $PRE_RESTART_HEALTHY"
DBG "Post restart cnt: $POST_RESTART_HEALTHY"

if [[ "$PRE_RESTART_HEALTHY" != "$POST_RESTART_HEALTHY" ]]; then
S=()
for SERVICE in "${SERVICES[@]}"; do
$SYSCTL_COMMAND is-active ${SERVICE}
local -i retval=$?

if [[ $retval -gt 0 ]]; then
S+=("${SERVICE}")
fi
done

CRIT "Not all services could be successfully started. Failed services: ${S[*]}"

return 1
fi
}

execute_post_hooks() {
DBG "Executing post-restart hooks: ${POST_RESTART[*]}"
for HOOK in "${POST_RESTART[@]}"; do
Expand All @@ -90,12 +142,14 @@ execute_post_hooks() {
# This means, we need to consolidate a few information sources here to be sure.
# 1) Check if processess actually got restarted (and ignore the "denylisted" services)
# 2) Remove userspace components from the reboot-hint output
readonly OS_VERSION=$(cut -d ":" -f6 /etc/system-release-cpe)
# shellcheck disable=SC2155
LOCAL_OS_VERSION=$(cut -d ":" -f6 /etc/system-release-cpe)
OS_VERSION=${OS_VERSION:-$LOCAL_OS_VERSION}

generate_reboot_hint_marker() {
local -i reboot_hint=0
local -i retval=0

# shellcheck disable=SC2155
local post_restart_services=$($NEEDS_RESTARTING_COMMAND -s | xargs)
local failed_services=()
for SERVICE in $post_restart_services; do
Expand All @@ -106,6 +160,8 @@ generate_reboot_hint_marker() {
fi
done

count_post_restart_health || retval=$?

local reboothint_separator=""

# Consistency is key, that's why the output of needs-restarting --reboothint has different styles for yum & dnf (output for glibc):
Expand All @@ -117,11 +173,12 @@ generate_reboot_hint_marker() {
reboothint_separator="*"
else
CRIT "ERROR: Could not determine OS. I won't create a reboot hint marker"
exit 1
return 1
fi

# Those are the packages `needs-restarting` is scanning for. We're going to ignore the one's we know we can't restart
# ['kernel', 'kernel-rt', 'glibc', 'linux-firmware', 'systemd', 'udev', 'openssl-libs', 'gnutls', 'dbus']
# shellcheck disable=SC2155
local updated_components=$($NEEDS_RESTARTING_COMMAND --reboothint | grep -v "glibc\|systemd\|openssl-libs\|gnutls\|dbus\|udev" | grep -- "${reboothint_separator}")
# At this point $updated_components should only report in case kernel* or linux-* was updated.

Expand All @@ -130,6 +187,19 @@ generate_reboot_hint_marker() {
DBG "Encountered updates we cannot restart without a reboot: $updated_components"
fi

if [[ "$PRE_RESTART_HEALTHY" != "$POST_RESTART_HEALTHY" ]]; then
reboot_hint=1
# shellcheck disable=SC2048
for SERVICE in ${SERVICES[*]}; do
$SYSCTL_COMMAND status "$SERVICE"

if [[ $? != 0 ]]; then

CRIT "Service \"${SERVICE}\" failed to start again."
fi
done
fi


if [[ $reboot_hint == 1 ]]; then
mkdir -p "$REBOOT_HINT_PATH"
Expand All @@ -144,9 +214,11 @@ generate_reboot_hint_marker() {
}

if [[ -z "$IS_TESTING" ]]; then
assert_root
assemble_service_list
count_pre_restart_health
execute_pre_hooks
restart_services
execute_post_hooks
generate_reboot_hint_marker
fi
generate_reboot_hint_marker || exit $?
fi
1 change: 1 addition & 0 deletions tests/Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
.PHONY: all

all:
./test-common.sh
./test-reboot-hint.sh
./test-hooks.sh
./test-restart.sh
5 changes: 4 additions & 1 deletion tests/mocks/needs-restarting
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
#!/usr/bin/env bash

S=""
readonly OS_VERSION=$(cut -d ":" -f6 /etc/system-release-cpe)
# Required for testing on a ubuntu-based machine. Specifically on a
# github-hosted test-runners.
LOCAL_OS_VERSION=$(cut -d ":" -f6 /etc/system-release-cpe)
OS_VERSION=${OS_VERSION:-$LOCAL_OS_VERSION}

if [[ "$OS_VERSION" -eq "2" ]]; then
S="->"
Expand Down
69 changes: 60 additions & 9 deletions tests/mocks/systemctl
Original file line number Diff line number Diff line change
@@ -1,27 +1,78 @@
#!/usr/bin/env bash

SERVICE_STATUS_RUNNING="● chronicled.service - chronicled
Loaded: loaded (/usr/lib/systemd/system/chronicled.service; enabled; vendor preset: disabled)
Active: active (running) since Thu 2023-11-16 09:22:15 CET; 2 weeks 1 days ago
Main PID: 10200 (chronicled)
Tasks: 37
Memory: 256.7M
CGroup: /system.slice/chronicled.service
└─10200 /usr/local/chronicle/sbin/chronicled
Dec 01 10:06:19 HOST systemd[1]: [/usr/lib/systemd/system/chronicled.service:4] Unknow...it'
Dec 01 10:06:19 HOST systemd[1]: [/usr/lib/systemd/system/chronicled.service:5] Unknow...it'
Dec 01 10:06:19 HOST systemd[1]: [/usr/lib/systemd/system/chronicled.service:4] Unknow...it'
Dec 01 10:06:19 HOST systemd[1]: [/usr/lib/systemd/system/chronicled.service:5] Unknow...it'
Dec 01 10:29:42 HOST systemd[1]: [/usr/lib/systemd/system/chronicled.service:4] Unknow...it'
Dec 01 10:29:42 HOST systemd[1]: [/usr/lib/systemd/system/chronicled.service:5] Unknow...it'
Dec 01 10:29:42 HOST systemd[1]: [/usr/lib/systemd/system/chronicled.service:4] Unknow...it'
Dec 01 10:29:42 HOST systemd[1]: [/usr/lib/systemd/system/chronicled.service:5] Unknow...it'
Dec 01 10:29:42 HOST systemd[1]: [/usr/lib/systemd/system/chronicled.service:4] Unknow...it'
Dec 01 10:29:42 HOST systemd[1]: [/usr/lib/systemd/system/chronicled.service:5] Unknow...it'
Hint: Some lines were ellipsized, use -l to show in full.
"

SERVICE_STATUS_FAILED="● network.service - LSB: Bring up/down networking
Loaded: loaded (/etc/rc.d/init.d/network; bad; vendor preset: disabled)
Active: failed (Result: timeout) since Thu 2023-11-16 09:22:14 CET; 2 weeks 1 days ago
Docs: man:systemd-sysv-generator(8)
CGroup: /system.slice/network.service
├─10737 /sbin/dhclient -q -lf /var/lib/dhclient/dhclient--eth0.lease -pf /var/run/dhclient-eth0.pid -H dev-dsk-suschako-1a-...
└─10789 /sbin/dhclient -6 -nw -lf /var/lib/dhclient/dhclient6--eth0.lease -pf /var/run/dhclient6-eth0.pid eth0 -H dev-dsk-s...
Dec 01 10:46:41 dev-dsk-suschako-1a-32125e6f.eu-west-1.amazon.com dhclient[10789]: XMT: Solicit on eth0, interval 123790ms.
Dec 01 10:48:45 dev-dsk-suschako-1a-32125e6f.eu-west-1.amazon.com dhclient[10789]: XMT: Solicit on eth0, interval 110330ms.
Dec 01 10:50:35 dev-dsk-suschako-1a-32125e6f.eu-west-1.amazon.com dhclient[10789]: XMT: Solicit on eth0, interval 124670ms.
Dec 01 10:52:40 dev-dsk-suschako-1a-32125e6f.eu-west-1.amazon.com dhclient[10789]: XMT: Solicit on eth0, interval 118210ms.
Dec 01 10:54:38 dev-dsk-suschako-1a-32125e6f.eu-west-1.amazon.com dhclient[10789]: XMT: Solicit on eth0, interval 130040ms.
Dec 01 10:56:48 dev-dsk-suschako-1a-32125e6f.eu-west-1.amazon.com dhclient[10789]: XMT: Solicit on eth0, interval 125680ms.
Dec 01 10:58:01 dev-dsk-suschako-1a-32125e6f.eu-west-1.amazon.com dhclient[10737]: DHCPREQUEST on eth0 to 10.15.96.1 port 67 (xid=0...74)
Dec 01 10:58:01 dev-dsk-suschako-1a-32125e6f.eu-west-1.amazon.com dhclient[10737]: DHCPACK from 10.15.96.1 (xid=0x66783e74)
Dec 01 10:58:01 dev-dsk-suschako-1a-32125e6f.eu-west-1.amazon.com dhclient[10737]: bound to 10.15.105.243 -- renewal in 1676 seconds.
Dec 01 10:58:54 dev-dsk-suschako-1a-32125e6f.eu-west-1.amazon.com dhclient[10789]: XMT: Solicit on eth0, interval 130260ms.
Hint: Some lines were ellipsized, use -l to show in full.
"



DBG() { [[ "$DEBUG" != "" ]] && >&2 echo "$@"; }

DBG "systemctl-mock invoced with: ${@}"

if [[ "$1" == "daemon-reexec" ]]; then
[[ $# == 1 ]] && exit 0
[[ "$DEBUG" != "" ]] && echo "systemd daemon-reexec called incorrectly: (No params: $# -> $@)"
DBG "systemd daemon-reexec called incorrectly: (No params: $# -> $@)"
exit 1
elif [[ "$1" == "restart" ]]; then
[[ "$DEBUG" != "" ]] && echo "(No. params: $# -> ${@})"
if [[ $SYS_EXPECT_2 == 1 ]]; then
[[ $# == 3 ]] && [[ "$2" == "dummy.service" ]] && [[ "$3" == "dummy2.service" ]] && exit 0

exit 98
elif [[ $SYS_EXPECT_1 == 1 ]]; then
[[ $# == 2 ]] && [[ "$2" == "dummy2.service" ]] && exit 0

exit 98
elif [[ $SYS_EXPECT_0 == 1 ]]; then
[[ $# == 1 ]] && exit 0;
else
exit 98
fi
elif [[ "$1" == "status" ]]; then
if [[ $SYS_RESTART_FAILED == "1" ]]; then
echo "$SERVICE_STATUS_FAILED"
exit 3
else
echo "$SERVICE_STATUS_RUNNING"
exit 0
fi
else
echo "systemd called incorrectly: (No params: $# -> $@)"
exit 1
DBG "systemd called incorrectly: (No params: $# -> $@)"
fi

exit 98
exit 98
15 changes: 13 additions & 2 deletions tests/setup_test
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
# This file needs to be sourced in all tests. Additionally, `TEST_NAME` can be set:
# TEST_NAME="MY NEW TEST"
# . "$(pwd)"/setup_test
# Also, to reset the state before test execution, call reset_test_environment in every test.

RED='\033[1;91m'
GREEN='\033[1;92m'
Expand All @@ -28,12 +29,22 @@ echo -e "=========================== ${YELLOW}$TEST_NAME${COLOR_OFF} started "==

. $UUT

# We're testing here sourced bash scripts, global variables keep their values in between tests.
# reset_test_environment resets them so every test starts from a clean state
reset_test_environment() {
SERVICES=()
BLOCKED_SERVICES=()
PRE_RESTART_HEALTHY="0"
POST_RESTART_HEALTHY="0"
echo "" > conf/default-denylist
echo "" > conf/custom-denylist
rm -rf "$(pwd)/reboot-hint-marker"
}

PASSED() {
echo -e " test ${TEST_NAME} [${GREEN}PASSED${COLOR_OFF}]: $1"
}

FAILED() {
echo -e " test ${TEST_NAME} [${RED}FAILED${COLOR_OFF}]: $1"
}


27 changes: 27 additions & 0 deletions tests/test-common.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#!/usr/bin/env bash

# This is the place to test common functionality not suitable for any other tests AND
# not suiteable for an own test-file.

# cannot follow "$(pwd)/setup_test"
# shellcheck disable=SC1091

# Unused variables like TEST_NAME
# shellcheck disable=SC2034
TEST_NAME="Common"
. "$(pwd)"/setup_test

function test_assert_root() {
DESCRIPTION="Root assert fails for user"
reset_test_environment
# Need a subshell here since assert_root exits instead of returning
(assert_root) || retval=$?

if [[ $retval != 0 ]]; then
PASSED "$DESCRIPTION"
else
FAILED "$DESCRIPTION (error: $retval)"
fi
}

test_assert_root
Loading

0 comments on commit 7565d8d

Please # to comment.