Skip to content

Commit 7565d8d

Browse files
authored
Merge pull request #4 from amazonlinux/dev
Merging dev into main.
2 parents 20574c1 + 6bce5f8 commit 7565d8d

File tree

9 files changed

+333
-70
lines changed

9 files changed

+333
-70
lines changed

.gitignore

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
._UG#_STORE
2-
.code-workspace
1+
._UG#_Store
2+
*.code-workspace

bin/smart-restart.sh

Lines changed: 80 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,9 @@ readonly REBOOT_HINT_PATH=${REBOOT_HINT_PATH:-/run/smart-restart}
1414
readonly REBOOT_HINT_MARKER="${REBOOT_HINT_PATH}"/reboot-hint-marker
1515
readonly CONF_PATH=${CONF_PATH:-/etc/smart-restart-conf.d}
1616
readonly DENYLISTS=("${CONF_PATH}"/*-denylist)
17+
# shellcheck disable=SC2207
1718
readonly PRE_RESTART=($(ls "$CONF_PATH"/*pre-restart | sort -n))
19+
# shellcheck disable=SC2207
1820
readonly POST_RESTART=($(ls "$CONF_PATH"/*post-restart | sort -n))
1921

2022
SYSCTL_COMMAND="${SYSCTL_COMMAND:-systemctl}"
@@ -25,14 +27,26 @@ DEBUG=${DEBUG:-}
2527

2628
SERVICES=()
2729
BLOCKED_SERVICES=()
30+
PRE_RESTART_HEALTHY=0
31+
POST_RESTART_HEALTHY=0
2832

2933
INF() { echo "$1"; }
3034
DBG() { [[ "$DEBUG" != "" ]] && >&2 echo "$1"; }
3135
CRIT() { >&2 echo "*** ERROR: $1"; }
3236

37+
assert_root() {
38+
if [[ "$(id -u)" != "0" ]]; then
39+
CRIT "Please run this script as root."
40+
exit 1
41+
fi
42+
return 0
43+
}
44+
3345
assemble_service_list() {
46+
# shellcheck disable=SC2207
3447
local all_services=($($NEEDS_RESTARTING_COMMAND -s | xargs))
3548

49+
# shellcheck disable=SC2048
3650
BLOCKED_SERVICES=("$(sed "s/#.*//g" ${DENYLISTS[*]})")
3751

3852
DBG "Denylist: ${DENYLISTS[*]}"
@@ -60,14 +74,16 @@ execute_pre_hooks() {
6074
restart_services() {
6175
local -i retval=0
6276

63-
if [[ ${#SERVICES[@]} != 0 ]]; then
77+
# shellcheck disable=SC2086
78+
if [[ ${#SERVICES[@]} != 0 ]]; then
6479
DBG "Attempting to restart services: ${SERVICES[*]}"
65-
# shellcheck disable=SC2048
80+
# shellcheck disable=SC2048,SC2086
6681
$SYSCTL_COMMAND restart ${SERVICES[*]} || retval=$?
6782
else
6883
DBG "No services to restart"
6984
fi
70-
85+
86+
7187
if [[ ! "${BLOCKED_SERVICES[*]}" =~ "systemd" ]]; then
7288
DBG "Attempting to restart systemd itself"
7389
$SYSCTL_COMMAND daemon-reexec || retval=$?
@@ -77,6 +93,42 @@ restart_services() {
7793
return $retval
7894
}
7995

96+
# Though, {pre,post} health checks could live in restart_services(), I've moved them outside to be able to test the functionality.
97+
count_pre_restart_health() {
98+
DBG "No of services to check (pre-restart): ${#SERVICES[@]}"
99+
if [[ ${#SERVICES[@]} != 0 ]]; then
100+
# shellcheck disable=SC2048,SC2086
101+
PRE_RESTART_HEALTHY=$($SYSCTL_COMMAND status ${SERVICES[*]} | grep "Active" | grep -cE "active \(running\)")
102+
fi
103+
}
104+
105+
count_post_restart_health() {
106+
DBG "No of services to check (post-restart): ${#SERVICES[@]}"
107+
108+
if [[ ${#SERVICES[@]} != 0 ]]; then
109+
# shellcheck disable=SC2048,SC2086
110+
POST_RESTART_HEALTHY=$($SYSCTL_COMMAND status ${SERVICES[*]} | grep "Active" | grep -cE "active \(running\)")
111+
fi
112+
DBG "Pre restart cnt: $PRE_RESTART_HEALTHY"
113+
DBG "Post restart cnt: $POST_RESTART_HEALTHY"
114+
115+
if [[ "$PRE_RESTART_HEALTHY" != "$POST_RESTART_HEALTHY" ]]; then
116+
S=()
117+
for SERVICE in "${SERVICES[@]}"; do
118+
$SYSCTL_COMMAND is-active ${SERVICE}
119+
local -i retval=$?
120+
121+
if [[ $retval -gt 0 ]]; then
122+
S+=("${SERVICE}")
123+
fi
124+
done
125+
126+
CRIT "Not all services could be successfully started. Failed services: ${S[*]}"
127+
128+
return 1
129+
fi
130+
}
131+
80132
execute_post_hooks() {
81133
DBG "Executing post-restart hooks: ${POST_RESTART[*]}"
82134
for HOOK in "${POST_RESTART[@]}"; do
@@ -90,12 +142,14 @@ execute_post_hooks() {
90142
# This means, we need to consolidate a few information sources here to be sure.
91143
# 1) Check if processess actually got restarted (and ignore the "denylisted" services)
92144
# 2) Remove userspace components from the reboot-hint output
93-
readonly OS_VERSION=$(cut -d ":" -f6 /etc/system-release-cpe)
145+
# shellcheck disable=SC2155
146+
LOCAL_OS_VERSION=$(cut -d ":" -f6 /etc/system-release-cpe)
147+
OS_VERSION=${OS_VERSION:-$LOCAL_OS_VERSION}
94148

95149
generate_reboot_hint_marker() {
96150
local -i reboot_hint=0
97151
local -i retval=0
98-
152+
# shellcheck disable=SC2155
99153
local post_restart_services=$($NEEDS_RESTARTING_COMMAND -s | xargs)
100154
local failed_services=()
101155
for SERVICE in $post_restart_services; do
@@ -106,6 +160,8 @@ generate_reboot_hint_marker() {
106160
fi
107161
done
108162

163+
count_post_restart_health || retval=$?
164+
109165
local reboothint_separator=""
110166

111167
# Consistency is key, that's why the output of needs-restarting --reboothint has different styles for yum & dnf (output for glibc):
@@ -117,11 +173,12 @@ generate_reboot_hint_marker() {
117173
reboothint_separator="*"
118174
else
119175
CRIT "ERROR: Could not determine OS. I won't create a reboot hint marker"
120-
exit 1
176+
return 1
121177
fi
122178

123179
# Those are the packages `needs-restarting` is scanning for. We're going to ignore the one's we know we can't restart
124180
# ['kernel', 'kernel-rt', 'glibc', 'linux-firmware', 'systemd', 'udev', 'openssl-libs', 'gnutls', 'dbus']
181+
# shellcheck disable=SC2155
125182
local updated_components=$($NEEDS_RESTARTING_COMMAND --reboothint | grep -v "glibc\|systemd\|openssl-libs\|gnutls\|dbus\|udev" | grep -- "${reboothint_separator}")
126183
# At this point $updated_components should only report in case kernel* or linux-* was updated.
127184

@@ -130,6 +187,19 @@ generate_reboot_hint_marker() {
130187
DBG "Encountered updates we cannot restart without a reboot: $updated_components"
131188
fi
132189

190+
if [[ "$PRE_RESTART_HEALTHY" != "$POST_RESTART_HEALTHY" ]]; then
191+
reboot_hint=1
192+
# shellcheck disable=SC2048
193+
for SERVICE in ${SERVICES[*]}; do
194+
$SYSCTL_COMMAND status "$SERVICE"
195+
196+
if [[ $? != 0 ]]; then
197+
198+
CRIT "Service \"${SERVICE}\" failed to start again."
199+
fi
200+
done
201+
fi
202+
133203

134204
if [[ $reboot_hint == 1 ]]; then
135205
mkdir -p "$REBOOT_HINT_PATH"
@@ -144,9 +214,11 @@ generate_reboot_hint_marker() {
144214
}
145215

146216
if [[ -z "$IS_TESTING" ]]; then
217+
assert_root
147218
assemble_service_list
219+
count_pre_restart_health
148220
execute_pre_hooks
149221
restart_services
150222
execute_post_hooks
151-
generate_reboot_hint_marker
152-
fi
223+
generate_reboot_hint_marker || exit $?
224+
fi

tests/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
.PHONY: all
22

33
all:
4+
./test-common.sh
45
./test-reboot-hint.sh
56
./test-hooks.sh
67
./test-restart.sh

tests/mocks/needs-restarting

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
#!/usr/bin/env bash
22

33
S=""
4-
readonly OS_VERSION=$(cut -d ":" -f6 /etc/system-release-cpe)
4+
# Required for testing on a ubuntu-based machine. Specifically on a
5+
# github-hosted test-runners.
6+
LOCAL_OS_VERSION=$(cut -d ":" -f6 /etc/system-release-cpe)
7+
OS_VERSION=${OS_VERSION:-$LOCAL_OS_VERSION}
58

69
if [[ "$OS_VERSION" -eq "2" ]]; then
710
S="->"

tests/mocks/systemctl

Lines changed: 60 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,78 @@
11
#!/usr/bin/env bash
22

3+
SERVICE_STATUS_RUNNING="● chronicled.service - chronicled
4+
Loaded: loaded (/usr/lib/systemd/system/chronicled.service; enabled; vendor preset: disabled)
5+
Active: active (running) since Thu 2023-11-16 09:22:15 CET; 2 weeks 1 days ago
6+
Main PID: 10200 (chronicled)
7+
Tasks: 37
8+
Memory: 256.7M
9+
CGroup: /system.slice/chronicled.service
10+
└─10200 /usr/local/chronicle/sbin/chronicled
11+
12+
Dec 01 10:06:19 HOST systemd[1]: [/usr/lib/systemd/system/chronicled.service:4] Unknow...it'
13+
Dec 01 10:06:19 HOST systemd[1]: [/usr/lib/systemd/system/chronicled.service:5] Unknow...it'
14+
Dec 01 10:06:19 HOST systemd[1]: [/usr/lib/systemd/system/chronicled.service:4] Unknow...it'
15+
Dec 01 10:06:19 HOST systemd[1]: [/usr/lib/systemd/system/chronicled.service:5] Unknow...it'
16+
Dec 01 10:29:42 HOST systemd[1]: [/usr/lib/systemd/system/chronicled.service:4] Unknow...it'
17+
Dec 01 10:29:42 HOST systemd[1]: [/usr/lib/systemd/system/chronicled.service:5] Unknow...it'
18+
Dec 01 10:29:42 HOST systemd[1]: [/usr/lib/systemd/system/chronicled.service:4] Unknow...it'
19+
Dec 01 10:29:42 HOST systemd[1]: [/usr/lib/systemd/system/chronicled.service:5] Unknow...it'
20+
Dec 01 10:29:42 HOST systemd[1]: [/usr/lib/systemd/system/chronicled.service:4] Unknow...it'
21+
Dec 01 10:29:42 HOST systemd[1]: [/usr/lib/systemd/system/chronicled.service:5] Unknow...it'
22+
Hint: Some lines were ellipsized, use -l to show in full.
23+
"
24+
25+
SERVICE_STATUS_FAILED="● network.service - LSB: Bring up/down networking
26+
Loaded: loaded (/etc/rc.d/init.d/network; bad; vendor preset: disabled)
27+
Active: failed (Result: timeout) since Thu 2023-11-16 09:22:14 CET; 2 weeks 1 days ago
28+
Docs: man:systemd-sysv-generator(8)
29+
CGroup: /system.slice/network.service
30+
├─10737 /sbin/dhclient -q -lf /var/lib/dhclient/dhclient--eth0.lease -pf /var/run/dhclient-eth0.pid -H dev-dsk-suschako-1a-...
31+
└─10789 /sbin/dhclient -6 -nw -lf /var/lib/dhclient/dhclient6--eth0.lease -pf /var/run/dhclient6-eth0.pid eth0 -H dev-dsk-s...
32+
33+
Dec 01 10:46:41 dev-dsk-suschako-1a-32125e6f.eu-west-1.amazon.com dhclient[10789]: XMT: Solicit on eth0, interval 123790ms.
34+
Dec 01 10:48:45 dev-dsk-suschako-1a-32125e6f.eu-west-1.amazon.com dhclient[10789]: XMT: Solicit on eth0, interval 110330ms.
35+
Dec 01 10:50:35 dev-dsk-suschako-1a-32125e6f.eu-west-1.amazon.com dhclient[10789]: XMT: Solicit on eth0, interval 124670ms.
36+
Dec 01 10:52:40 dev-dsk-suschako-1a-32125e6f.eu-west-1.amazon.com dhclient[10789]: XMT: Solicit on eth0, interval 118210ms.
37+
Dec 01 10:54:38 dev-dsk-suschako-1a-32125e6f.eu-west-1.amazon.com dhclient[10789]: XMT: Solicit on eth0, interval 130040ms.
38+
Dec 01 10:56:48 dev-dsk-suschako-1a-32125e6f.eu-west-1.amazon.com dhclient[10789]: XMT: Solicit on eth0, interval 125680ms.
39+
Dec 01 10:58:01 dev-dsk-suschako-1a-32125e6f.eu-west-1.amazon.com dhclient[10737]: DHCPREQUEST on eth0 to 10.15.96.1 port 67 (xid=0...74)
40+
Dec 01 10:58:01 dev-dsk-suschako-1a-32125e6f.eu-west-1.amazon.com dhclient[10737]: DHCPACK from 10.15.96.1 (xid=0x66783e74)
41+
Dec 01 10:58:01 dev-dsk-suschako-1a-32125e6f.eu-west-1.amazon.com dhclient[10737]: bound to 10.15.105.243 -- renewal in 1676 seconds.
42+
Dec 01 10:58:54 dev-dsk-suschako-1a-32125e6f.eu-west-1.amazon.com dhclient[10789]: XMT: Solicit on eth0, interval 130260ms.
43+
Hint: Some lines were ellipsized, use -l to show in full.
44+
"
45+
46+
47+
48+
DBG() { [[ "$DEBUG" != "" ]] && >&2 echo "$@"; }
49+
50+
DBG "systemctl-mock invoced with: ${@}"
51+
352
if [[ "$1" == "daemon-reexec" ]]; then
453
[[ $# == 1 ]] && exit 0
5-
[[ "$DEBUG" != "" ]] && echo "systemd daemon-reexec called incorrectly: (No params: $# -> $@)"
54+
DBG "systemd daemon-reexec called incorrectly: (No params: $# -> $@)"
655
exit 1
756
elif [[ "$1" == "restart" ]]; then
8-
[[ "$DEBUG" != "" ]] && echo "(No. params: $# -> ${@})"
957
if [[ $SYS_EXPECT_2 == 1 ]]; then
1058
[[ $# == 3 ]] && [[ "$2" == "dummy.service" ]] && [[ "$3" == "dummy2.service" ]] && exit 0
11-
12-
exit 98
1359
elif [[ $SYS_EXPECT_1 == 1 ]]; then
1460
[[ $# == 2 ]] && [[ "$2" == "dummy2.service" ]] && exit 0
15-
16-
exit 98
1761
elif [[ $SYS_EXPECT_0 == 1 ]]; then
1862
[[ $# == 1 ]] && exit 0;
1963
else
2064
exit 98
2165
fi
66+
elif [[ "$1" == "status" ]]; then
67+
if [[ $SYS_RESTART_FAILED == "1" ]]; then
68+
echo "$SERVICE_STATUS_FAILED"
69+
exit 3
70+
else
71+
echo "$SERVICE_STATUS_RUNNING"
72+
exit 0
73+
fi
2274
else
23-
echo "systemd called incorrectly: (No params: $# -> $@)"
24-
exit 1
75+
DBG "systemd called incorrectly: (No params: $# -> $@)"
2576
fi
2677

27-
exit 98
78+
exit 98

tests/setup_test

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
# This file needs to be sourced in all tests. Additionally, `TEST_NAME` can be set:
66
# TEST_NAME="MY NEW TEST"
77
# . "$(pwd)"/setup_test
8+
# Also, to reset the state before test execution, call reset_test_environment in every test.
89

910
RED='\033[1;91m'
1011
GREEN='\033[1;92m'
@@ -28,12 +29,22 @@ echo -e "=========================== ${YELLOW}$TEST_NAME${COLOR_OFF} started "==
2829

2930
. $UUT
3031

32+
# We're testing here sourced bash scripts, global variables keep their values in between tests.
33+
# reset_test_environment resets them so every test starts from a clean state
34+
reset_test_environment() {
35+
SERVICES=()
36+
BLOCKED_SERVICES=()
37+
PRE_RESTART_HEALTHY="0"
38+
POST_RESTART_HEALTHY="0"
39+
echo "" > conf/default-denylist
40+
echo "" > conf/custom-denylist
41+
rm -rf "$(pwd)/reboot-hint-marker"
42+
}
43+
3144
PASSED() {
3245
echo -e " test ${TEST_NAME} [${GREEN}PASSED${COLOR_OFF}]: $1"
3346
}
3447

3548
FAILED() {
3649
echo -e " test ${TEST_NAME} [${RED}FAILED${COLOR_OFF}]: $1"
3750
}
38-
39-

tests/test-common.sh

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
#!/usr/bin/env bash
2+
3+
# This is the place to test common functionality not suitable for any other tests AND
4+
# not suiteable for an own test-file.
5+
6+
# cannot follow "$(pwd)/setup_test"
7+
# shellcheck disable=SC1091
8+
9+
# Unused variables like TEST_NAME
10+
# shellcheck disable=SC2034
11+
TEST_NAME="Common"
12+
. "$(pwd)"/setup_test
13+
14+
function test_assert_root() {
15+
DESCRIPTION="Root assert fails for user"
16+
reset_test_environment
17+
# Need a subshell here since assert_root exits instead of returning
18+
(assert_root) || retval=$?
19+
20+
if [[ $retval != 0 ]]; then
21+
PASSED "$DESCRIPTION"
22+
else
23+
FAILED "$DESCRIPTION (error: $retval)"
24+
fi
25+
}
26+
27+
test_assert_root

0 commit comments

Comments
 (0)