-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsysbox.sh
executable file
·318 lines (263 loc) · 8.71 KB
/
sysbox.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
#!/bin/bash -e
# NOTE: This file has been taken from https://github.com/nestybox/sysbox/blob/master/sysbox-in-docker/sysbox
#
# Launch Sysbox (requires root privileges)
#
# Note: normally Sysbox is installed via a distro-specific package
# which sets up the Sysbox systemd units. This script is meant as a
# quick alternative to the installer to help launch Sysbox manually
# (e.g. for development & testing).
#
# Usage: ./sysbox [testing-on]
#
# Note: "testing-on" attribute is useful to launch Sysbox inside a privileged
# container. In this context Sysbox may be unable to reach certain procfs
# nodes, due to kernel not exposing them in non-init namespaces. This flag
# alerts Sysbox of the need to obviate these potential errors when running
# in testing mode.
#
# Max number of user-namespaces to configure in distros supporting this knob.
sysbox_max_user_ns=10000
# Default mtu value associated to container's egress-interface.
default_mtu=1500
# UID-shifting module
shiftfs_module="shiftfs"
# Dockerd default configuration dir/file.
dockerCfgDir="/etc/docker"
dockerCfgFile="${dockerCfgDir}/daemon.json"
# Temp file for jq write operations.
tmpfile=$(mktemp /tmp/init-scr.XXXXXX)
trap 'rm -f "${tmpfile}"' EXIT
# Retry a command $1 times until it succeeds. Wait $2 seconds between retries.
# (copied from runc/tests/integration/helpers.bash)
function retry() {
local attempts=$1
shift
local delay=$1
shift
local i
for ((i = 0; i < attempts; i++)); do
$@
if [ "$?" -eq 0 ]; then
return 0
fi
sleep $delay
done
echo "Command \"$@\" failed $attempts times. Output: $?"
false
}
# Ensure that kernel-modules expected by system-level apps (running within sys
# containers) are properly loaded.
function load_required_modules() {
# Sysbox requires 'configfs' module to ensure proper operation of containerized
# apps requiring access to kernel's config file (e.g. kubeadm).
if ! modprobe configfs &> /dev/null; then
echo "Could not load configfs kernel module. Exiting ..."
return 1
fi
return 0
}
# Returns linux distro running in the system.
function get_host_distro() {
local distro=$(cat /etc/os-release | awk -F"=" '/^ID=/ {print $2}' | tr -d '"')
echo $distro
}
# Ensures user "sysbox" is present on the host
function setup_sysbox_user() {
local exit_code=0
grep -q "^sysbox:" /etc/passwd || exit_code=$?
if (( $exit_code == 1 )); then
useradd -r -s /usr/sbin/nologin sysbox
fi
}
# Checks if Sysbox is running on a supported distro
function check_distro() {
local distro=$1
if [[ "${distro}" != "ubuntu" ]] &&
[[ "${distro}" != "debian" ]] &&
[[ "${distro}" != "centos" ]] &&
[[ "${distro}" != "fedora" ]]; then
echo "Unsupported Linux distribution: $distro. Sysbox may not operate as expected."
fi
}
# Ensures unprivileged user-ns's are allowed.
function userns_setup() {
local distro=$1
if [[ "${distro}" == "ubuntu" ]] ||
[[ "${distro}" == "debian" ]]; then
echo "1" > /proc/sys/kernel/unprivileged_userns_clone
fi
# Setting user-ns max value.
max_user_ns=$(</proc/sys/user/max_user_namespaces)
if [[ $max_user_ns =~ ^[0-9]+$ ]] && [[ $max_user_ns -lt $sysbox_max_user_ns ]]; then
echo $sysbox_max_user_ns > /proc/sys/user/max_user_namespaces
fi
}
# Identifies kernel-header's expected path based on distro.
function kernel_header_path() {
local distro=$1
local path
if [[ "${distro}" == "centos" ]] ||
[[ "${distro}" == "fedora" ]] ||
[[ "${distro}" == "redhat" ]]; then
path="/usr/src/kernels/$(uname -r)"
else
path="/usr/src/linux-headers-$(uname -r)"
fi
echo "$path"
}
# Verifies that a kernel configuration file is found, and if that's not the case,
# copy it from the original "/boot" folder. Notice that this may not work when
# running sysbox within a test-priv container, as "/boot" folder may not be around;
# in those cases initialization will proceed as normal and a log will be dumped to
# alert the user.
function kernel_config_setup() {
local distro=$1
local kernel_path=$(kernel_header_path $distro)
if [[ ! -f "${kernel_path}"/.config ]]; then
# Attempt to find kernel config in /boot path.
if [[ -d /boot ]] && [[ -f /boot/config-$(uname -r) ]]; then
cp /boot/config-$(uname -r) "${kernel_path}"/.config
return
fi
echo -e "\nUnable to find a kernel config file. This may affect some system" \
"level apps running within sys-containers. As a solution, identify your" \
"kernel config file in the host (typically: \"/boot/config-$(uname -r)\")" \
"and copy it into your distro's expected kernel-headers path" \
"(usually: \"$(kernel_header_path $distro)\").\n"
fi
}
# Obtain the MTU value to be configured for the docker interface within the test
# container. This value shall be the lowest among these ...
#
# * The privileged-container's egress interface (host-facing iface).
# * The 'default' mtu value (1500 bytes) supported by most NICs.
function docker_iface_mtu() {
# Identify default egress iface.
local egress_iface=$(ip route show | awk '/default via/ {print $5}')
if [ -z "${egress_iface}" ]; then
return
fi
# Obtain mtu value associated to the privileged-container's egress interface.
local egress_mtu=$(ip link show ${egress_iface} | awk '/mtu / {print $5}')
if [ ! -z "${egress_mtu}" ] &&
[ "${egress_mtu}" -lt "${default_mtu}" ]; then
echo ${egress_mtu}
else
echo ${default_mtu}
fi
}
# Create docker configuration.
function docker_setup() {
# Configure dockerd
mkdir -p "${dockerCfgDir}"
# If shiftfs module is not present then consider configuring docker in userns
# mode.
if ! modprobe "${shiftfs_module}" &> /dev/null ; then
cat <<EOF > "${dockerCfgFile}"
{
"debug": false,
"userns-remap": "sysbox",
"runtimes": {
"sysbox-runc": {
"path": "/usr/bin/sysbox-runc"
}
},
"bip": "172.25.0.1/16",
"default-address-pools": [
{
"base": "172.32.0.0/16",
"size": 24
}
]
}
EOF
else
cat <<EOF > "${dockerCfgFile}"
{
"debug": false,
"userns-remap": "",
"runtimes": {
"sysbox-runc": {
"path": "/usr/bin/sysbox-runc",
"runtimeArgs": [
"--no-kernel-check"
]
}
},
"bip": "172.25.0.1/16",
"default-address-pools": [
{
"base": "172.32.0.0/16",
"size": 24
}
]
}
EOF
fi
# Adjust docker's interface mtu configuration. This is required to avoid forwarding issues
# in containers seating behind an egress-interface with lower-than-default mtu.
egress_mtu=$(docker_iface_mtu)
if [ ! -z "${egress_mtu}" ] && [ "${egress_mtu}" -ne "${default_mtu}" ]; then
jq --arg mtu "${egress_mtu}" --indent 4 '. + {"mtu": $mtu|tonumber}' "${dockerCfgFile}" \
> ${tmpfile} && cp ${tmpfile} "${dockerCfgFile}"
fi
}
# Increases system-level resources to satisfy Sysbox requirements.
function maxs_setup() {
# Increase default inotify resources to meet sys container's demands.
sysctl -w fs.inotify.max_queued_events=1048576 &> /dev/null
sysctl -w fs.inotify.max_user_watches=1048576 &> /dev/null
sysctl -w fs.inotify.max_user_instances=1048576 &> /dev/null
# Increase default keyring resources to meet sys container demands.
# For a k8s cluster:
# keys = 35 + (workers * 23) + (2 * pods)
# maxbytes = 20 bytes * maxkeys
sysctl -w kernel.keys.maxkeys=20000 &> /dev/null
sysctl -w kernel.keys.maxbytes=$((20*20000)) &> /dev/null
}
# Sysbox performs certain operations that require read-write access to sysfs. In
# the scenario where this script executes (i.e. privileged containers), sysfs is
# usually mounted in read-write mode, but there is one bug (apparently in
# containerd -- see https://github.com/containerd/containerd/issues/3221), that
# prevents this from happening. This function ensures that sysfs is properly
# mounted in sysbox-in-docker scenarios.
function sysfs_mount_setup() {
if mount | egrep -q "/sys type sysfs.*ro" && mount -o remount,rw /sys; then
printf "\nUnable to remount /sys in read-write mode.\n"
exit 1
fi
}
# Completes Sysbox's setup process.
function sysbox_setup() {
local distro=$(get_host_distro)
check_distro "$distro"
if ! load_required_modules; then
exit 1
fi
setup_sysbox_user
userns_setup "$distro"
kernel_config_setup "$distro"
sysfs_mount_setup
docker_setup
maxs_setup
}
# Start Docker.
function docker_start() {
if ! pidof dockerd; then
dockerd > /var/log/dockerd.log 2>&1 &
RES=$(retry 10 1 docker ps > /dev/null 2>&1)
if [ $? -ne 0 ]; then
printf "\dockerd failed to start.\n"
exit 1
fi
fi
}
function main() {
if [ "$EUID" -ne 0 ]; then
echo "Please run as root."
exit
fi
sysbox_setup
}
main "$@"