|
1 |
| -use std::fs::File; |
| 1 | +use std::fs::{File, Permissions}; |
2 | 2 | use std::io::{BufRead, BufReader, Seek};
|
| 3 | +use std::os::fd::AsFd; |
| 4 | +use std::os::unix::fs::{FileTypeExt, MetadataExt, PermissionsExt}; |
3 | 5 | use std::path::Path;
|
4 | 6 |
|
5 | 7 | use anyhow::{bail, Context, Result};
|
6 |
| -use rustix::fs::{FileType, Mode}; |
| 8 | +use rustix::fs::{FileType, Mode, UnmountFlags}; |
| 9 | +use rustix::mount::{FsMountFlags, FsOpenFlags, MountAttrFlags, MoveMountFlags}; |
7 | 10 | use rustix::process::{Pid, Signal};
|
8 | 11 | use tokio::io::unix::AsyncFd;
|
9 | 12 | use tokio::io::Interest;
|
@@ -89,13 +92,113 @@ impl Container {
|
89 | 92 | Box::new(DeviceAccessControllerV2::new(&state.cgroup_paths.unified)?)
|
90 | 93 | };
|
91 | 94 |
|
92 |
| - Ok(Self { |
| 95 | + let container = Self { |
93 | 96 | uid: config.process.user.uid,
|
94 | 97 | gid: config.process.user.gid,
|
95 | 98 | pid: Pid::from_raw(state.init_process_pid.try_into()?).context("Invalid PID")?,
|
96 | 99 | wait: recv,
|
97 | 100 | cgroup_device_filter: Mutex::new(cgroup_device_filter),
|
98 |
| - }) |
| 101 | + }; |
| 102 | + |
| 103 | + container.remount_dev()?; |
| 104 | + |
| 105 | + Ok(container) |
| 106 | + } |
| 107 | + |
| 108 | + /// Remount /dev inside the init namespace. |
| 109 | + /// |
| 110 | + /// When user namespace is used, the /dev created by runc will be mounted inside the user namespace, |
| 111 | + /// and will automatically gain SB_I_NODEV flag as a kernel security measure. |
| 112 | + /// |
| 113 | + /// This is doing no favour for us because that flag will cause device node within it to be unopenable. |
| 114 | + fn remount_dev(&self) -> Result<()> { |
| 115 | + let ns = crate::util::namespace::MntNamespace::of_pid(self.pid)?; |
| 116 | + if !ns.in_user_ns() { |
| 117 | + return Ok(()); |
| 118 | + } |
| 119 | + |
| 120 | + log::info!("Remount /dev to allow device node access"); |
| 121 | + |
| 122 | + // Create a tmpfs and mount in the init namespace. |
| 123 | + // Note that while we have "mounted" it, it is not associated with any mount point yet. |
| 124 | + // The actual mounting will happen after we moved into the mount namespace. |
| 125 | + let dev_fs = rustix::mount::fsopen("tmpfs", FsOpenFlags::empty())?; |
| 126 | + rustix::mount::fsconfig_create(dev_fs.as_fd())?; |
| 127 | + let dev_mnt = rustix::mount::fsmount( |
| 128 | + dev_fs.as_fd(), |
| 129 | + FsMountFlags::FSMOUNT_CLOEXEC, |
| 130 | + MountAttrFlags::empty(), |
| 131 | + )?; |
| 132 | + |
| 133 | + ns.enter(|| -> Result<_> { |
| 134 | + // Don't interfere us setting the desired mode! |
| 135 | + rustix::process::umask(Mode::empty()); |
| 136 | + |
| 137 | + // Move the existing mount elsewhere. |
| 138 | + std::fs::create_dir("/olddev")?; |
| 139 | + rustix::mount::mount_move("/dev", "/olddev")?; |
| 140 | + |
| 141 | + // Move to our newly created `/dev` mount. |
| 142 | + rustix::mount::move_mount( |
| 143 | + dev_mnt.as_fd(), |
| 144 | + "", |
| 145 | + rustix::fs::CWD, |
| 146 | + "/dev", |
| 147 | + MoveMountFlags::MOVE_MOUNT_F_EMPTY_PATH, |
| 148 | + )?; |
| 149 | + |
| 150 | + // Make sure the /dev is now owned by the container root not host root. |
| 151 | + std::os::unix::fs::chown("/dev", Some(ns.uid(0)?), Some(ns.gid(0)?))?; |
| 152 | + std::fs::set_permissions("/dev", Permissions::from_mode(0o755))?; |
| 153 | + |
| 154 | + for file in std::fs::read_dir("/olddev")? { |
| 155 | + let file = file?; |
| 156 | + let metadata = file.metadata()?; |
| 157 | + let new_path = Path::new("/dev").join(file.file_name()); |
| 158 | + |
| 159 | + if file.file_name() == "console" { |
| 160 | + // `console` is special, it's a file but it should be bind-mounted. |
| 161 | + drop( |
| 162 | + std::fs::OpenOptions::new() |
| 163 | + .create(true) |
| 164 | + .write(true) |
| 165 | + .open(&new_path)?, |
| 166 | + ); |
| 167 | + rustix::mount::mount_move(file.path(), new_path)?; |
| 168 | + } else if metadata.file_type().is_dir() { |
| 169 | + // This is a mount point, e.g. pts, mqueue, shm. |
| 170 | + std::fs::create_dir(&new_path)?; |
| 171 | + rustix::mount::mount_move(file.path(), new_path)?; |
| 172 | + } else if metadata.file_type().is_symlink() { |
| 173 | + // Recreate symlinks |
| 174 | + let target = std::fs::read_link(file.path())?; |
| 175 | + std::os::unix::fs::symlink(target, new_path)?; |
| 176 | + } else if metadata.file_type().is_char_device() { |
| 177 | + // Recreate device |
| 178 | + let dev = metadata.rdev(); |
| 179 | + rustix::fs::mknodat( |
| 180 | + rustix::fs::CWD, |
| 181 | + &new_path, |
| 182 | + FileType::CharacterDevice, |
| 183 | + Mode::from_raw_mode(metadata.mode()), |
| 184 | + dev, |
| 185 | + )?; |
| 186 | + |
| 187 | + // The old file might be a bind mount. Try umount it. |
| 188 | + let _ = rustix::mount::unmount(file.path(), UnmountFlags::DETACH); |
| 189 | + } else { |
| 190 | + anyhow::bail!("Unknown file present in /dev"); |
| 191 | + } |
| 192 | + } |
| 193 | + |
| 194 | + // Now we have moved everything to the new /dev, obliterate the old one. |
| 195 | + rustix::mount::unmount("/olddev", UnmountFlags::DETACH)?; |
| 196 | + std::fs::remove_dir("/olddev")?; |
| 197 | + |
| 198 | + Ok(()) |
| 199 | + })??; |
| 200 | + |
| 201 | + Ok(()) |
99 | 202 | }
|
100 | 203 |
|
101 | 204 | pub async fn kill(&self, signal: Signal) -> Result<()> {
|
|
0 commit comments