1
1
use std:: fs:: File ;
2
2
use std:: os:: fd:: AsFd ;
3
+ use std:: path:: Path ;
3
4
4
- use anyhow:: Result ;
5
+ use anyhow:: { Context , Result } ;
6
+ use rustix:: fs:: { Gid , Uid } ;
5
7
use rustix:: process:: Pid ;
6
- use rustix:: thread:: { LinkNameSpaceType , UnshareFlags } ;
8
+ use rustix:: thread:: { CapabilitiesSecureBits , LinkNameSpaceType , UnshareFlags } ;
9
+
10
+ pub struct IdMap {
11
+ map : Vec < ( u32 , u32 , u32 ) > ,
12
+ }
13
+
14
+ impl IdMap {
15
+ fn read ( path : & Path ) -> Result < Self > {
16
+ Self :: parse ( & std:: fs:: read_to_string ( path) ?)
17
+ }
18
+
19
+ fn parse ( content : & str ) -> Result < Self > {
20
+ let mut map = Vec :: new ( ) ;
21
+ for line in content. lines ( ) {
22
+ let mut words = line. split_ascii_whitespace ( ) ;
23
+ let inside = words. next ( ) . context ( "unexpected id_map" ) ?. parse ( ) ?;
24
+ let outside = words. next ( ) . context ( "unexpected id_map" ) ?. parse ( ) ?;
25
+ let count = words. next ( ) . context ( "unexpected id_map" ) ?. parse ( ) ?;
26
+ map. push ( ( inside, outside, count) ) ;
27
+ }
28
+ Ok ( Self { map } )
29
+ }
30
+
31
+ fn translate ( & self , id : u32 ) -> Option < u32 > {
32
+ for & ( inside, outside, count) in self . map . iter ( ) {
33
+ if ( inside..inside. checked_add ( count) ?) . contains ( & id) {
34
+ return ( id - inside) . checked_add ( outside) ;
35
+ }
36
+ }
37
+ None
38
+ }
39
+ }
7
40
8
41
pub struct MntNamespace {
9
- fd : File ,
42
+ mnt_fd : File ,
43
+ uid_map : IdMap ,
44
+ gid_map : IdMap ,
10
45
}
11
46
12
47
impl MntNamespace {
13
48
/// Open the mount namespace of a process.
14
49
pub fn of_pid ( pid : Pid ) -> Result < MntNamespace > {
15
- let path = format ! ( "/proc/{}/ns/mnt" , pid. as_raw_nonzero( ) ) ;
16
- let fd = File :: open ( path) ?;
17
- Ok ( MntNamespace { fd } )
50
+ let mnt_fd = File :: open ( format ! ( "/proc/{}/ns/mnt" , pid. as_raw_nonzero( ) ) ) ?;
51
+ let uid_map = IdMap :: read ( format ! ( "/proc/{}/uid_map" , pid. as_raw_nonzero( ) ) . as_ref ( ) ) ?;
52
+ let gid_map = IdMap :: read ( format ! ( "/proc/{}/gid_map" , pid. as_raw_nonzero( ) ) . as_ref ( ) ) ?;
53
+ Ok ( MntNamespace {
54
+ mnt_fd,
55
+ uid_map,
56
+ gid_map,
57
+ } )
58
+ }
59
+
60
+ /// Translate user ID into a UID in the namespace.
61
+ pub fn uid ( & self , uid : u32 ) -> Result < u32 > {
62
+ Ok ( self . uid_map . translate ( uid) . context ( "UID overflows" ) ?)
63
+ }
64
+
65
+ /// Translate group ID into a GID in the namespace.
66
+ pub fn gid ( & self , gid : u32 ) -> Result < u32 > {
67
+ Ok ( self . gid_map . translate ( gid) . context ( "GID overflows" ) ?)
18
68
}
19
69
20
70
/// Enter the mount namespace.
@@ -30,9 +80,35 @@ impl MntNamespace {
30
80
31
81
// Switch this particular thread to the container's mount namespace.
32
82
rustix:: thread:: move_into_link_name_space (
33
- self . fd . as_fd ( ) ,
83
+ self . mnt_fd . as_fd ( ) ,
34
84
Some ( LinkNameSpaceType :: Mount ) ,
35
85
) ?;
86
+
87
+ // If user namespace is used, we must act like the root user *inside*
88
+ // namespace to be able to create files properly (otherwise EOVERFLOW
89
+ // will be returned when creating file).
90
+ //
91
+ // Entering the user namespace turns out to be problematic.
92
+ // The reason seems to be this line [1]:
93
+ // which means `CAP_MKNOD` capability of the *init* namespace is needed.
94
+ // However task's associated security context is all relative to its current
95
+ // user namespace [2], so once you enter a user namespace there's no way of getting
96
+ // back `CAP_MKNOD` of the init namespace anymore.
97
+ // (Yes this means that even if CAP_MKNOD is granted to the container, you cannot
98
+ // create device nodes within it.)
99
+ //
100
+ // [1]: https://elixir.bootlin.com/linux/v6.11.1/source/fs/namei.c#L4073
101
+ // [2]: https://elixir.bootlin.com/linux/v6.11.1/source/include/linux/cred.h#L111
102
+
103
+ // By default `setuid` will drop capabilities when transitioning from root
104
+ // to non-root user. This bit prevents it so our code still have superpower.
105
+ rustix:: thread:: set_capabilities_secure_bits (
106
+ CapabilitiesSecureBits :: NO_SETUID_FIXUP ,
107
+ ) ?;
108
+
109
+ rustix:: thread:: set_thread_uid ( unsafe { Uid :: from_raw ( self . uid ( 0 ) ?) } ) ?;
110
+ rustix:: thread:: set_thread_gid ( unsafe { Gid :: from_raw ( self . gid ( 0 ) ?) } ) ?;
111
+
36
112
Ok ( f ( ) )
37
113
} )
38
114
. join ( )
0 commit comments