|
| 1 | +From 7362cd5afe9d40131fb62cb075092025c7c71064 Mon Sep 17 00:00:00 2001 |
| 2 | +From: "hang.jiang" <hang.jiang@daocloud.io> |
| 3 | +Date: Fri, 1 Sep 2023 16:17:13 +0800 |
| 4 | +Subject: [runc v1.1.z PATCH 1/6] Fix File to Close |
| 5 | + |
| 6 | +(This is a cherry-pick of 937ca107c3d22da77eb8e8030f2342253b980980.) |
| 7 | + |
| 8 | +Signed-off-by: hang.jiang <hang.jiang@daocloud.io> |
| 9 | +Fixes: GHSA-xr7r-f8xq-vfvv CVE-2024-21626 |
| 10 | +Signed-off-by: Aleksa Sarai <cyphar@cyphar.com> |
| 11 | +--- |
| 12 | + libcontainer/cgroups/fs/paths.go | 1 + |
| 13 | + update.go | 1 + |
| 14 | + 2 files changed, 2 insertions(+) |
| 15 | + |
| 16 | +diff --git a/libcontainer/cgroups/fs/paths.go b/libcontainer/cgroups/fs/paths.go |
| 17 | +index 1092331b25d8..2cb970a3d55b 100644 |
| 18 | +--- a/libcontainer/cgroups/fs/paths.go |
| 19 | ++++ b/libcontainer/cgroups/fs/paths.go |
| 20 | +@@ -83,6 +83,7 @@ func tryDefaultCgroupRoot() string { |
| 21 | + if err != nil { |
| 22 | + return "" |
| 23 | + } |
| 24 | ++ defer dir.Close() |
| 25 | + names, err := dir.Readdirnames(1) |
| 26 | + if err != nil { |
| 27 | + return "" |
| 28 | +diff --git a/update.go b/update.go |
| 29 | +index 9ce5a2e835b2..6d582ddddecb 100644 |
| 30 | +--- a/update.go |
| 31 | ++++ b/update.go |
| 32 | +@@ -174,6 +174,7 @@ other options are ignored. |
| 33 | + if err != nil { |
| 34 | + return err |
| 35 | + } |
| 36 | ++ defer f.Close() |
| 37 | + } |
| 38 | + err = json.NewDecoder(f).Decode(&r) |
| 39 | + if err != nil { |
| 40 | +-- |
| 41 | +2.43.0 |
| 42 | + |
| 43 | +From 2ed79a6c91e56dcd2d1da47f8ffd663066153746 Mon Sep 17 00:00:00 2001 |
| 44 | +From: Aleksa Sarai <cyphar@cyphar.com> |
| 45 | +Date: Tue, 26 Dec 2023 23:53:07 +1100 |
| 46 | +Subject: [runc v1.1.z PATCH 2/6] init: verify after chdir that cwd is inside |
| 47 | + the container |
| 48 | + |
| 49 | +If a file descriptor of a directory in the host's mount namespace is |
| 50 | +leaked to runc init, a malicious config.json could use /proc/self/fd/... |
| 51 | +as a working directory to allow for host filesystem access after the |
| 52 | +container runs. This can also be exploited by a container process if it |
| 53 | +knows that an administrator will use "runc exec --cwd" and the target |
| 54 | +--cwd (the attacker can change that cwd to be a symlink pointing to |
| 55 | +/proc/self/fd/... and wait for the process to exec and then snoop on |
| 56 | +/proc/$pid/cwd to get access to the host). The former issue can lead to |
| 57 | +a critical vulnerability in Docker and Kubernetes, while the latter is a |
| 58 | +container breakout. |
| 59 | + |
| 60 | +We can (ab)use the fact that getcwd(2) on Linux detects this exact case, |
| 61 | +and getcwd(3) and Go's Getwd() return an error as a result. Thus, if we |
| 62 | +just do os.Getwd() after chdir we can easily detect this case and error |
| 63 | +out. |
| 64 | + |
| 65 | +In runc 1.1, a /sys/fs/cgroup handle happens to be leaked to "runc |
| 66 | +init", making this exploitable. On runc main it just so happens that the |
| 67 | +leaked /sys/fs/cgroup gets clobbered and thus this is only consistently |
| 68 | +exploitable for runc 1.1. |
| 69 | + |
| 70 | +Fixes: GHSA-xr7r-f8xq-vfvv CVE-2024-21626 |
| 71 | +Co-developed-by: lifubang <lifubang@acmcoder.com> |
| 72 | +Signed-off-by: lifubang <lifubang@acmcoder.com> |
| 73 | +[refactored the implementation and added more comments] |
| 74 | +Signed-off-by: Aleksa Sarai <cyphar@cyphar.com> |
| 75 | +--- |
| 76 | + libcontainer/init_linux.go | 31 ++++++++++++++++++++++++ |
| 77 | + libcontainer/integration/seccomp_test.go | 20 +++++++-------- |
| 78 | + 2 files changed, 41 insertions(+), 10 deletions(-) |
| 79 | + |
| 80 | +diff --git a/libcontainer/init_linux.go b/libcontainer/init_linux.go |
| 81 | +index 5b88c71fc83a..057b30669811 100644 |
| 82 | +--- a/libcontainer/init_linux.go |
| 83 | ++++ b/libcontainer/init_linux.go |
| 84 | +@@ -8,6 +8,7 @@ import ( |
| 85 | + "io" |
| 86 | + "net" |
| 87 | + "os" |
| 88 | ++ "path/filepath" |
| 89 | + "strings" |
| 90 | + "unsafe" |
| 91 | + |
| 92 | +@@ -135,6 +136,32 @@ func populateProcessEnvironment(env []string) error { |
| 93 | + return nil |
| 94 | + } |
| 95 | + |
| 96 | ++// verifyCwd ensures that the current directory is actually inside the mount |
| 97 | ++// namespace root of the current process. |
| 98 | ++func verifyCwd() error { |
| 99 | ++ // getcwd(2) on Linux detects if cwd is outside of the rootfs of the |
| 100 | ++ // current mount namespace root, and in that case prefixes "(unreachable)" |
| 101 | ++ // to the returned string. glibc's getcwd(3) and Go's Getwd() both detect |
| 102 | ++ // when this happens and return ENOENT rather than returning a non-absolute |
| 103 | ++ // path. In both cases we can therefore easily detect if we have an invalid |
| 104 | ++ // cwd by checking the return value of getcwd(3). See getcwd(3) for more |
| 105 | ++ // details, and CVE-2024-21626 for the security issue that motivated this |
| 106 | ++ // check. |
| 107 | ++ // |
| 108 | ++ // We have to use unix.Getwd() here because os.Getwd() has a workaround for |
| 109 | ++ // $PWD which involves doing stat(.), which can fail if the current |
| 110 | ++ // directory is inaccessible to the container process. |
| 111 | ++ if wd, err := unix.Getwd(); err == unix.ENOENT { |
| 112 | ++ return errors.New("current working directory is outside of container mount namespace root -- possible container breakout detected") |
| 113 | ++ } else if err != nil { |
| 114 | ++ return fmt.Errorf("failed to verify if current working directory is safe: %w", err) |
| 115 | ++ } else if !filepath.IsAbs(wd) { |
| 116 | ++ // We shouldn't ever hit this, but check just in case. |
| 117 | ++ return fmt.Errorf("current working directory is not absolute -- possible container breakout detected: cwd is %q", wd) |
| 118 | ++ } |
| 119 | ++ return nil |
| 120 | ++} |
| 121 | ++ |
| 122 | + // finalizeNamespace drops the caps, sets the correct user |
| 123 | + // and working dir, and closes any leaked file descriptors |
| 124 | + // before executing the command inside the namespace |
| 125 | +@@ -193,6 +220,10 @@ func finalizeNamespace(config *initConfig) error { |
| 126 | + return fmt.Errorf("chdir to cwd (%q) set in config.json failed: %w", config.Cwd, err) |
| 127 | + } |
| 128 | + } |
| 129 | ++ // Make sure our final working directory is inside the container. |
| 130 | ++ if err := verifyCwd(); err != nil { |
| 131 | ++ return err |
| 132 | ++ } |
| 133 | + if err := system.ClearKeepCaps(); err != nil { |
| 134 | + return fmt.Errorf("unable to clear keep caps: %w", err) |
| 135 | + } |
| 136 | +diff --git a/libcontainer/integration/seccomp_test.go b/libcontainer/integration/seccomp_test.go |
| 137 | +index 31092a0a5d21..ecdfa7957df1 100644 |
| 138 | +--- a/libcontainer/integration/seccomp_test.go |
| 139 | ++++ b/libcontainer/integration/seccomp_test.go |
| 140 | +@@ -13,7 +13,7 @@ import ( |
| 141 | + libseccomp "github.com/seccomp/libseccomp-golang" |
| 142 | + ) |
| 143 | + |
| 144 | +-func TestSeccompDenyGetcwdWithErrno(t *testing.T) { |
| 145 | ++func TestSeccompDenySyslogWithErrno(t *testing.T) { |
| 146 | + if testing.Short() { |
| 147 | + return |
| 148 | + } |
| 149 | +@@ -25,7 +25,7 @@ func TestSeccompDenyGetcwdWithErrno(t *testing.T) { |
| 150 | + DefaultAction: configs.Allow, |
| 151 | + Syscalls: []*configs.Syscall{ |
| 152 | + { |
| 153 | +- Name: "getcwd", |
| 154 | ++ Name: "syslog", |
| 155 | + Action: configs.Errno, |
| 156 | + ErrnoRet: &errnoRet, |
| 157 | + }, |
| 158 | +@@ -39,7 +39,7 @@ func TestSeccompDenyGetcwdWithErrno(t *testing.T) { |
| 159 | + buffers := newStdBuffers() |
| 160 | + pwd := &libcontainer.Process{ |
| 161 | + Cwd: "/", |
| 162 | +- Args: []string{"pwd"}, |
| 163 | ++ Args: []string{"dmesg"}, |
| 164 | + Env: standardEnvironment, |
| 165 | + Stdin: buffers.Stdin, |
| 166 | + Stdout: buffers.Stdout, |
| 167 | +@@ -65,17 +65,17 @@ func TestSeccompDenyGetcwdWithErrno(t *testing.T) { |
| 168 | + } |
| 169 | + |
| 170 | + if exitCode == 0 { |
| 171 | +- t.Fatalf("Getcwd should fail with negative exit code, instead got %d!", exitCode) |
| 172 | ++ t.Fatalf("dmesg should fail with negative exit code, instead got %d!", exitCode) |
| 173 | + } |
| 174 | + |
| 175 | +- expected := "pwd: getcwd: No such process" |
| 176 | ++ expected := "dmesg: klogctl: No such process" |
| 177 | + actual := strings.Trim(buffers.Stderr.String(), "\n") |
| 178 | + if actual != expected { |
| 179 | + t.Fatalf("Expected output %s but got %s\n", expected, actual) |
| 180 | + } |
| 181 | + } |
| 182 | + |
| 183 | +-func TestSeccompDenyGetcwd(t *testing.T) { |
| 184 | ++func TestSeccompDenySyslog(t *testing.T) { |
| 185 | + if testing.Short() { |
| 186 | + return |
| 187 | + } |
| 188 | +@@ -85,7 +85,7 @@ func TestSeccompDenyGetcwd(t *testing.T) { |
| 189 | + DefaultAction: configs.Allow, |
| 190 | + Syscalls: []*configs.Syscall{ |
| 191 | + { |
| 192 | +- Name: "getcwd", |
| 193 | ++ Name: "syslog", |
| 194 | + Action: configs.Errno, |
| 195 | + }, |
| 196 | + }, |
| 197 | +@@ -98,7 +98,7 @@ func TestSeccompDenyGetcwd(t *testing.T) { |
| 198 | + buffers := newStdBuffers() |
| 199 | + pwd := &libcontainer.Process{ |
| 200 | + Cwd: "/", |
| 201 | +- Args: []string{"pwd"}, |
| 202 | ++ Args: []string{"dmesg"}, |
| 203 | + Env: standardEnvironment, |
| 204 | + Stdin: buffers.Stdin, |
| 205 | + Stdout: buffers.Stdout, |
| 206 | +@@ -124,10 +124,10 @@ func TestSeccompDenyGetcwd(t *testing.T) { |
| 207 | + } |
| 208 | + |
| 209 | + if exitCode == 0 { |
| 210 | +- t.Fatalf("Getcwd should fail with negative exit code, instead got %d!", exitCode) |
| 211 | ++ t.Fatalf("dmesg should fail with negative exit code, instead got %d!", exitCode) |
| 212 | + } |
| 213 | + |
| 214 | +- expected := "pwd: getcwd: Operation not permitted" |
| 215 | ++ expected := "dmesg: klogctl: Operation not permitted" |
| 216 | + actual := strings.Trim(buffers.Stderr.String(), "\n") |
| 217 | + if actual != expected { |
| 218 | + t.Fatalf("Expected output %s but got %s\n", expected, actual) |
| 219 | +-- |
| 220 | +2.43.0 |
| 221 | + |
| 222 | +From b2b5754eb34174e032f1048beb1b27db83e77c5a Mon Sep 17 00:00:00 2001 |
| 223 | +From: Aleksa Sarai <cyphar@cyphar.com> |
| 224 | +Date: Fri, 5 Jan 2024 01:42:32 +1100 |
| 225 | +Subject: [runc v1.1.z PATCH 3/6] setns init: do explicit lookup of execve |
| 226 | + argument early |
| 227 | + |
| 228 | +(This is a partial backport of a minor change included in commit |
| 229 | +dac41717465462b21fab5b5942fe4cb3f47d7e53.) |
| 230 | + |
| 231 | +This mirrors the logic in standard_init_linux.go, and also ensures that |
| 232 | +we do not call exec.LookPath in the final execve step. |
| 233 | + |
| 234 | +While this is okay for regular binaries, it seems exec.LookPath calls |
| 235 | +os.Getenv which tries to emit a log entry to the test harness when |
| 236 | +running in "go test" mode. In a future patch (in order to fix |
| 237 | +CVE-2024-21626), we will close all of the file descriptors immediately |
| 238 | +before execve, which would mean the file descriptor for test harness |
| 239 | +logging would be closed at execve time. So, moving exec.LookPath earlier |
| 240 | +is necessary. |
| 241 | + |
| 242 | +Ref: dac417174654 ("runc-dmz: reduce memfd binary cloning cost with small C binary") |
| 243 | +Signed-off-by: Aleksa Sarai <cyphar@cyphar.com> |
| 244 | +--- |
| 245 | + libcontainer/setns_init_linux.go | 18 +++++++++++++++++- |
| 246 | + 1 file changed, 17 insertions(+), 1 deletion(-) |
| 247 | + |
| 248 | +diff --git a/libcontainer/setns_init_linux.go b/libcontainer/setns_init_linux.go |
| 249 | +index 09ab552b3d12..e891773ec578 100644 |
| 250 | +--- a/libcontainer/setns_init_linux.go |
| 251 | ++++ b/libcontainer/setns_init_linux.go |
| 252 | +@@ -4,6 +4,7 @@ import ( |
| 253 | + "errors" |
| 254 | + "fmt" |
| 255 | + "os" |
| 256 | ++ "os/exec" |
| 257 | + "strconv" |
| 258 | + |
| 259 | + "github.com/opencontainers/selinux/go-selinux" |
| 260 | +@@ -82,6 +83,21 @@ func (l *linuxSetnsInit) Init() error { |
| 261 | + if err := apparmor.ApplyProfile(l.config.AppArmorProfile); err != nil { |
| 262 | + return err |
| 263 | + } |
| 264 | ++ |
| 265 | ++ // Check for the arg before waiting to make sure it exists and it is |
| 266 | ++ // returned as a create time error. |
| 267 | ++ name, err := exec.LookPath(l.config.Args[0]) |
| 268 | ++ if err != nil { |
| 269 | ++ return err |
| 270 | ++ } |
| 271 | ++ // exec.LookPath in Go < 1.20 might return no error for an executable |
| 272 | ++ // residing on a file system mounted with noexec flag, so perform this |
| 273 | ++ // extra check now while we can still return a proper error. |
| 274 | ++ // TODO: remove this once go < 1.20 is not supported. |
| 275 | ++ if err := eaccess(name); err != nil { |
| 276 | ++ return &os.PathError{Op: "eaccess", Path: name, Err: err} |
| 277 | ++ } |
| 278 | ++ |
| 279 | + // Set seccomp as close to execve as possible, so as few syscalls take |
| 280 | + // place afterward (reducing the amount of syscalls that users need to |
| 281 | + // enable in their seccomp profiles). |
| 282 | +@@ -101,5 +117,5 @@ func (l *linuxSetnsInit) Init() error { |
| 283 | + return &os.PathError{Op: "close log pipe", Path: "fd " + strconv.Itoa(l.logFd), Err: err} |
| 284 | + } |
| 285 | + |
| 286 | +- return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ()) |
| 287 | ++ return system.Exec(name, l.config.Args[0:], os.Environ()) |
| 288 | + } |
| 289 | +-- |
| 290 | +2.43.0 |
| 291 | + |
0 commit comments