diff options
| -rw-r--r-- | meta/recipes-core/systemd/systemd/0001-nspawn-make-sure-host-root-can-write-to-the-uidmappe.patch | 216 | ||||
| -rw-r--r-- | meta/recipes-core/systemd/systemd_250.5.bb | 1 |
2 files changed, 217 insertions, 0 deletions
diff --git a/meta/recipes-core/systemd/systemd/0001-nspawn-make-sure-host-root-can-write-to-the-uidmappe.patch b/meta/recipes-core/systemd/systemd/0001-nspawn-make-sure-host-root-can-write-to-the-uidmappe.patch new file mode 100644 index 0000000000..8715019c99 --- /dev/null +++ b/meta/recipes-core/systemd/systemd/0001-nspawn-make-sure-host-root-can-write-to-the-uidmappe.patch | |||
| @@ -0,0 +1,216 @@ | |||
| 1 | From e34fb1a4568bd080032065bb1506ab9b6c6606f1 Mon Sep 17 00:00:00 2001 | ||
| 2 | From: Lennart Poettering <lennart@poettering.net> | ||
| 3 | Date: Thu, 17 Mar 2022 13:46:12 +0100 | ||
| 4 | Subject: [PATCH] nspawn: make sure host root can write to the uidmapped mounts | ||
| 5 | we prepare for the container payload | ||
| 6 | MIME-Version: 1.0 | ||
| 7 | Content-Type: text/plain; charset=UTF-8 | ||
| 8 | Content-Transfer-Encoding: 8bit | ||
| 9 | |||
| 10 | When using user namespaces in conjunction with uidmapped mounts, nspawn | ||
| 11 | so far set up two uidmappings: | ||
| 12 | |||
| 13 | 1. One that is used for the uidmapped mount and that maps the UID range | ||
| 14 | 0…65535 on the backing fs to some high UID range X…X+65535 on the | ||
| 15 | uidmapped fs. (Let's call this mapping the "mount mapping") | ||
| 16 | |||
| 17 | 2. One that is used for the userns namespace the container payload | ||
| 18 | processes run in, that maps X…X+65535 back to 0…65535. (Let's call | ||
| 19 | this one the "process mapping"). | ||
| 20 | |||
| 21 | These mappings hence are pretty much identical, one just moves things up | ||
| 22 | and one back down. (Reminder: we do all this so that the processes can | ||
| 23 | run under high UIDs while running off file systems that require no | ||
| 24 | recursive chown()ing, i.e. we want processes with high UID range but | ||
| 25 | files with low UID range.) | ||
| 26 | |||
| 27 | This creates one problem, i.e. issue #20989: if nspawn (which runs as | ||
| 28 | host root, i.e. host UID 0) wants to add inodes to the uidmapped mount | ||
| 29 | it can't do that, since host UID 0 is not defined in the mount mapping | ||
| 30 | (only the X…X+65536 range is, after all, and X > 0), and processes whose | ||
| 31 | UID is not mapped in a uidmapped fs cannot create inodes in it since | ||
| 32 | those would be owned by an unmapped UID, which then triggers | ||
| 33 | the famous EOVERFLOW error. | ||
| 34 | |||
| 35 | Let's fix this, by explicitly including an entry for the host UID 0 in | ||
| 36 | the mount mapping. Specifically, we'll extend the mount mapping to map | ||
| 37 | UID 2147483646 (which is INT32_MAX-1, see code for an explanation why I | ||
| 38 | picked this one) of the backing fs to UID 0 on the uidmapped fs. This | ||
| 39 | way nspawn can creates inode on the uidmapped as it likes (which will | ||
| 40 | then actually be owned by UID 2147483646 on the backing fs), and as it | ||
| 41 | always did. Note that we do *not* create a similar entry in the process | ||
| 42 | mapping. Thus any files created by nspawn that way (and not chown()ed to | ||
| 43 | something better) will appear as unmapped (i.e. as overflowuid/"nobody") | ||
| 44 | in the container payload. And that's good. Of course, the latter is | ||
| 45 | mostly theoretic, as nspawn should generally chown() the inodes it | ||
| 46 | creates to UID ranges that actually make sense for the container (and we | ||
| 47 | generally already do this correctly), but it#s good to know that we are | ||
| 48 | safe here, given we might accidentally forget to chown() some inodes we | ||
| 49 | create. | ||
| 50 | |||
| 51 | Net effect: the two mappings will not be identical anymore. The mount | ||
| 52 | mapping has one entry more, and the only reason it exists is so that | ||
| 53 | nspawn can access the uidmapped fs reasonably independently from any | ||
| 54 | process mapping. | ||
| 55 | |||
| 56 | Fixes: #20989 | ||
| 57 | |||
| 58 | Upstream-Status: Backport [50ae2966d20b0b4a19def060de3b966b7a70b54a] | ||
| 59 | Signed-off-by: Marek Vasut <marex@denx.de> | ||
| 60 | --- | ||
| 61 | src/basic/user-util.h | 13 +++++++++++++ | ||
| 62 | src/nspawn/nspawn-mount.c | 2 +- | ||
| 63 | src/nspawn/nspawn.c | 2 +- | ||
| 64 | src/shared/dissect-image.c | 2 +- | ||
| 65 | src/shared/mount-util.c | 28 +++++++++++++++++++++++----- | ||
| 66 | src/shared/mount-util.h | 13 ++++++++++++- | ||
| 67 | 6 files changed, 51 insertions(+), 9 deletions(-) | ||
| 68 | |||
| 69 | diff --git a/src/basic/user-util.h b/src/basic/user-util.h | ||
| 70 | index ab1ce48b2d..0b9749ef8b 100644 | ||
| 71 | --- a/src/basic/user-util.h | ||
| 72 | +++ b/src/basic/user-util.h | ||
| 73 | @@ -59,6 +59,19 @@ int take_etc_passwd_lock(const char *root); | ||
| 74 | #define UID_NOBODY ((uid_t) 65534U) | ||
| 75 | #define GID_NOBODY ((gid_t) 65534U) | ||
| 76 | |||
| 77 | +/* If REMOUNT_IDMAP_HOST_ROOT is set for remount_idmap() we'll include a mapping here that maps the host root | ||
| 78 | + * user accessing the idmapped mount to the this user ID on the backing fs. This is the last valid UID in the | ||
| 79 | + * *signed* 32bit range. You might wonder why precisely use this specific UID for this purpose? Well, we | ||
| 80 | + * definitely cannot use the first 0…65536 UIDs for that, since in most cases that's precisely the file range | ||
| 81 | + * we intend to map to some high UID range, and since UID mappings have to be bijective we thus cannot use | ||
| 82 | + * them at all. Furthermore the UID range beyond INT32_MAX (i.e. the range above the signed 32bit range) is | ||
| 83 | + * icky, since many APIs cannot use it (example: setfsuid() returns the old UID as signed integer). Following | ||
| 84 | + * our usual logic of assigning a 16bit UID range to each container, so that the upper 16bit of a 32bit UID | ||
| 85 | + * value indicate kind of a "container ID" and the lower 16bit map directly to the intended user you can read | ||
| 86 | + * this specific UID as the "nobody" user of the container with ID 0x7FFF, which is kinda nice. */ | ||
| 87 | +#define UID_MAPPED_ROOT ((uid_t) (INT32_MAX-1)) | ||
| 88 | +#define GID_MAPPED_ROOT ((gid_t) (INT32_MAX-1)) | ||
| 89 | + | ||
| 90 | #define ETC_PASSWD_LOCK_PATH "/etc/.pwd.lock" | ||
| 91 | |||
| 92 | /* The following macros add 1 when converting things, since UID 0 is a valid UID, while the pointer | ||
| 93 | diff --git a/src/nspawn/nspawn-mount.c b/src/nspawn/nspawn-mount.c | ||
| 94 | index 40773d90c1..f2fad0f462 100644 | ||
| 95 | --- a/src/nspawn/nspawn-mount.c | ||
| 96 | +++ b/src/nspawn/nspawn-mount.c | ||
| 97 | @@ -780,7 +780,7 @@ static int mount_bind(const char *dest, CustomMount *m, uid_t uid_shift, uid_t u | ||
| 98 | } | ||
| 99 | |||
| 100 | if (idmapped) { | ||
| 101 | - r = remount_idmap(where, uid_shift, uid_range); | ||
| 102 | + r = remount_idmap(where, uid_shift, uid_range, REMOUNT_IDMAP_HOST_ROOT); | ||
| 103 | if (r < 0) | ||
| 104 | return log_error_errno(r, "Failed to map ids for bind mount %s: %m", where); | ||
| 105 | } | ||
| 106 | diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c | ||
| 107 | index 8f17ab8810..fe0af8e42d 100644 | ||
| 108 | --- a/src/nspawn/nspawn.c | ||
| 109 | +++ b/src/nspawn/nspawn.c | ||
| 110 | @@ -3779,7 +3779,7 @@ static int outer_child( | ||
| 111 | IN_SET(arg_userns_ownership, USER_NAMESPACE_OWNERSHIP_MAP, USER_NAMESPACE_OWNERSHIP_AUTO) && | ||
| 112 | arg_uid_shift != 0) { | ||
| 113 | |||
| 114 | - r = remount_idmap(directory, arg_uid_shift, arg_uid_range); | ||
| 115 | + r = remount_idmap(directory, arg_uid_shift, arg_uid_range, REMOUNT_IDMAP_HOST_ROOT); | ||
| 116 | if (r == -EINVAL || ERRNO_IS_NOT_SUPPORTED(r)) { | ||
| 117 | /* This might fail because the kernel or file system doesn't support idmapping. We | ||
| 118 | * can't really distinguish this nicely, nor do we have any guarantees about the | ||
| 119 | diff --git a/src/shared/dissect-image.c b/src/shared/dissect-image.c | ||
| 120 | index 39a7f4c3f2..471c165257 100644 | ||
| 121 | --- a/src/shared/dissect-image.c | ||
| 122 | +++ b/src/shared/dissect-image.c | ||
| 123 | @@ -1807,7 +1807,7 @@ static int mount_partition( | ||
| 124 | (void) fs_grow(node, p); | ||
| 125 | |||
| 126 | if (remap_uid_gid) { | ||
| 127 | - r = remount_idmap(p, uid_shift, uid_range); | ||
| 128 | + r = remount_idmap(p, uid_shift, uid_range, REMOUNT_IDMAP_HOST_ROOT); | ||
| 129 | if (r < 0) | ||
| 130 | return r; | ||
| 131 | } | ||
| 132 | diff --git a/src/shared/mount-util.c b/src/shared/mount-util.c | ||
| 133 | index c75c02f5be..fb2e9a0711 100644 | ||
| 134 | --- a/src/shared/mount-util.c | ||
| 135 | +++ b/src/shared/mount-util.c | ||
| 136 | @@ -1049,14 +1049,31 @@ int make_mount_point(const char *path) { | ||
| 137 | return 1; | ||
| 138 | } | ||
| 139 | |||
| 140 | -static int make_userns(uid_t uid_shift, uid_t uid_range) { | ||
| 141 | - char line[DECIMAL_STR_MAX(uid_t)*3+3+1]; | ||
| 142 | +static int make_userns(uid_t uid_shift, uid_t uid_range, RemountIdmapFlags flags) { | ||
| 143 | _cleanup_close_ int userns_fd = -1; | ||
| 144 | + _cleanup_free_ char *line = NULL; | ||
| 145 | |||
| 146 | /* Allocates a userns file descriptor with the mapping we need. For this we'll fork off a child | ||
| 147 | * process whose only purpose is to give us a new user namespace. It's killed when we got it. */ | ||
| 148 | |||
| 149 | - xsprintf(line, UID_FMT " " UID_FMT " " UID_FMT "\n", 0, uid_shift, uid_range); | ||
| 150 | + if (asprintf(&line, UID_FMT " " UID_FMT " " UID_FMT "\n", 0, uid_shift, uid_range) < 0) | ||
| 151 | + return log_oom_debug(); | ||
| 152 | + | ||
| 153 | + /* If requested we'll include an entry in the mapping so that the host root user can make changes to | ||
| 154 | + * the uidmapped mount like it normally would. Specifically, we'll map the user with UID_HOST_ROOT on | ||
| 155 | + * the backing fs to UID 0. This is useful, since nspawn code wants to create various missing inodes | ||
| 156 | + * in the OS tree before booting into it, and this becomes very easy and straightforward to do if it | ||
| 157 | + * can just do it under its own regular UID. Note that in that case the container's runtime uidmap | ||
| 158 | + * (i.e. the one the container payload processes run in) will leave this UID unmapped, i.e. if we | ||
| 159 | + * accidentally leave files owned by host root in the already uidmapped tree around they'll show up | ||
| 160 | + * as owned by 'nobody', which is safe. (Of course, we shouldn't leave such inodes around, but always | ||
| 161 | + * chown() them to the container's own UID range, but it's good to have a safety net, in case we | ||
| 162 | + * forget it.) */ | ||
| 163 | + if (flags & REMOUNT_IDMAP_HOST_ROOT) | ||
| 164 | + if (strextendf(&line, | ||
| 165 | + UID_FMT " " UID_FMT " " UID_FMT "\n", | ||
| 166 | + UID_MAPPED_ROOT, 0, 1) < 0) | ||
| 167 | + return log_oom_debug(); | ||
| 168 | |||
| 169 | /* We always assign the same UID and GID ranges */ | ||
| 170 | userns_fd = userns_acquire(line, line); | ||
| 171 | @@ -1069,7 +1086,8 @@ static int make_userns(uid_t uid_shift, uid_t uid_range) { | ||
| 172 | int remount_idmap( | ||
| 173 | const char *p, | ||
| 174 | uid_t uid_shift, | ||
| 175 | - uid_t uid_range) { | ||
| 176 | + uid_t uid_range, | ||
| 177 | + RemountIdmapFlags flags) { | ||
| 178 | |||
| 179 | _cleanup_close_ int mount_fd = -1, userns_fd = -1; | ||
| 180 | int r; | ||
| 181 | @@ -1085,7 +1103,7 @@ int remount_idmap( | ||
| 182 | return log_debug_errno(errno, "Failed to open tree of mounted filesystem '%s': %m", p); | ||
| 183 | |||
| 184 | /* Create a user namespace mapping */ | ||
| 185 | - userns_fd = make_userns(uid_shift, uid_range); | ||
| 186 | + userns_fd = make_userns(uid_shift, uid_range, flags); | ||
| 187 | if (userns_fd < 0) | ||
| 188 | return userns_fd; | ||
| 189 | |||
| 190 | diff --git a/src/shared/mount-util.h b/src/shared/mount-util.h | ||
| 191 | index ce73aebd4b..f53a64186f 100644 | ||
| 192 | --- a/src/shared/mount-util.h | ||
| 193 | +++ b/src/shared/mount-util.h | ||
| 194 | @@ -112,7 +112,18 @@ int mount_image_in_namespace(pid_t target, const char *propagate_path, const cha | ||
| 195 | |||
| 196 | int make_mount_point(const char *path); | ||
| 197 | |||
| 198 | -int remount_idmap(const char *p, uid_t uid_shift, uid_t uid_range); | ||
| 199 | +typedef enum RemountIdmapFlags { | ||
| 200 | + /* Include a mapping from UID_MAPPED_ROOT (i.e. UID 2^31-2) on the backing fs to UID 0 on the | ||
| 201 | + * uidmapped fs. This is useful to ensure that the host root user can safely add inodes to the | ||
| 202 | + * uidmapped fs (which otherwise wouldn't work as the host root user is not defined on the uidmapped | ||
| 203 | + * mount and any attempts to create inodes will then be refused with EOVERFLOW). The idea is that | ||
| 204 | + * these inodes are quickly re-chown()ed to more suitable UIDs/GIDs. Any code that intends to be able | ||
| 205 | + * to add inodes to file systems mapped this way should set this flag, but given it comes with | ||
| 206 | + * certain security implications defaults to off, and requires explicit opt-in. */ | ||
| 207 | + REMOUNT_IDMAP_HOST_ROOT = 1 << 0, | ||
| 208 | +} RemountIdmapFlags; | ||
| 209 | + | ||
| 210 | +int remount_idmap(const char *p, uid_t uid_shift, uid_t uid_range, RemountIdmapFlags flags); | ||
| 211 | |||
| 212 | /* Creates a mount point (not parents) based on the source path or stat - ie, a file or a directory */ | ||
| 213 | int make_mount_point_inode_from_stat(const struct stat *st, const char *dest, mode_t mode); | ||
| 214 | -- | ||
| 215 | 2.40.1 | ||
| 216 | |||
diff --git a/meta/recipes-core/systemd/systemd_250.5.bb b/meta/recipes-core/systemd/systemd_250.5.bb index 21a09d8594..c35557471a 100644 --- a/meta/recipes-core/systemd/systemd_250.5.bb +++ b/meta/recipes-core/systemd/systemd_250.5.bb | |||
| @@ -31,6 +31,7 @@ SRC_URI += "file://touchscreen.rules \ | |||
| 31 | file://CVE-2022-4415-1.patch \ | 31 | file://CVE-2022-4415-1.patch \ |
| 32 | file://CVE-2022-4415-2.patch \ | 32 | file://CVE-2022-4415-2.patch \ |
| 33 | file://0001-network-remove-only-managed-configs-on-reconfigure-o.patch \ | 33 | file://0001-network-remove-only-managed-configs-on-reconfigure-o.patch \ |
| 34 | file://0001-nspawn-make-sure-host-root-can-write-to-the-uidmappe.patch \ | ||
| 34 | " | 35 | " |
| 35 | 36 | ||
| 36 | # patches needed by musl | 37 | # patches needed by musl |
