Merge pull request #109342 from Mic92/wrappers

authored by

Jörg Thalheim and committed by
GitHub
0998756d 4f73437e

+179 -172
+2 -10
nixos/modules/security/wrappers/default.nix
··· 10 10 (n: v: (if v ? program then v else v // {program=n;})) 11 11 wrappers); 12 12 13 - securityWrapper = pkgs.stdenv.mkDerivation { 14 - name = "security-wrapper"; 15 - phases = [ "installPhase" "fixupPhase" ]; 16 - buildInputs = [ pkgs.libcap pkgs.libcap_ng pkgs.linuxHeaders ]; 17 - hardeningEnable = [ "pie" ]; 18 - installPhase = '' 19 - mkdir -p $out/bin 20 - $CC -Wall -O2 -DWRAPPER_DIR=\"${parentWrapperDir}\" \ 21 - -lcap-ng -lcap ${./wrapper.c} -o $out/bin/security-wrapper 22 - ''; 13 + securityWrapper = pkgs.callPackage ./wrapper.nix { 14 + inherit parentWrapperDir; 23 15 }; 24 16 25 17 ###### Activation script for the setcap wrappers
+156 -162
nixos/modules/security/wrappers/wrapper.c
··· 4 4 #include <unistd.h> 5 5 #include <sys/types.h> 6 6 #include <sys/stat.h> 7 + #include <sys/xattr.h> 7 8 #include <fcntl.h> 8 9 #include <dirent.h> 9 10 #include <assert.h> 10 11 #include <errno.h> 11 12 #include <linux/capability.h> 12 - #include <sys/capability.h> 13 13 #include <sys/prctl.h> 14 14 #include <limits.h> 15 - #include <cap-ng.h> 15 + #include <stdint.h> 16 + #include <syscall.h> 17 + #include <byteswap.h> 16 18 17 19 // Make sure assertions are not compiled out, we use them to codify 18 20 // invariants about this program and we want it to fail fast and ··· 23 25 24 26 // The WRAPPER_DIR macro is supplied at compile time so that it cannot 25 27 // be changed at runtime 26 - static char * wrapperDir = WRAPPER_DIR; 28 + static char *wrapper_dir = WRAPPER_DIR; 27 29 28 30 // Wrapper debug variable name 29 - static char * wrapperDebug = "WRAPPER_DEBUG"; 31 + static char *wrapper_debug = "WRAPPER_DEBUG"; 30 32 31 - // Update the capabilities of the running process to include the given 32 - // capability in the Ambient set. 33 - static void set_ambient_cap(cap_value_t cap) 34 - { 35 - capng_get_caps_process(); 33 + #define CAP_SETPCAP 8 36 34 37 - if (capng_update(CAPNG_ADD, CAPNG_INHERITABLE, (unsigned long) cap)) 38 - { 39 - perror("cannot raise the capability into the Inheritable set\n"); 40 - exit(1); 41 - } 35 + #if __BYTE_ORDER == __BIG_ENDIAN 36 + #define LE32_TO_H(x) bswap_32(x) 37 + #else 38 + #define LE32_TO_H(x) (x) 39 + #endif 42 40 43 - capng_apply(CAPNG_SELECT_CAPS); 44 - 45 - if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, (unsigned long) cap, 0, 0)) 46 - { 47 - perror("cannot raise the capability into the Ambient set\n"); 48 - exit(1); 41 + int get_last_cap(unsigned *last_cap) { 42 + FILE* file = fopen("/proc/sys/kernel/cap_last_cap", "r"); 43 + if (file == NULL) { 44 + int saved_errno = errno; 45 + fprintf(stderr, "failed to open /proc/sys/kernel/cap_last_cap: %s\n", strerror(errno)); 46 + return -saved_errno; 49 47 } 48 + int res = fscanf(file, "%u", last_cap); 49 + if (res == EOF) { 50 + int saved_errno = errno; 51 + fprintf(stderr, "could not read number from /proc/sys/kernel/cap_last_cap: %s\n", strerror(errno)); 52 + return -saved_errno; 53 + } 54 + fclose(file); 55 + return 0; 50 56 } 51 57 52 58 // Given the path to this program, fetch its configured capability set 53 59 // (as set by `setcap ... /path/to/file`) and raise those capabilities 54 60 // into the Ambient set. 55 - static int make_caps_ambient(const char *selfPath) 56 - { 57 - cap_t caps = cap_get_file(selfPath); 58 - 59 - if(!caps) 60 - { 61 - if(getenv(wrapperDebug)) 62 - fprintf(stderr, "no caps set or could not retrieve the caps for this file, not doing anything..."); 61 + static int make_caps_ambient(const char *self_path) { 62 + struct vfs_ns_cap_data data = {}; 63 + int r = getxattr(self_path, "security.capability", &data, sizeof(data)); 63 64 65 + if (r < 0) { 66 + if (errno == ENODATA) { 67 + // no capabilities set 68 + return 0; 69 + } 70 + fprintf(stderr, "cannot get capabilities for %s: %s", self_path, strerror(errno)); 64 71 return 1; 65 72 } 66 73 67 - // We use `cap_to_text` and iteration over the tokenized result 68 - // string because, as of libcap's current release, there is no 69 - // facility for retrieving an array of `cap_value_t`'s that can be 70 - // given to `prctl` in order to lift that capability into the 71 - // Ambient set. 72 - // 73 - // Some discussion was had around shot-gunning all of the 74 - // capabilities we know about into the Ambient set but that has a 75 - // security smell and I deemed the risk of the current 76 - // implementation crashing the program to be lower than the risk 77 - // of a privilege escalation security hole being introduced by 78 - // raising all capabilities, even ones we didn't intend for the 79 - // program, into the Ambient set. 80 - // 81 - // `cap_t` which is returned by `cap_get_*` is an opaque type and 82 - // even if we could retrieve the bitmasks (which, as far as I can 83 - // tell we cannot) in order to get the `cap_value_t` 84 - // representation for each capability we would have to take the 85 - // total number of capabilities supported and iterate over the 86 - // sequence of integers up-to that maximum total, testing each one 87 - // against the bitmask ((bitmask >> n) & 1) to see if it's set and 88 - // aggregating each "capability integer n" that is set in the 89 - // bitmask. 90 - // 91 - // That, combined with the fact that we can't easily get the 92 - // bitmask anyway seemed much more brittle than fetching the 93 - // `cap_t`, transforming it into a textual representation, 94 - // tokenizing the string, and using `cap_from_name` on the token 95 - // to get the `cap_value_t` that we need for `prctl`. There is 96 - // indeed risk involved if the output string format of 97 - // `cap_to_text` ever changes but at this time the combination of 98 - // factors involving the below list have led me to the conclusion 99 - // that the best implementation at this time is reading then 100 - // parsing with *lots of documentation* about why we're doing it 101 - // this way. 102 - // 103 - // 1. No explicit API for fetching an array of `cap_value_t`'s or 104 - // for transforming a `cap_t` into such a representation 105 - // 2. The risk of a crash is lower than lifting all capabilities 106 - // into the Ambient set 107 - // 3. libcap is depended on heavily in the Linux ecosystem so 108 - // there is a high chance that the output representation of 109 - // `cap_to_text` will not change which reduces our risk that 110 - // this parsing step will cause a crash 111 - // 112 - // The preferred method, should it ever be available in the 113 - // future, would be to use libcap API's to transform the result 114 - // from a `cap_get_*` into an array of `cap_value_t`'s that can 115 - // then be given to prctl. 116 - // 117 - // - Parnell 118 - ssize_t capLen; 119 - char* capstr = cap_to_text(caps, &capLen); 120 - cap_free(caps); 121 - 122 - // TODO: For now, we assume that cap_to_text always starts its 123 - // result string with " =" and that the first capability is listed 124 - // immediately after that. We should verify this. 125 - assert(capLen >= 2); 126 - capstr += 2; 74 + size_t size; 75 + uint32_t version = LE32_TO_H(data.magic_etc) & VFS_CAP_REVISION_MASK; 76 + switch (version) { 77 + case VFS_CAP_REVISION_1: 78 + size = VFS_CAP_U32_1; 79 + break; 80 + case VFS_CAP_REVISION_2: 81 + case VFS_CAP_REVISION_3: 82 + size = VFS_CAP_U32_3; 83 + break; 84 + default: 85 + fprintf(stderr, "BUG! Unsupported capability version 0x%x on %s. Report to NixOS bugtracker\n", version, self_path); 86 + return 1; 87 + } 127 88 128 - char* saveptr = NULL; 129 - for(char* tok = strtok_r(capstr, ",", &saveptr); tok; tok = strtok_r(NULL, ",", &saveptr)) 130 - { 131 - cap_value_t capnum; 132 - if (cap_from_name(tok, &capnum)) 133 - { 134 - if(getenv(wrapperDebug)) 135 - fprintf(stderr, "cap_from_name failed, skipping: %s", tok); 136 - } 137 - else if (capnum == CAP_SETPCAP) 138 - { 139 - // Check for the cap_setpcap capability, we set this on the 140 - // wrapper so it can elevate the capabilities to the Ambient 141 - // set but we do not want to propagate it down into the 142 - // wrapped program. 143 - // 144 - // TODO: what happens if that's the behavior you want 145 - // though???? I'm preferring a strict vs. loose policy here. 146 - if(getenv(wrapperDebug)) 147 - fprintf(stderr, "cap_setpcap in set, skipping it\n"); 148 - } 149 - else 150 - { 151 - set_ambient_cap(capnum); 89 + const struct __user_cap_header_struct header = { 90 + .version = _LINUX_CAPABILITY_VERSION_3, 91 + .pid = getpid(), 92 + }; 93 + struct __user_cap_data_struct user_data[2] = {}; 152 94 153 - if(getenv(wrapperDebug)) 154 - fprintf(stderr, "raised %s into the Ambient capability set\n", tok); 155 - } 95 + for (size_t i = 0; i < size; i++) { 96 + // merge inheritable & permitted into one 97 + user_data[i].permitted = user_data[i].inheritable = 98 + LE32_TO_H(data.data[i].inheritable) | LE32_TO_H(data.data[i].permitted); 156 99 } 157 - cap_free(capstr); 100 + 101 + if (syscall(SYS_capset, &header, &user_data) < 0) { 102 + fprintf(stderr, "failed to inherit capabilities: %s", strerror(errno)); 103 + return 1; 104 + } 105 + unsigned last_cap; 106 + r = get_last_cap(&last_cap); 107 + if (r < 0) { 108 + return 1; 109 + } 110 + uint64_t set = user_data[0].permitted | (uint64_t)user_data[1].permitted << 32; 111 + for (unsigned cap = 0; cap < last_cap; cap++) { 112 + if (!(set & (1ULL << cap))) { 113 + continue; 114 + } 115 + 116 + // Check for the cap_setpcap capability, we set this on the 117 + // wrapper so it can elevate the capabilities to the Ambient 118 + // set but we do not want to propagate it down into the 119 + // wrapped program. 120 + // 121 + // TODO: what happens if that's the behavior you want 122 + // though???? I'm preferring a strict vs. loose policy here. 123 + if (cap == CAP_SETPCAP) { 124 + if(getenv(wrapper_debug)) { 125 + fprintf(stderr, "cap_setpcap in set, skipping it\n"); 126 + } 127 + continue; 128 + } 129 + if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, (unsigned long) cap, 0, 0)) { 130 + fprintf(stderr, "cannot raise the capability %d into the ambient set: %s\n", cap, strerror(errno)); 131 + return 1; 132 + } 133 + if (getenv(wrapper_debug)) { 134 + fprintf(stderr, "raised %d into the ambient capability set\n", cap); 135 + } 136 + } 158 137 159 138 return 0; 160 139 } 161 140 162 - int main(int argc, char * * argv) 163 - { 164 - // I *think* it's safe to assume that a path from a symbolic link 165 - // should safely fit within the PATH_MAX system limit. Though I'm 166 - // not positive it's safe... 167 - char selfPath[PATH_MAX]; 168 - int selfPathSize = readlink("/proc/self/exe", selfPath, sizeof(selfPath)); 141 + int readlink_malloc(const char *p, char **ret) { 142 + size_t l = FILENAME_MAX+1; 143 + int r; 169 144 170 - assert(selfPathSize > 0); 145 + for (;;) { 146 + char *c = calloc(l, sizeof(char)); 147 + if (!c) { 148 + return -ENOMEM; 149 + } 150 + 151 + ssize_t n = readlink(p, c, l-1); 152 + if (n < 0) { 153 + r = -errno; 154 + free(c); 155 + return r; 156 + } 157 + 158 + if ((size_t) n < l-1) { 159 + c[n] = 0; 160 + *ret = c; 161 + return 0; 162 + } 171 163 172 - // Assert we have room for the zero byte, this ensures the path 173 - // isn't being truncated because it's too big for the buffer. 174 - // 175 - // A better way to handle this might be to use something like the 176 - // whereami library (https://github.com/gpakosz/whereami) or a 177 - // loop that resizes the buffer and re-reads the link if the 178 - // contents are being truncated. 179 - assert(selfPathSize < sizeof(selfPath)); 164 + free(c); 165 + l *= 2; 166 + } 167 + } 180 168 181 - // Set the zero byte since readlink doesn't do that for us. 182 - selfPath[selfPathSize] = '\0'; 169 + int main(int argc, char **argv) { 170 + char *self_path = NULL; 171 + int self_path_size = readlink_malloc("/proc/self/exe", &self_path); 172 + if (self_path_size < 0) { 173 + fprintf(stderr, "cannot readlink /proc/self/exe: %s", strerror(-self_path_size)); 174 + } 183 175 184 176 // Make sure that we are being executed from the right location, 185 - // i.e., `safeWrapperDir'. This is to prevent someone from creating 177 + // i.e., `safe_wrapper_dir'. This is to prevent someone from creating 186 178 // hard link `X' from some other location, along with a false 187 179 // `X.real' file, to allow arbitrary programs from being executed 188 180 // with elevated capabilities. 189 - int len = strlen(wrapperDir); 190 - if (len > 0 && '/' == wrapperDir[len - 1]) 181 + int len = strlen(wrapper_dir); 182 + if (len > 0 && '/' == wrapper_dir[len - 1]) 191 183 --len; 192 - assert(!strncmp(selfPath, wrapperDir, len)); 193 - assert('/' == wrapperDir[0]); 194 - assert('/' == selfPath[len]); 184 + assert(!strncmp(self_path, wrapper_dir, len)); 185 + assert('/' == wrapper_dir[0]); 186 + assert('/' == self_path[len]); 195 187 196 188 // Make *really* *really* sure that we were executed as 197 - // `selfPath', and not, say, as some other setuid program. That 189 + // `self_path', and not, say, as some other setuid program. That 198 190 // is, our effective uid/gid should match the uid/gid of 199 - // `selfPath'. 191 + // `self_path'. 200 192 struct stat st; 201 - assert(lstat(selfPath, &st) != -1); 193 + assert(lstat(self_path, &st) != -1); 202 194 203 195 assert(!(st.st_mode & S_ISUID) || (st.st_uid == geteuid())); 204 196 assert(!(st.st_mode & S_ISGID) || (st.st_gid == getegid())); ··· 207 199 assert(!(st.st_mode & (S_IWGRP | S_IWOTH))); 208 200 209 201 // Read the path of the real (wrapped) program from <self>.real. 210 - char realFN[PATH_MAX + 10]; 211 - int realFNSize = snprintf (realFN, sizeof(realFN), "%s.real", selfPath); 212 - assert (realFNSize < sizeof(realFN)); 202 + char real_fn[PATH_MAX + 10]; 203 + int real_fn_size = snprintf(real_fn, sizeof(real_fn), "%s.real", self_path); 204 + assert(real_fn_size < sizeof(real_fn)); 213 205 214 - int fdSelf = open(realFN, O_RDONLY); 215 - assert (fdSelf != -1); 206 + int fd_self = open(real_fn, O_RDONLY); 207 + assert(fd_self != -1); 216 208 217 - char sourceProg[PATH_MAX]; 218 - len = read(fdSelf, sourceProg, PATH_MAX); 219 - assert (len != -1); 220 - assert (len < sizeof(sourceProg)); 221 - assert (len > 0); 222 - sourceProg[len] = 0; 209 + char source_prog[PATH_MAX]; 210 + len = read(fd_self, source_prog, PATH_MAX); 211 + assert(len != -1); 212 + assert(len < sizeof(source_prog)); 213 + assert(len > 0); 214 + source_prog[len] = 0; 223 215 224 - close(fdSelf); 216 + close(fd_self); 225 217 226 218 // Read the capabilities set on the wrapper and raise them in to 227 - // the Ambient set so the program we're wrapping receives the 219 + // the ambient set so the program we're wrapping receives the 228 220 // capabilities too! 229 - make_caps_ambient(selfPath); 221 + if (make_caps_ambient(self_path) != 0) { 222 + free(self_path); 223 + return 1; 224 + } 225 + free(self_path); 230 226 231 - execve(sourceProg, argv, environ); 227 + execve(source_prog, argv, environ); 232 228 233 229 fprintf(stderr, "%s: cannot run `%s': %s\n", 234 - argv[0], sourceProg, strerror(errno)); 230 + argv[0], source_prog, strerror(errno)); 235 231 236 - exit(1); 232 + return 1; 237 233 } 238 - 239 -
+21
nixos/modules/security/wrappers/wrapper.nix
··· 1 + { stdenv, linuxHeaders, parentWrapperDir, debug ? false }: 2 + # For testing: 3 + # $ nix-build -E 'with import <nixpkgs> {}; pkgs.callPackage ./wrapper.nix { parentWrapperDir = "/run/wrappers"; debug = true; }' 4 + stdenv.mkDerivation { 5 + name = "security-wrapper"; 6 + buildInputs = [ linuxHeaders ]; 7 + dontUnpack = true; 8 + hardeningEnable = [ "pie" ]; 9 + CFLAGS = [ 10 + ''-DWRAPPER_DIR="${parentWrapperDir}"'' 11 + ] ++ (if debug then [ 12 + "-Werror" "-Og" "-g" 13 + ] else [ 14 + "-Wall" "-O2" 15 + ]); 16 + dontStrip = debug; 17 + installPhase = '' 18 + mkdir -p $out/bin 19 + $CC $CFLAGS ${./wrapper.c} -o $out/bin/security-wrapper 20 + ''; 21 + }