Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

bpf: Don't return EINVAL from {get,set}sockopt when optlen > PAGE_SIZE

Attaching to these hooks can break iptables because its optval is
usually quite big, or at least bigger than the current PAGE_SIZE limit.
David also mentioned some SCTP options can be big (around 256k).

For such optvals we expose only the first PAGE_SIZE bytes to
the BPF program. BPF program has two options:
1. Set ctx->optlen to 0 to indicate that the BPF's optval
should be ignored and the kernel should use original userspace
value.
2. Set ctx->optlen to something that's smaller than the PAGE_SIZE.

v5:
* use ctx->optlen == 0 with trimmed buffer (Alexei Starovoitov)
* update the docs accordingly

v4:
* use temporary buffer to avoid optval == optval_end == NULL;
this removes the corner case in the verifier that might assume
non-zero PTR_TO_PACKET/PTR_TO_PACKET_END.

v3:
* don't increase the limit, bypass the argument

v2:
* proper comments formatting (Jakub Kicinski)

Fixes: 0d01da6afc54 ("bpf: implement getsockopt and setsockopt hooks")
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Cc: David Laight <David.Laight@ACULAB.COM>
Link: https://lore.kernel.org/bpf/20200617010416.93086-1-sdf@google.com

authored by

Stanislav Fomichev and committed by
Alexei Starovoitov
d8fe449a 99c51064

+33 -20
+33 -20
kernel/bpf/cgroup.c
··· 1276 1276 1277 1277 static int sockopt_alloc_buf(struct bpf_sockopt_kern *ctx, int max_optlen) 1278 1278 { 1279 - if (unlikely(max_optlen > PAGE_SIZE) || max_optlen < 0) 1279 + if (unlikely(max_optlen < 0)) 1280 1280 return -EINVAL; 1281 + 1282 + if (unlikely(max_optlen > PAGE_SIZE)) { 1283 + /* We don't expose optvals that are greater than PAGE_SIZE 1284 + * to the BPF program. 1285 + */ 1286 + max_optlen = PAGE_SIZE; 1287 + } 1281 1288 1282 1289 ctx->optval = kzalloc(max_optlen, GFP_USER); 1283 1290 if (!ctx->optval) ··· 1292 1285 1293 1286 ctx->optval_end = ctx->optval + max_optlen; 1294 1287 1295 - return 0; 1288 + return max_optlen; 1296 1289 } 1297 1290 1298 1291 static void sockopt_free_buf(struct bpf_sockopt_kern *ctx) ··· 1326 1319 */ 1327 1320 max_optlen = max_t(int, 16, *optlen); 1328 1321 1329 - ret = sockopt_alloc_buf(&ctx, max_optlen); 1330 - if (ret) 1331 - return ret; 1322 + max_optlen = sockopt_alloc_buf(&ctx, max_optlen); 1323 + if (max_optlen < 0) 1324 + return max_optlen; 1332 1325 1333 1326 ctx.optlen = *optlen; 1334 1327 1335 - if (copy_from_user(ctx.optval, optval, *optlen) != 0) { 1328 + if (copy_from_user(ctx.optval, optval, min(*optlen, max_optlen)) != 0) { 1336 1329 ret = -EFAULT; 1337 1330 goto out; 1338 1331 } ··· 1360 1353 /* export any potential modifications */ 1361 1354 *level = ctx.level; 1362 1355 *optname = ctx.optname; 1363 - *optlen = ctx.optlen; 1364 - *kernel_optval = ctx.optval; 1356 + 1357 + /* optlen == 0 from BPF indicates that we should 1358 + * use original userspace data. 1359 + */ 1360 + if (ctx.optlen != 0) { 1361 + *optlen = ctx.optlen; 1362 + *kernel_optval = ctx.optval; 1363 + } 1365 1364 } 1366 1365 1367 1366 out: ··· 1398 1385 __cgroup_bpf_prog_array_is_empty(cgrp, BPF_CGROUP_GETSOCKOPT)) 1399 1386 return retval; 1400 1387 1401 - ret = sockopt_alloc_buf(&ctx, max_optlen); 1402 - if (ret) 1403 - return ret; 1404 - 1405 1388 ctx.optlen = max_optlen; 1389 + 1390 + max_optlen = sockopt_alloc_buf(&ctx, max_optlen); 1391 + if (max_optlen < 0) 1392 + return max_optlen; 1406 1393 1407 1394 if (!retval) { 1408 1395 /* If kernel getsockopt finished successfully, ··· 1417 1404 goto out; 1418 1405 } 1419 1406 1420 - if (ctx.optlen > max_optlen) 1421 - ctx.optlen = max_optlen; 1422 - 1423 - if (copy_from_user(ctx.optval, optval, ctx.optlen) != 0) { 1407 + if (copy_from_user(ctx.optval, optval, 1408 + min(ctx.optlen, max_optlen)) != 0) { 1424 1409 ret = -EFAULT; 1425 1410 goto out; 1426 1411 } ··· 1447 1436 goto out; 1448 1437 } 1449 1438 1450 - if (copy_to_user(optval, ctx.optval, ctx.optlen) || 1451 - put_user(ctx.optlen, optlen)) { 1452 - ret = -EFAULT; 1453 - goto out; 1439 + if (ctx.optlen != 0) { 1440 + if (copy_to_user(optval, ctx.optval, ctx.optlen) || 1441 + put_user(ctx.optlen, optlen)) { 1442 + ret = -EFAULT; 1443 + goto out; 1444 + } 1454 1445 } 1455 1446 1456 1447 ret = ctx.retval;