Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf

Daniel Borkmann says:

====================
bpf 2022-07-08

We've added 3 non-merge commits during the last 2 day(s) which contain
a total of 7 files changed, 40 insertions(+), 24 deletions(-).

The main changes are:

1) Fix cBPF splat triggered by skb not having a mac header, from Eric Dumazet.

2) Fix spurious packet loss in generic XDP when pushing packets out (note
that native XDP is not affected by the issue), from Johan Almbladh.

3) Fix bpf_dynptr_{read,write}() helper signatures with flag argument before
its set in stone as UAPI, from Joanne Koong.

* https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf:
bpf: Add flags arg to bpf_dynptr_read and bpf_dynptr_write APIs
bpf: Make sure mac_header was set before using it
xdp: Fix spurious packet loss in generic XDP TX path
====================

Link: https://lore.kernel.org/r/20220708213418.19626-1-daniel@iogearbox.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+40 -24
+7 -4
include/uapi/linux/bpf.h
··· 5222 5222 * Return 5223 5223 * Nothing. Always succeeds. 5224 5224 * 5225 - * long bpf_dynptr_read(void *dst, u32 len, struct bpf_dynptr *src, u32 offset) 5225 + * long bpf_dynptr_read(void *dst, u32 len, struct bpf_dynptr *src, u32 offset, u64 flags) 5226 5226 * Description 5227 5227 * Read *len* bytes from *src* into *dst*, starting from *offset* 5228 5228 * into *src*. 5229 + * *flags* is currently unused. 5229 5230 * Return 5230 5231 * 0 on success, -E2BIG if *offset* + *len* exceeds the length 5231 - * of *src*'s data, -EINVAL if *src* is an invalid dynptr. 5232 + * of *src*'s data, -EINVAL if *src* is an invalid dynptr or if 5233 + * *flags* is not 0. 5232 5234 * 5233 - * long bpf_dynptr_write(struct bpf_dynptr *dst, u32 offset, void *src, u32 len) 5235 + * long bpf_dynptr_write(struct bpf_dynptr *dst, u32 offset, void *src, u32 len, u64 flags) 5234 5236 * Description 5235 5237 * Write *len* bytes from *src* into *dst*, starting from *offset* 5236 5238 * into *dst*. 5239 + * *flags* is currently unused. 5237 5240 * Return 5238 5241 * 0 on success, -E2BIG if *offset* + *len* exceeds the length 5239 5242 * of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst* 5240 - * is a read-only dynptr. 5243 + * is a read-only dynptr or if *flags* is not 0. 5241 5244 * 5242 5245 * void *bpf_dynptr_data(struct bpf_dynptr *ptr, u32 offset, u32 len) 5243 5246 * Description
+5 -3
kernel/bpf/core.c
··· 68 68 { 69 69 u8 *ptr = NULL; 70 70 71 - if (k >= SKF_NET_OFF) 71 + if (k >= SKF_NET_OFF) { 72 72 ptr = skb_network_header(skb) + k - SKF_NET_OFF; 73 - else if (k >= SKF_LL_OFF) 73 + } else if (k >= SKF_LL_OFF) { 74 + if (unlikely(!skb_mac_header_was_set(skb))) 75 + return NULL; 74 76 ptr = skb_mac_header(skb) + k - SKF_LL_OFF; 75 - 77 + } 76 78 if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb)) 77 79 return ptr; 78 80
+8 -4
kernel/bpf/helpers.c
··· 1497 1497 .arg4_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL | MEM_UNINIT, 1498 1498 }; 1499 1499 1500 - BPF_CALL_4(bpf_dynptr_read, void *, dst, u32, len, struct bpf_dynptr_kern *, src, u32, offset) 1500 + BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, struct bpf_dynptr_kern *, src, 1501 + u32, offset, u64, flags) 1501 1502 { 1502 1503 int err; 1503 1504 1504 - if (!src->data) 1505 + if (!src->data || flags) 1505 1506 return -EINVAL; 1506 1507 1507 1508 err = bpf_dynptr_check_off_len(src, offset, len); ··· 1522 1521 .arg2_type = ARG_CONST_SIZE_OR_ZERO, 1523 1522 .arg3_type = ARG_PTR_TO_DYNPTR, 1524 1523 .arg4_type = ARG_ANYTHING, 1524 + .arg5_type = ARG_ANYTHING, 1525 1525 }; 1526 1526 1527 - BPF_CALL_4(bpf_dynptr_write, struct bpf_dynptr_kern *, dst, u32, offset, void *, src, u32, len) 1527 + BPF_CALL_5(bpf_dynptr_write, struct bpf_dynptr_kern *, dst, u32, offset, void *, src, 1528 + u32, len, u64, flags) 1528 1529 { 1529 1530 int err; 1530 1531 1531 - if (!dst->data || bpf_dynptr_is_rdonly(dst)) 1532 + if (!dst->data || flags || bpf_dynptr_is_rdonly(dst)) 1532 1533 return -EINVAL; 1533 1534 1534 1535 err = bpf_dynptr_check_off_len(dst, offset, len); ··· 1550 1547 .arg2_type = ARG_ANYTHING, 1551 1548 .arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY, 1552 1549 .arg4_type = ARG_CONST_SIZE_OR_ZERO, 1550 + .arg5_type = ARG_ANYTHING, 1553 1551 }; 1554 1552 1555 1553 BPF_CALL_3(bpf_dynptr_data, struct bpf_dynptr_kern *, ptr, u32, offset, u32, len)
+6 -2
net/core/dev.c
··· 4863 4863 } 4864 4864 4865 4865 /* When doing generic XDP we have to bypass the qdisc layer and the 4866 - * network taps in order to match in-driver-XDP behavior. 4866 + * network taps in order to match in-driver-XDP behavior. This also means 4867 + * that XDP packets are able to starve other packets going through a qdisc, 4868 + * and DDOS attacks will be more effective. In-driver-XDP use dedicated TX 4869 + * queues, so they do not have this starvation issue. 4867 4870 */ 4868 4871 void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog) 4869 4872 { ··· 4878 4875 txq = netdev_core_pick_tx(dev, skb, NULL); 4879 4876 cpu = smp_processor_id(); 4880 4877 HARD_TX_LOCK(dev, txq, cpu); 4881 - if (!netif_xmit_stopped(txq)) { 4878 + if (!netif_xmit_frozen_or_drv_stopped(txq)) { 4882 4879 rc = netdev_start_xmit(skb, dev, txq, 0); 4883 4880 if (dev_xmit_complete(rc)) 4884 4881 free_skb = false; ··· 4886 4883 HARD_TX_UNLOCK(dev, txq); 4887 4884 if (free_skb) { 4888 4885 trace_xdp_exception(dev, xdp_prog, XDP_TX); 4886 + dev_core_stats_tx_dropped_inc(dev); 4889 4887 kfree_skb(skb); 4890 4888 } 4891 4889 }
+7 -4
tools/include/uapi/linux/bpf.h
··· 5222 5222 * Return 5223 5223 * Nothing. Always succeeds. 5224 5224 * 5225 - * long bpf_dynptr_read(void *dst, u32 len, struct bpf_dynptr *src, u32 offset) 5225 + * long bpf_dynptr_read(void *dst, u32 len, struct bpf_dynptr *src, u32 offset, u64 flags) 5226 5226 * Description 5227 5227 * Read *len* bytes from *src* into *dst*, starting from *offset* 5228 5228 * into *src*. 5229 + * *flags* is currently unused. 5229 5230 * Return 5230 5231 * 0 on success, -E2BIG if *offset* + *len* exceeds the length 5231 - * of *src*'s data, -EINVAL if *src* is an invalid dynptr. 5232 + * of *src*'s data, -EINVAL if *src* is an invalid dynptr or if 5233 + * *flags* is not 0. 5232 5234 * 5233 - * long bpf_dynptr_write(struct bpf_dynptr *dst, u32 offset, void *src, u32 len) 5235 + * long bpf_dynptr_write(struct bpf_dynptr *dst, u32 offset, void *src, u32 len, u64 flags) 5234 5236 * Description 5235 5237 * Write *len* bytes from *src* into *dst*, starting from *offset* 5236 5238 * into *dst*. 5239 + * *flags* is currently unused. 5237 5240 * Return 5238 5241 * 0 on success, -E2BIG if *offset* + *len* exceeds the length 5239 5242 * of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst* 5240 - * is a read-only dynptr. 5243 + * is a read-only dynptr or if *flags* is not 0. 5241 5244 * 5242 5245 * void *bpf_dynptr_data(struct bpf_dynptr *ptr, u32 offset, u32 len) 5243 5246 * Description
+5 -5
tools/testing/selftests/bpf/progs/dynptr_fail.c
··· 140 140 141 141 bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(read_data), 0, &ptr); 142 142 143 - bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0); 143 + bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0, 0); 144 144 145 145 bpf_ringbuf_submit_dynptr(&ptr, 0); 146 146 147 147 /* this should fail */ 148 - bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0); 148 + bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0, 0); 149 149 150 150 return 0; 151 151 } ··· 338 338 get_map_val_dynptr(&ptr); 339 339 340 340 /* this should fail */ 341 - bpf_dynptr_read(read_data, sizeof(read_data), (void *)&ptr + 8, 0); 341 + bpf_dynptr_read(read_data, sizeof(read_data), (void *)&ptr + 8, 0, 0); 342 342 343 343 return 0; 344 344 } ··· 377 377 memcpy((void *)&ptr + 8, &x, sizeof(x)); 378 378 379 379 /* this should fail */ 380 - bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0); 380 + bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0, 0); 381 381 382 382 bpf_ringbuf_submit_dynptr(&ptr, 0); 383 383 ··· 473 473 get_map_val_dynptr(&ptr); 474 474 475 475 /* this should fail */ 476 - bpf_dynptr_read(read_data, sizeof(read_data), (void *)&ptr + 1, 0); 476 + bpf_dynptr_read(read_data, sizeof(read_data), (void *)&ptr + 1, 0, 0); 477 477 478 478 return 0; 479 479 }
+2 -2
tools/testing/selftests/bpf/progs/dynptr_success.c
··· 43 43 bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(write_data), 0, &ptr); 44 44 45 45 /* Write data into the dynptr */ 46 - err = err ?: bpf_dynptr_write(&ptr, 0, write_data, sizeof(write_data)); 46 + err = bpf_dynptr_write(&ptr, 0, write_data, sizeof(write_data), 0); 47 47 48 48 /* Read the data that was written into the dynptr */ 49 - err = err ?: bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0); 49 + err = err ?: bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0, 0); 50 50 51 51 /* Ensure the data we read matches the data we wrote */ 52 52 for (i = 0; i < sizeof(read_data); i++) {