Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

bpf: fix write helpers with regards to non-linear parts

Fix the bpf_try_make_writable() helper and all call sites we have in BPF,
it's currently defect with regards to skbs when the write_len spans into
non-linear parts, no matter if cloned or not.

There are multiple issues at once. First, using skb_store_bits() is not
correct since even if we have a cloned skb, page frags can still be shared.
To really make them private, we need to pull them in via __pskb_pull_tail()
first, which also gets us a private head via pskb_expand_head() implicitly.

This is for helpers like bpf_skb_store_bytes(), bpf_l3_csum_replace(),
bpf_l4_csum_replace(). Really, the only thing reasonable and working here
is to call skb_ensure_writable() before any write operation. Meaning, via
pskb_may_pull() it makes sure that parts we want to access are pulled in and
if not does so plus unclones the skb implicitly. If our write_len still fits
the headlen and we're cloned and our header of the clone is not writable,
then we need to make a private copy via pskb_expand_head(). skb_store_bits()
is a bit misleading and only safe to store into non-linear data in different
contexts such as 357b40a18b04 ("[IPV6]: IPV6_CHECKSUM socket option can
corrupt kernel memory").

For above BPF helper functions, it means after fixed bpf_try_make_writable(),
we've pulled in enough, so that we operate always based on skb->data. Thus,
the call to skb_header_pointer() and skb_store_bits() becomes superfluous.
In bpf_skb_store_bytes(), the len check is unnecessary too since it can
only pass in maximum of BPF stack size, so adding offset is guaranteed to
never overflow. Also bpf_l3/4_csum_replace() helpers must test for proper
offset alignment since they use __sum16 pointer for writing resulting csum.

The remaining helpers that change skb data not discussed here yet are
bpf_skb_vlan_push(), bpf_skb_vlan_pop() and bpf_skb_change_proto(). The
vlan helpers internally call either skb_ensure_writable() (pop case) and
skb_cow_head() (push case, for head expansion), respectively. Similarly,
bpf_skb_proto_xlat() takes care to not mangle page frags.

Fixes: 608cd71a9c7c ("tc: bpf: generalize pedit action")
Fixes: 91bc4822c3d6 ("tc: bpf: add checksum helpers")
Fixes: 3697649ff29e ("bpf: try harder on clones when writing into skb")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Daniel Borkmann and committed by
David S. Miller
0ed661d5 e8c2993a

+18 -52
+18 -52
net/core/filter.c
··· 1355 1355 { 1356 1356 int err; 1357 1357 1358 - if (!skb_cloned(skb)) 1359 - return 0; 1360 - if (skb_clone_writable(skb, write_len)) 1361 - return 0; 1362 - err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); 1363 - if (!err) 1364 - bpf_compute_data_end(skb); 1358 + err = skb_ensure_writable(skb, write_len); 1359 + bpf_compute_data_end(skb); 1360 + 1365 1361 return err; 1366 1362 } 1367 1363 ··· 1375 1379 1376 1380 static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) 1377 1381 { 1378 - struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp); 1379 1382 struct sk_buff *skb = (struct sk_buff *) (long) r1; 1380 - int offset = (int) r2; 1383 + unsigned int offset = (unsigned int) r2; 1381 1384 void *from = (void *) (long) r3; 1382 1385 unsigned int len = (unsigned int) r4; 1383 1386 void *ptr; 1384 1387 1385 1388 if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM | BPF_F_INVALIDATE_HASH))) 1386 1389 return -EINVAL; 1387 - 1388 - /* bpf verifier guarantees that: 1389 - * 'from' pointer points to bpf program stack 1390 - * 'len' bytes of it were initialized 1391 - * 'len' > 0 1392 - * 'skb' is a valid pointer to 'struct sk_buff' 1393 - * 1394 - * so check for invalid 'offset' and too large 'len' 1395 - */ 1396 - if (unlikely((u32) offset > 0xffff || len > sizeof(sp->buff))) 1390 + if (unlikely(offset > 0xffff)) 1397 1391 return -EFAULT; 1398 1392 if (unlikely(bpf_try_make_writable(skb, offset + len))) 1399 1393 return -EFAULT; 1400 1394 1401 - ptr = skb_header_pointer(skb, offset, len, sp->buff); 1402 - if (unlikely(!ptr)) 1403 - return -EFAULT; 1404 - 1395 + ptr = skb->data + offset; 1405 1396 if (flags & BPF_F_RECOMPUTE_CSUM) 1406 1397 __skb_postpull_rcsum(skb, ptr, len, offset); 1407 1398 1408 1399 memcpy(ptr, from, len); 1409 - 1410 - if (ptr == sp->buff) 1411 - /* skb_store_bits cannot return -EFAULT here */ 1412 - skb_store_bits(skb, offset, ptr, len); 1413 1400 1414 1401 if (flags & BPF_F_RECOMPUTE_CSUM) 1415 1402 __skb_postpush_rcsum(skb, ptr, len, offset); ··· 1416 1437 static u64 bpf_skb_load_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) 1417 1438 { 1418 1439 const struct sk_buff *skb = (const struct sk_buff *)(unsigned long) r1; 1419 - int offset = (int) r2; 1440 + unsigned int offset = (unsigned int) r2; 1420 1441 void *to = (void *)(unsigned long) r3; 1421 1442 unsigned int len = (unsigned int) r4; 1422 1443 void *ptr; 1423 1444 1424 - if (unlikely((u32) offset > 0xffff)) 1445 + if (unlikely(offset > 0xffff)) 1425 1446 goto err_clear; 1426 1447 1427 1448 ptr = skb_header_pointer(skb, offset, len, to); ··· 1449 1470 static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags) 1450 1471 { 1451 1472 struct sk_buff *skb = (struct sk_buff *) (long) r1; 1452 - int offset = (int) r2; 1453 - __sum16 sum, *ptr; 1473 + unsigned int offset = (unsigned int) r2; 1474 + __sum16 *ptr; 1454 1475 1455 1476 if (unlikely(flags & ~(BPF_F_HDR_FIELD_MASK))) 1456 1477 return -EINVAL; 1457 - if (unlikely((u32) offset > 0xffff)) 1478 + if (unlikely(offset > 0xffff || offset & 1)) 1458 1479 return -EFAULT; 1459 - if (unlikely(bpf_try_make_writable(skb, offset + sizeof(sum)))) 1460 - return -EFAULT; 1461 - 1462 - ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum); 1463 - if (unlikely(!ptr)) 1480 + if (unlikely(bpf_try_make_writable(skb, offset + sizeof(*ptr)))) 1464 1481 return -EFAULT; 1465 1482 1483 + ptr = (__sum16 *)(skb->data + offset); 1466 1484 switch (flags & BPF_F_HDR_FIELD_MASK) { 1467 1485 case 0: 1468 1486 if (unlikely(from != 0)) ··· 1476 1500 default: 1477 1501 return -EINVAL; 1478 1502 } 1479 - 1480 - if (ptr == &sum) 1481 - /* skb_store_bits guaranteed to not return -EFAULT here */ 1482 - skb_store_bits(skb, offset, ptr, sizeof(sum)); 1483 1503 1484 1504 return 0; 1485 1505 } ··· 1496 1524 struct sk_buff *skb = (struct sk_buff *) (long) r1; 1497 1525 bool is_pseudo = flags & BPF_F_PSEUDO_HDR; 1498 1526 bool is_mmzero = flags & BPF_F_MARK_MANGLED_0; 1499 - int offset = (int) r2; 1500 - __sum16 sum, *ptr; 1527 + unsigned int offset = (unsigned int) r2; 1528 + __sum16 *ptr; 1501 1529 1502 1530 if (unlikely(flags & ~(BPF_F_MARK_MANGLED_0 | BPF_F_PSEUDO_HDR | 1503 1531 BPF_F_HDR_FIELD_MASK))) 1504 1532 return -EINVAL; 1505 - if (unlikely((u32) offset > 0xffff)) 1533 + if (unlikely(offset > 0xffff || offset & 1)) 1506 1534 return -EFAULT; 1507 - if (unlikely(bpf_try_make_writable(skb, offset + sizeof(sum)))) 1535 + if (unlikely(bpf_try_make_writable(skb, offset + sizeof(*ptr)))) 1508 1536 return -EFAULT; 1509 1537 1510 - ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum); 1511 - if (unlikely(!ptr)) 1512 - return -EFAULT; 1538 + ptr = (__sum16 *)(skb->data + offset); 1513 1539 if (is_mmzero && !*ptr) 1514 1540 return 0; 1515 1541 ··· 1530 1560 1531 1561 if (is_mmzero && !*ptr) 1532 1562 *ptr = CSUM_MANGLED_0; 1533 - if (ptr == &sum) 1534 - /* skb_store_bits guaranteed to not return -EFAULT here */ 1535 - skb_store_bits(skb, offset, ptr, sizeof(sum)); 1536 - 1537 1563 return 0; 1538 1564 } 1539 1565