Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net: compound page support in skb_seq_read

skb_seq_read iterates over an skb, returning pointer and length of
the next data range with each call.

It relies on kmap_atomic to access highmem pages when needed.

An skb frag may be backed by a compound page, but kmap_atomic maps
only a single page. There are not enough kmap slots to always map all
pages concurrently.

Instead, if kmap_atomic is needed, iterate over each page.

As this increases the number of calls, avoid this unless needed.
The necessary condition is captured in skb_frag_must_loop.

I tried to make the change as obvious as possible. It should be easy
to verify that nothing changes if skb_frag_must_loop returns false.

Tested:
On an x86 platform with
CONFIG_HIGHMEM=y
CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP=y
CONFIG_NETFILTER_XT_MATCH_STRING=y

Run
ip link set dev lo mtu 1500
iptables -A OUTPUT -m string --string 'badstring' -algo bm -j ACCEPT
dd if=/dev/urandom of=in bs=1M count=20
nc -l -p 8000 > /dev/null &
nc -w 1 -q 0 localhost 8000 < in

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Willem de Bruijn and committed by
Jakub Kicinski
97550f6f 29766bcf

+25 -6
+1
include/linux/skbuff.h
··· 1203 1203 struct sk_buff *root_skb; 1204 1204 struct sk_buff *cur_skb; 1205 1205 __u8 *frag_data; 1206 + __u32 frag_off; 1206 1207 }; 1207 1208 1208 1209 void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from,
+24 -6
net/core/skbuff.c
··· 3442 3442 st->root_skb = st->cur_skb = skb; 3443 3443 st->frag_idx = st->stepped_offset = 0; 3444 3444 st->frag_data = NULL; 3445 + st->frag_off = 0; 3445 3446 } 3446 3447 EXPORT_SYMBOL(skb_prepare_seq_read); 3447 3448 ··· 3497 3496 st->stepped_offset += skb_headlen(st->cur_skb); 3498 3497 3499 3498 while (st->frag_idx < skb_shinfo(st->cur_skb)->nr_frags) { 3500 - frag = &skb_shinfo(st->cur_skb)->frags[st->frag_idx]; 3501 - block_limit = skb_frag_size(frag) + st->stepped_offset; 3499 + unsigned int pg_idx, pg_off, pg_sz; 3502 3500 3501 + frag = &skb_shinfo(st->cur_skb)->frags[st->frag_idx]; 3502 + 3503 + pg_idx = 0; 3504 + pg_off = skb_frag_off(frag); 3505 + pg_sz = skb_frag_size(frag); 3506 + 3507 + if (skb_frag_must_loop(skb_frag_page(frag))) { 3508 + pg_idx = (pg_off + st->frag_off) >> PAGE_SHIFT; 3509 + pg_off = offset_in_page(pg_off + st->frag_off); 3510 + pg_sz = min_t(unsigned int, pg_sz - st->frag_off, 3511 + PAGE_SIZE - pg_off); 3512 + } 3513 + 3514 + block_limit = pg_sz + st->stepped_offset; 3503 3515 if (abs_offset < block_limit) { 3504 3516 if (!st->frag_data) 3505 - st->frag_data = kmap_atomic(skb_frag_page(frag)); 3517 + st->frag_data = kmap_atomic(skb_frag_page(frag) + pg_idx); 3506 3518 3507 - *data = (u8 *) st->frag_data + skb_frag_off(frag) + 3519 + *data = (u8 *)st->frag_data + pg_off + 3508 3520 (abs_offset - st->stepped_offset); 3509 3521 3510 3522 return block_limit - abs_offset; ··· 3528 3514 st->frag_data = NULL; 3529 3515 } 3530 3516 3531 - st->frag_idx++; 3532 - st->stepped_offset += skb_frag_size(frag); 3517 + st->stepped_offset += pg_sz; 3518 + st->frag_off += pg_sz; 3519 + if (st->frag_off == skb_frag_size(frag)) { 3520 + st->frag_off = 0; 3521 + st->frag_idx++; 3522 + } 3533 3523 } 3534 3524 3535 3525 if (st->frag_data) {