Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net: fix crash in build_skb()

When I added pfmemalloc support in build_skb(), I forgot netlink
was using build_skb() with a vmalloc() area.

In this patch I introduce __build_skb() for netlink use,
and build_skb() is a wrapper handling both skb->head_frag and
skb->pfmemalloc

This means netlink no longer has to hack skb->head_frag

[ 1567.700067] kernel BUG at arch/x86/mm/physaddr.c:26!
[ 1567.700067] invalid opcode: 0000 [#1] PREEMPT SMP KASAN
[ 1567.700067] Dumping ftrace buffer:
[ 1567.700067] (ftrace buffer empty)
[ 1567.700067] Modules linked in:
[ 1567.700067] CPU: 9 PID: 16186 Comm: trinity-c182 Not tainted 4.0.0-next-20150424-sasha-00037-g4796e21 #2167
[ 1567.700067] task: ffff880127efb000 ti: ffff880246770000 task.ti: ffff880246770000
[ 1567.700067] RIP: __phys_addr (arch/x86/mm/physaddr.c:26 (discriminator 3))
[ 1567.700067] RSP: 0018:ffff8802467779d8 EFLAGS: 00010202
[ 1567.700067] RAX: 000041000ed8e000 RBX: ffffc9008ed8e000 RCX: 000000000000002c
[ 1567.700067] RDX: 0000000000000004 RSI: 0000000000000000 RDI: ffffffffb3fd6049
[ 1567.700067] RBP: ffff8802467779f8 R08: 0000000000000019 R09: ffff8801d0168000
[ 1567.700067] R10: ffff8801d01680c7 R11: ffffed003a02d019 R12: ffffc9000ed8e000
[ 1567.700067] R13: 0000000000000f40 R14: 0000000000001180 R15: ffffc9000ed8e000
[ 1567.700067] FS: 00007f2a7da3f700(0000) GS:ffff8801d1000000(0000) knlGS:0000000000000000
[ 1567.700067] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 1567.700067] CR2: 0000000000738308 CR3: 000000022e329000 CR4: 00000000000007e0
[ 1567.700067] Stack:
[ 1567.700067] ffffc9000ed8e000 ffff8801d0168000 ffffc9000ed8e000 ffff8801d0168000
[ 1567.700067] ffff880246777a28 ffffffffad7c0a21 0000000000001080 ffff880246777c08
[ 1567.700067] ffff88060d302e68 ffff880246777b58 ffff880246777b88 ffffffffad9a6821
[ 1567.700067] Call Trace:
[ 1567.700067] build_skb (include/linux/mm.h:508 net/core/skbuff.c:316)
[ 1567.700067] netlink_sendmsg (net/netlink/af_netlink.c:1633 net/netlink/af_netlink.c:2329)
[ 1567.774369] ? sched_clock_cpu (kernel/sched/clock.c:311)
[ 1567.774369] ? netlink_unicast (net/netlink/af_netlink.c:2273)
[ 1567.774369] ? netlink_unicast (net/netlink/af_netlink.c:2273)
[ 1567.774369] sock_sendmsg (net/socket.c:614 net/socket.c:623)
[ 1567.774369] sock_write_iter (net/socket.c:823)
[ 1567.774369] ? sock_sendmsg (net/socket.c:806)
[ 1567.774369] __vfs_write (fs/read_write.c:479 fs/read_write.c:491)
[ 1567.774369] ? get_lock_stats (kernel/locking/lockdep.c:249)
[ 1567.774369] ? default_llseek (fs/read_write.c:487)
[ 1567.774369] ? vtime_account_user (kernel/sched/cputime.c:701)
[ 1567.774369] ? rw_verify_area (fs/read_write.c:406 (discriminator 4))
[ 1567.774369] vfs_write (fs/read_write.c:539)
[ 1567.774369] SyS_write (fs/read_write.c:586 fs/read_write.c:577)
[ 1567.774369] ? SyS_read (fs/read_write.c:577)
[ 1567.774369] ? __this_cpu_preempt_check (lib/smp_processor_id.c:63)
[ 1567.774369] ? trace_hardirqs_on_caller (kernel/locking/lockdep.c:2594 kernel/locking/lockdep.c:2636)
[ 1567.774369] ? trace_hardirqs_on_thunk (arch/x86/lib/thunk_64.S:42)
[ 1567.774369] system_call_fastpath (arch/x86/kernel/entry_64.S:261)

Fixes: 79930f5892e ("net: do not deplete pfmemalloc reserve")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Sasha Levin <sasha.levin@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Eric Dumazet and committed by
David S. Miller
2ea2f62c 20d96964

+25 -13
+1
include/linux/skbuff.h
··· 773 773 774 774 struct sk_buff *__alloc_skb(unsigned int size, gfp_t priority, int flags, 775 775 int node); 776 + struct sk_buff *__build_skb(void *data, unsigned int frag_size); 776 777 struct sk_buff *build_skb(void *data, unsigned int frag_size); 777 778 static inline struct sk_buff *alloc_skb(unsigned int size, 778 779 gfp_t priority)
+22 -9
net/core/skbuff.c
··· 280 280 EXPORT_SYMBOL(__alloc_skb); 281 281 282 282 /** 283 - * build_skb - build a network buffer 283 + * __build_skb - build a network buffer 284 284 * @data: data buffer provided by caller 285 - * @frag_size: size of fragment, or 0 if head was kmalloced 285 + * @frag_size: size of data, or 0 if head was kmalloced 286 286 * 287 287 * Allocate a new &sk_buff. Caller provides space holding head and 288 288 * skb_shared_info. @data must have been allocated by kmalloc() only if 289 - * @frag_size is 0, otherwise data should come from the page allocator. 289 + * @frag_size is 0, otherwise data should come from the page allocator 290 + * or vmalloc() 290 291 * The return is the new skb buffer. 291 292 * On a failure the return is %NULL, and @data is not freed. 292 293 * Notes : ··· 298 297 * before giving packet to stack. 299 298 * RX rings only contains data buffers, not full skbs. 300 299 */ 301 - struct sk_buff *build_skb(void *data, unsigned int frag_size) 300 + struct sk_buff *__build_skb(void *data, unsigned int frag_size) 302 301 { 303 302 struct skb_shared_info *shinfo; 304 303 struct sk_buff *skb; ··· 312 311 313 312 memset(skb, 0, offsetof(struct sk_buff, tail)); 314 313 skb->truesize = SKB_TRUESIZE(size); 315 - if (frag_size) { 316 - skb->head_frag = 1; 317 - if (virt_to_head_page(data)->pfmemalloc) 318 - skb->pfmemalloc = 1; 319 - } 320 314 atomic_set(&skb->users, 1); 321 315 skb->head = data; 322 316 skb->data = data; ··· 326 330 atomic_set(&shinfo->dataref, 1); 327 331 kmemcheck_annotate_variable(shinfo->destructor_arg); 328 332 333 + return skb; 334 + } 335 + 336 + /* build_skb() is wrapper over __build_skb(), that specifically 337 + * takes care of skb->head and skb->pfmemalloc 338 + * This means that if @frag_size is not zero, then @data must be backed 339 + * by a page fragment, not kmalloc() or vmalloc() 340 + */ 341 + struct sk_buff *build_skb(void *data, unsigned int frag_size) 342 + { 343 + struct sk_buff *skb = __build_skb(data, frag_size); 344 + 345 + if (skb && frag_size) { 346 + skb->head_frag = 1; 347 + if (virt_to_head_page(data)->pfmemalloc) 348 + skb->pfmemalloc = 1; 349 + } 329 350 return skb; 330 351 } 331 352 EXPORT_SYMBOL(build_skb);
+2 -4
net/netlink/af_netlink.c
··· 1629 1629 if (data == NULL) 1630 1630 return NULL; 1631 1631 1632 - skb = build_skb(data, size); 1632 + skb = __build_skb(data, size); 1633 1633 if (skb == NULL) 1634 1634 vfree(data); 1635 - else { 1636 - skb->head_frag = 0; 1635 + else 1637 1636 skb->destructor = netlink_skb_destructor; 1638 - } 1639 1637 1640 1638 return skb; 1641 1639 }