Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net

+9 -1

Documentation/bpf/bpf_devel_QA.txt

··· 557 557 pulls in some header files containing file scope host assembly codes. 558 558 - You can add "-fno-jump-tables" to work around the switch table issue. 559 559 560 - Otherwise, you can use bpf target. 560 + Otherwise, you can use bpf target. Additionally, you _must_ use bpf target 561 + when: 562 + 563 + - Your program uses data structures with pointer or long / unsigned long 564 + types that interface with BPF helpers or context data structures. Access 565 + into these structures is verified by the BPF verifier and may result 566 + in verification failures if the native architecture is not aligned with 567 + the BPF architecture, e.g. 64-bit. An example of this is 568 + BPF_PROG_TYPE_SK_MSG require '-target bpf' 561 569 562 570 Happy BPF hacking!

+11 -1

arch/x86/net/bpf_jit_comp.c

··· 1027 1027 break; 1028 1028 1029 1029 case BPF_JMP | BPF_JA: 1030 - jmp_offset = addrs[i + insn->off] - addrs[i]; 1030 + if (insn->off == -1) 1031 + /* -1 jmp instructions will always jump 1032 + * backwards two bytes. Explicitly handling 1033 + * this case avoids wasting too many passes 1034 + * when there are long sequences of replaced 1035 + * dead code. 1036 + */ 1037 + jmp_offset = -2; 1038 + else 1039 + jmp_offset = addrs[i + insn->off] - addrs[i]; 1040 + 1031 1041 if (!jmp_offset) 1032 1042 /* optimize out nop jumps */ 1033 1043 break;

+10 -1

drivers/net/phy/phy_device.c

··· 535 535 536 536 /* Grab the bits from PHYIR1, and put them in the upper half */ 537 537 phy_reg = mdiobus_read(bus, addr, MII_PHYSID1); 538 - if (phy_reg < 0) 538 + if (phy_reg < 0) { 539 + /* if there is no device, return without an error so scanning 540 + * the bus works properly 541 + */ 542 + if (phy_reg == -EIO || phy_reg == -ENODEV) { 543 + *phy_id = 0xffffffff; 544 + return 0; 545 + } 546 + 539 547 return -EIO; 548 + } 540 549 541 550 *phy_id = (phy_reg & 0xffff) << 16; 542 551

+3 -1

include/linux/bpf.h

··· 33 33 void (*map_release)(struct bpf_map *map, struct file *map_file); 34 34 void (*map_free)(struct bpf_map *map); 35 35 int (*map_get_next_key)(struct bpf_map *map, void *key, void *next_key); 36 + void (*map_release_uref)(struct bpf_map *map); 36 37 37 38 /* funcs callable from userspace and from eBPF programs */ 38 39 void *(*map_lookup_elem)(struct bpf_map *map, void *key); ··· 366 365 struct bpf_prog **_prog, *__prog; \ 367 366 struct bpf_prog_array *_array; \ 368 367 u32 _ret = 1; \ 368 + preempt_disable(); \ 369 369 rcu_read_lock(); \ 370 370 _array = rcu_dereference(array); \ 371 371 if (unlikely(check_non_null && !_array))\ ··· 378 376 } \ 379 377 _out: \ 380 378 rcu_read_unlock(); \ 379 + preempt_enable_no_resched(); \ 381 380 _ret; \ 382 381 }) 383 382 ··· 451 448 int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file, 452 449 void *key, void *value, u64 map_flags); 453 450 int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value); 454 - void bpf_fd_array_map_clear(struct bpf_map *map); 455 451 int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file, 456 452 void *key, void *value, u64 map_flags); 457 453 int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value);

+2 -1

kernel/bpf/arraymap.c

··· 526 526 } 527 527 528 528 /* decrement refcnt of all bpf_progs that are stored in this map */ 529 - void bpf_fd_array_map_clear(struct bpf_map *map) 529 + static void bpf_fd_array_map_clear(struct bpf_map *map) 530 530 { 531 531 struct bpf_array *array = container_of(map, struct bpf_array, map); 532 532 int i; ··· 545 545 .map_fd_get_ptr = prog_fd_array_get_ptr, 546 546 .map_fd_put_ptr = prog_fd_array_put_ptr, 547 547 .map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem, 548 + .map_release_uref = bpf_fd_array_map_clear, 548 549 }; 549 550 550 551 static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file,

+47 -4

kernel/bpf/sockmap.c

··· 43 43 #include <net/tcp.h> 44 44 #include <linux/ptr_ring.h> 45 45 #include <net/inet_common.h> 46 + #include <linux/sched/signal.h> 46 47 47 48 #define SOCK_CREATE_FLAG_MASK \ 48 49 (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY) ··· 524 523 i = md->sg_start; 525 524 526 525 do { 527 - r->sg_data[i] = md->sg_data[i]; 528 - 529 526 size = (apply && apply_bytes < md->sg_data[i].length) ? 530 527 apply_bytes : md->sg_data[i].length; 531 528 ··· 534 535 } 535 536 536 537 sk_mem_charge(sk, size); 538 + r->sg_data[i] = md->sg_data[i]; 537 539 r->sg_data[i].length = size; 538 540 md->sg_data[i].length -= size; 539 541 md->sg_data[i].offset += size; ··· 732 732 return err; 733 733 } 734 734 735 + static int bpf_wait_data(struct sock *sk, 736 + struct smap_psock *psk, int flags, 737 + long timeo, int *err) 738 + { 739 + int rc; 740 + 741 + DEFINE_WAIT_FUNC(wait, woken_wake_function); 742 + 743 + add_wait_queue(sk_sleep(sk), &wait); 744 + sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); 745 + rc = sk_wait_event(sk, &timeo, 746 + !list_empty(&psk->ingress) || 747 + !skb_queue_empty(&sk->sk_receive_queue), 748 + &wait); 749 + sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); 750 + remove_wait_queue(sk_sleep(sk), &wait); 751 + 752 + return rc; 753 + } 754 + 735 755 static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, 736 756 int nonblock, int flags, int *addr_len) 737 757 { ··· 775 755 return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len); 776 756 777 757 lock_sock(sk); 758 + bytes_ready: 778 759 while (copied != len) { 779 760 struct scatterlist *sg; 780 761 struct sk_msg_buff *md; ··· 828 807 consume_skb(md->skb); 829 808 kfree(md); 830 809 } 810 + } 811 + 812 + if (!copied) { 813 + long timeo; 814 + int data; 815 + int err = 0; 816 + 817 + timeo = sock_rcvtimeo(sk, nonblock); 818 + data = bpf_wait_data(sk, psock, flags, timeo, &err); 819 + 820 + if (data) { 821 + if (!skb_queue_empty(&sk->sk_receive_queue)) { 822 + release_sock(sk); 823 + smap_release_sock(psock, sk); 824 + copied = tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len); 825 + return copied; 826 + } 827 + goto bytes_ready; 828 + } 829 + 830 + if (err) 831 + copied = err; 831 832 } 832 833 833 834 release_sock(sk); ··· 1874 1831 return err; 1875 1832 } 1876 1833 1877 - static void sock_map_release(struct bpf_map *map, struct file *map_file) 1834 + static void sock_map_release(struct bpf_map *map) 1878 1835 { 1879 1836 struct bpf_stab *stab = container_of(map, struct bpf_stab, map); 1880 1837 struct bpf_prog *orig; ··· 1898 1855 .map_get_next_key = sock_map_get_next_key, 1899 1856 .map_update_elem = sock_map_update_elem, 1900 1857 .map_delete_elem = sock_map_delete_elem, 1901 - .map_release = sock_map_release, 1858 + .map_release_uref = sock_map_release, 1902 1859 }; 1903 1860 1904 1861 BPF_CALL_4(bpf_sock_map_update, struct bpf_sock_ops_kern *, bpf_sock,

+2 -2

kernel/bpf/syscall.c

··· 260 260 static void bpf_map_put_uref(struct bpf_map *map) 261 261 { 262 262 if (atomic_dec_and_test(&map->usercnt)) { 263 - if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) 264 - bpf_fd_array_map_clear(map); 263 + if (map->ops->map_release_uref) 264 + map->ops->map_release_uref(map); 265 265 } 266 266 } 267 267

+1

net/core/filter.c

··· 3280 3280 skb_dst_set(skb, (struct dst_entry *) md); 3281 3281 3282 3282 info = &md->u.tun_info; 3283 + memset(info, 0, sizeof(*info)); 3283 3284 info->mode = IP_TUNNEL_INFO_TX; 3284 3285 3285 3286 info->key.tun_flags = TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_NOCACHE;

+2 -1

net/rds/ib_cm.c

··· 547 547 rdsdebug("conn %p pd %p cq %p %p\n", conn, ic->i_pd, 548 548 ic->i_send_cq, ic->i_recv_cq); 549 549 550 - return ret; 550 + goto out; 551 551 552 552 sends_out: 553 553 vfree(ic->i_sends); ··· 572 572 ic->i_send_cq = NULL; 573 573 rds_ibdev_out: 574 574 rds_ib_remove_conn(rds_ibdev, conn); 575 + out: 575 576 rds_ib_dev_put(rds_ibdev); 576 577 577 578 return ret;

-4

net/smc/af_smc.c

··· 978 978 } 979 979 980 980 out: 981 - if (lsmc->clcsock) { 982 - sock_release(lsmc->clcsock); 983 - lsmc->clcsock = NULL; 984 - } 985 981 release_sock(lsk); 986 982 sock_put(&lsmc->sk); /* sock_hold in smc_listen */ 987 983 }

+5 -2

samples/sockmap/Makefile

··· 65 65 # asm/sysreg.h - inline assembly used by it is incompatible with llvm. 66 66 # But, there is no easy way to fix it, so just exclude it since it is 67 67 # useless for BPF samples. 68 + # 69 + # -target bpf option required with SK_MSG programs, this is to ensure 70 + # reading 'void *' data types for data and data_end are __u64 reads. 68 71 $(obj)/%.o: $(src)/%.c 69 72 $(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) -I$(obj) \ 70 73 -D__KERNEL__ -D__ASM_SYSREG_H -Wno-unused-value -Wno-pointer-sign \ 71 74 -Wno-compare-distinct-pointer-types \ 72 75 -Wno-gnu-variable-sized-type-not-at-end \ 73 76 -Wno-address-of-packed-member -Wno-tautological-compare \ 74 - -Wno-unknown-warning-option \ 75 - -O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@ 77 + -Wno-unknown-warning-option -O2 -target bpf \ 78 + -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@

+2

tools/bpf/Makefile

··· 76 76 $(QUIET_LINK)$(CC) $(CFLAGS) -o $@ $^ 77 77 78 78 $(OUTPUT)bpf_exp.lex.c: $(OUTPUT)bpf_exp.yacc.c 79 + $(OUTPUT)bpf_exp.yacc.o: $(OUTPUT)bpf_exp.yacc.c 80 + $(OUTPUT)bpf_exp.lex.o: $(OUTPUT)bpf_exp.lex.c 79 81 80 82 clean: bpftool_clean 81 83 $(call QUIET_CLEAN, bpf-progs)