Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf

Alexei Starovoitov says:

====================
pull-request: bpf 2020-08-21

The following pull-request contains BPF updates for your *net* tree.

We've added 11 non-merge commits during the last 5 day(s) which contain
a total of 12 files changed, 78 insertions(+), 24 deletions(-).

The main changes are:

1) three fixes in BPF task iterator logic, from Yonghong.

2) fix for compressed dwarf sections in vmlinux, from Jiri.

3) fix xdp attach regression, from Andrii.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>

+78 -24
+5 -5
include/uapi/linux/bpf.h
··· 767 * 768 * Also, note that **bpf_trace_printk**\ () is slow, and should 769 * only be used for debugging purposes. For this reason, a notice 770 - * bloc (spanning several lines) is printed to kernel logs and 771 * states that the helper should not be used "for production use" 772 * the first time this helper is used (or more precisely, when 773 * **trace_printk**\ () buffers are allocated). For passing values ··· 1033 * 1034 * int ret; 1035 * struct bpf_tunnel_key key = {}; 1036 - * 1037 * ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); 1038 * if (ret < 0) 1039 * return TC_ACT_SHOT; // drop packet 1040 - * 1041 * if (key.remote_ipv4 != 0x0a000001) 1042 * return TC_ACT_SHOT; // drop packet 1043 - * 1044 * return TC_ACT_OK; // accept packet 1045 * 1046 * This interface can also be used with all encapsulation devices ··· 1147 * Description 1148 * Retrieve the realm or the route, that is to say the 1149 * **tclassid** field of the destination for the *skb*. The 1150 - * indentifier retrieved is a user-provided tag, similar to the 1151 * one used with the net_cls cgroup (see description for 1152 * **bpf_get_cgroup_classid**\ () helper), but here this tag is 1153 * held by a route (a destination entry), not by a task.
··· 767 * 768 * Also, note that **bpf_trace_printk**\ () is slow, and should 769 * only be used for debugging purposes. For this reason, a notice 770 + * block (spanning several lines) is printed to kernel logs and 771 * states that the helper should not be used "for production use" 772 * the first time this helper is used (or more precisely, when 773 * **trace_printk**\ () buffers are allocated). For passing values ··· 1033 * 1034 * int ret; 1035 * struct bpf_tunnel_key key = {}; 1036 + * 1037 * ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); 1038 * if (ret < 0) 1039 * return TC_ACT_SHOT; // drop packet 1040 + * 1041 * if (key.remote_ipv4 != 0x0a000001) 1042 * return TC_ACT_SHOT; // drop packet 1043 + * 1044 * return TC_ACT_OK; // accept packet 1045 * 1046 * This interface can also be used with all encapsulation devices ··· 1147 * Description 1148 * Retrieve the realm or the route, that is to say the 1149 * **tclassid** field of the destination for the *skb*. The 1150 + * identifier retrieved is a user-provided tag, similar to the 1151 * one used with the net_cls cgroup (see description for 1152 * **bpf_get_cgroup_classid**\ () helper), but here this tag is 1153 * held by a route (a destination entry), not by a task.
+14 -1
kernel/bpf/bpf_iter.c
··· 67 iter_priv->done_stop = true; 68 } 69 70 /* bpf_seq_read, a customized and simpler version for bpf iterator. 71 * no_llseek is assumed for this file. 72 * The following are differences from seq_read(): ··· 82 { 83 struct seq_file *seq = file->private_data; 84 size_t n, offs, copied = 0; 85 - int err = 0; 86 void *p; 87 88 mutex_lock(&seq->lock); ··· 138 while (1) { 139 loff_t pos = seq->index; 140 141 offs = seq->count; 142 p = seq->op->next(seq, p, &seq->index); 143 if (pos == seq->index) { ··· 156 157 if (seq->count >= size) 158 break; 159 160 err = seq->op->show(seq, p); 161 if (err > 0) {
··· 67 iter_priv->done_stop = true; 68 } 69 70 + /* maximum visited objects before bailing out */ 71 + #define MAX_ITER_OBJECTS 1000000 72 + 73 /* bpf_seq_read, a customized and simpler version for bpf iterator. 74 * no_llseek is assumed for this file. 75 * The following are differences from seq_read(): ··· 79 { 80 struct seq_file *seq = file->private_data; 81 size_t n, offs, copied = 0; 82 + int err = 0, num_objs = 0; 83 void *p; 84 85 mutex_lock(&seq->lock); ··· 135 while (1) { 136 loff_t pos = seq->index; 137 138 + num_objs++; 139 offs = seq->count; 140 p = seq->op->next(seq, p, &seq->index); 141 if (pos == seq->index) { ··· 152 153 if (seq->count >= size) 154 break; 155 + 156 + if (num_objs >= MAX_ITER_OBJECTS) { 157 + if (offs == 0) { 158 + err = -EAGAIN; 159 + seq->op->stop(seq, p); 160 + goto done; 161 + } 162 + break; 163 + } 164 165 err = seq->op->show(seq, p); 166 if (err > 0) {
+4 -2
kernel/bpf/task_iter.c
··· 29 30 rcu_read_lock(); 31 retry: 32 - pid = idr_get_next(&ns->idr, tid); 33 if (pid) { 34 task = get_pid_task(pid, PIDTYPE_PID); 35 if (!task) { 36 ++*tid; ··· 179 f = fcheck_files(curr_files, curr_fd); 180 if (!f) 181 continue; 182 183 /* set info->fd */ 184 info->fd = curr_fd; 185 - get_file(f); 186 rcu_read_unlock(); 187 return f; 188 }
··· 29 30 rcu_read_lock(); 31 retry: 32 + pid = find_ge_pid(*tid, ns); 33 if (pid) { 34 + *tid = pid_nr_ns(pid, ns); 35 task = get_pid_task(pid, PIDTYPE_PID); 36 if (!task) { 37 ++*tid; ··· 178 f = fcheck_files(curr_files, curr_fd); 179 if (!f) 180 continue; 181 + if (!get_file_rcu(f)) 182 + continue; 183 184 /* set info->fd */ 185 info->fd = curr_fd; 186 rcu_read_unlock(); 187 return f; 188 }
+8 -6
net/core/dev.c
··· 8742 int flags; 8743 }; 8744 8745 - static enum bpf_xdp_mode dev_xdp_mode(u32 flags) 8746 { 8747 if (flags & XDP_FLAGS_HW_MODE) 8748 return XDP_MODE_HW; 8749 if (flags & XDP_FLAGS_DRV_MODE) 8750 return XDP_MODE_DRV; 8751 - return XDP_MODE_SKB; 8752 } 8753 8754 static bpf_op_t dev_xdp_bpf_op(struct net_device *dev, enum bpf_xdp_mode mode) ··· 8898 return -EINVAL; 8899 } 8900 8901 - mode = dev_xdp_mode(flags); 8902 /* can't replace attached link */ 8903 if (dev_xdp_link(dev, mode)) { 8904 NL_SET_ERR_MSG(extack, "Can't replace active BPF XDP link"); ··· 8986 8987 ASSERT_RTNL(); 8988 8989 - mode = dev_xdp_mode(link->flags); 8990 if (dev_xdp_link(dev, mode) != link) 8991 return -EINVAL; 8992 ··· 9082 goto out_unlock; 9083 } 9084 9085 - mode = dev_xdp_mode(xdp_link->flags); 9086 bpf_op = dev_xdp_bpf_op(xdp_link->dev, mode); 9087 err = dev_xdp_install(xdp_link->dev, mode, bpf_op, NULL, 9088 xdp_link->flags, new_prog); ··· 9166 int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, 9167 int fd, int expected_fd, u32 flags) 9168 { 9169 - enum bpf_xdp_mode mode = dev_xdp_mode(flags); 9170 struct bpf_prog *new_prog = NULL, *old_prog = NULL; 9171 int err; 9172
··· 8742 int flags; 8743 }; 8744 8745 + static enum bpf_xdp_mode dev_xdp_mode(struct net_device *dev, u32 flags) 8746 { 8747 if (flags & XDP_FLAGS_HW_MODE) 8748 return XDP_MODE_HW; 8749 if (flags & XDP_FLAGS_DRV_MODE) 8750 return XDP_MODE_DRV; 8751 + if (flags & XDP_FLAGS_SKB_MODE) 8752 + return XDP_MODE_SKB; 8753 + return dev->netdev_ops->ndo_bpf ? XDP_MODE_DRV : XDP_MODE_SKB; 8754 } 8755 8756 static bpf_op_t dev_xdp_bpf_op(struct net_device *dev, enum bpf_xdp_mode mode) ··· 8896 return -EINVAL; 8897 } 8898 8899 + mode = dev_xdp_mode(dev, flags); 8900 /* can't replace attached link */ 8901 if (dev_xdp_link(dev, mode)) { 8902 NL_SET_ERR_MSG(extack, "Can't replace active BPF XDP link"); ··· 8984 8985 ASSERT_RTNL(); 8986 8987 + mode = dev_xdp_mode(dev, link->flags); 8988 if (dev_xdp_link(dev, mode) != link) 8989 return -EINVAL; 8990 ··· 9080 goto out_unlock; 9081 } 9082 9083 + mode = dev_xdp_mode(xdp_link->dev, xdp_link->flags); 9084 bpf_op = dev_xdp_bpf_op(xdp_link->dev, mode); 9085 err = dev_xdp_install(xdp_link->dev, mode, bpf_op, NULL, 9086 xdp_link->flags, new_prog); ··· 9164 int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, 9165 int fd, int expected_fd, u32 flags) 9166 { 9167 + enum bpf_xdp_mode mode = dev_xdp_mode(dev, flags); 9168 struct bpf_prog *new_prog = NULL, *old_prog = NULL; 9169 int err; 9170
+2
tools/bpf/bpftool/pids.c
··· 134 while (true) { 135 ret = read(fd, buf, sizeof(buf)); 136 if (ret < 0) { 137 err = -errno; 138 p_err("failed to read PID iterator output: %d", err); 139 goto out;
··· 134 while (true) { 135 ret = read(fd, buf, sizeof(buf)); 136 if (ret < 0) { 137 + if (errno == EAGAIN) 138 + continue; 139 err = -errno; 140 p_err("failed to read PID iterator output: %d", err); 141 goto out;
+36
tools/bpf/resolve_btfids/main.c
··· 233 return btf_id__add(root, id, false); 234 } 235 236 static int elf_collect(struct object *obj) 237 { 238 Elf_Scn *scn = NULL; ··· 342 obj->efile.idlist_shndx = idx; 343 obj->efile.idlist_addr = sh.sh_addr; 344 } 345 } 346 347 return 0;
··· 233 return btf_id__add(root, id, false); 234 } 235 236 + /* 237 + * The data of compressed section should be aligned to 4 238 + * (for 32bit) or 8 (for 64 bit) bytes. The binutils ld 239 + * sets sh_addralign to 1, which makes libelf fail with 240 + * misaligned section error during the update: 241 + * FAILED elf_update(WRITE): invalid section alignment 242 + * 243 + * While waiting for ld fix, we fix the compressed sections 244 + * sh_addralign value manualy. 245 + */ 246 + static int compressed_section_fix(Elf *elf, Elf_Scn *scn, GElf_Shdr *sh) 247 + { 248 + int expected = gelf_getclass(elf) == ELFCLASS32 ? 4 : 8; 249 + 250 + if (!(sh->sh_flags & SHF_COMPRESSED)) 251 + return 0; 252 + 253 + if (sh->sh_addralign == expected) 254 + return 0; 255 + 256 + pr_debug2(" - fixing wrong alignment sh_addralign %u, expected %u\n", 257 + sh->sh_addralign, expected); 258 + 259 + sh->sh_addralign = expected; 260 + 261 + if (gelf_update_shdr(scn, sh) == 0) { 262 + printf("FAILED cannot update section header: %s\n", 263 + elf_errmsg(-1)); 264 + return -1; 265 + } 266 + return 0; 267 + } 268 + 269 static int elf_collect(struct object *obj) 270 { 271 Elf_Scn *scn = NULL; ··· 309 obj->efile.idlist_shndx = idx; 310 obj->efile.idlist_addr = sh.sh_addr; 311 } 312 + 313 + if (compressed_section_fix(elf, scn, &sh)) 314 + return -1; 315 } 316 317 return 0;
+5 -5
tools/include/uapi/linux/bpf.h
··· 767 * 768 * Also, note that **bpf_trace_printk**\ () is slow, and should 769 * only be used for debugging purposes. For this reason, a notice 770 - * bloc (spanning several lines) is printed to kernel logs and 771 * states that the helper should not be used "for production use" 772 * the first time this helper is used (or more precisely, when 773 * **trace_printk**\ () buffers are allocated). For passing values ··· 1033 * 1034 * int ret; 1035 * struct bpf_tunnel_key key = {}; 1036 - * 1037 * ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); 1038 * if (ret < 0) 1039 * return TC_ACT_SHOT; // drop packet 1040 - * 1041 * if (key.remote_ipv4 != 0x0a000001) 1042 * return TC_ACT_SHOT; // drop packet 1043 - * 1044 * return TC_ACT_OK; // accept packet 1045 * 1046 * This interface can also be used with all encapsulation devices ··· 1147 * Description 1148 * Retrieve the realm or the route, that is to say the 1149 * **tclassid** field of the destination for the *skb*. The 1150 - * indentifier retrieved is a user-provided tag, similar to the 1151 * one used with the net_cls cgroup (see description for 1152 * **bpf_get_cgroup_classid**\ () helper), but here this tag is 1153 * held by a route (a destination entry), not by a task.
··· 767 * 768 * Also, note that **bpf_trace_printk**\ () is slow, and should 769 * only be used for debugging purposes. For this reason, a notice 770 + * block (spanning several lines) is printed to kernel logs and 771 * states that the helper should not be used "for production use" 772 * the first time this helper is used (or more precisely, when 773 * **trace_printk**\ () buffers are allocated). For passing values ··· 1033 * 1034 * int ret; 1035 * struct bpf_tunnel_key key = {}; 1036 + * 1037 * ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); 1038 * if (ret < 0) 1039 * return TC_ACT_SHOT; // drop packet 1040 + * 1041 * if (key.remote_ipv4 != 0x0a000001) 1042 * return TC_ACT_SHOT; // drop packet 1043 + * 1044 * return TC_ACT_OK; // accept packet 1045 * 1046 * This interface can also be used with all encapsulation devices ··· 1147 * Description 1148 * Retrieve the realm or the route, that is to say the 1149 * **tclassid** field of the destination for the *skb*. The 1150 + * identifier retrieved is a user-provided tag, similar to the 1151 * one used with the net_cls cgroup (see description for 1152 * **bpf_get_cgroup_classid**\ () helper), but here this tag is 1153 * held by a route (a destination entry), not by a task.
+1 -1
tools/lib/bpf/btf_dump.c
··· 879 btf_dump_printf(d, ": %d", m_sz); 880 off = m_off + m_sz; 881 } else { 882 - m_sz = max(0LL, btf__resolve_size(d->btf, m->type)); 883 off = m_off + m_sz * 8; 884 } 885 btf_dump_printf(d, ";");
··· 879 btf_dump_printf(d, ": %d", m_sz); 880 off = m_off + m_sz; 881 } else { 882 + m_sz = max((__s64)0, btf__resolve_size(d->btf, m->type)); 883 off = m_off + m_sz * 8; 884 } 885 btf_dump_printf(d, ";");
+1 -1
tools/lib/bpf/libbpf.c
··· 2264 data = elf_getdata(scn, NULL); 2265 if (!scn || !data) { 2266 pr_warn("failed to get Elf_Data from map section %d (%s)\n", 2267 - obj->efile.maps_shndx, MAPS_ELF_SEC); 2268 return -EINVAL; 2269 } 2270
··· 2264 data = elf_getdata(scn, NULL); 2265 if (!scn || !data) { 2266 pr_warn("failed to get Elf_Data from map section %d (%s)\n", 2267 + obj->efile.btf_maps_shndx, MAPS_ELF_SEC); 2268 return -EINVAL; 2269 } 2270
-1
tools/testing/selftests/bpf/.gitignore
··· 6 test_tag 7 FEATURE-DUMP.libbpf 8 fixdep 9 - test_align 10 test_dev_cgroup 11 /test_progs* 12 test_tcpbpf_user
··· 6 test_tag 7 FEATURE-DUMP.libbpf 8 fixdep 9 test_dev_cgroup 10 /test_progs* 11 test_tcpbpf_user
+1 -1
tools/testing/selftests/bpf/Makefile
··· 32 33 # Order correspond to 'make run_tests' order 34 TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ 35 - test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \ 36 test_sock test_btf test_sockmap get_cgroup_id_user test_socket_cookie \ 37 test_cgroup_storage \ 38 test_netcnt test_tcpnotify_user test_sock_fields test_sysctl \
··· 32 33 # Order correspond to 'make run_tests' order 34 TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ 35 + test_verifier_log test_dev_cgroup test_tcpbpf_user \ 36 test_sock test_btf test_sockmap get_cgroup_id_user test_socket_cookie \ 37 test_cgroup_storage \ 38 test_netcnt test_tcpnotify_user test_sock_fields test_sysctl \
+1 -1
tools/testing/selftests/bpf/prog_tests/test_global_funcs.c
··· 19 log_buf = va_arg(args, char *); 20 if (!log_buf) 21 goto out; 22 - if (strstr(log_buf, err_str) == 0) 23 found = true; 24 out: 25 printf(format, log_buf);
··· 19 log_buf = va_arg(args, char *); 20 if (!log_buf) 21 goto out; 22 + if (err_str && strstr(log_buf, err_str) == 0) 23 found = true; 24 out: 25 printf(format, log_buf);