Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf

Daniel Borkmann says:

====================
pull-request: bpf 2018-02-20

The following pull-request contains BPF updates for your *net* tree.

The main changes are:

1) Fix a memory leak in LPM trie's map_free() callback function, where
the trie structure itself was not freed since initial implementation.
Also a synchronize_rcu() was needed in order to wait for outstanding
programs accessing the trie to complete, from Yonghong.

2) Fix sock_map_alloc()'s error path in order to correctly propagate
the -EINVAL error in case of too large allocation requests. This
was just recently introduced when fixing close hooks via ULP layer,
fix from Eric.

3) Do not use GFP_ATOMIC in __cpu_map_entry_alloc(). Reason is that this
will not work with the recent __ptr_ring_init_queue_alloc() conversion
to kvmalloc_array(), where in case of fallback to vmalloc() that GFP
flag is invalid, from Jason.

4) Fix two recent syzkaller warnings: i) fix bpf_prog_array_copy_to_user()
when a prog query with a big number of ids was performed where we'd
otherwise trigger a warning from allocator side, ii) fix a missing
mlock precharge on arraymaps, from Daniel.

5) Two fixes for bpftool in order to avoid breaking JSON output when used
in batch mode, from Quentin.

6) Move a pr_debug() in libbpf in order to avoid having an otherwise
uninitialized variable in bpf_program__reloc_text(), from Jeremy.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>

+36 -22
+16 -12
kernel/bpf/arraymap.c
··· 73 73 static struct bpf_map *array_map_alloc(union bpf_attr *attr) 74 74 { 75 75 bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; 76 - int numa_node = bpf_map_attr_numa_node(attr); 76 + int ret, numa_node = bpf_map_attr_numa_node(attr); 77 77 u32 elem_size, index_mask, max_entries; 78 78 bool unpriv = !capable(CAP_SYS_ADMIN); 79 + u64 cost, array_size, mask64; 79 80 struct bpf_array *array; 80 - u64 array_size, mask64; 81 81 82 82 elem_size = round_up(attr->value_size, 8); 83 83 ··· 109 109 array_size += (u64) max_entries * elem_size; 110 110 111 111 /* make sure there is no u32 overflow later in round_up() */ 112 - if (array_size >= U32_MAX - PAGE_SIZE) 112 + cost = array_size; 113 + if (cost >= U32_MAX - PAGE_SIZE) 113 114 return ERR_PTR(-ENOMEM); 115 + if (percpu) { 116 + cost += (u64)attr->max_entries * elem_size * num_possible_cpus(); 117 + if (cost >= U32_MAX - PAGE_SIZE) 118 + return ERR_PTR(-ENOMEM); 119 + } 120 + cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; 121 + 122 + ret = bpf_map_precharge_memlock(cost); 123 + if (ret < 0) 124 + return ERR_PTR(ret); 114 125 115 126 /* allocate all map elements and zero-initialize them */ 116 127 array = bpf_map_area_alloc(array_size, numa_node); ··· 132 121 133 122 /* copy mandatory map attributes */ 134 123 bpf_map_init_from_attr(&array->map, attr); 124 + array->map.pages = cost; 135 125 array->elem_size = elem_size; 136 126 137 - if (!percpu) 138 - goto out; 139 - 140 - array_size += (u64) attr->max_entries * elem_size * num_possible_cpus(); 141 - 142 - if (array_size >= U32_MAX - PAGE_SIZE || 143 - bpf_array_alloc_percpu(array)) { 127 + if (percpu && bpf_array_alloc_percpu(array)) { 144 128 bpf_map_area_free(array); 145 129 return ERR_PTR(-ENOMEM); 146 130 } 147 - out: 148 - array->map.pages = round_up(array_size, PAGE_SIZE) >> PAGE_SHIFT; 149 131 150 132 return &array->map; 151 133 }
+1 -1
kernel/bpf/core.c
··· 1590 1590 * so always copy 'cnt' prog_ids to the user. 1591 1591 * In a rare race the user will see zero prog_ids 1592 1592 */ 1593 - ids = kcalloc(cnt, sizeof(u32), GFP_USER); 1593 + ids = kcalloc(cnt, sizeof(u32), GFP_USER | __GFP_NOWARN); 1594 1594 if (!ids) 1595 1595 return -ENOMEM; 1596 1596 rcu_read_lock();
+1 -1
kernel/bpf/cpumap.c
··· 334 334 static struct bpf_cpu_map_entry *__cpu_map_entry_alloc(u32 qsize, u32 cpu, 335 335 int map_id) 336 336 { 337 - gfp_t gfp = GFP_ATOMIC|__GFP_NOWARN; 337 + gfp_t gfp = GFP_KERNEL | __GFP_NOWARN; 338 338 struct bpf_cpu_map_entry *rcpu; 339 339 int numa, err; 340 340
+7 -4
kernel/bpf/lpm_trie.c
··· 555 555 struct lpm_trie_node __rcu **slot; 556 556 struct lpm_trie_node *node; 557 557 558 - raw_spin_lock(&trie->lock); 558 + /* Wait for outstanding programs to complete 559 + * update/lookup/delete/get_next_key and free the trie. 560 + */ 561 + synchronize_rcu(); 559 562 560 563 /* Always start at the root and walk down to a node that has no 561 564 * children. Then free that node, nullify its reference in the parent ··· 572 569 node = rcu_dereference_protected(*slot, 573 570 lockdep_is_held(&trie->lock)); 574 571 if (!node) 575 - goto unlock; 572 + goto out; 576 573 577 574 if (rcu_access_pointer(node->child[0])) { 578 575 slot = &node->child[0]; ··· 590 587 } 591 588 } 592 589 593 - unlock: 594 - raw_spin_unlock(&trie->lock); 590 + out: 591 + kfree(trie); 595 592 } 596 593 597 594 static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key)
+2 -1
kernel/bpf/sockmap.c
··· 521 521 static struct bpf_map *sock_map_alloc(union bpf_attr *attr) 522 522 { 523 523 struct bpf_stab *stab; 524 - int err = -EINVAL; 525 524 u64 cost; 525 + int err; 526 526 527 527 if (!capable(CAP_NET_ADMIN)) 528 528 return ERR_PTR(-EPERM); ··· 547 547 548 548 /* make sure page count doesn't overflow */ 549 549 cost = (u64) stab->map.max_entries * sizeof(struct sock *); 550 + err = -EINVAL; 550 551 if (cost >= U32_MAX - PAGE_SIZE) 551 552 goto free_stab; 552 553
+2
kernel/trace/bpf_trace.c
··· 872 872 return -EINVAL; 873 873 if (copy_from_user(&query, uquery, sizeof(query))) 874 874 return -EFAULT; 875 + if (query.ids_len > BPF_TRACE_MAX_PROGS) 876 + return -E2BIG; 875 877 876 878 mutex_lock(&bpf_event_mutex); 877 879 ret = bpf_prog_array_copy_info(event->tp_event->prog_array,
+1 -1
tools/bpf/bpftool/main.c
··· 244 244 } 245 245 246 246 if (errno && errno != ENOENT) { 247 - perror("reading batch file failed"); 247 + p_err("reading batch file failed: %s", strerror(errno)); 248 248 err = -1; 249 249 } else { 250 250 p_info("processed %d lines", lines);
+3
tools/bpf/bpftool/prog.c
··· 774 774 n < 0 ? strerror(errno) : "short write"); 775 775 goto err_free; 776 776 } 777 + 778 + if (json_output) 779 + jsonw_null(json_wtr); 777 780 } else { 778 781 if (member_len == &info.jited_prog_len) { 779 782 const char *name = NULL;
+3 -2
tools/lib/bpf/libbpf.c
··· 1060 1060 prog->insns = new_insn; 1061 1061 prog->main_prog_cnt = prog->insns_cnt; 1062 1062 prog->insns_cnt = new_cnt; 1063 + pr_debug("added %zd insn from %s to prog %s\n", 1064 + text->insns_cnt, text->section_name, 1065 + prog->section_name); 1063 1066 } 1064 1067 insn = &prog->insns[relo->insn_idx]; 1065 1068 insn->imm += prog->main_prog_cnt - relo->insn_idx; 1066 - pr_debug("added %zd insn from %s to prog %s\n", 1067 - text->insns_cnt, text->section_name, prog->section_name); 1068 1069 return 0; 1069 1070 } 1070 1071