Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net_sched: destroy proto tp when all filters are gone

Kernel automatically creates a tp for each
(kind, protocol, priority) tuple, which has handle 0,
when we add a new filter, but it still is left there
after we remove our own, unless we don't specify the
handle (literally means all the filters under
the tuple). For example this one is left:

# tc filter show dev eth0
filter parent 8001: protocol arp pref 49152 basic

The user-space is hard to clean up these for kernel
because filters like u32 are organized in a complex way.
So kernel is responsible to remove it after all filters
are gone. Each type of filter has its own way to
store the filters, so each type has to provide its
way to check if all filters are gone.

Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Cong Wang <cwang@twopensource.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Acked-by: Jamal Hadi Salim<jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Cong Wang and committed by
David S. Miller
1e052be6 fc6c6c2b

+99 -23
+2 -2
include/net/sch_generic.h
··· 213 213 const struct tcf_proto *, 214 214 struct tcf_result *); 215 215 int (*init)(struct tcf_proto*); 216 - void (*destroy)(struct tcf_proto*); 216 + bool (*destroy)(struct tcf_proto*, bool); 217 217 218 218 unsigned long (*get)(struct tcf_proto*, u32 handle); 219 219 int (*change)(struct net *net, struct sk_buff *, ··· 399 399 const struct Qdisc_ops *ops, u32 parentid); 400 400 void __qdisc_calculate_pkt_len(struct sk_buff *skb, 401 401 const struct qdisc_size_table *stab); 402 - void tcf_destroy(struct tcf_proto *tp); 402 + bool tcf_destroy(struct tcf_proto *tp, bool force); 403 403 void tcf_destroy_chain(struct tcf_proto __rcu **fl); 404 404 405 405 /* Reset all TX qdiscs greater then index of a device. */
+10 -4
net/sched/cls_api.c
··· 286 286 RCU_INIT_POINTER(*back, next); 287 287 288 288 tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER); 289 - tcf_destroy(tp); 289 + tcf_destroy(tp, true); 290 290 err = 0; 291 291 goto errout; 292 292 } ··· 301 301 err = -EEXIST; 302 302 if (n->nlmsg_flags & NLM_F_EXCL) { 303 303 if (tp_created) 304 - tcf_destroy(tp); 304 + tcf_destroy(tp, true); 305 305 goto errout; 306 306 } 307 307 break; 308 308 case RTM_DELTFILTER: 309 309 err = tp->ops->delete(tp, fh); 310 - if (err == 0) 310 + if (err == 0) { 311 311 tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER); 312 + if (tcf_destroy(tp, false)) { 313 + struct tcf_proto *next = rtnl_dereference(tp->next); 314 + 315 + RCU_INIT_POINTER(*back, next); 316 + } 317 + } 312 318 goto errout; 313 319 case RTM_GETTFILTER: 314 320 err = tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER); ··· 335 329 tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER); 336 330 } else { 337 331 if (tp_created) 338 - tcf_destroy(tp); 332 + tcf_destroy(tp, true); 339 333 } 340 334 341 335 errout:
+5 -1
net/sched/cls_basic.c
··· 96 96 kfree(f); 97 97 } 98 98 99 - static void basic_destroy(struct tcf_proto *tp) 99 + static bool basic_destroy(struct tcf_proto *tp, bool force) 100 100 { 101 101 struct basic_head *head = rtnl_dereference(tp->root); 102 102 struct basic_filter *f, *n; 103 + 104 + if (!force && !list_empty(&head->flist)) 105 + return false; 103 106 104 107 list_for_each_entry_safe(f, n, &head->flist, link) { 105 108 list_del_rcu(&f->link); ··· 111 108 } 112 109 RCU_INIT_POINTER(tp->root, NULL); 113 110 kfree_rcu(head, rcu); 111 + return true; 114 112 } 115 113 116 114 static int basic_delete(struct tcf_proto *tp, unsigned long arg)
+5 -1
net/sched/cls_bpf.c
··· 137 137 return 0; 138 138 } 139 139 140 - static void cls_bpf_destroy(struct tcf_proto *tp) 140 + static bool cls_bpf_destroy(struct tcf_proto *tp, bool force) 141 141 { 142 142 struct cls_bpf_head *head = rtnl_dereference(tp->root); 143 143 struct cls_bpf_prog *prog, *tmp; 144 + 145 + if (!force && !list_empty(&head->plist)) 146 + return false; 144 147 145 148 list_for_each_entry_safe(prog, tmp, &head->plist, link) { 146 149 list_del_rcu(&prog->link); ··· 153 150 154 151 RCU_INIT_POINTER(tp->root, NULL); 155 152 kfree_rcu(head, rcu); 153 + return true; 156 154 } 157 155 158 156 static unsigned long cls_bpf_get(struct tcf_proto *tp, u32 handle)
+5 -1
net/sched/cls_cgroup.c
··· 143 143 return err; 144 144 } 145 145 146 - static void cls_cgroup_destroy(struct tcf_proto *tp) 146 + static bool cls_cgroup_destroy(struct tcf_proto *tp, bool force) 147 147 { 148 148 struct cls_cgroup_head *head = rtnl_dereference(tp->root); 149 + 150 + if (!force) 151 + return false; 149 152 150 153 if (head) { 151 154 RCU_INIT_POINTER(tp->root, NULL); 152 155 call_rcu(&head->rcu, cls_cgroup_destroy_rcu); 153 156 } 157 + return true; 154 158 } 155 159 156 160 static int cls_cgroup_delete(struct tcf_proto *tp, unsigned long arg)
+5 -1
net/sched/cls_flow.c
··· 557 557 return 0; 558 558 } 559 559 560 - static void flow_destroy(struct tcf_proto *tp) 560 + static bool flow_destroy(struct tcf_proto *tp, bool force) 561 561 { 562 562 struct flow_head *head = rtnl_dereference(tp->root); 563 563 struct flow_filter *f, *next; 564 + 565 + if (!force && !list_empty(&head->filters)) 566 + return false; 564 567 565 568 list_for_each_entry_safe(f, next, &head->filters, list) { 566 569 list_del_rcu(&f->list); ··· 571 568 } 572 569 RCU_INIT_POINTER(tp->root, NULL); 573 570 kfree_rcu(head, rcu); 571 + return true; 574 572 } 575 573 576 574 static unsigned long flow_get(struct tcf_proto *tp, u32 handle)
+9 -2
net/sched/cls_fw.c
··· 133 133 kfree(f); 134 134 } 135 135 136 - static void fw_destroy(struct tcf_proto *tp) 136 + static bool fw_destroy(struct tcf_proto *tp, bool force) 137 137 { 138 138 struct fw_head *head = rtnl_dereference(tp->root); 139 139 struct fw_filter *f; 140 140 int h; 141 141 142 142 if (head == NULL) 143 - return; 143 + return true; 144 + 145 + if (!force) { 146 + for (h = 0; h < HTSIZE; h++) 147 + if (rcu_access_pointer(head->ht[h])) 148 + return false; 149 + } 144 150 145 151 for (h = 0; h < HTSIZE; h++) { 146 152 while ((f = rtnl_dereference(head->ht[h])) != NULL) { ··· 158 152 } 159 153 RCU_INIT_POINTER(tp->root, NULL); 160 154 kfree_rcu(head, rcu); 155 + return true; 161 156 } 162 157 163 158 static int fw_delete(struct tcf_proto *tp, unsigned long arg)
+10 -2
net/sched/cls_route.c
··· 277 277 kfree(f); 278 278 } 279 279 280 - static void route4_destroy(struct tcf_proto *tp) 280 + static bool route4_destroy(struct tcf_proto *tp, bool force) 281 281 { 282 282 struct route4_head *head = rtnl_dereference(tp->root); 283 283 int h1, h2; 284 284 285 285 if (head == NULL) 286 - return; 286 + return true; 287 + 288 + if (!force) { 289 + for (h1 = 0; h1 <= 256; h1++) { 290 + if (rcu_access_pointer(head->table[h1])) 291 + return false; 292 + } 293 + } 287 294 288 295 for (h1 = 0; h1 <= 256; h1++) { 289 296 struct route4_bucket *b; ··· 315 308 } 316 309 RCU_INIT_POINTER(tp->root, NULL); 317 310 kfree_rcu(head, rcu); 311 + return true; 318 312 } 319 313 320 314 static int route4_delete(struct tcf_proto *tp, unsigned long arg)
+10 -2
net/sched/cls_rsvp.h
··· 291 291 kfree_rcu(f, rcu); 292 292 } 293 293 294 - static void rsvp_destroy(struct tcf_proto *tp) 294 + static bool rsvp_destroy(struct tcf_proto *tp, bool force) 295 295 { 296 296 struct rsvp_head *data = rtnl_dereference(tp->root); 297 297 int h1, h2; 298 298 299 299 if (data == NULL) 300 - return; 300 + return true; 301 + 302 + if (!force) { 303 + for (h1 = 0; h1 < 256; h1++) { 304 + if (rcu_access_pointer(data->ht[h1])) 305 + return false; 306 + } 307 + } 301 308 302 309 RCU_INIT_POINTER(tp->root, NULL); 303 310 ··· 326 319 } 327 320 } 328 321 kfree_rcu(data, rcu); 322 + return true; 329 323 } 330 324 331 325 static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
+5 -1
net/sched/cls_tcindex.c
··· 468 468 } 469 469 } 470 470 471 - static void tcindex_destroy(struct tcf_proto *tp) 471 + static bool tcindex_destroy(struct tcf_proto *tp, bool force) 472 472 { 473 473 struct tcindex_data *p = rtnl_dereference(tp->root); 474 474 struct tcf_walker walker; 475 + 476 + if (!force) 477 + return false; 475 478 476 479 pr_debug("tcindex_destroy(tp %p),p %p\n", tp, p); 477 480 walker.count = 0; ··· 484 481 485 482 RCU_INIT_POINTER(tp->root, NULL); 486 483 call_rcu(&p->rcu, __tcindex_destroy); 484 + return true; 487 485 } 488 486 489 487
+24 -1
net/sched/cls_u32.c
··· 460 460 return -ENOENT; 461 461 } 462 462 463 - static void u32_destroy(struct tcf_proto *tp) 463 + static bool ht_empty(struct tc_u_hnode *ht) 464 + { 465 + unsigned int h; 466 + 467 + for (h = 0; h <= ht->divisor; h++) 468 + if (rcu_access_pointer(ht->ht[h])) 469 + return false; 470 + 471 + return true; 472 + } 473 + 474 + static bool u32_destroy(struct tcf_proto *tp, bool force) 464 475 { 465 476 struct tc_u_common *tp_c = tp->data; 466 477 struct tc_u_hnode *root_ht = rtnl_dereference(tp->root); 467 478 468 479 WARN_ON(root_ht == NULL); 480 + 481 + if (!force) { 482 + if (root_ht) { 483 + if (root_ht->refcnt > 1) 484 + return false; 485 + if (root_ht->refcnt == 1) { 486 + if (!ht_empty(root_ht)) 487 + return false; 488 + } 489 + } 490 + } 469 491 470 492 if (root_ht && --root_ht->refcnt == 0) 471 493 u32_destroy_hnode(tp, root_ht); ··· 513 491 } 514 492 515 493 tp->data = NULL; 494 + return true; 516 495 } 517 496 518 497 static int u32_delete(struct tcf_proto *tp, unsigned long arg)
+9 -5
net/sched/sch_api.c
··· 1858 1858 } 1859 1859 EXPORT_SYMBOL(tc_classify); 1860 1860 1861 - void tcf_destroy(struct tcf_proto *tp) 1861 + bool tcf_destroy(struct tcf_proto *tp, bool force) 1862 1862 { 1863 - tp->ops->destroy(tp); 1864 - module_put(tp->ops->owner); 1865 - kfree_rcu(tp, rcu); 1863 + if (tp->ops->destroy(tp, force)) { 1864 + module_put(tp->ops->owner); 1865 + kfree_rcu(tp, rcu); 1866 + return true; 1867 + } 1868 + 1869 + return false; 1866 1870 } 1867 1871 1868 1872 void tcf_destroy_chain(struct tcf_proto __rcu **fl) ··· 1875 1871 1876 1872 while ((tp = rtnl_dereference(*fl)) != NULL) { 1877 1873 RCU_INIT_POINTER(*fl, tp->next); 1878 - tcf_destroy(tp); 1874 + tcf_destroy(tp, true); 1879 1875 } 1880 1876 } 1881 1877 EXPORT_SYMBOL(tcf_destroy_chain);