Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net: sched: flower: refactor reoffload for concurrent access

Recent changes that introduced unlocked flower did not properly account for
case when reoffload is initiated concurrently with filter updates. To fix
the issue, extend flower with 'hw_filters' list that is used to store
filters that don't have 'skip_hw' flag set. Filter is added to the list
when it is inserted to hardware and only removed from it after being
unoffloaded from all drivers that parent block is attached to. This ensures
that concurrent reoffload can still access filter that is being deleted and
prevents race condition when driver callback can be removed when filter is
no longer accessible trough idr, but is still present in hardware.

Refactor fl_change() to respect new filter reference counter and to release
filter reference with __fl_put() in case of error, instead of directly
deallocating filter memory. This allows for concurrent access to filter
from fl_reoffload() and protects it with reference counting. Refactor
fl_reoffload() to iterate over hw_filters list instead of idr. Implement
fl_get_next_hw_filter() helper function that is used to iterate over
hw_filters list with reference counting and skips filters that are being
concurrently deleted.

Fixes: 92149190067d ("net: sched: flower: set unlocked flag for flower proto ops")
Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Reviewed-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Vlad Buslov and committed by
David S. Miller
c049d56e a3ddd94f

+57 -22
+57 -22
net/sched/cls_flower.c
··· 90 90 struct rhashtable ht; 91 91 spinlock_t masks_lock; /* Protect masks list */ 92 92 struct list_head masks; 93 + struct list_head hw_filters; 93 94 struct rcu_work rwork; 94 95 struct idr handle_idr; 95 96 }; ··· 103 102 struct tcf_result res; 104 103 struct fl_flow_key key; 105 104 struct list_head list; 105 + struct list_head hw_list; 106 106 u32 handle; 107 107 u32 flags; 108 108 u32 in_hw_count; ··· 317 315 318 316 spin_lock_init(&head->masks_lock); 319 317 INIT_LIST_HEAD_RCU(&head->masks); 318 + INIT_LIST_HEAD(&head->hw_filters); 320 319 rcu_assign_pointer(tp->root, head); 321 320 idr_init(&head->handle_idr); 322 321 ··· 355 352 return true; 356 353 } 357 354 355 + static struct cls_fl_head *fl_head_dereference(struct tcf_proto *tp) 356 + { 357 + /* Flower classifier only changes root pointer during init and destroy. 358 + * Users must obtain reference to tcf_proto instance before calling its 359 + * API, so tp->root pointer is protected from concurrent call to 360 + * fl_destroy() by reference counting. 361 + */ 362 + return rcu_dereference_raw(tp->root); 363 + } 364 + 358 365 static void __fl_destroy_filter(struct cls_fl_filter *f) 359 366 { 360 367 tcf_exts_destroy(&f->exts); ··· 395 382 396 383 tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false); 397 384 spin_lock(&tp->lock); 385 + list_del_init(&f->hw_list); 398 386 tcf_block_offload_dec(block, &f->flags); 399 387 spin_unlock(&tp->lock); 400 388 ··· 407 393 struct cls_fl_filter *f, bool rtnl_held, 408 394 struct netlink_ext_ack *extack) 409 395 { 396 + struct cls_fl_head *head = fl_head_dereference(tp); 410 397 struct tc_cls_flower_offload cls_flower = {}; 411 398 struct tcf_block *block = tp->chain->block; 412 399 bool skip_sw = tc_skip_sw(f->flags); ··· 459 444 goto errout; 460 445 } 461 446 447 + spin_lock(&tp->lock); 448 + list_add(&f->hw_list, &head->hw_filters); 449 + spin_unlock(&tp->lock); 462 450 errout: 463 451 if (!rtnl_held) 464 452 rtnl_unlock(); ··· 493 475 rtnl_unlock(); 494 476 } 495 477 496 - static struct cls_fl_head *fl_head_dereference(struct tcf_proto *tp) 497 - { 498 - /* Flower classifier only changes root pointer during init and destroy. 499 - * Users must obtain reference to tcf_proto instance before calling its 500 - * API, so tp->root pointer is protected from concurrent call to 501 - * fl_destroy() by reference counting. 502 - */ 503 - return rcu_dereference_raw(tp->root); 504 - } 505 - 506 478 static void __fl_put(struct cls_fl_filter *f) 507 479 { 508 480 if (!refcount_dec_and_test(&f->refcnt)) 509 481 return; 510 - 511 - WARN_ON(!f->deleted); 512 482 513 483 if (tcf_exts_get_net(&f->exts)) 514 484 tcf_queue_work(&f->rwork, fl_destroy_filter_work); ··· 1528 1522 err = -ENOBUFS; 1529 1523 goto errout_tb; 1530 1524 } 1525 + INIT_LIST_HEAD(&fnew->hw_list); 1531 1526 refcount_set(&fnew->refcnt, 1); 1532 1527 1533 1528 err = tcf_exts_init(&fnew->exts, net, TCA_FLOWER_ACT, 0); ··· 1576 1569 goto errout_hw; 1577 1570 } 1578 1571 1579 - refcount_inc(&fnew->refcnt); 1580 1572 if (fold) { 1581 1573 /* Fold filter was deleted concurrently. Retry lookup. */ 1582 1574 if (fold->deleted) { ··· 1597 1591 in_ht = true; 1598 1592 } 1599 1593 1594 + refcount_inc(&fnew->refcnt); 1600 1595 rhashtable_remove_fast(&fold->mask->ht, 1601 1596 &fold->ht_node, 1602 1597 fold->mask->filter_ht_params); ··· 1638 1631 if (err) 1639 1632 goto errout_hw; 1640 1633 1634 + refcount_inc(&fnew->refcnt); 1641 1635 fnew->handle = handle; 1642 1636 list_add_tail_rcu(&fnew->list, &fnew->mask->filters); 1643 1637 spin_unlock(&tp->lock); ··· 1650 1642 kfree(mask); 1651 1643 return 0; 1652 1644 1645 + errout_ht: 1646 + spin_lock(&tp->lock); 1653 1647 errout_hw: 1648 + fnew->deleted = true; 1654 1649 spin_unlock(&tp->lock); 1655 1650 if (!tc_skip_hw(fnew->flags)) 1656 1651 fl_hw_destroy_filter(tp, fnew, rtnl_held, NULL); 1657 - errout_ht: 1658 1652 if (in_ht) 1659 1653 rhashtable_remove_fast(&fnew->mask->ht, &fnew->ht_node, 1660 1654 fnew->mask->filter_ht_params); 1661 1655 errout_mask: 1662 1656 fl_mask_put(head, fnew->mask); 1663 1657 errout: 1664 - tcf_exts_get_net(&fnew->exts); 1665 - tcf_queue_work(&fnew->rwork, fl_destroy_filter_work); 1658 + __fl_put(fnew); 1666 1659 errout_tb: 1667 1660 kfree(tb); 1668 1661 errout_mask_alloc: ··· 1708 1699 } 1709 1700 } 1710 1701 1702 + static struct cls_fl_filter * 1703 + fl_get_next_hw_filter(struct tcf_proto *tp, struct cls_fl_filter *f, bool add) 1704 + { 1705 + struct cls_fl_head *head = fl_head_dereference(tp); 1706 + 1707 + spin_lock(&tp->lock); 1708 + if (list_empty(&head->hw_filters)) { 1709 + spin_unlock(&tp->lock); 1710 + return NULL; 1711 + } 1712 + 1713 + if (!f) 1714 + f = list_entry(&head->hw_filters, struct cls_fl_filter, 1715 + hw_list); 1716 + list_for_each_entry_continue(f, &head->hw_filters, hw_list) { 1717 + if (!(add && f->deleted) && refcount_inc_not_zero(&f->refcnt)) { 1718 + spin_unlock(&tp->lock); 1719 + return f; 1720 + } 1721 + } 1722 + 1723 + spin_unlock(&tp->lock); 1724 + return NULL; 1725 + } 1726 + 1711 1727 static int fl_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb, 1712 1728 void *cb_priv, struct netlink_ext_ack *extack) 1713 1729 { 1714 1730 struct tc_cls_flower_offload cls_flower = {}; 1715 1731 struct tcf_block *block = tp->chain->block; 1716 - unsigned long handle = 0; 1717 - struct cls_fl_filter *f; 1732 + struct cls_fl_filter *f = NULL; 1718 1733 int err; 1719 1734 1720 - while ((f = fl_get_next_filter(tp, &handle))) { 1721 - if (tc_skip_hw(f->flags)) 1722 - goto next_flow; 1735 + /* hw_filters list can only be changed by hw offload functions after 1736 + * obtaining rtnl lock. Make sure it is not changed while reoffload is 1737 + * iterating it. 1738 + */ 1739 + ASSERT_RTNL(); 1723 1740 1741 + while ((f = fl_get_next_hw_filter(tp, f, add))) { 1724 1742 cls_flower.rule = 1725 1743 flow_rule_alloc(tcf_exts_num_actions(&f->exts)); 1726 1744 if (!cls_flower.rule) { ··· 1793 1757 add); 1794 1758 spin_unlock(&tp->lock); 1795 1759 next_flow: 1796 - handle++; 1797 1760 __fl_put(f); 1798 1761 } 1799 1762