Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

mptcp: add netlink event support

Allow userspace (mptcpd) to subscribe to mptcp genl multicast events.
This implementation reuses the same event API as the mptcp kernel fork
to ease integration of existing tools, e.g. mptcpd.

Supported events include:
1. start and close of an mptcp connection
2. start and close of subflows (joins)
3. announce and withdrawals of addresses
4. subflow priority (backup/non-backup) change.

Reviewed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Florian Westphal and committed by
David S. Miller
b911c97c 4d54cc32

+364 -7
+74
include/uapi/linux/mptcp.h
··· 36 36 /* netlink interface */ 37 37 #define MPTCP_PM_NAME "mptcp_pm" 38 38 #define MPTCP_PM_CMD_GRP_NAME "mptcp_pm_cmds" 39 + #define MPTCP_PM_EV_GRP_NAME "mptcp_pm_events" 39 40 #define MPTCP_PM_VER 0x1 40 41 41 42 /* ··· 104 103 __u64 mptcpi_snd_una; 105 104 __u64 mptcpi_rcv_nxt; 106 105 }; 106 + 107 + /* 108 + * MPTCP_EVENT_CREATED: token, family, saddr4 | saddr6, daddr4 | daddr6, 109 + * sport, dport 110 + * A new MPTCP connection has been created. It is the good time to allocate 111 + * memory and send ADD_ADDR if needed. Depending on the traffic-patterns 112 + * it can take a long time until the MPTCP_EVENT_ESTABLISHED is sent. 113 + * 114 + * MPTCP_EVENT_ESTABLISHED: token, family, saddr4 | saddr6, daddr4 | daddr6, 115 + * sport, dport 116 + * A MPTCP connection is established (can start new subflows). 117 + * 118 + * MPTCP_EVENT_CLOSED: token 119 + * A MPTCP connection has stopped. 120 + * 121 + * MPTCP_EVENT_ANNOUNCED: token, rem_id, family, daddr4 | daddr6 [, dport] 122 + * A new address has been announced by the peer. 123 + * 124 + * MPTCP_EVENT_REMOVED: token, rem_id 125 + * An address has been lost by the peer. 126 + * 127 + * MPTCP_EVENT_SUB_ESTABLISHED: token, family, saddr4 | saddr6, 128 + * daddr4 | daddr6, sport, dport, backup, 129 + * if_idx [, error] 130 + * A new subflow has been established. 'error' should not be set. 131 + * 132 + * MPTCP_EVENT_SUB_CLOSED: token, family, saddr4 | saddr6, daddr4 | daddr6, 133 + * sport, dport, backup, if_idx [, error] 134 + * A subflow has been closed. An error (copy of sk_err) could be set if an 135 + * error has been detected for this subflow. 136 + * 137 + * MPTCP_EVENT_SUB_PRIORITY: token, family, saddr4 | saddr6, daddr4 | daddr6, 138 + * sport, dport, backup, if_idx [, error] 139 + * The priority of a subflow has changed. 'error' should not be set. 140 + */ 141 + enum mptcp_event_type { 142 + MPTCP_EVENT_UNSPEC = 0, 143 + MPTCP_EVENT_CREATED = 1, 144 + MPTCP_EVENT_ESTABLISHED = 2, 145 + MPTCP_EVENT_CLOSED = 3, 146 + 147 + MPTCP_EVENT_ANNOUNCED = 6, 148 + MPTCP_EVENT_REMOVED = 7, 149 + 150 + MPTCP_EVENT_SUB_ESTABLISHED = 10, 151 + MPTCP_EVENT_SUB_CLOSED = 11, 152 + 153 + MPTCP_EVENT_SUB_PRIORITY = 13, 154 + }; 155 + 156 + enum mptcp_event_attr { 157 + MPTCP_ATTR_UNSPEC = 0, 158 + 159 + MPTCP_ATTR_TOKEN, /* u32 */ 160 + MPTCP_ATTR_FAMILY, /* u16 */ 161 + MPTCP_ATTR_LOC_ID, /* u8 */ 162 + MPTCP_ATTR_REM_ID, /* u8 */ 163 + MPTCP_ATTR_SADDR4, /* be32 */ 164 + MPTCP_ATTR_SADDR6, /* struct in6_addr */ 165 + MPTCP_ATTR_DADDR4, /* be32 */ 166 + MPTCP_ATTR_DADDR6, /* struct in6_addr */ 167 + MPTCP_ATTR_SPORT, /* be16 */ 168 + MPTCP_ATTR_DPORT, /* be16 */ 169 + MPTCP_ATTR_BACKUP, /* u8 */ 170 + MPTCP_ATTR_ERROR, /* u8 */ 171 + MPTCP_ATTR_FLAGS, /* u16 */ 172 + MPTCP_ATTR_TIMEOUT, /* u32 */ 173 + MPTCP_ATTR_IF_IDX, /* s32 */ 174 + 175 + __MPTCP_ATTR_AFTER_LAST 176 + }; 177 + 178 + #define MPTCP_ATTR_MAX (__MPTCP_ATTR_AFTER_LAST - 1) 107 179 108 180 #endif /* _UAPI_MPTCP_H */
+15 -5
net/mptcp/pm.c
··· 75 75 pr_debug("msk=%p, token=%u side=%d", msk, msk->token, server_side); 76 76 77 77 WRITE_ONCE(pm->server_side, server_side); 78 + mptcp_event(MPTCP_EVENT_CREATED, msk, ssk, GFP_ATOMIC); 78 79 } 79 80 80 81 bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk) ··· 123 122 void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp) 124 123 { 125 124 struct mptcp_pm_data *pm = &msk->pm; 125 + bool announce = false; 126 126 127 127 pr_debug("msk=%p", msk); 128 - 129 - /* try to avoid acquiring the lock below */ 130 - if (!READ_ONCE(pm->work_pending)) 131 - return; 132 128 133 129 spin_lock_bh(&pm->lock); 134 130 ··· 136 138 if (READ_ONCE(pm->work_pending) && 137 139 !(msk->pm.status & BIT(MPTCP_PM_ALREADY_ESTABLISHED))) 138 140 mptcp_pm_schedule_work(msk, MPTCP_PM_ESTABLISHED); 139 - msk->pm.status |= BIT(MPTCP_PM_ALREADY_ESTABLISHED); 140 141 142 + if ((msk->pm.status & BIT(MPTCP_PM_ALREADY_ESTABLISHED)) == 0) 143 + announce = true; 144 + 145 + msk->pm.status |= BIT(MPTCP_PM_ALREADY_ESTABLISHED); 141 146 spin_unlock_bh(&pm->lock); 147 + 148 + if (announce) 149 + mptcp_event(MPTCP_EVENT_ESTABLISHED, msk, ssk, gfp); 142 150 } 143 151 144 152 void mptcp_pm_connection_closed(struct mptcp_sock *msk) ··· 183 179 pr_debug("msk=%p remote_id=%d accept=%d", msk, addr->id, 184 180 READ_ONCE(pm->accept_addr)); 185 181 182 + mptcp_event_addr_announced(msk, addr); 183 + 186 184 spin_lock_bh(&pm->lock); 187 185 188 186 if (!READ_ONCE(pm->accept_addr)) { ··· 211 205 212 206 pr_debug("msk=%p remote_id=%d", msk, rm_id); 213 207 208 + mptcp_event_addr_removed(msk, rm_id); 209 + 214 210 spin_lock_bh(&pm->lock); 215 211 mptcp_pm_schedule_work(msk, MPTCP_PM_RM_ADDR_RECEIVED); 216 212 pm->rm_id = rm_id; ··· 225 217 226 218 pr_debug("subflow->backup=%d, bkup=%d\n", subflow->backup, bkup); 227 219 subflow->backup = bkup; 220 + 221 + mptcp_event(MPTCP_EVENT_SUB_PRIORITY, mptcp_sk(subflow->conn), sk, GFP_ATOMIC); 228 222 } 229 223 230 224 /* path manager helpers */
+260 -1
net/mptcp/pm_netlink.c
··· 860 860 WRITE_ONCE(pm->accept_subflow, subflows); 861 861 } 862 862 863 - #define MPTCP_PM_CMD_GRP_OFFSET 0 863 + #define MPTCP_PM_CMD_GRP_OFFSET 0 864 + #define MPTCP_PM_EV_GRP_OFFSET 1 864 865 865 866 static const struct genl_multicast_group mptcp_pm_mcgrps[] = { 866 867 [MPTCP_PM_CMD_GRP_OFFSET] = { .name = MPTCP_PM_CMD_GRP_NAME, }, 868 + [MPTCP_PM_EV_GRP_OFFSET] = { .name = MPTCP_PM_EV_GRP_NAME, 869 + .flags = GENL_UNS_ADMIN_PERM, 870 + }, 867 871 }; 868 872 869 873 static const struct nla_policy ··· 1484 1480 } 1485 1481 1486 1482 return 0; 1483 + } 1484 + 1485 + static void mptcp_nl_mcast_send(struct net *net, struct sk_buff *nlskb, gfp_t gfp) 1486 + { 1487 + genlmsg_multicast_netns(&mptcp_genl_family, net, 1488 + nlskb, 0, MPTCP_PM_EV_GRP_OFFSET, gfp); 1489 + } 1490 + 1491 + static int mptcp_event_add_subflow(struct sk_buff *skb, const struct sock *ssk) 1492 + { 1493 + const struct inet_sock *issk = inet_sk(ssk); 1494 + const struct mptcp_subflow_context *sf; 1495 + 1496 + if (nla_put_u16(skb, MPTCP_ATTR_FAMILY, ssk->sk_family)) 1497 + return -EMSGSIZE; 1498 + 1499 + switch (ssk->sk_family) { 1500 + case AF_INET: 1501 + if (nla_put_in_addr(skb, MPTCP_ATTR_SADDR4, issk->inet_saddr)) 1502 + return -EMSGSIZE; 1503 + if (nla_put_in_addr(skb, MPTCP_ATTR_DADDR4, issk->inet_daddr)) 1504 + return -EMSGSIZE; 1505 + break; 1506 + #if IS_ENABLED(CONFIG_MPTCP_IPV6) 1507 + case AF_INET6: { 1508 + const struct ipv6_pinfo *np = inet6_sk(ssk); 1509 + 1510 + if (nla_put_in6_addr(skb, MPTCP_ATTR_SADDR6, &np->saddr)) 1511 + return -EMSGSIZE; 1512 + if (nla_put_in6_addr(skb, MPTCP_ATTR_DADDR6, &ssk->sk_v6_daddr)) 1513 + return -EMSGSIZE; 1514 + break; 1515 + } 1516 + #endif 1517 + default: 1518 + WARN_ON_ONCE(1); 1519 + return -EMSGSIZE; 1520 + } 1521 + 1522 + if (nla_put_be16(skb, MPTCP_ATTR_SPORT, issk->inet_sport)) 1523 + return -EMSGSIZE; 1524 + if (nla_put_be16(skb, MPTCP_ATTR_DPORT, issk->inet_dport)) 1525 + return -EMSGSIZE; 1526 + 1527 + sf = mptcp_subflow_ctx(ssk); 1528 + if (WARN_ON_ONCE(!sf)) 1529 + return -EINVAL; 1530 + 1531 + if (nla_put_u8(skb, MPTCP_ATTR_LOC_ID, sf->local_id)) 1532 + return -EMSGSIZE; 1533 + 1534 + if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, sf->remote_id)) 1535 + return -EMSGSIZE; 1536 + 1537 + return 0; 1538 + } 1539 + 1540 + static int mptcp_event_put_token_and_ssk(struct sk_buff *skb, 1541 + const struct mptcp_sock *msk, 1542 + const struct sock *ssk) 1543 + { 1544 + const struct sock *sk = (const struct sock *)msk; 1545 + const struct mptcp_subflow_context *sf; 1546 + u8 sk_err; 1547 + 1548 + if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token)) 1549 + return -EMSGSIZE; 1550 + 1551 + if (mptcp_event_add_subflow(skb, ssk)) 1552 + return -EMSGSIZE; 1553 + 1554 + sf = mptcp_subflow_ctx(ssk); 1555 + if (WARN_ON_ONCE(!sf)) 1556 + return -EINVAL; 1557 + 1558 + if (nla_put_u8(skb, MPTCP_ATTR_BACKUP, sf->backup)) 1559 + return -EMSGSIZE; 1560 + 1561 + if (ssk->sk_bound_dev_if && 1562 + nla_put_s32(skb, MPTCP_ATTR_IF_IDX, ssk->sk_bound_dev_if)) 1563 + return -EMSGSIZE; 1564 + 1565 + sk_err = ssk->sk_err; 1566 + if (sk_err && sk->sk_state == TCP_ESTABLISHED && 1567 + nla_put_u8(skb, MPTCP_ATTR_ERROR, sk_err)) 1568 + return -EMSGSIZE; 1569 + 1570 + return 0; 1571 + } 1572 + 1573 + static int mptcp_event_sub_established(struct sk_buff *skb, 1574 + const struct mptcp_sock *msk, 1575 + const struct sock *ssk) 1576 + { 1577 + return mptcp_event_put_token_and_ssk(skb, msk, ssk); 1578 + } 1579 + 1580 + static int mptcp_event_sub_closed(struct sk_buff *skb, 1581 + const struct mptcp_sock *msk, 1582 + const struct sock *ssk) 1583 + { 1584 + if (mptcp_event_put_token_and_ssk(skb, msk, ssk)) 1585 + return -EMSGSIZE; 1586 + 1587 + return 0; 1588 + } 1589 + 1590 + static int mptcp_event_created(struct sk_buff *skb, 1591 + const struct mptcp_sock *msk, 1592 + const struct sock *ssk) 1593 + { 1594 + int err = nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token); 1595 + 1596 + if (err) 1597 + return err; 1598 + 1599 + return mptcp_event_add_subflow(skb, ssk); 1600 + } 1601 + 1602 + void mptcp_event_addr_removed(const struct mptcp_sock *msk, uint8_t id) 1603 + { 1604 + struct net *net = sock_net((const struct sock *)msk); 1605 + struct nlmsghdr *nlh; 1606 + struct sk_buff *skb; 1607 + 1608 + if (!genl_has_listeners(&mptcp_genl_family, net, MPTCP_PM_EV_GRP_OFFSET)) 1609 + return; 1610 + 1611 + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); 1612 + if (!skb) 1613 + return; 1614 + 1615 + nlh = genlmsg_put(skb, 0, 0, &mptcp_genl_family, 0, MPTCP_EVENT_REMOVED); 1616 + if (!nlh) 1617 + goto nla_put_failure; 1618 + 1619 + if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token)) 1620 + goto nla_put_failure; 1621 + 1622 + if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, id)) 1623 + goto nla_put_failure; 1624 + 1625 + genlmsg_end(skb, nlh); 1626 + mptcp_nl_mcast_send(net, skb, GFP_ATOMIC); 1627 + return; 1628 + 1629 + nla_put_failure: 1630 + kfree_skb(skb); 1631 + } 1632 + 1633 + void mptcp_event_addr_announced(const struct mptcp_sock *msk, 1634 + const struct mptcp_addr_info *info) 1635 + { 1636 + struct net *net = sock_net((const struct sock *)msk); 1637 + struct nlmsghdr *nlh; 1638 + struct sk_buff *skb; 1639 + 1640 + if (!genl_has_listeners(&mptcp_genl_family, net, MPTCP_PM_EV_GRP_OFFSET)) 1641 + return; 1642 + 1643 + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); 1644 + if (!skb) 1645 + return; 1646 + 1647 + nlh = genlmsg_put(skb, 0, 0, &mptcp_genl_family, 0, 1648 + MPTCP_EVENT_ANNOUNCED); 1649 + if (!nlh) 1650 + goto nla_put_failure; 1651 + 1652 + if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token)) 1653 + goto nla_put_failure; 1654 + 1655 + if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, info->id)) 1656 + goto nla_put_failure; 1657 + 1658 + if (nla_put_be16(skb, MPTCP_ATTR_DPORT, info->port)) 1659 + goto nla_put_failure; 1660 + 1661 + switch (info->family) { 1662 + case AF_INET: 1663 + if (nla_put_in_addr(skb, MPTCP_ATTR_DADDR4, info->addr.s_addr)) 1664 + goto nla_put_failure; 1665 + break; 1666 + #if IS_ENABLED(CONFIG_MPTCP_IPV6) 1667 + case AF_INET6: 1668 + if (nla_put_in6_addr(skb, MPTCP_ATTR_DADDR6, &info->addr6)) 1669 + goto nla_put_failure; 1670 + break; 1671 + #endif 1672 + default: 1673 + WARN_ON_ONCE(1); 1674 + goto nla_put_failure; 1675 + } 1676 + 1677 + genlmsg_end(skb, nlh); 1678 + mptcp_nl_mcast_send(net, skb, GFP_ATOMIC); 1679 + return; 1680 + 1681 + nla_put_failure: 1682 + kfree_skb(skb); 1683 + } 1684 + 1685 + void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk, 1686 + const struct sock *ssk, gfp_t gfp) 1687 + { 1688 + struct net *net = sock_net((const struct sock *)msk); 1689 + struct nlmsghdr *nlh; 1690 + struct sk_buff *skb; 1691 + 1692 + if (!genl_has_listeners(&mptcp_genl_family, net, MPTCP_PM_EV_GRP_OFFSET)) 1693 + return; 1694 + 1695 + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); 1696 + if (!skb) 1697 + return; 1698 + 1699 + nlh = genlmsg_put(skb, 0, 0, &mptcp_genl_family, 0, type); 1700 + if (!nlh) 1701 + goto nla_put_failure; 1702 + 1703 + switch (type) { 1704 + case MPTCP_EVENT_UNSPEC: 1705 + WARN_ON_ONCE(1); 1706 + break; 1707 + case MPTCP_EVENT_CREATED: 1708 + case MPTCP_EVENT_ESTABLISHED: 1709 + if (mptcp_event_created(skb, msk, ssk) < 0) 1710 + goto nla_put_failure; 1711 + break; 1712 + case MPTCP_EVENT_CLOSED: 1713 + if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token) < 0) 1714 + goto nla_put_failure; 1715 + break; 1716 + case MPTCP_EVENT_ANNOUNCED: 1717 + case MPTCP_EVENT_REMOVED: 1718 + /* call mptcp_event_addr_announced()/removed instead */ 1719 + WARN_ON_ONCE(1); 1720 + break; 1721 + case MPTCP_EVENT_SUB_ESTABLISHED: 1722 + case MPTCP_EVENT_SUB_PRIORITY: 1723 + if (mptcp_event_sub_established(skb, msk, ssk) < 0) 1724 + goto nla_put_failure; 1725 + break; 1726 + case MPTCP_EVENT_SUB_CLOSED: 1727 + if (mptcp_event_sub_closed(skb, msk, ssk) < 0) 1728 + goto nla_put_failure; 1729 + break; 1730 + } 1731 + 1732 + genlmsg_end(skb, nlh); 1733 + mptcp_nl_mcast_send(net, skb, gfp); 1734 + return; 1735 + 1736 + nla_put_failure: 1737 + kfree_skb(skb); 1487 1738 } 1488 1739 1489 1740 static const struct genl_small_ops mptcp_pm_ops[] = {
+9 -1
net/mptcp/protocol.c
··· 2150 2150 void mptcp_close_ssk(struct sock *sk, struct sock *ssk, 2151 2151 struct mptcp_subflow_context *subflow) 2152 2152 { 2153 + if (sk->sk_state == TCP_ESTABLISHED) 2154 + mptcp_event(MPTCP_EVENT_SUB_CLOSED, mptcp_sk(sk), ssk, GFP_KERNEL); 2153 2155 __mptcp_close_ssk(sk, ssk, subflow); 2154 2156 } 2155 2157 ··· 2588 2586 release_sock(sk); 2589 2587 if (do_cancel_work) 2590 2588 mptcp_cancel_work(sk); 2589 + 2590 + if (mptcp_sk(sk)->token) 2591 + mptcp_event(MPTCP_EVENT_CLOSED, mptcp_sk(sk), NULL, GFP_KERNEL); 2592 + 2591 2593 sock_put(sk); 2592 2594 } 2593 2595 ··· 3063 3057 return false; 3064 3058 3065 3059 if (!msk->pm.server_side) 3066 - return true; 3060 + goto out; 3067 3061 3068 3062 if (!mptcp_pm_allow_new_subflow(msk)) 3069 3063 return false; ··· 3090 3084 if (parent_sock && !ssk->sk_socket) 3091 3085 mptcp_sock_graft(ssk, parent_sock); 3092 3086 subflow->map_seq = READ_ONCE(msk->ack_seq); 3087 + out: 3088 + mptcp_event(MPTCP_EVENT_SUB_ESTABLISHED, msk, ssk, GFP_ATOMIC); 3093 3089 return true; 3094 3090 } 3095 3091
+6
net/mptcp/protocol.h
··· 10 10 #include <linux/random.h> 11 11 #include <net/tcp.h> 12 12 #include <net/inet_connection_sock.h> 13 + #include <uapi/linux/mptcp.h> 13 14 14 15 #define MPTCP_SUPPORTED_VERSION 1 15 16 ··· 666 665 bool echo, bool port); 667 666 int mptcp_pm_remove_addr(struct mptcp_sock *msk, u8 local_id); 668 667 int mptcp_pm_remove_subflow(struct mptcp_sock *msk, u8 local_id); 668 + 669 + void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk, 670 + const struct sock *ssk, gfp_t gfp); 671 + void mptcp_event_addr_announced(const struct mptcp_sock *msk, const struct mptcp_addr_info *info); 672 + void mptcp_event_addr_removed(const struct mptcp_sock *msk, u8 id); 669 673 670 674 static inline bool mptcp_pm_should_add_signal(struct mptcp_sock *msk) 671 675 {