Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'mptcp-fixes'

Mat Martineau says:

====================
mptcp: Miscellaneous fixes

Here are five changes we've collected and tested in the mptcp-tree:

Patch 1 changes handling of the MPTCP-level snd_next value during the
recovery phase after a subflow link failure.

Patches 2 and 3 are some small refactoring changes to replace some
open-coded bits.

Patch 4 removes an unused field in struct mptcp_sock.

Patch 5 restarts the MPTCP retransmit timer when there is
not-yet-transmitted data to send and all previously sent data has been
acknowledged. This prevents some sending stalls.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>

+56 -35
+5 -10
net/mptcp/options.c
··· 748 748 /* can't send MP_PRIO with MPC, as they share the same option space: 749 749 * 'backup'. Also it makes no sense at all 750 750 */ 751 - if (!subflow->send_mp_prio || 752 - ((OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK | 753 - OPTION_MPTCP_MPC_ACK) & opts->suboptions)) 751 + if (!subflow->send_mp_prio || (opts->suboptions & OPTIONS_MPTCP_MPC)) 754 752 return false; 755 753 756 754 /* account for the trailing 'nop' option */ ··· 1017 1019 old_snd_una = msk->snd_una; 1018 1020 new_snd_una = mptcp_expand_seq(old_snd_una, mp_opt->data_ack, mp_opt->ack64); 1019 1021 1020 - /* ACK for data not even sent yet and even above recovery bound? Ignore.*/ 1021 - if (unlikely(after64(new_snd_una, snd_nxt))) { 1022 - if (!msk->recovery || after64(new_snd_una, msk->recovery_snd_nxt)) 1023 - new_snd_una = old_snd_una; 1024 - } 1022 + /* ACK for data not even sent yet? Ignore.*/ 1023 + if (unlikely(after64(new_snd_una, snd_nxt))) 1024 + new_snd_una = old_snd_una; 1025 1025 1026 1026 new_wnd_end = new_snd_una + tcp_sk(ssk)->snd_wnd; 1027 1027 ··· 1325 1329 TCPOPT_NOP << 8 | TCPOPT_NOP, ptr); 1326 1330 } 1327 1331 } 1328 - } else if ((OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK | 1329 - OPTION_MPTCP_MPC_ACK) & opts->suboptions) { 1332 + } else if (OPTIONS_MPTCP_MPC & opts->suboptions) { 1330 1333 u8 len, flag = MPTCP_CAP_HMAC_SHA256; 1331 1334 1332 1335 if (OPTION_MPTCP_MPC_SYN & opts->suboptions) {
+51 -24
net/mptcp/protocol.c
··· 956 956 { 957 957 struct mptcp_sock *msk = mptcp_sk(sk); 958 958 959 - #ifdef CONFIG_LOCKDEP 960 - WARN_ON_ONCE(!lockdep_is_held(&sk->sk_lock.slock)); 961 - #endif 959 + lockdep_assert_held_once(&sk->sk_lock.slock); 962 960 963 961 if (!msk->wmem_reserved) 964 962 return; ··· 1105 1107 if (cleaned && tcp_under_memory_pressure(sk)) 1106 1108 __mptcp_mem_reclaim_partial(sk); 1107 1109 1108 - if (snd_una == READ_ONCE(msk->snd_nxt) && !msk->recovery) { 1110 + if (snd_una == READ_ONCE(msk->snd_nxt) && 1111 + snd_una == READ_ONCE(msk->write_seq)) { 1109 1112 if (mptcp_timer_pending(sk) && !mptcp_data_fin_enabled(msk)) 1110 1113 mptcp_stop_timer(sk); 1111 1114 } else { ··· 1116 1117 1117 1118 static void __mptcp_clean_una_wakeup(struct sock *sk) 1118 1119 { 1119 - #ifdef CONFIG_LOCKDEP 1120 - WARN_ON_ONCE(!lockdep_is_held(&sk->sk_lock.slock)); 1121 - #endif 1120 + lockdep_assert_held_once(&sk->sk_lock.slock); 1121 + 1122 1122 __mptcp_clean_una(sk); 1123 1123 mptcp_write_space(sk); 1124 1124 } ··· 1523 1525 release_sock(ssk); 1524 1526 } 1525 1527 1528 + static void mptcp_update_post_push(struct mptcp_sock *msk, 1529 + struct mptcp_data_frag *dfrag, 1530 + u32 sent) 1531 + { 1532 + u64 snd_nxt_new = dfrag->data_seq; 1533 + 1534 + dfrag->already_sent += sent; 1535 + 1536 + msk->snd_burst -= sent; 1537 + 1538 + snd_nxt_new += dfrag->already_sent; 1539 + 1540 + /* snd_nxt_new can be smaller than snd_nxt in case mptcp 1541 + * is recovering after a failover. In that event, this re-sends 1542 + * old segments. 1543 + * 1544 + * Thus compute snd_nxt_new candidate based on 1545 + * the dfrag->data_seq that was sent and the data 1546 + * that has been handed to the subflow for transmission 1547 + * and skip update in case it was old dfrag. 1548 + */ 1549 + if (likely(after64(snd_nxt_new, msk->snd_nxt))) 1550 + msk->snd_nxt = snd_nxt_new; 1551 + } 1552 + 1553 + static void mptcp_check_and_set_pending(struct sock *sk) 1554 + { 1555 + if (mptcp_send_head(sk) && 1556 + !test_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags)) 1557 + set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags); 1558 + } 1559 + 1526 1560 void __mptcp_push_pending(struct sock *sk, unsigned int flags) 1527 1561 { 1528 1562 struct sock *prev_ssk = NULL, *ssk = NULL; ··· 1598 1568 } 1599 1569 1600 1570 info.sent += ret; 1601 - dfrag->already_sent += ret; 1602 - msk->snd_nxt += ret; 1603 - msk->snd_burst -= ret; 1604 - msk->tx_pending_data -= ret; 1605 1571 copied += ret; 1606 1572 len -= ret; 1573 + 1574 + mptcp_update_post_push(msk, dfrag, ret); 1607 1575 } 1608 1576 WRITE_ONCE(msk->first_pending, mptcp_send_next(sk)); 1609 1577 } ··· 1654 1626 goto out; 1655 1627 1656 1628 info.sent += ret; 1657 - dfrag->already_sent += ret; 1658 - msk->snd_nxt += ret; 1659 - msk->snd_burst -= ret; 1660 - msk->tx_pending_data -= ret; 1661 1629 copied += ret; 1662 1630 len -= ret; 1663 1631 first = false; 1632 + 1633 + mptcp_update_post_push(msk, dfrag, ret); 1664 1634 } 1665 1635 WRITE_ONCE(msk->first_pending, mptcp_send_next(sk)); 1666 1636 } ··· 1768 1742 frag_truesize += psize; 1769 1743 pfrag->offset += frag_truesize; 1770 1744 WRITE_ONCE(msk->write_seq, msk->write_seq + psize); 1771 - msk->tx_pending_data += psize; 1772 1745 1773 1746 /* charge data on mptcp pending queue to the msk socket 1774 1747 * Note: we charge such data both to sk and ssk ··· 2255 2230 return false; 2256 2231 } 2257 2232 2258 - /* will accept ack for reijected data before re-sending them */ 2259 - if (!msk->recovery || after64(msk->snd_nxt, msk->recovery_snd_nxt)) 2260 - msk->recovery_snd_nxt = msk->snd_nxt; 2233 + msk->recovery_snd_nxt = msk->snd_nxt; 2261 2234 msk->recovery = true; 2262 2235 mptcp_data_unlock(sk); 2263 2236 2264 2237 msk->first_pending = rtx_head; 2265 - msk->tx_pending_data += msk->snd_nxt - rtx_head->data_seq; 2266 - msk->snd_nxt = rtx_head->data_seq; 2267 2238 msk->snd_burst = 0; 2268 2239 2269 2240 /* be sure to clear the "sent status" on all re-injected fragments */ ··· 2422 2401 int ret; 2423 2402 2424 2403 mptcp_clean_una_wakeup(sk); 2404 + 2405 + /* first check ssk: need to kick "stale" logic */ 2406 + ssk = mptcp_subflow_get_retrans(msk); 2425 2407 dfrag = mptcp_rtx_head(sk); 2426 2408 if (!dfrag) { 2427 2409 if (mptcp_data_fin_enabled(msk)) { ··· 2437 2413 goto reset_timer; 2438 2414 } 2439 2415 2440 - return; 2416 + if (!mptcp_send_head(sk)) 2417 + return; 2418 + 2419 + goto reset_timer; 2441 2420 } 2442 2421 2443 - ssk = mptcp_subflow_get_retrans(msk); 2444 2422 if (!ssk) 2445 2423 goto reset_timer; 2446 2424 ··· 2469 2443 release_sock(ssk); 2470 2444 2471 2445 reset_timer: 2446 + mptcp_check_and_set_pending(sk); 2447 + 2472 2448 if (!mptcp_timer_pending(sk)) 2473 2449 mptcp_reset_timer(sk); 2474 2450 } ··· 2537 2509 msk->first_pending = NULL; 2538 2510 msk->wmem_reserved = 0; 2539 2511 WRITE_ONCE(msk->rmem_released, 0); 2540 - msk->tx_pending_data = 0; 2541 2512 msk->timer_ival = TCP_RTO_MIN; 2542 2513 2543 2514 msk->first = NULL;
-1
net/mptcp/protocol.h
··· 254 254 struct sk_buff *ooo_last_skb; 255 255 struct rb_root out_of_order_queue; 256 256 struct sk_buff_head receive_queue; 257 - int tx_pending_data; 258 257 struct list_head conn_list; 259 258 struct list_head rtx_queue; 260 259 struct mptcp_data_frag *first_pending;