Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net/tls: support setting the maximum payload size

During a handshake, an endpoint may specify a maximum record size limit.
Currently, the kernel defaults to TLS_MAX_PAYLOAD_SIZE (16KB) for the
maximum record size. Meaning that, the outgoing records from the kernel
can exceed a lower size negotiated during the handshake. In such a case,
the TLS endpoint must send a fatal "record_overflow" alert [1], and
thus the record is discarded.

Upcoming Western Digital NVMe-TCP hardware controllers implement TLS
support. For these devices, supporting TLS record size negotiation is
necessary because the maximum TLS record size supported by the controller
is less than the default 16KB currently used by the kernel.

Currently, there is no way to inform the kernel of such a limit. This patch
adds support to a new setsockopt() option `TLS_TX_MAX_PAYLOAD_LEN` that
allows for setting the maximum plaintext fragment size. Once set, outgoing
records are no larger than the size specified. This option can be used to
specify the record size limit.

[1] https://www.rfc-editor.org/rfc/rfc8449

Signed-off-by: Wilfred Mallawa <wilfred.mallawa@wdc.com>
Reviewed-by: Sabrina Dubroca <sd@queasysnail.net>
Link: https://patch.msgid.link/20251022001937.20155-1-wilfred.opensource@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Wilfred Mallawa and committed by
Jakub Kicinski
82cb5be6 bfe62db5

+91 -2
+20
Documentation/networking/tls.rst
··· 280 280 record it will be decrypted again into a kernel buffer without zero copy. 281 281 Such events are counted in the ``TlsDecryptRetry`` statistic. 282 282 283 + TLS_TX_MAX_PAYLOAD_LEN 284 + ~~~~~~~~~~~~~~~~~~~~~~ 285 + 286 + Specifies the maximum size of the plaintext payload for transmitted TLS records. 287 + 288 + When this option is set, the kernel enforces the specified limit on all outgoing 289 + TLS records. No plaintext fragment will exceed this size. This option can be used 290 + to implement the TLS Record Size Limit extension [1]. 291 + 292 + * For TLS 1.2, the value corresponds directly to the record size limit. 293 + * For TLS 1.3, the value should be set to record_size_limit - 1, since 294 + the record size limit includes one additional byte for the ContentType 295 + field. 296 + 297 + The valid range for this option is 64 to 16384 bytes for TLS 1.2, and 63 to 298 + 16384 bytes for TLS 1.3. The lower minimum for TLS 1.3 accounts for the 299 + extra byte used by the ContentType field. 300 + 301 + [1] https://datatracker.ietf.org/doc/html/rfc8449 302 + 283 303 Statistics 284 304 ========== 285 305
+3
include/net/tls.h
··· 53 53 54 54 /* Maximum data size carried in a TLS record */ 55 55 #define TLS_MAX_PAYLOAD_SIZE ((size_t)1 << 14) 56 + /* Minimum record size limit as per RFC8449 */ 57 + #define TLS_MIN_RECORD_SIZE_LIM ((size_t)1 << 6) 56 58 57 59 #define TLS_HEADER_SIZE 5 58 60 #define TLS_NONCE_OFFSET TLS_HEADER_SIZE ··· 228 226 u8 rx_conf:3; 229 227 u8 zerocopy_sendfile:1; 230 228 u8 rx_no_pad:1; 229 + u16 tx_max_payload_len; 231 230 232 231 int (*push_pending_record)(struct sock *sk, int flags); 233 232 void (*sk_write_space)(struct sock *sk);
+2
include/uapi/linux/tls.h
··· 41 41 #define TLS_RX 2 /* Set receive parameters */ 42 42 #define TLS_TX_ZEROCOPY_RO 3 /* TX zerocopy (only sendfile now) */ 43 43 #define TLS_RX_EXPECT_NO_PAD 4 /* Attempt opportunistic zero-copy */ 44 + #define TLS_TX_MAX_PAYLOAD_LEN 5 /* Maximum plaintext size */ 44 45 45 46 /* Supported versions */ 46 47 #define TLS_VERSION_MINOR(ver) ((ver) & 0xFF) ··· 195 194 TLS_INFO_RXCONF, 196 195 TLS_INFO_ZC_RO_TX, 197 196 TLS_INFO_RX_NO_PAD, 197 + TLS_INFO_TX_MAX_PAYLOAD_LEN, 198 198 __TLS_INFO_MAX, 199 199 }; 200 200 #define TLS_INFO_MAX (__TLS_INFO_MAX - 1)
+1 -1
net/tls/tls_device.c
··· 462 462 /* TLS_HEADER_SIZE is not counted as part of the TLS record, and 463 463 * we need to leave room for an authentication tag. 464 464 */ 465 - max_open_record_len = TLS_MAX_PAYLOAD_SIZE + 465 + max_open_record_len = tls_ctx->tx_max_payload_len + 466 466 prot->prepend_size; 467 467 do { 468 468 rc = tls_do_allocation(sk, ctx, pfrag, prot->prepend_size);
+64
net/tls/tls_main.c
··· 541 541 return 0; 542 542 } 543 543 544 + static int do_tls_getsockopt_tx_payload_len(struct sock *sk, char __user *optval, 545 + int __user *optlen) 546 + { 547 + struct tls_context *ctx = tls_get_ctx(sk); 548 + u16 payload_len = ctx->tx_max_payload_len; 549 + int len; 550 + 551 + if (get_user(len, optlen)) 552 + return -EFAULT; 553 + 554 + if (len < sizeof(payload_len)) 555 + return -EINVAL; 556 + 557 + if (put_user(sizeof(payload_len), optlen)) 558 + return -EFAULT; 559 + 560 + if (copy_to_user(optval, &payload_len, sizeof(payload_len))) 561 + return -EFAULT; 562 + 563 + return 0; 564 + } 565 + 544 566 static int do_tls_getsockopt(struct sock *sk, int optname, 545 567 char __user *optval, int __user *optlen) 546 568 { ··· 581 559 break; 582 560 case TLS_RX_EXPECT_NO_PAD: 583 561 rc = do_tls_getsockopt_no_pad(sk, optval, optlen); 562 + break; 563 + case TLS_TX_MAX_PAYLOAD_LEN: 564 + rc = do_tls_getsockopt_tx_payload_len(sk, optval, optlen); 584 565 break; 585 566 default: 586 567 rc = -ENOPROTOOPT; ··· 834 809 return rc; 835 810 } 836 811 812 + static int do_tls_setsockopt_tx_payload_len(struct sock *sk, sockptr_t optval, 813 + unsigned int optlen) 814 + { 815 + struct tls_context *ctx = tls_get_ctx(sk); 816 + struct tls_sw_context_tx *sw_ctx = tls_sw_ctx_tx(ctx); 817 + u16 value; 818 + bool tls_13 = ctx->prot_info.version == TLS_1_3_VERSION; 819 + 820 + if (sw_ctx && sw_ctx->open_rec) 821 + return -EBUSY; 822 + 823 + if (sockptr_is_null(optval) || optlen != sizeof(value)) 824 + return -EINVAL; 825 + 826 + if (copy_from_sockptr(&value, optval, sizeof(value))) 827 + return -EFAULT; 828 + 829 + if (value < TLS_MIN_RECORD_SIZE_LIM - (tls_13 ? 1 : 0) || 830 + value > TLS_MAX_PAYLOAD_SIZE) 831 + return -EINVAL; 832 + 833 + ctx->tx_max_payload_len = value; 834 + 835 + return 0; 836 + } 837 + 837 838 static int do_tls_setsockopt(struct sock *sk, int optname, sockptr_t optval, 838 839 unsigned int optlen) 839 840 { ··· 880 829 break; 881 830 case TLS_RX_EXPECT_NO_PAD: 882 831 rc = do_tls_setsockopt_no_pad(sk, optval, optlen); 832 + break; 833 + case TLS_TX_MAX_PAYLOAD_LEN: 834 + lock_sock(sk); 835 + rc = do_tls_setsockopt_tx_payload_len(sk, optval, optlen); 836 + release_sock(sk); 883 837 break; 884 838 default: 885 839 rc = -ENOPROTOOPT; ··· 1075 1019 1076 1020 ctx->tx_conf = TLS_BASE; 1077 1021 ctx->rx_conf = TLS_BASE; 1022 + ctx->tx_max_payload_len = TLS_MAX_PAYLOAD_SIZE; 1078 1023 update_sk_prot(sk, ctx); 1079 1024 out: 1080 1025 write_unlock_bh(&sk->sk_callback_lock); ··· 1165 1108 goto nla_failure; 1166 1109 } 1167 1110 1111 + err = nla_put_u16(skb, TLS_INFO_TX_MAX_PAYLOAD_LEN, 1112 + ctx->tx_max_payload_len); 1113 + 1114 + if (err) 1115 + goto nla_failure; 1116 + 1168 1117 rcu_read_unlock(); 1169 1118 nla_nest_end(skb, start); 1170 1119 return 0; ··· 1192 1129 nla_total_size(sizeof(u16)) + /* TLS_INFO_TXCONF */ 1193 1130 nla_total_size(0) + /* TLS_INFO_ZC_RO_TX */ 1194 1131 nla_total_size(0) + /* TLS_INFO_RX_NO_PAD */ 1132 + nla_total_size(sizeof(u16)) + /* TLS_INFO_TX_MAX_PAYLOAD_LEN */ 1195 1133 0; 1196 1134 1197 1135 return size;
+1 -1
net/tls/tls_sw.c
··· 1079 1079 orig_size = msg_pl->sg.size; 1080 1080 full_record = false; 1081 1081 try_to_copy = msg_data_left(msg); 1082 - record_room = TLS_MAX_PAYLOAD_SIZE - msg_pl->sg.size; 1082 + record_room = tls_ctx->tx_max_payload_len - msg_pl->sg.size; 1083 1083 if (try_to_copy >= record_room) { 1084 1084 try_to_copy = record_room; 1085 1085 full_record = true;