Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

SUNRPC: Add a TCP-with-TLS RPC transport class

Use the new TLS handshake API to enable the SunRPC client code
to request a TLS handshake. This implements support for RFC 9289,
only on TCP sockets.

Upper layers such as NFS use RPC-with-TLS to protect in-transit
traffic.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>

authored by

Chuck Lever and committed by
Trond Myklebust
75eb6af7 dea034b9

+418
+1
include/linux/sunrpc/xprt.h
··· 200 200 XPRT_TRANSPORT_RDMA = 256, 201 201 XPRT_TRANSPORT_BC_RDMA = XPRT_TRANSPORT_RDMA | XPRT_TRANSPORT_BC, 202 202 XPRT_TRANSPORT_LOCAL = 257, 203 + XPRT_TRANSPORT_TCP_TLS = 258, 203 204 }; 204 205 205 206 struct rpc_sysfs_xprt;
+2
include/linux/sunrpc/xprtsock.h
··· 57 57 struct work_struct error_worker; 58 58 struct work_struct recv_worker; 59 59 struct mutex recv_mutex; 60 + struct completion handshake_done; 60 61 struct sockaddr_storage srcaddr; 61 62 unsigned short srcport; 62 63 int xprt_err; 64 + struct rpc_clnt *clnt; 63 65 64 66 /* 65 67 * UDP socket buffer size parameters
+44
include/trace/events/sunrpc.h
··· 1525 1525 ) 1526 1526 ); 1527 1527 1528 + /** 1529 + ** RPC-over-TLS tracepoints 1530 + **/ 1531 + 1532 + DECLARE_EVENT_CLASS(rpc_tls_class, 1533 + TP_PROTO( 1534 + const struct rpc_clnt *clnt, 1535 + const struct rpc_xprt *xprt 1536 + ), 1537 + 1538 + TP_ARGS(clnt, xprt), 1539 + 1540 + TP_STRUCT__entry( 1541 + __field(unsigned long, requested_policy) 1542 + __field(u32, version) 1543 + __string(servername, xprt->servername) 1544 + __string(progname, clnt->cl_program->name) 1545 + ), 1546 + 1547 + TP_fast_assign( 1548 + __entry->requested_policy = clnt->cl_xprtsec.policy; 1549 + __entry->version = clnt->cl_vers; 1550 + __assign_str(servername, xprt->servername); 1551 + __assign_str(progname, clnt->cl_program->name) 1552 + ), 1553 + 1554 + TP_printk("server=%s %sv%u requested_policy=%s", 1555 + __get_str(servername), __get_str(progname), __entry->version, 1556 + rpc_show_xprtsec_policy(__entry->requested_policy) 1557 + ) 1558 + ); 1559 + 1560 + #define DEFINE_RPC_TLS_EVENT(name) \ 1561 + DEFINE_EVENT(rpc_tls_class, rpc_tls_##name, \ 1562 + TP_PROTO( \ 1563 + const struct rpc_clnt *clnt, \ 1564 + const struct rpc_xprt *xprt \ 1565 + ), \ 1566 + TP_ARGS(clnt, xprt)) 1567 + 1568 + DEFINE_RPC_TLS_EVENT(unavailable); 1569 + DEFINE_RPC_TLS_EVENT(not_started); 1570 + 1571 + 1528 1572 /* Record an xdr_buf containing a fully-formed RPC message */ 1529 1573 DECLARE_EVENT_CLASS(svc_xdr_msg_class, 1530 1574 TP_PROTO(
+1
net/sunrpc/sysfs.c
··· 239 239 if (!xprt) 240 240 return 0; 241 241 if (!(xprt->xprt_class->ident == XPRT_TRANSPORT_TCP || 242 + xprt->xprt_class->ident == XPRT_TRANSPORT_TCP_TLS || 242 243 xprt->xprt_class->ident == XPRT_TRANSPORT_RDMA)) { 243 244 xprt_put(xprt); 244 245 return -EOPNOTSUPP;
+370
net/sunrpc/xprtsock.c
··· 48 48 #include <net/udp.h> 49 49 #include <net/tcp.h> 50 50 #include <net/tls.h> 51 + #include <net/handshake.h> 51 52 52 53 #include <linux/bvec.h> 53 54 #include <linux/highmem.h> ··· 99 98 static struct xprt_class xs_local_transport; 100 99 static struct xprt_class xs_udp_transport; 101 100 static struct xprt_class xs_tcp_transport; 101 + static struct xprt_class xs_tcp_tls_transport; 102 102 static struct xprt_class xs_bc_tcp_transport; 103 103 104 104 /* ··· 190 188 * holding port numbers when there is no RPC traffic. 191 189 */ 192 190 #define XS_IDLE_DISC_TO (5U * 60 * HZ) 191 + 192 + /* 193 + * TLS handshake timeout. 194 + */ 195 + #define XS_TLS_HANDSHAKE_TO (10U * HZ) 193 196 194 197 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 195 198 # undef RPC_DEBUG_DATA ··· 1249 1242 1250 1243 if (atomic_read(&transport->xprt.swapper)) 1251 1244 sk_clear_memalloc(sk); 1245 + 1246 + tls_handshake_cancel(sk); 1252 1247 1253 1248 kernel_sock_shutdown(sock, SHUT_RDWR); 1254 1249 ··· 2425 2416 current_restore_flags(pflags, PF_MEMALLOC); 2426 2417 } 2427 2418 2419 + /* 2420 + * Transfer the connected socket to @upper_transport, then mark that 2421 + * xprt CONNECTED. 2422 + */ 2423 + static int xs_tcp_tls_finish_connecting(struct rpc_xprt *lower_xprt, 2424 + struct sock_xprt *upper_transport) 2425 + { 2426 + struct sock_xprt *lower_transport = 2427 + container_of(lower_xprt, struct sock_xprt, xprt); 2428 + struct rpc_xprt *upper_xprt = &upper_transport->xprt; 2429 + 2430 + if (!upper_transport->inet) { 2431 + struct socket *sock = lower_transport->sock; 2432 + struct sock *sk = sock->sk; 2433 + 2434 + /* Avoid temporary address, they are bad for long-lived 2435 + * connections such as NFS mounts. 2436 + * RFC4941, section 3.6 suggests that: 2437 + * Individual applications, which have specific 2438 + * knowledge about the normal duration of connections, 2439 + * MAY override this as appropriate. 2440 + */ 2441 + if (xs_addr(upper_xprt)->sa_family == PF_INET6) 2442 + ip6_sock_set_addr_preferences(sk, IPV6_PREFER_SRC_PUBLIC); 2443 + 2444 + xs_tcp_set_socket_timeouts(upper_xprt, sock); 2445 + tcp_sock_set_nodelay(sk); 2446 + 2447 + lock_sock(sk); 2448 + 2449 + /* @sk is already connected, so it now has the RPC callbacks. 2450 + * Reach into @lower_transport to save the original ones. 2451 + */ 2452 + upper_transport->old_data_ready = lower_transport->old_data_ready; 2453 + upper_transport->old_state_change = lower_transport->old_state_change; 2454 + upper_transport->old_write_space = lower_transport->old_write_space; 2455 + upper_transport->old_error_report = lower_transport->old_error_report; 2456 + sk->sk_user_data = upper_xprt; 2457 + 2458 + /* socket options */ 2459 + sock_reset_flag(sk, SOCK_LINGER); 2460 + 2461 + xprt_clear_connected(upper_xprt); 2462 + 2463 + upper_transport->sock = sock; 2464 + upper_transport->inet = sk; 2465 + upper_transport->file = lower_transport->file; 2466 + 2467 + release_sock(sk); 2468 + 2469 + /* Reset lower_transport before shutting down its clnt */ 2470 + mutex_lock(&lower_transport->recv_mutex); 2471 + lower_transport->inet = NULL; 2472 + lower_transport->sock = NULL; 2473 + lower_transport->file = NULL; 2474 + 2475 + xprt_clear_connected(lower_xprt); 2476 + xs_sock_reset_connection_flags(lower_xprt); 2477 + xs_stream_reset_connect(lower_transport); 2478 + mutex_unlock(&lower_transport->recv_mutex); 2479 + } 2480 + 2481 + if (!xprt_bound(upper_xprt)) 2482 + return -ENOTCONN; 2483 + 2484 + xs_set_memalloc(upper_xprt); 2485 + 2486 + if (!xprt_test_and_set_connected(upper_xprt)) { 2487 + upper_xprt->connect_cookie++; 2488 + clear_bit(XPRT_SOCK_CONNECTING, &upper_transport->sock_state); 2489 + xprt_clear_connecting(upper_xprt); 2490 + 2491 + upper_xprt->stat.connect_count++; 2492 + upper_xprt->stat.connect_time += (long)jiffies - 2493 + upper_xprt->stat.connect_start; 2494 + xs_run_error_worker(upper_transport, XPRT_SOCK_WAKE_PENDING); 2495 + } 2496 + return 0; 2497 + } 2498 + 2499 + /** 2500 + * xs_tls_handshake_done - TLS handshake completion handler 2501 + * @data: address of xprt to wake 2502 + * @status: status of handshake 2503 + * @peerid: serial number of key containing the remote's identity 2504 + * 2505 + */ 2506 + static void xs_tls_handshake_done(void *data, int status, key_serial_t peerid) 2507 + { 2508 + struct rpc_xprt *lower_xprt = data; 2509 + struct sock_xprt *lower_transport = 2510 + container_of(lower_xprt, struct sock_xprt, xprt); 2511 + 2512 + lower_transport->xprt_err = status ? -EACCES : 0; 2513 + complete(&lower_transport->handshake_done); 2514 + xprt_put(lower_xprt); 2515 + } 2516 + 2517 + static int xs_tls_handshake_sync(struct rpc_xprt *lower_xprt, struct xprtsec_parms *xprtsec) 2518 + { 2519 + struct sock_xprt *lower_transport = 2520 + container_of(lower_xprt, struct sock_xprt, xprt); 2521 + struct tls_handshake_args args = { 2522 + .ta_sock = lower_transport->sock, 2523 + .ta_done = xs_tls_handshake_done, 2524 + .ta_data = xprt_get(lower_xprt), 2525 + .ta_peername = lower_xprt->servername, 2526 + }; 2527 + struct sock *sk = lower_transport->inet; 2528 + int rc; 2529 + 2530 + init_completion(&lower_transport->handshake_done); 2531 + set_bit(XPRT_SOCK_IGNORE_RECV, &lower_transport->sock_state); 2532 + lower_transport->xprt_err = -ETIMEDOUT; 2533 + switch (xprtsec->policy) { 2534 + case RPC_XPRTSEC_TLS_ANON: 2535 + rc = tls_client_hello_anon(&args, GFP_KERNEL); 2536 + if (rc) 2537 + goto out_put_xprt; 2538 + break; 2539 + case RPC_XPRTSEC_TLS_X509: 2540 + args.ta_my_cert = xprtsec->cert_serial; 2541 + args.ta_my_privkey = xprtsec->privkey_serial; 2542 + rc = tls_client_hello_x509(&args, GFP_KERNEL); 2543 + if (rc) 2544 + goto out_put_xprt; 2545 + break; 2546 + default: 2547 + rc = -EACCES; 2548 + goto out_put_xprt; 2549 + } 2550 + 2551 + rc = wait_for_completion_interruptible_timeout(&lower_transport->handshake_done, 2552 + XS_TLS_HANDSHAKE_TO); 2553 + if (rc <= 0) { 2554 + if (!tls_handshake_cancel(sk)) { 2555 + if (rc == 0) 2556 + rc = -ETIMEDOUT; 2557 + goto out_put_xprt; 2558 + } 2559 + } 2560 + 2561 + rc = lower_transport->xprt_err; 2562 + 2563 + out: 2564 + xs_stream_reset_connect(lower_transport); 2565 + clear_bit(XPRT_SOCK_IGNORE_RECV, &lower_transport->sock_state); 2566 + return rc; 2567 + 2568 + out_put_xprt: 2569 + xprt_put(lower_xprt); 2570 + goto out; 2571 + } 2572 + 2573 + /** 2574 + * xs_tcp_tls_setup_socket - establish a TLS session on a TCP socket 2575 + * @work: queued work item 2576 + * 2577 + * Invoked by a work queue tasklet. 2578 + * 2579 + * For RPC-with-TLS, there is a two-stage connection process. 2580 + * 2581 + * The "upper-layer xprt" is visible to the RPC consumer. Once it has 2582 + * been marked connected, the consumer knows that a TCP connection and 2583 + * a TLS session have been established. 2584 + * 2585 + * A "lower-layer xprt", created in this function, handles the mechanics 2586 + * of connecting the TCP socket, performing the RPC_AUTH_TLS probe, and 2587 + * then driving the TLS handshake. Once all that is complete, the upper 2588 + * layer xprt is marked connected. 2589 + */ 2590 + static void xs_tcp_tls_setup_socket(struct work_struct *work) 2591 + { 2592 + struct sock_xprt *upper_transport = 2593 + container_of(work, struct sock_xprt, connect_worker.work); 2594 + struct rpc_clnt *upper_clnt = upper_transport->clnt; 2595 + struct rpc_xprt *upper_xprt = &upper_transport->xprt; 2596 + struct rpc_create_args args = { 2597 + .net = upper_xprt->xprt_net, 2598 + .protocol = upper_xprt->prot, 2599 + .address = (struct sockaddr *)&upper_xprt->addr, 2600 + .addrsize = upper_xprt->addrlen, 2601 + .timeout = upper_clnt->cl_timeout, 2602 + .servername = upper_xprt->servername, 2603 + .program = upper_clnt->cl_program, 2604 + .prognumber = upper_clnt->cl_prog, 2605 + .version = upper_clnt->cl_vers, 2606 + .authflavor = RPC_AUTH_TLS, 2607 + .cred = upper_clnt->cl_cred, 2608 + .xprtsec = { 2609 + .policy = RPC_XPRTSEC_NONE, 2610 + }, 2611 + }; 2612 + unsigned int pflags = current->flags; 2613 + struct rpc_clnt *lower_clnt; 2614 + struct rpc_xprt *lower_xprt; 2615 + int status; 2616 + 2617 + if (atomic_read(&upper_xprt->swapper)) 2618 + current->flags |= PF_MEMALLOC; 2619 + 2620 + xs_stream_start_connect(upper_transport); 2621 + 2622 + /* This implicitly sends an RPC_AUTH_TLS probe */ 2623 + lower_clnt = rpc_create(&args); 2624 + if (IS_ERR(lower_clnt)) { 2625 + trace_rpc_tls_unavailable(upper_clnt, upper_xprt); 2626 + clear_bit(XPRT_SOCK_CONNECTING, &upper_transport->sock_state); 2627 + xprt_clear_connecting(upper_xprt); 2628 + xprt_wake_pending_tasks(upper_xprt, PTR_ERR(lower_clnt)); 2629 + xs_run_error_worker(upper_transport, XPRT_SOCK_WAKE_PENDING); 2630 + goto out_unlock; 2631 + } 2632 + 2633 + /* RPC_AUTH_TLS probe was successful. Try a TLS handshake on 2634 + * the lower xprt. 2635 + */ 2636 + rcu_read_lock(); 2637 + lower_xprt = rcu_dereference(lower_clnt->cl_xprt); 2638 + rcu_read_unlock(); 2639 + status = xs_tls_handshake_sync(lower_xprt, &upper_xprt->xprtsec); 2640 + if (status) { 2641 + trace_rpc_tls_not_started(upper_clnt, upper_xprt); 2642 + goto out_close; 2643 + } 2644 + 2645 + status = xs_tcp_tls_finish_connecting(lower_xprt, upper_transport); 2646 + if (status) 2647 + goto out_close; 2648 + 2649 + trace_rpc_socket_connect(upper_xprt, upper_transport->sock, 0); 2650 + if (!xprt_test_and_set_connected(upper_xprt)) { 2651 + upper_xprt->connect_cookie++; 2652 + clear_bit(XPRT_SOCK_CONNECTING, &upper_transport->sock_state); 2653 + xprt_clear_connecting(upper_xprt); 2654 + 2655 + upper_xprt->stat.connect_count++; 2656 + upper_xprt->stat.connect_time += (long)jiffies - 2657 + upper_xprt->stat.connect_start; 2658 + xs_run_error_worker(upper_transport, XPRT_SOCK_WAKE_PENDING); 2659 + } 2660 + rpc_shutdown_client(lower_clnt); 2661 + 2662 + out_unlock: 2663 + current_restore_flags(pflags, PF_MEMALLOC); 2664 + upper_transport->clnt = NULL; 2665 + xprt_unlock_connect(upper_xprt, upper_transport); 2666 + return; 2667 + 2668 + out_close: 2669 + rpc_shutdown_client(lower_clnt); 2670 + 2671 + /* xprt_force_disconnect() wakes tasks with a fixed tk_status code. 2672 + * Wake them first here to ensure they get our tk_status code. 2673 + */ 2674 + xprt_wake_pending_tasks(upper_xprt, status); 2675 + xs_tcp_force_close(upper_xprt); 2676 + xprt_clear_connecting(upper_xprt); 2677 + goto out_unlock; 2678 + } 2679 + 2428 2680 /** 2429 2681 * xs_connect - connect a socket to a remote endpoint 2430 2682 * @xprt: pointer to transport structure ··· 2717 2447 } else 2718 2448 dprintk("RPC: xs_connect scheduled xprt %p\n", xprt); 2719 2449 2450 + transport->clnt = task->tk_client; 2720 2451 queue_delayed_work(xprtiod_workqueue, 2721 2452 &transport->connect_worker, 2722 2453 delay); ··· 3372 3101 } 3373 3102 3374 3103 /** 3104 + * xs_setup_tcp_tls - Set up transport to use a TCP with TLS 3105 + * @args: rpc transport creation arguments 3106 + * 3107 + */ 3108 + static struct rpc_xprt *xs_setup_tcp_tls(struct xprt_create *args) 3109 + { 3110 + struct sockaddr *addr = args->dstaddr; 3111 + struct rpc_xprt *xprt; 3112 + struct sock_xprt *transport; 3113 + struct rpc_xprt *ret; 3114 + unsigned int max_slot_table_size = xprt_max_tcp_slot_table_entries; 3115 + 3116 + if (args->flags & XPRT_CREATE_INFINITE_SLOTS) 3117 + max_slot_table_size = RPC_MAX_SLOT_TABLE_LIMIT; 3118 + 3119 + xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries, 3120 + max_slot_table_size); 3121 + if (IS_ERR(xprt)) 3122 + return xprt; 3123 + transport = container_of(xprt, struct sock_xprt, xprt); 3124 + 3125 + xprt->prot = IPPROTO_TCP; 3126 + xprt->xprt_class = &xs_tcp_transport; 3127 + xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; 3128 + 3129 + xprt->bind_timeout = XS_BIND_TO; 3130 + xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; 3131 + xprt->idle_timeout = XS_IDLE_DISC_TO; 3132 + 3133 + xprt->ops = &xs_tcp_ops; 3134 + xprt->timeout = &xs_tcp_default_timeout; 3135 + 3136 + xprt->max_reconnect_timeout = xprt->timeout->to_maxval; 3137 + xprt->connect_timeout = xprt->timeout->to_initval * 3138 + (xprt->timeout->to_retries + 1); 3139 + 3140 + INIT_WORK(&transport->recv_worker, xs_stream_data_receive_workfn); 3141 + INIT_WORK(&transport->error_worker, xs_error_handle); 3142 + 3143 + switch (args->xprtsec.policy) { 3144 + case RPC_XPRTSEC_TLS_ANON: 3145 + case RPC_XPRTSEC_TLS_X509: 3146 + xprt->xprtsec = args->xprtsec; 3147 + INIT_DELAYED_WORK(&transport->connect_worker, 3148 + xs_tcp_tls_setup_socket); 3149 + break; 3150 + default: 3151 + ret = ERR_PTR(-EACCES); 3152 + goto out_err; 3153 + } 3154 + 3155 + switch (addr->sa_family) { 3156 + case AF_INET: 3157 + if (((struct sockaddr_in *)addr)->sin_port != htons(0)) 3158 + xprt_set_bound(xprt); 3159 + 3160 + xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP); 3161 + break; 3162 + case AF_INET6: 3163 + if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0)) 3164 + xprt_set_bound(xprt); 3165 + 3166 + xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP6); 3167 + break; 3168 + default: 3169 + ret = ERR_PTR(-EAFNOSUPPORT); 3170 + goto out_err; 3171 + } 3172 + 3173 + if (xprt_bound(xprt)) 3174 + dprintk("RPC: set up xprt to %s (port %s) via %s\n", 3175 + xprt->address_strings[RPC_DISPLAY_ADDR], 3176 + xprt->address_strings[RPC_DISPLAY_PORT], 3177 + xprt->address_strings[RPC_DISPLAY_PROTO]); 3178 + else 3179 + dprintk("RPC: set up xprt to %s (autobind) via %s\n", 3180 + xprt->address_strings[RPC_DISPLAY_ADDR], 3181 + xprt->address_strings[RPC_DISPLAY_PROTO]); 3182 + 3183 + if (try_module_get(THIS_MODULE)) 3184 + return xprt; 3185 + ret = ERR_PTR(-EINVAL); 3186 + out_err: 3187 + xs_xprt_free(xprt); 3188 + return ret; 3189 + } 3190 + 3191 + /** 3375 3192 * xs_setup_bc_tcp - Set up transport to use a TCP backchannel socket 3376 3193 * @args: rpc transport creation arguments 3377 3194 * ··· 3568 3209 .netid = { "tcp", "tcp6", "" }, 3569 3210 }; 3570 3211 3212 + static struct xprt_class xs_tcp_tls_transport = { 3213 + .list = LIST_HEAD_INIT(xs_tcp_tls_transport.list), 3214 + .name = "tcp-with-tls", 3215 + .owner = THIS_MODULE, 3216 + .ident = XPRT_TRANSPORT_TCP_TLS, 3217 + .setup = xs_setup_tcp_tls, 3218 + .netid = { "tcp", "tcp6", "" }, 3219 + }; 3220 + 3571 3221 static struct xprt_class xs_bc_tcp_transport = { 3572 3222 .list = LIST_HEAD_INIT(xs_bc_tcp_transport.list), 3573 3223 .name = "tcp NFSv4.1 backchannel", ··· 3598 3230 xprt_register_transport(&xs_local_transport); 3599 3231 xprt_register_transport(&xs_udp_transport); 3600 3232 xprt_register_transport(&xs_tcp_transport); 3233 + xprt_register_transport(&xs_tcp_tls_transport); 3601 3234 xprt_register_transport(&xs_bc_tcp_transport); 3602 3235 3603 3236 return 0; ··· 3618 3249 xprt_unregister_transport(&xs_local_transport); 3619 3250 xprt_unregister_transport(&xs_udp_transport); 3620 3251 xprt_unregister_transport(&xs_tcp_transport); 3252 + xprt_unregister_transport(&xs_tcp_tls_transport); 3621 3253 xprt_unregister_transport(&xs_bc_tcp_transport); 3622 3254 } 3623 3255