Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net/smc: rebuild nonblocking connect

The recent poll change may lead to stalls for non-blocking connecting
SMC sockets, since sock_poll_wait is no longer performed on the
internal CLC socket, but on the outer SMC socket. kernel_connect() on
the internal CLC socket returns with -EINPROGRESS, but the wake up
logic does not work in all cases. If the internal CLC socket is still
in state TCP_SYN_SENT when polled, sock_poll_wait() from sock_poll()
does not sleep. It is supposed to sleep till the state of the internal
CLC socket switches to TCP_ESTABLISHED.

This problem triggered a redesign of the SMC nonblocking connect logic.
This patch introduces a connect worker covering all connect steps
followed by a wake up of socket waiters. It allows to get rid of all
delays and locks in smc_poll().

Fixes: c0129a061442 ("smc: convert to ->poll_mask")
Signed-off-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Ursula Braun and committed by
David S. Miller
24ac3a08 15ecbe94

+71 -28
+63 -28
net/smc/af_smc.c
··· 45 45 */ 46 46 47 47 static void smc_tcp_listen_work(struct work_struct *); 48 + static void smc_connect_work(struct work_struct *); 48 49 49 50 static void smc_set_keepalive(struct sock *sk, int val) 50 51 { ··· 123 122 goto out; 124 123 125 124 smc = smc_sk(sk); 125 + 126 + /* cleanup for a dangling non-blocking connect */ 127 + flush_work(&smc->connect_work); 128 + kfree(smc->connect_info); 129 + smc->connect_info = NULL; 130 + 126 131 if (sk->sk_state == SMC_LISTEN) 127 132 /* smc_close_non_accepted() is called and acquires 128 133 * sock lock for child sockets again ··· 193 186 sk->sk_protocol = protocol; 194 187 smc = smc_sk(sk); 195 188 INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work); 189 + INIT_WORK(&smc->connect_work, smc_connect_work); 196 190 INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work); 197 191 INIT_LIST_HEAD(&smc->accept_q); 198 192 spin_lock_init(&smc->accept_q_lock); ··· 584 576 return 0; 585 577 } 586 578 579 + static void smc_connect_work(struct work_struct *work) 580 + { 581 + struct smc_sock *smc = container_of(work, struct smc_sock, 582 + connect_work); 583 + int rc; 584 + 585 + lock_sock(&smc->sk); 586 + rc = kernel_connect(smc->clcsock, &smc->connect_info->addr, 587 + smc->connect_info->alen, smc->connect_info->flags); 588 + if (smc->clcsock->sk->sk_err) { 589 + smc->sk.sk_err = smc->clcsock->sk->sk_err; 590 + goto out; 591 + } 592 + if (rc < 0) { 593 + smc->sk.sk_err = -rc; 594 + goto out; 595 + } 596 + 597 + rc = __smc_connect(smc); 598 + if (rc < 0) 599 + smc->sk.sk_err = -rc; 600 + 601 + out: 602 + smc->sk.sk_state_change(&smc->sk); 603 + kfree(smc->connect_info); 604 + smc->connect_info = NULL; 605 + release_sock(&smc->sk); 606 + } 607 + 587 608 static int smc_connect(struct socket *sock, struct sockaddr *addr, 588 609 int alen, int flags) 589 610 { ··· 642 605 643 606 smc_copy_sock_settings_to_clc(smc); 644 607 tcp_sk(smc->clcsock->sk)->syn_smc = 1; 645 - rc = kernel_connect(smc->clcsock, addr, alen, flags); 646 - if (rc) 647 - goto out; 608 + if (flags & O_NONBLOCK) { 609 + if (smc->connect_info) { 610 + rc = -EALREADY; 611 + goto out; 612 + } 613 + smc->connect_info = kzalloc(alen + 2 * sizeof(int), GFP_KERNEL); 614 + if (!smc->connect_info) { 615 + rc = -ENOMEM; 616 + goto out; 617 + } 618 + smc->connect_info->alen = alen; 619 + smc->connect_info->flags = flags ^ O_NONBLOCK; 620 + memcpy(&smc->connect_info->addr, addr, alen); 621 + schedule_work(&smc->connect_work); 622 + rc = -EINPROGRESS; 623 + } else { 624 + rc = kernel_connect(smc->clcsock, addr, alen, flags); 625 + if (rc) 626 + goto out; 648 627 649 - rc = __smc_connect(smc); 650 - if (rc < 0) 651 - goto out; 652 - else 653 - rc = 0; /* success cases including fallback */ 628 + rc = __smc_connect(smc); 629 + if (rc < 0) 630 + goto out; 631 + else 632 + rc = 0; /* success cases including fallback */ 633 + } 654 634 655 635 out: 656 636 release_sock(sk); ··· 1332 1278 struct sock *sk = sock->sk; 1333 1279 __poll_t mask = 0; 1334 1280 struct smc_sock *smc; 1335 - int rc; 1336 1281 1337 1282 if (!sk) 1338 1283 return EPOLLNVAL; 1339 1284 1340 1285 smc = smc_sk(sock->sk); 1341 - sock_hold(sk); 1342 - lock_sock(sk); 1343 1286 if ((sk->sk_state == SMC_INIT) || smc->use_fallback) { 1344 1287 /* delegate to CLC child sock */ 1345 - release_sock(sk); 1346 1288 mask = smc->clcsock->ops->poll_mask(smc->clcsock, events); 1347 - lock_sock(sk); 1348 1289 sk->sk_err = smc->clcsock->sk->sk_err; 1349 - if (sk->sk_err) { 1290 + if (sk->sk_err) 1350 1291 mask |= EPOLLERR; 1351 - } else { 1352 - /* if non-blocking connect finished ... */ 1353 - if (sk->sk_state == SMC_INIT && 1354 - mask & EPOLLOUT && 1355 - smc->clcsock->sk->sk_state != TCP_CLOSE) { 1356 - rc = __smc_connect(smc); 1357 - if (rc < 0) 1358 - mask |= EPOLLERR; 1359 - /* success cases including fallback */ 1360 - mask |= EPOLLOUT | EPOLLWRNORM; 1361 - } 1362 - } 1363 1292 } else { 1364 1293 if (sk->sk_err) 1365 1294 mask |= EPOLLERR; ··· 1371 1334 mask |= EPOLLPRI; 1372 1335 1373 1336 } 1374 - release_sock(sk); 1375 - sock_put(sk); 1376 1337 1377 1338 return mask; 1378 1339 }
+8
net/smc/smc.h
··· 187 187 struct work_struct close_work; /* peer sent some closing */ 188 188 }; 189 189 190 + struct smc_connect_info { 191 + int flags; 192 + int alen; 193 + struct sockaddr addr; 194 + }; 195 + 190 196 struct smc_sock { /* smc sock container */ 191 197 struct sock sk; 192 198 struct socket *clcsock; /* internal tcp socket */ 193 199 struct smc_connection conn; /* smc connection */ 194 200 struct smc_sock *listen_smc; /* listen parent */ 201 + struct smc_connect_info *connect_info; /* connect address & flags */ 202 + struct work_struct connect_work; /* handle non-blocking connect*/ 195 203 struct work_struct tcp_listen_work;/* handle tcp socket accepts */ 196 204 struct work_struct smc_listen_work;/* prepare new accept socket */ 197 205 struct list_head accept_q; /* sockets to be accepted */