Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

tcp: add tcp_sock_set_nodelay

Add a helper to directly set the TCP_NODELAY sockopt from kernel space
without going through a fake uaccess. Cleanup the callers to avoid
pointless wrappers now that this is a simple function call.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Sagi Grimberg <sagi@grimberg.me>
Acked-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Christoph Hellwig and committed by
David S. Miller
12abc5ee db10538a

+49 -127
-7
drivers/block/drbd/drbd_int.h
··· 1570 1570 extern void _drbd_clear_done_ee(struct drbd_device *device, struct list_head *to_be_freed); 1571 1571 extern int drbd_connected(struct drbd_peer_device *); 1572 1572 1573 - static inline void drbd_tcp_nodelay(struct socket *sock) 1574 - { 1575 - int val = 1; 1576 - (void) kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, 1577 - (char*)&val, sizeof(val)); 1578 - } 1579 - 1580 1573 static inline void drbd_tcp_quickack(struct socket *sock) 1581 1574 { 1582 1575 int val = 2;
+1 -1
drivers/block/drbd/drbd_main.c
··· 660 660 /* DRBD protocol "pings" are latency critical. 661 661 * This is supposed to trigger tcp_push_pending_frames() */ 662 662 if (!err && (cmd == P_PING || cmd == P_PING_ACK)) 663 - drbd_tcp_nodelay(sock->socket); 663 + tcp_sock_set_nodelay(sock->socket->sk); 664 664 665 665 return err; 666 666 }
+2 -2
drivers/block/drbd/drbd_receiver.c
··· 1051 1051 1052 1052 /* we don't want delays. 1053 1053 * we use TCP_CORK where appropriate, though */ 1054 - drbd_tcp_nodelay(sock.socket); 1055 - drbd_tcp_nodelay(msock.socket); 1054 + tcp_sock_set_nodelay(sock.socket->sk); 1055 + tcp_sock_set_nodelay(msock.socket->sk); 1056 1056 1057 1057 connection->data.socket = sock.socket; 1058 1058 connection->meta.socket = msock.socket;
+4 -20
drivers/infiniband/sw/siw/siw_cm.c
··· 947 947 siw_cep_get(new_cep); 948 948 new_s->sk->sk_user_data = new_cep; 949 949 950 - if (siw_tcp_nagle == false) { 951 - int val = 1; 952 - 953 - rv = kernel_setsockopt(new_s, SOL_TCP, TCP_NODELAY, 954 - (char *)&val, sizeof(val)); 955 - if (rv) { 956 - siw_dbg_cep(cep, "setsockopt NODELAY error: %d\n", rv); 957 - goto error; 958 - } 959 - } 950 + if (siw_tcp_nagle == false) 951 + tcp_sock_set_nodelay(new_s->sk); 960 952 new_cep->state = SIW_EPSTATE_AWAIT_MPAREQ; 961 953 962 954 rv = siw_cm_queue_work(new_cep, SIW_CM_WORK_MPATIMEOUT); ··· 1378 1386 siw_dbg_qp(qp, "kernel_bindconnect: error %d\n", rv); 1379 1387 goto error; 1380 1388 } 1381 - if (siw_tcp_nagle == false) { 1382 - int val = 1; 1383 - 1384 - rv = kernel_setsockopt(s, SOL_TCP, TCP_NODELAY, (char *)&val, 1385 - sizeof(val)); 1386 - if (rv) { 1387 - siw_dbg_qp(qp, "setsockopt NODELAY error: %d\n", rv); 1388 - goto error; 1389 - } 1390 - } 1389 + if (siw_tcp_nagle == false) 1390 + tcp_sock_set_nodelay(s->sk); 1391 1391 cep = siw_cep_alloc(sdev); 1392 1392 if (!cep) { 1393 1393 rv = -ENOMEM;
+1 -8
drivers/nvme/host/tcp.c
··· 1346 1346 } 1347 1347 1348 1348 /* Set TCP no delay */ 1349 - opt = 1; 1350 - ret = kernel_setsockopt(queue->sock, IPPROTO_TCP, 1351 - TCP_NODELAY, (char *)&opt, sizeof(opt)); 1352 - if (ret) { 1353 - dev_err(nctrl->device, 1354 - "failed to set TCP_NODELAY sock opt %d\n", ret); 1355 - goto err_sock; 1356 - } 1349 + tcp_sock_set_nodelay(queue->sock->sk); 1357 1350 1358 1351 /* 1359 1352 * Cleanup whatever is sitting in the TCP transmit queue on socket
+2 -10
drivers/nvme/target/tcp.c
··· 1580 1580 { 1581 1581 struct nvmet_tcp_port *port; 1582 1582 __kernel_sa_family_t af; 1583 - int opt, ret; 1583 + int ret; 1584 1584 1585 1585 port = kzalloc(sizeof(*port), GFP_KERNEL); 1586 1586 if (!port) ··· 1625 1625 port->data_ready = port->sock->sk->sk_data_ready; 1626 1626 port->sock->sk->sk_data_ready = nvmet_tcp_listen_data_ready; 1627 1627 sock_set_reuseaddr(port->sock->sk); 1628 - 1629 - opt = 1; 1630 - ret = kernel_setsockopt(port->sock, IPPROTO_TCP, 1631 - TCP_NODELAY, (char *)&opt, sizeof(opt)); 1632 - if (ret) { 1633 - pr_err("failed to set TCP_NODELAY sock opt %d\n", ret); 1634 - goto err_sock; 1635 - } 1636 - 1628 + tcp_sock_set_nodelay(port->sock->sk); 1637 1629 if (so_priority > 0) 1638 1630 sock_set_priority(port->sock->sk, so_priority); 1639 1631
+3 -12
drivers/target/iscsi/iscsi_target_login.c
··· 897 897 /* 898 898 * Set SO_REUSEADDR, and disable Nagel Algorithm with TCP_NODELAY. 899 899 */ 900 - /* FIXME: Someone please explain why this is endian-safe */ 901 - opt = 1; 902 - if (np->np_network_transport == ISCSI_TCP) { 903 - ret = kernel_setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, 904 - (char *)&opt, sizeof(opt)); 905 - if (ret < 0) { 906 - pr_err("kernel_setsockopt() for TCP_NODELAY" 907 - " failed: %d\n", ret); 908 - goto fail; 909 - } 910 - } 911 - 900 + if (np->np_network_transport == ISCSI_TCP) 901 + tcp_sock_set_nodelay(sock->sk); 912 902 sock_set_reuseaddr(sock->sk); 913 903 904 + opt = 1; 914 905 ret = kernel_setsockopt(sock, IPPROTO_IP, IP_FREEBIND, 915 906 (char *)&opt, sizeof(opt)); 916 907 if (ret < 0) {
+2 -8
fs/cifs/connect.c
··· 3929 3929 socket->sk->sk_rcvbuf = 140 * 1024; 3930 3930 } 3931 3931 3932 - if (server->tcp_nodelay) { 3933 - int val = 1; 3934 - rc = kernel_setsockopt(socket, SOL_TCP, TCP_NODELAY, 3935 - (char *)&val, sizeof(val)); 3936 - if (rc) 3937 - cifs_dbg(FYI, "set TCP_NODELAY socket option error %d\n", 3938 - rc); 3939 - } 3932 + if (server->tcp_nodelay) 3933 + tcp_sock_set_nodelay(socket->sk); 3940 3934 3941 3935 cifs_dbg(FYI, "sndbuf %d rcvbuf %d rcvtimeo 0x%lx\n", 3942 3936 socket->sk->sk_sndbuf,
+2 -6
fs/dlm/lowcomms.c
··· 1011 1011 struct sockaddr_storage saddr, src_addr; 1012 1012 int addr_len; 1013 1013 struct socket *sock = NULL; 1014 - int one = 1; 1015 1014 int result; 1016 1015 1017 1016 if (con->nodeid == 0) { ··· 1059 1060 log_print("connecting to %d", con->nodeid); 1060 1061 1061 1062 /* Turn off Nagle's algorithm */ 1062 - kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (char *)&one, 1063 - sizeof(one)); 1063 + tcp_sock_set_nodelay(sock->sk); 1064 1064 1065 1065 result = sock->ops->connect(sock, (struct sockaddr *)&saddr, addr_len, 1066 1066 O_NONBLOCK); ··· 1101 1103 { 1102 1104 struct socket *sock = NULL; 1103 1105 int result = 0; 1104 - int one = 1; 1105 1106 int addr_len; 1106 1107 1107 1108 if (dlm_local_addr[0]->ss_family == AF_INET) ··· 1117 1120 } 1118 1121 1119 1122 /* Turn off Nagle's algorithm */ 1120 - kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (char *)&one, 1121 - sizeof(one)); 1123 + tcp_sock_set_nodelay(sock->sk); 1122 1124 1123 1125 sock_set_reuseaddr(sock->sk); 1124 1126
+2 -18
fs/ocfs2/cluster/tcp.c
··· 1441 1441 sc_put(sc); 1442 1442 } 1443 1443 1444 - static int o2net_set_nodelay(struct socket *sock) 1445 - { 1446 - int val = 1; 1447 - 1448 - return kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, 1449 - (void *)&val, sizeof(val)); 1450 - } 1451 - 1452 1444 static int o2net_set_usertimeout(struct socket *sock) 1453 1445 { 1454 1446 int user_timeout = O2NET_TCP_USER_TIMEOUT; ··· 1628 1636 goto out; 1629 1637 } 1630 1638 1631 - ret = o2net_set_nodelay(sc->sc_sock); 1632 - if (ret) { 1633 - mlog(ML_ERROR, "setting TCP_NODELAY failed with %d\n", ret); 1634 - goto out; 1635 - } 1639 + tcp_sock_set_nodelay(sc->sc_sock->sk); 1636 1640 1637 1641 ret = o2net_set_usertimeout(sock); 1638 1642 if (ret) { ··· 1820 1832 *more = 1; 1821 1833 new_sock->sk->sk_allocation = GFP_ATOMIC; 1822 1834 1823 - ret = o2net_set_nodelay(new_sock); 1824 - if (ret) { 1825 - mlog(ML_ERROR, "setting TCP_NODELAY failed with %d\n", ret); 1826 - goto out; 1827 - } 1835 + tcp_sock_set_nodelay(new_sock->sk); 1828 1836 1829 1837 ret = o2net_set_usertimeout(new_sock); 1830 1838 if (ret) {
+1
include/linux/tcp.h
··· 498 498 int shiftlen); 499 499 500 500 void tcp_sock_set_cork(struct sock *sk, bool on); 501 + void tcp_sock_set_nodelay(struct sock *sk); 501 502 502 503 #endif /* _LINUX_TCP_H */
+2 -9
net/ceph/messenger.c
··· 490 490 return ret; 491 491 } 492 492 493 - if (ceph_test_opt(from_msgr(con->msgr), TCP_NODELAY)) { 494 - int optval = 1; 495 - 496 - ret = kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, 497 - (char *)&optval, sizeof(optval)); 498 - if (ret) 499 - pr_err("kernel_setsockopt(TCP_NODELAY) failed: %d", 500 - ret); 501 - } 493 + if (ceph_test_opt(from_msgr(con->msgr), TCP_NODELAY)) 494 + tcp_sock_set_nodelay(sock->sk); 502 495 503 496 con->sock = sock; 504 497 return 0;
+25 -14
net/ipv4/tcp.c
··· 2832 2832 } 2833 2833 EXPORT_SYMBOL(tcp_sock_set_cork); 2834 2834 2835 + /* TCP_NODELAY is weaker than TCP_CORK, so that this option on corked socket is 2836 + * remembered, but it is not activated until cork is cleared. 2837 + * 2838 + * However, when TCP_NODELAY is set we make an explicit push, which overrides 2839 + * even TCP_CORK for currently queued segments. 2840 + */ 2841 + static void __tcp_sock_set_nodelay(struct sock *sk, bool on) 2842 + { 2843 + if (on) { 2844 + tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF|TCP_NAGLE_PUSH; 2845 + tcp_push_pending_frames(sk); 2846 + } else { 2847 + tcp_sk(sk)->nonagle &= ~TCP_NAGLE_OFF; 2848 + } 2849 + } 2850 + 2851 + void tcp_sock_set_nodelay(struct sock *sk) 2852 + { 2853 + lock_sock(sk); 2854 + __tcp_sock_set_nodelay(sk, true); 2855 + release_sock(sk); 2856 + } 2857 + EXPORT_SYMBOL(tcp_sock_set_nodelay); 2858 + 2835 2859 /* 2836 2860 * Socket option code for TCP. 2837 2861 */ ··· 2953 2929 break; 2954 2930 2955 2931 case TCP_NODELAY: 2956 - if (val) { 2957 - /* TCP_NODELAY is weaker than TCP_CORK, so that 2958 - * this option on corked socket is remembered, but 2959 - * it is not activated until cork is cleared. 2960 - * 2961 - * However, when TCP_NODELAY is set we make 2962 - * an explicit push, which overrides even TCP_CORK 2963 - * for currently queued segments. 2964 - */ 2965 - tp->nonagle |= TCP_NAGLE_OFF|TCP_NAGLE_PUSH; 2966 - tcp_push_pending_frames(sk); 2967 - } else { 2968 - tp->nonagle &= ~TCP_NAGLE_OFF; 2969 - } 2932 + __tcp_sock_set_nodelay(sk, val); 2970 2933 break; 2971 2934 2972 2935 case TCP_THIN_LINEAR_TIMEOUTS:
+1 -10
net/rds/tcp.c
··· 89 89 { } 90 90 }; 91 91 92 - /* doing it this way avoids calling tcp_sk() */ 93 - void rds_tcp_nonagle(struct socket *sock) 94 - { 95 - int val = 1; 96 - 97 - kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (void *)&val, 98 - sizeof(val)); 99 - } 100 - 101 92 u32 rds_tcp_write_seq(struct rds_tcp_connection *tc) 102 93 { 103 94 /* seq# of the last byte of data in tcp send buffer */ ··· 493 502 struct net *net = sock_net(sk); 494 503 struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); 495 504 496 - rds_tcp_nonagle(sock); 505 + tcp_sock_set_nodelay(sock->sk); 497 506 lock_sock(sk); 498 507 if (rtn->sndbuf_size > 0) { 499 508 sk->sk_sndbuf = rtn->sndbuf_size;
-1
net/rds/tcp.h
··· 50 50 51 51 /* tcp.c */ 52 52 void rds_tcp_tune(struct socket *sock); 53 - void rds_tcp_nonagle(struct socket *sock); 54 53 void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp); 55 54 void rds_tcp_reset_callbacks(struct socket *sock, struct rds_conn_path *cp); 56 55 void rds_tcp_restore_callbacks(struct socket *sock,
+1 -1
net/rds/tcp_listen.c
··· 288 288 } 289 289 290 290 sock->sk->sk_reuse = SK_CAN_REUSE; 291 - rds_tcp_nonagle(sock); 291 + tcp_sock_set_nodelay(sock->sk); 292 292 293 293 write_lock_bh(&sock->sk->sk_callback_lock); 294 294 sock->sk->sk_user_data = sock->sk->sk_data_ready;