Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

tcp: fix recv with flags MSG_WAITALL | MSG_PEEK

Currently, tcp_recvmsg enters a busy loop in sk_wait_data if called
with flags = MSG_WAITALL | MSG_PEEK.

sk_wait_data waits for sk_receive_queue not empty, but in this case,
the receive queue is not empty, but does not contain any skb that we
can use.

Add a "last skb seen on receive queue" argument to sk_wait_data, so
that it sleeps until the receive queue has new skbs.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=99461
Link: https://sourceware.org/bugzilla/show_bug.cgi?id=18493
Link: https://bugzilla.redhat.com/show_bug.cgi?id=1205258
Reported-by: Enrico Scholz <rh-bugzilla@ensc.de>
Reported-by: Dan Searle <dan@censornet.com>
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Sabrina Dubroca and committed by
David S. Miller
dfbafc99 3d3af885

+14 -10
+1 -1
include/net/sock.h
··· 902 902 void sk_set_memalloc(struct sock *sk); 903 903 void sk_clear_memalloc(struct sock *sk); 904 904 905 - int sk_wait_data(struct sock *sk, long *timeo); 905 + int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb); 906 906 907 907 struct request_sock_ops; 908 908 struct timewait_sock_ops;
+3 -2
net/core/sock.c
··· 1967 1967 * sk_wait_data - wait for data to arrive at sk_receive_queue 1968 1968 * @sk: sock to wait on 1969 1969 * @timeo: for how long 1970 + * @skb: last skb seen on sk_receive_queue 1970 1971 * 1971 1972 * Now socket state including sk->sk_err is changed only under lock, 1972 1973 * hence we may omit checks after joining wait queue. 1973 1974 * We check receive queue before schedule() only as optimization; 1974 1975 * it is very likely that release_sock() added new data. 1975 1976 */ 1976 - int sk_wait_data(struct sock *sk, long *timeo) 1977 + int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb) 1977 1978 { 1978 1979 int rc; 1979 1980 DEFINE_WAIT(wait); 1980 1981 1981 1982 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 1982 1983 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); 1983 - rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue)); 1984 + rc = sk_wait_event(sk, timeo, skb_peek_tail(&sk->sk_receive_queue) != skb); 1984 1985 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); 1985 1986 finish_wait(sk_sleep(sk), &wait); 1986 1987 return rc;
+1 -1
net/dccp/proto.c
··· 886 886 break; 887 887 } 888 888 889 - sk_wait_data(sk, &timeo); 889 + sk_wait_data(sk, &timeo, NULL); 890 890 continue; 891 891 found_ok_skb: 892 892 if (len > skb->len)
+7 -4
net/ipv4/tcp.c
··· 780 780 ret = -EAGAIN; 781 781 break; 782 782 } 783 - sk_wait_data(sk, &timeo); 783 + sk_wait_data(sk, &timeo, NULL); 784 784 if (signal_pending(current)) { 785 785 ret = sock_intr_errno(timeo); 786 786 break; ··· 1575 1575 int target; /* Read at least this many bytes */ 1576 1576 long timeo; 1577 1577 struct task_struct *user_recv = NULL; 1578 - struct sk_buff *skb; 1578 + struct sk_buff *skb, *last; 1579 1579 u32 urg_hole = 0; 1580 1580 1581 1581 if (unlikely(flags & MSG_ERRQUEUE)) ··· 1635 1635 1636 1636 /* Next get a buffer. */ 1637 1637 1638 + last = skb_peek_tail(&sk->sk_receive_queue); 1638 1639 skb_queue_walk(&sk->sk_receive_queue, skb) { 1640 + last = skb; 1639 1641 /* Now that we have two receive queues this 1640 1642 * shouldn't happen. 1641 1643 */ ··· 1756 1754 /* Do not sleep, just process backlog. */ 1757 1755 release_sock(sk); 1758 1756 lock_sock(sk); 1759 - } else 1760 - sk_wait_data(sk, &timeo); 1757 + } else { 1758 + sk_wait_data(sk, &timeo, last); 1759 + } 1761 1760 1762 1761 if (user_recv) { 1763 1762 int chunk;
+2 -2
net/llc/af_llc.c
··· 613 613 if (signal_pending(current)) 614 614 break; 615 615 rc = 0; 616 - if (sk_wait_data(sk, &timeo)) 616 + if (sk_wait_data(sk, &timeo, NULL)) 617 617 break; 618 618 } 619 619 return rc; ··· 802 802 release_sock(sk); 803 803 lock_sock(sk); 804 804 } else 805 - sk_wait_data(sk, &timeo); 805 + sk_wait_data(sk, &timeo, NULL); 806 806 807 807 if ((flags & MSG_PEEK) && peek_seq != llc->copied_seq) { 808 808 net_dbg_ratelimited("LLC(%s:%d): Application bug, race in MSG_PEEK\n",