Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net: Generalize socket rx gap / receive queue overflow cmsg

Create a new socket level option to report number of queue overflows

Recently I augmented the AF_PACKET protocol to report the number of frames lost
on the socket receive queue between any two enqueued frames. This value was
exported via a SOL_PACKET level cmsg. AFter I completed that work it was
requested that this feature be generalized so that any datagram oriented socket
could make use of this option. As such I've created this patch, It creates a
new SOL_SOCKET level option called SO_RXQ_OVFL, which when enabled exports a
SOL_SOCKET level cmsg that reports the nubmer of times the sk_receive_queue
overflowed between any two given frames. It also augments the AF_PACKET
protocol to take advantage of this new feature (as it previously did not touch
sk->sk_drops, which this patch uses to record the overflow count). Tested
successfully by me.

Notes:

1) Unlike my previous patch, this patch simply records the sk_drops value, which
is not a number of drops between packets, but rather a total number of drops.
Deltas must be computed in user space.

2) While this patch currently works with datagram oriented protocols, it will
also be accepted by non-datagram oriented protocols. I'm not sure if thats
agreeable to everyone, but my argument in favor of doing so is that, for those
protocols which aren't applicable to this option, sk_drops will always be zero,
and reporting no drops on a receive queue that isn't used for those
non-participating protocols seems reasonable to me. This also saves us having
to code in a per-protocol opt in mechanism.

3) This applies cleanly to net-next assuming that commit
977750076d98c7ff6cbda51858bb5a5894a9d9ab (my af packet cmsg patch) is reverted

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Neil Horman and committed by
David S. Miller
3b885787 d5e63bde

+88 -21
+2
arch/alpha/include/asm/socket.h
··· 67 67 #define SO_TIMESTAMPING 37 68 68 #define SCM_TIMESTAMPING SO_TIMESTAMPING 69 69 70 + #define SO_RXQ_OVFL 40 71 + 70 72 /* O_NONBLOCK clashes with the bits used for socket types. Therefore we 71 73 * have to define SOCK_NONBLOCK to a different value here. 72 74 */
+2
arch/arm/include/asm/socket.h
··· 60 60 #define SO_PROTOCOL 38 61 61 #define SO_DOMAIN 39 62 62 63 + #define SO_RXQ_OVFL 40 64 + 63 65 #endif /* _ASM_SOCKET_H */
+2
arch/avr32/include/asm/socket.h
··· 60 60 #define SO_PROTOCOL 38 61 61 #define SO_DOMAIN 39 62 62 63 + #define SO_RXQ_OVFL 40 64 + 63 65 #endif /* __ASM_AVR32_SOCKET_H */
+2
arch/cris/include/asm/socket.h
··· 62 62 #define SO_PROTOCOL 38 63 63 #define SO_DOMAIN 39 64 64 65 + #define SO_RXQ_OVFL 40 66 + 65 67 #endif /* _ASM_SOCKET_H */ 66 68 67 69
+2
arch/frv/include/asm/socket.h
··· 60 60 #define SO_PROTOCOL 38 61 61 #define SO_DOMAIN 39 62 62 63 + #define SO_RXQ_OVFL 40 64 + 63 65 #endif /* _ASM_SOCKET_H */ 64 66
+2
arch/h8300/include/asm/socket.h
··· 60 60 #define SO_PROTOCOL 38 61 61 #define SO_DOMAIN 39 62 62 63 + #define SO_RXQ_OVFL 40 64 + 63 65 #endif /* _ASM_SOCKET_H */
+2
arch/ia64/include/asm/socket.h
··· 69 69 #define SO_PROTOCOL 38 70 70 #define SO_DOMAIN 39 71 71 72 + #define SO_RXQ_OVFL 40 73 + 72 74 #endif /* _ASM_IA64_SOCKET_H */
+2
arch/m32r/include/asm/socket.h
··· 60 60 #define SO_PROTOCOL 38 61 61 #define SO_DOMAIN 39 62 62 63 + #define SO_RXQ_OVFL 40 64 + 63 65 #endif /* _ASM_M32R_SOCKET_H */
+2
arch/m68k/include/asm/socket.h
··· 60 60 #define SO_PROTOCOL 38 61 61 #define SO_DOMAIN 39 62 62 63 + #define SO_RXQ_OVFL 40 64 + 63 65 #endif /* _ASM_SOCKET_H */
+2
arch/mips/include/asm/socket.h
··· 80 80 #define SO_TIMESTAMPING 37 81 81 #define SCM_TIMESTAMPING SO_TIMESTAMPING 82 82 83 + #define SO_RXQ_OVFL 40 84 + 83 85 #ifdef __KERNEL__ 84 86 85 87 /** sock_type - Socket types
+2
arch/mn10300/include/asm/socket.h
··· 60 60 #define SO_PROTOCOL 38 61 61 #define SO_DOMAIN 39 62 62 63 + #define SO_RXQ_OVFL 40 64 + 63 65 #endif /* _ASM_SOCKET_H */
+2
arch/parisc/include/asm/socket.h
··· 59 59 #define SO_TIMESTAMPING 0x4020 60 60 #define SCM_TIMESTAMPING SO_TIMESTAMPING 61 61 62 + #define SO_RXQ_OVFL 0x4021 63 + 62 64 /* O_NONBLOCK clashes with the bits used for socket types. Therefore we 63 65 * have to define SOCK_NONBLOCK to a different value here. 64 66 */
+2
arch/powerpc/include/asm/socket.h
··· 67 67 #define SO_PROTOCOL 38 68 68 #define SO_DOMAIN 39 69 69 70 + #define SO_RXQ_OVFL 40 71 + 70 72 #endif /* _ASM_POWERPC_SOCKET_H */
+2
arch/s390/include/asm/socket.h
··· 68 68 #define SO_PROTOCOL 38 69 69 #define SO_DOMAIN 39 70 70 71 + #define SO_RXQ_OVFL 40 72 + 71 73 #endif /* _ASM_SOCKET_H */
+2
arch/sparc/include/asm/socket.h
··· 56 56 #define SO_TIMESTAMPING 0x0023 57 57 #define SCM_TIMESTAMPING SO_TIMESTAMPING 58 58 59 + #define SO_RXQ_OVFL 0x0024 60 + 59 61 /* Security levels - as per NRL IPv6 - don't actually do anything */ 60 62 #define SO_SECURITY_AUTHENTICATION 0x5001 61 63 #define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002
+2
arch/xtensa/include/asm/socket.h
··· 71 71 #define SO_PROTOCOL 38 72 72 #define SO_DOMAIN 39 73 73 74 + #define SO_RXQ_OVFL 40 75 + 74 76 #endif /* _XTENSA_SOCKET_H */
+1
include/asm-generic/socket.h
··· 63 63 #define SO_PROTOCOL 38 64 64 #define SO_DOMAIN 39 65 65 66 + #define SO_RXQ_OVFL 40 66 67 #endif /* __ASM_GENERIC_SOCKET_H */
+4 -2
include/linux/skbuff.h
··· 389 389 #ifdef CONFIG_NETWORK_SECMARK 390 390 __u32 secmark; 391 391 #endif 392 - 393 - __u32 mark; 392 + union { 393 + __u32 mark; 394 + __u32 dropcount; 395 + }; 394 396 395 397 __u16 vlan_tci; 396 398
+3
include/net/sock.h
··· 505 505 SOCK_TIMESTAMPING_RAW_HARDWARE, /* %SOF_TIMESTAMPING_RAW_HARDWARE */ 506 506 SOCK_TIMESTAMPING_SYS_HARDWARE, /* %SOF_TIMESTAMPING_SYS_HARDWARE */ 507 507 SOCK_FASYNC, /* fasync() active */ 508 + SOCK_RXQ_OVFL, 508 509 }; 509 510 510 511 static inline void sock_copy_flags(struct sock *nsk, struct sock *osk) ··· 1493 1492 else 1494 1493 sk->sk_stamp = kt; 1495 1494 } 1495 + 1496 + extern void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb); 1496 1497 1497 1498 /** 1498 1499 * sock_tx_timestamp - checks whether the outgoing packet is to be time stamped
+1 -1
net/atm/common.c
··· 496 496 error = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); 497 497 if (error) 498 498 return error; 499 - sock_recv_timestamp(msg, sk, skb); 499 + sock_recv_ts_and_drops(msg, sk, skb); 500 500 pr_debug("RcvM %d -= %d\n", atomic_read(&sk->sk_rmem_alloc), skb->truesize); 501 501 atm_return(vcc, skb->truesize); 502 502 skb_free_datagram(sk, skb);
+1 -1
net/bluetooth/af_bluetooth.c
··· 257 257 skb_reset_transport_header(skb); 258 258 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); 259 259 if (err == 0) 260 - sock_recv_timestamp(msg, sk, skb); 260 + sock_recv_ts_and_drops(msg, sk, skb); 261 261 262 262 skb_free_datagram(sk, skb); 263 263
+1 -1
net/bluetooth/rfcomm/sock.c
··· 703 703 copied += chunk; 704 704 size -= chunk; 705 705 706 - sock_recv_timestamp(msg, sk, skb); 706 + sock_recv_ts_and_drops(msg, sk, skb); 707 707 708 708 if (!(flags & MSG_PEEK)) { 709 709 atomic_sub(chunk, &sk->sk_rmem_alloc);
+1 -1
net/can/bcm.c
··· 1534 1534 return err; 1535 1535 } 1536 1536 1537 - sock_recv_timestamp(msg, sk, skb); 1537 + sock_recv_ts_and_drops(msg, sk, skb); 1538 1538 1539 1539 if (msg->msg_name) { 1540 1540 msg->msg_namelen = sizeof(struct sockaddr_can);
+1 -1
net/can/raw.c
··· 702 702 return err; 703 703 } 704 704 705 - sock_recv_timestamp(msg, sk, skb); 705 + sock_recv_ts_and_drops(msg, sk, skb); 706 706 707 707 if (msg->msg_name) { 708 708 msg->msg_namelen = sizeof(struct sockaddr_can);
+16 -1
net/core/sock.c
··· 276 276 { 277 277 int err = 0; 278 278 int skb_len; 279 + unsigned long flags; 280 + struct sk_buff_head *list = &sk->sk_receive_queue; 279 281 280 282 /* Cast sk->rcvbuf to unsigned... It's pointless, but reduces 281 283 number of warnings when compiling with -W --ANK ··· 307 305 */ 308 306 skb_len = skb->len; 309 307 310 - skb_queue_tail(&sk->sk_receive_queue, skb); 308 + spin_lock_irqsave(&list->lock, flags); 309 + skb->dropcount = atomic_read(&sk->sk_drops); 310 + __skb_queue_tail(list, skb); 311 + spin_unlock_irqrestore(&list->lock, flags); 311 312 312 313 if (!sock_flag(sk, SOCK_DEAD)) 313 314 sk->sk_data_ready(sk, skb_len); ··· 707 702 708 703 /* We implement the SO_SNDLOWAT etc to 709 704 not be settable (1003.1g 5.3) */ 705 + case SO_RXQ_OVFL: 706 + if (valbool) 707 + sock_set_flag(sk, SOCK_RXQ_OVFL); 708 + else 709 + sock_reset_flag(sk, SOCK_RXQ_OVFL); 710 + break; 710 711 default: 711 712 ret = -ENOPROTOOPT; 712 713 break; ··· 910 899 911 900 case SO_MARK: 912 901 v.val = sk->sk_mark; 902 + break; 903 + 904 + case SO_RXQ_OVFL: 905 + v.val = !!sock_flag(sk, SOCK_RXQ_OVFL); 913 906 break; 914 907 915 908 default:
+1 -1
net/ieee802154/dgram.c
··· 303 303 if (err) 304 304 goto done; 305 305 306 - sock_recv_timestamp(msg, sk, skb); 306 + sock_recv_ts_and_drops(msg, sk, skb); 307 307 308 308 if (flags & MSG_TRUNC) 309 309 copied = skb->len;
+1 -1
net/ieee802154/raw.c
··· 191 191 if (err) 192 192 goto done; 193 193 194 - sock_recv_timestamp(msg, sk, skb); 194 + sock_recv_ts_and_drops(msg, sk, skb); 195 195 196 196 if (flags & MSG_TRUNC) 197 197 copied = skb->len;
+1 -1
net/ipv4/raw.c
··· 682 682 if (err) 683 683 goto done; 684 684 685 - sock_recv_timestamp(msg, sk, skb); 685 + sock_recv_ts_and_drops(msg, sk, skb); 686 686 687 687 /* Copy the address. */ 688 688 if (sin) {
+1 -1
net/ipv4/udp.c
··· 955 955 UDP_INC_STATS_USER(sock_net(sk), 956 956 UDP_MIB_INDATAGRAMS, is_udplite); 957 957 958 - sock_recv_timestamp(msg, sk, skb); 958 + sock_recv_ts_and_drops(msg, sk, skb); 959 959 960 960 /* Copy the address. */ 961 961 if (sin) {
+1 -1
net/ipv6/raw.c
··· 497 497 sin6->sin6_scope_id = IP6CB(skb)->iif; 498 498 } 499 499 500 - sock_recv_timestamp(msg, sk, skb); 500 + sock_recv_ts_and_drops(msg, sk, skb); 501 501 502 502 if (np->rxopt.all) 503 503 datagram_recv_ctl(sk, msg, skb);
+1 -1
net/ipv6/udp.c
··· 252 252 UDP_MIB_INDATAGRAMS, is_udplite); 253 253 } 254 254 255 - sock_recv_timestamp(msg, sk, skb); 255 + sock_recv_ts_and_drops(msg, sk, skb); 256 256 257 257 /* Copy the address. */ 258 258 if (msg->msg_name) {
+1 -1
net/key/af_key.c
··· 3606 3606 if (err) 3607 3607 goto out_free; 3608 3608 3609 - sock_recv_timestamp(msg, sk, skb); 3609 + sock_recv_ts_and_drops(msg, sk, skb); 3610 3610 3611 3611 err = (flags & MSG_TRUNC) ? skb->len : copied; 3612 3612
+3 -4
net/packet/af_packet.c
··· 627 627 628 628 spin_lock(&sk->sk_receive_queue.lock); 629 629 po->stats.tp_packets++; 630 + skb->dropcount = atomic_read(&sk->sk_drops); 630 631 __skb_queue_tail(&sk->sk_receive_queue, skb); 631 632 spin_unlock(&sk->sk_receive_queue.lock); 632 633 sk->sk_data_ready(sk, skb->len); 633 634 return 0; 634 635 635 636 drop_n_acct: 636 - spin_lock(&sk->sk_receive_queue.lock); 637 - po->stats.tp_drops++; 638 - spin_unlock(&sk->sk_receive_queue.lock); 637 + po->stats.tp_drops = atomic_inc_return(&sk->sk_drops); 639 638 640 639 drop_n_restore: 641 640 if (skb_head != skb->data && skb_shared(skb)) { ··· 1477 1478 if (err) 1478 1479 goto out_free; 1479 1480 1480 - sock_recv_timestamp(msg, sk, skb); 1481 + sock_recv_ts_and_drops(msg, sk, skb); 1481 1482 1482 1483 if (msg->msg_name) 1483 1484 memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
+1 -1
net/rxrpc/ar-recvmsg.c
··· 146 146 memcpy(msg->msg_name, 147 147 &call->conn->trans->peer->srx, 148 148 sizeof(call->conn->trans->peer->srx)); 149 - sock_recv_timestamp(msg, &rx->sk, skb); 149 + sock_recv_ts_and_drops(msg, &rx->sk, skb); 150 150 } 151 151 152 152 /* receive the message */
+1 -1
net/sctp/socket.c
··· 1958 1958 if (err) 1959 1959 goto out_free; 1960 1960 1961 - sock_recv_timestamp(msg, sk, skb); 1961 + sock_recv_ts_and_drops(msg, sk, skb); 1962 1962 if (sctp_ulpevent_is_notification(event)) { 1963 1963 msg->msg_flags |= MSG_NOTIFICATION; 1964 1964 sp->pf->event_msgname(event, msg->msg_name, addr_len);
+15
net/socket.c
··· 668 668 669 669 EXPORT_SYMBOL_GPL(__sock_recv_timestamp); 670 670 671 + inline void sock_recv_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) 672 + { 673 + if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && skb->dropcount) 674 + put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL, 675 + sizeof(__u32), &skb->dropcount); 676 + } 677 + 678 + void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, 679 + struct sk_buff *skb) 680 + { 681 + sock_recv_timestamp(msg, sk, skb); 682 + sock_recv_drops(msg, sk, skb); 683 + } 684 + EXPORT_SYMBOL_GPL(sock_recv_ts_and_drops); 685 + 671 686 static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, 672 687 struct msghdr *msg, size_t size, int flags) 673 688 {