Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net: introduce SO_MAX_PACING_RATE

As mentioned in commit afe4fd062416b ("pkt_sched: fq: Fair Queue packet
scheduler"), this patch adds a new socket option.

SO_MAX_PACING_RATE offers the application the ability to cap the
rate computed by transport layer. Value is in bytes per second.

u32 val = 1000000;
setsockopt(sockfd, SOL_SOCKET, SO_MAX_PACING_RATE, &val, sizeof(val));

To be effectively paced, a flow must use FQ packet scheduler.

Note that a packet scheduler takes into account the headers for its
computations. The effective payload rate depends on MSS and retransmits
if any.

I chose to make this pacing rate a SOL_SOCKET option instead of a
TCP one because this can be used by other protocols.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Steinar H. Gunderson <sesse@google.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Eric Dumazet and committed by
David S. Miller
62748f32 4aa0a03f

+45 -2
+3 -1
arch/alpha/include/uapi/asm/socket.h
··· 81 81 82 82 #define SO_SELECT_ERR_QUEUE 45 83 83 84 - #define SO_BUSY_POLL 46 84 + #define SO_BUSY_POLL 46 85 + 86 + #define SO_MAX_PACING_RATE 47 85 87 86 88 #endif /* _UAPI_ASM_SOCKET_H */
+2
arch/avr32/include/uapi/asm/socket.h
··· 76 76 77 77 #define SO_BUSY_POLL 46 78 78 79 + #define SO_MAX_PACING_RATE 47 80 + 79 81 #endif /* __ASM_AVR32_SOCKET_H */
+2
arch/cris/include/uapi/asm/socket.h
··· 78 78 79 79 #define SO_BUSY_POLL 46 80 80 81 + #define SO_MAX_PACING_RATE 47 82 + 81 83 #endif /* _ASM_SOCKET_H */ 82 84 83 85
+2
arch/frv/include/uapi/asm/socket.h
··· 76 76 77 77 #define SO_BUSY_POLL 46 78 78 79 + #define SO_MAX_PACING_RATE 47 80 + 79 81 #endif /* _ASM_SOCKET_H */ 80 82
+2
arch/h8300/include/uapi/asm/socket.h
··· 76 76 77 77 #define SO_BUSY_POLL 46 78 78 79 + #define SO_MAX_PACING_RATE 47 80 + 79 81 #endif /* _ASM_SOCKET_H */
+2
arch/ia64/include/uapi/asm/socket.h
··· 85 85 86 86 #define SO_BUSY_POLL 46 87 87 88 + #define SO_MAX_PACING_RATE 47 89 + 88 90 #endif /* _ASM_IA64_SOCKET_H */
+2
arch/m32r/include/uapi/asm/socket.h
··· 76 76 77 77 #define SO_BUSY_POLL 46 78 78 79 + #define SO_MAX_PACING_RATE 47 80 + 79 81 #endif /* _ASM_M32R_SOCKET_H */
+2
arch/mips/include/uapi/asm/socket.h
··· 94 94 95 95 #define SO_BUSY_POLL 46 96 96 97 + #define SO_MAX_PACING_RATE 47 98 + 97 99 #endif /* _UAPI_ASM_SOCKET_H */
+2
arch/mn10300/include/uapi/asm/socket.h
··· 76 76 77 77 #define SO_BUSY_POLL 46 78 78 79 + #define SO_MAX_PACING_RATE 47 80 + 79 81 #endif /* _ASM_SOCKET_H */
+2
arch/parisc/include/uapi/asm/socket.h
··· 75 75 76 76 #define SO_BUSY_POLL 0x4027 77 77 78 + #define SO_MAX_PACING_RATE 0x4048 79 + 78 80 /* O_NONBLOCK clashes with the bits used for socket types. Therefore we 79 81 * have to define SOCK_NONBLOCK to a different value here. 80 82 */
+2
arch/powerpc/include/uapi/asm/socket.h
··· 83 83 84 84 #define SO_BUSY_POLL 46 85 85 86 + #define SO_MAX_PACING_RATE 47 87 + 86 88 #endif /* _ASM_POWERPC_SOCKET_H */
+2
arch/s390/include/uapi/asm/socket.h
··· 82 82 83 83 #define SO_BUSY_POLL 46 84 84 85 + #define SO_MAX_PACING_RATE 47 86 + 85 87 #endif /* _ASM_SOCKET_H */
+2
arch/sparc/include/uapi/asm/socket.h
··· 72 72 73 73 #define SO_BUSY_POLL 0x0030 74 74 75 + #define SO_MAX_PACING_RATE 0x0031 76 + 75 77 /* Security levels - as per NRL IPv6 - don't actually do anything */ 76 78 #define SO_SECURITY_AUTHENTICATION 0x5001 77 79 #define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002
+2
arch/xtensa/include/uapi/asm/socket.h
··· 87 87 88 88 #define SO_BUSY_POLL 46 89 89 90 + #define SO_MAX_PACING_RATE 47 91 + 90 92 #endif /* _XTENSA_SOCKET_H */
+1
include/net/sock.h
··· 363 363 int sk_wmem_queued; 364 364 gfp_t sk_allocation; 365 365 u32 sk_pacing_rate; /* bytes per second */ 366 + u32 sk_max_pacing_rate; 366 367 netdev_features_t sk_route_caps; 367 368 netdev_features_t sk_route_nocaps; 368 369 int sk_gso_type;
+2
include/uapi/asm-generic/socket.h
··· 78 78 79 79 #define SO_BUSY_POLL 46 80 80 81 + #define SO_MAX_PACING_RATE 47 82 + 81 83 #endif /* __ASM_GENERIC_SOCKET_H */
+12
net/core/sock.c
··· 914 914 } 915 915 break; 916 916 #endif 917 + 918 + case SO_MAX_PACING_RATE: 919 + sk->sk_max_pacing_rate = val; 920 + sk->sk_pacing_rate = min(sk->sk_pacing_rate, 921 + sk->sk_max_pacing_rate); 922 + break; 923 + 917 924 default: 918 925 ret = -ENOPROTOOPT; 919 926 break; ··· 1183 1176 v.val = sk->sk_ll_usec; 1184 1177 break; 1185 1178 #endif 1179 + 1180 + case SO_MAX_PACING_RATE: 1181 + v.val = sk->sk_max_pacing_rate; 1182 + break; 1186 1183 1187 1184 default: 1188 1185 return -ENOPROTOOPT; ··· 2330 2319 sk->sk_ll_usec = sysctl_net_busy_read; 2331 2320 #endif 2332 2321 2322 + sk->sk_max_pacing_rate = ~0U; 2333 2323 /* 2334 2324 * Before updating sk_refcnt, we must commit prior changes to memory 2335 2325 * (Documentation/RCU/rculist_nulls.txt for details)
+1 -1
net/ipv4/tcp_input.c
··· 735 735 if (tp->srtt > 8 + 2) 736 736 do_div(rate, tp->srtt); 737 737 738 - sk->sk_pacing_rate = min_t(u64, rate, ~0U); 738 + sk->sk_pacing_rate = min_t(u64, rate, sk->sk_max_pacing_rate); 739 739 } 740 740 741 741 /* Calculate rto without backoff. This is the second half of Van Jacobson's