Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

SUNRPC: Revert 241b1f419f0e ("SUNRPC: Remove xdr_buf_trim()")

I've noticed that when krb5i or krb5p security is in use,
retransmitted requests are missing the server's duplicate reply
cache. The computed checksum on the retransmitted request does not
match the cached checksum, resulting in the server performing the
retransmitted request again instead of returning the cached reply.

The assumptions made when removing xdr_buf_trim() were not correct.
In the send paths, the upper layer has already set the segment
lengths correctly, and shorting the buffer's content is simply a
matter of reducing buf->len.

xdr_buf_trim() is the right answer in the receive/unwrap path on
both the client and the server. The buffer segment lengths have to
be shortened one-by-one.

On the server side in particular, head.iov_len needs to be updated
correctly to enable nfsd_cache_csum() to work correctly. The simple
buf->len computation doesn't do that, and that results in
checksumming stale data in the buffer.

The problem isn't noticed until there's significant instability of
the RPC transport. At that point, the reliability of retransmit
detection on the server becomes crucial.

Fixes: 241b1f419f0e ("SUNRPC: Remove xdr_buf_trim()")
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>

+46 -5
+1
include/linux/sunrpc/xdr.h
··· 184 184 extern void xdr_shift_buf(struct xdr_buf *, size_t); 185 185 extern void xdr_buf_from_iov(struct kvec *, struct xdr_buf *); 186 186 extern int xdr_buf_subsegment(struct xdr_buf *, struct xdr_buf *, unsigned int, unsigned int); 187 + extern void xdr_buf_trim(struct xdr_buf *, unsigned int); 187 188 extern int read_bytes_from_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int); 188 189 extern int write_bytes_to_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int); 189 190
+3 -4
net/sunrpc/auth_gss/gss_krb5_wrap.c
··· 580 580 */ 581 581 movelen = min_t(unsigned int, buf->head[0].iov_len, len); 582 582 movelen -= offset + GSS_KRB5_TOK_HDR_LEN + headskip; 583 - if (offset + GSS_KRB5_TOK_HDR_LEN + headskip + movelen > 584 - buf->head[0].iov_len) 585 - return GSS_S_FAILURE; 583 + BUG_ON(offset + GSS_KRB5_TOK_HDR_LEN + headskip + movelen > 584 + buf->head[0].iov_len); 586 585 memmove(ptr, ptr + GSS_KRB5_TOK_HDR_LEN + headskip, movelen); 587 586 buf->head[0].iov_len -= GSS_KRB5_TOK_HDR_LEN + headskip; 588 587 buf->len = len - GSS_KRB5_TOK_HDR_LEN + headskip; 589 588 590 589 /* Trim off the trailing "extra count" and checksum blob */ 591 - buf->len -= ec + GSS_KRB5_TOK_HDR_LEN + tailskip; 590 + xdr_buf_trim(buf, ec + GSS_KRB5_TOK_HDR_LEN + tailskip); 592 591 593 592 *align = XDR_QUADLEN(GSS_KRB5_TOK_HDR_LEN + headskip); 594 593 *slack = *align + XDR_QUADLEN(ec + GSS_KRB5_TOK_HDR_LEN + tailskip);
+1 -1
net/sunrpc/auth_gss/svcauth_gss.c
··· 906 906 if (svc_getnl(&buf->head[0]) != seq) 907 907 goto out; 908 908 /* trim off the mic and padding at the end before returning */ 909 - buf->len -= 4 + round_up_to_quad(mic.len); 909 + xdr_buf_trim(buf, round_up_to_quad(mic.len) + 4); 910 910 stat = 0; 911 911 out: 912 912 kfree(mic.data);
+41
net/sunrpc/xdr.c
··· 1150 1150 } 1151 1151 EXPORT_SYMBOL_GPL(xdr_buf_subsegment); 1152 1152 1153 + /** 1154 + * xdr_buf_trim - lop at most "len" bytes off the end of "buf" 1155 + * @buf: buf to be trimmed 1156 + * @len: number of bytes to reduce "buf" by 1157 + * 1158 + * Trim an xdr_buf by the given number of bytes by fixing up the lengths. Note 1159 + * that it's possible that we'll trim less than that amount if the xdr_buf is 1160 + * too small, or if (for instance) it's all in the head and the parser has 1161 + * already read too far into it. 1162 + */ 1163 + void xdr_buf_trim(struct xdr_buf *buf, unsigned int len) 1164 + { 1165 + size_t cur; 1166 + unsigned int trim = len; 1167 + 1168 + if (buf->tail[0].iov_len) { 1169 + cur = min_t(size_t, buf->tail[0].iov_len, trim); 1170 + buf->tail[0].iov_len -= cur; 1171 + trim -= cur; 1172 + if (!trim) 1173 + goto fix_len; 1174 + } 1175 + 1176 + if (buf->page_len) { 1177 + cur = min_t(unsigned int, buf->page_len, trim); 1178 + buf->page_len -= cur; 1179 + trim -= cur; 1180 + if (!trim) 1181 + goto fix_len; 1182 + } 1183 + 1184 + if (buf->head[0].iov_len) { 1185 + cur = min_t(size_t, buf->head[0].iov_len, trim); 1186 + buf->head[0].iov_len -= cur; 1187 + trim -= cur; 1188 + } 1189 + fix_len: 1190 + buf->len -= (len - trim); 1191 + } 1192 + EXPORT_SYMBOL_GPL(xdr_buf_trim); 1193 + 1153 1194 static void __read_bytes_from_xdr_buf(struct xdr_buf *subbuf, void *obj, unsigned int len) 1154 1195 { 1155 1196 unsigned int this_len;