xprtrdma: Harden chunk list encoding against send buffer overflow

While marshaling chunk lists which are variable-length XDR objects,
check for XDR buffer overflow at every step. Measurements show no
significant changes in CPU utilization.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>

authored by

Chuck Lever and committed by
Anna Schumaker
39f4cd9e 7a80f3f0

+144 -88
+144 -88
net/sunrpc/xprtrdma/rpc_rdma.c
··· 273 return -EIO; 274 } 275 276 - static inline __be32 * 277 xdr_encode_rdma_segment(__be32 *iptr, struct rpcrdma_mw *mw) 278 { 279 *iptr++ = cpu_to_be32(mw->mw_handle); 280 *iptr++ = cpu_to_be32(mw->mw_length); 281 - return xdr_encode_hyper(iptr, mw->mw_offset); 282 } 283 284 - /* XDR-encode the Read list. Supports encoding a list of read 285 * segments that belong to a single read chunk. 286 * 287 * Encoding key for single-list chunks (HLOO = Handle32 Length32 Offset64): ··· 345 * N elements, position P (same P for all chunks of same arg!): 346 * 1 - PHLOO - 1 - PHLOO - ... - 1 - PHLOO - 0 347 * 348 - * Returns a pointer to the XDR word in the RDMA header following 349 - * the end of the Read list, or an error pointer. 350 */ 351 - static __be32 * 352 - rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, 353 - struct rpcrdma_req *req, struct rpc_rqst *rqst, 354 - __be32 *iptr, enum rpcrdma_chunktype rtype) 355 { 356 struct rpcrdma_mr_seg *seg; 357 struct rpcrdma_mw *mw; 358 unsigned int pos; 359 int n, nsegs; 360 - 361 - if (rtype == rpcrdma_noch) { 362 - *iptr++ = xdr_zero; /* item not present */ 363 - return iptr; 364 - } 365 366 pos = rqst->rq_snd_buf.head[0].iov_len; 367 if (rtype == rpcrdma_areadch) ··· 367 nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_snd_buf, pos, 368 rtype, seg); 369 if (nsegs < 0) 370 - return ERR_PTR(nsegs); 371 372 do { 373 n = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, 374 false, &mw); 375 if (n < 0) 376 - return ERR_PTR(n); 377 rpcrdma_push_mw(mw, &req->rl_registered); 378 379 - *iptr++ = xdr_one; /* item present */ 380 - 381 - /* All read segments in this chunk 382 - * have the same "position". 383 - */ 384 - *iptr++ = cpu_to_be32(pos); 385 - iptr = xdr_encode_rdma_segment(iptr, mw); 386 387 dprintk("RPC: %5u %s: pos %u %u@0x%016llx:0x%08x (%s)\n", 388 rqst->rq_task->tk_pid, __func__, pos, ··· 389 nsegs -= n; 390 } while (nsegs); 391 392 - /* Finish Read list */ 393 - *iptr++ = xdr_zero; /* Next item not present */ 394 - return iptr; 395 } 396 397 - /* XDR-encode the Write list. Supports encoding a list containing 398 - * one array of plain segments that belong to a single write chunk. 399 * 400 * Encoding key for single-list chunks (HLOO = Handle32 Length32 Offset64): 401 * ··· 402 * N elements: 403 * 1 - N - HLOO - HLOO - ... - HLOO - 0 404 * 405 - * Returns a pointer to the XDR word in the RDMA header following 406 - * the end of the Write list, or an error pointer. 407 */ 408 - static __be32 * 409 rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, 410 - struct rpc_rqst *rqst, __be32 *iptr, 411 - enum rpcrdma_chunktype wtype) 412 { 413 struct rpcrdma_mr_seg *seg; 414 struct rpcrdma_mw *mw; 415 int n, nsegs, nchunks; 416 __be32 *segcount; 417 - 418 - if (wtype != rpcrdma_writech) { 419 - *iptr++ = xdr_zero; /* no Write list present */ 420 - return iptr; 421 - } 422 423 seg = req->rl_segments; 424 nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf, 425 rqst->rq_rcv_buf.head[0].iov_len, 426 wtype, seg); 427 if (nsegs < 0) 428 - return ERR_PTR(nsegs); 429 430 - *iptr++ = xdr_one; /* Write list present */ 431 - segcount = iptr++; /* save location of segment count */ 432 433 nchunks = 0; 434 do { 435 n = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, 436 true, &mw); 437 if (n < 0) 438 - return ERR_PTR(n); 439 rpcrdma_push_mw(mw, &req->rl_registered); 440 441 - iptr = xdr_encode_rdma_segment(iptr, mw); 442 443 dprintk("RPC: %5u %s: %u@0x016%llx:0x%08x (%s)\n", 444 rqst->rq_task->tk_pid, __func__, ··· 457 /* Update count of segments in this Write chunk */ 458 *segcount = cpu_to_be32(nchunks); 459 460 - /* Finish Write list */ 461 - *iptr++ = xdr_zero; /* Next item not present */ 462 - return iptr; 463 } 464 465 - /* XDR-encode the Reply chunk. Supports encoding an array of plain 466 - * segments that belong to a single write (reply) chunk. 467 * 468 * Encoding key for single-list chunks (HLOO = Handle32 Length32 Offset64): 469 * ··· 469 * N elements: 470 * 1 - N - HLOO - HLOO - ... - HLOO 471 * 472 - * Returns a pointer to the XDR word in the RDMA header following 473 - * the end of the Reply chunk, or an error pointer. 474 */ 475 - static __be32 * 476 - rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, 477 - struct rpcrdma_req *req, struct rpc_rqst *rqst, 478 - __be32 *iptr, enum rpcrdma_chunktype wtype) 479 { 480 struct rpcrdma_mr_seg *seg; 481 struct rpcrdma_mw *mw; 482 int n, nsegs, nchunks; 483 __be32 *segcount; 484 485 - if (wtype != rpcrdma_replych) { 486 - *iptr++ = xdr_zero; /* no Reply chunk present */ 487 - return iptr; 488 - } 489 - 490 seg = req->rl_segments; 491 nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf, 0, wtype, seg); 492 if (nsegs < 0) 493 - return ERR_PTR(nsegs); 494 495 - *iptr++ = xdr_one; /* Reply chunk present */ 496 - segcount = iptr++; /* save location of segment count */ 497 498 nchunks = 0; 499 do { 500 n = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, 501 true, &mw); 502 if (n < 0) 503 - return ERR_PTR(n); 504 rpcrdma_push_mw(mw, &req->rl_registered); 505 506 - iptr = xdr_encode_rdma_segment(iptr, mw); 507 508 dprintk("RPC: %5u %s: %u@0x%016llx:0x%08x (%s)\n", 509 rqst->rq_task->tk_pid, __func__, ··· 520 /* Update count of segments in the Reply chunk */ 521 *segcount = cpu_to_be32(nchunks); 522 523 - return iptr; 524 } 525 526 /* Prepare the RPC-over-RDMA header SGE. ··· 722 struct rpcrdma_req *req = rpcr_to_rdmar(rqst); 723 struct xdr_stream *xdr = &req->rl_stream; 724 enum rpcrdma_chunktype rtype, wtype; 725 - struct rpcrdma_msg *headerp; 726 bool ddp_allowed; 727 - ssize_t hdrlen; 728 - __be32 *iptr; 729 __be32 *p; 730 731 #if defined(CONFIG_SUNRPC_BACKCHANNEL) 732 if (test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state)) 733 return rpcrdma_bc_marshal_reply(rqst); 734 #endif 735 736 - headerp = rdmab_to_msg(req->rl_rdmabuf); 737 rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0); 738 xdr_init_encode(xdr, &req->rl_hdrbuf, 739 req->rl_rdmabuf->rg_base); 740 741 /* Fixed header fields */ 742 - iptr = ERR_PTR(-EMSGSIZE); 743 p = xdr_reserve_space(xdr, 4 * sizeof(*p)); 744 if (!p) 745 goto out_err; ··· 818 * send a Call message with a Position Zero Read chunk and a 819 * regular Read chunk at the same time. 820 */ 821 - iptr = headerp->rm_body.rm_chunks; 822 - iptr = rpcrdma_encode_read_list(r_xprt, req, rqst, iptr, rtype); 823 - if (IS_ERR(iptr)) 824 goto out_err; 825 - iptr = rpcrdma_encode_write_list(r_xprt, req, rqst, iptr, wtype); 826 - if (IS_ERR(iptr)) 827 - goto out_err; 828 - iptr = rpcrdma_encode_reply_chunk(r_xprt, req, rqst, iptr, wtype); 829 - if (IS_ERR(iptr)) 830 - goto out_err; 831 - hdrlen = (unsigned char *)iptr - (unsigned char *)headerp; 832 833 - dprintk("RPC: %5u %s: %s/%s: hdrlen %zd\n", 834 rqst->rq_task->tk_pid, __func__, 835 transfertypes[rtype], transfertypes[wtype], 836 - hdrlen); 837 838 - if (!rpcrdma_prepare_send_sges(&r_xprt->rx_ia, req, hdrlen, 839 &rqst->rq_snd_buf, rtype)) { 840 - iptr = ERR_PTR(-EIO); 841 goto out_err; 842 } 843 return 0; 844 845 out_err: 846 - if (PTR_ERR(iptr) != -ENOBUFS) { 847 - pr_err("rpcrdma: rpcrdma_marshal_req failed, status %ld\n", 848 - PTR_ERR(iptr)); 849 r_xprt->rx_stats.failed_marshal_count++; 850 } 851 - return PTR_ERR(iptr); 852 } 853 854 /**
··· 273 return -EIO; 274 } 275 276 + static inline int 277 + encode_item_present(struct xdr_stream *xdr) 278 + { 279 + __be32 *p; 280 + 281 + p = xdr_reserve_space(xdr, sizeof(*p)); 282 + if (unlikely(!p)) 283 + return -EMSGSIZE; 284 + 285 + *p = xdr_one; 286 + return 0; 287 + } 288 + 289 + static inline int 290 + encode_item_not_present(struct xdr_stream *xdr) 291 + { 292 + __be32 *p; 293 + 294 + p = xdr_reserve_space(xdr, sizeof(*p)); 295 + if (unlikely(!p)) 296 + return -EMSGSIZE; 297 + 298 + *p = xdr_zero; 299 + return 0; 300 + } 301 + 302 + static void 303 xdr_encode_rdma_segment(__be32 *iptr, struct rpcrdma_mw *mw) 304 { 305 *iptr++ = cpu_to_be32(mw->mw_handle); 306 *iptr++ = cpu_to_be32(mw->mw_length); 307 + xdr_encode_hyper(iptr, mw->mw_offset); 308 } 309 310 + static int 311 + encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw) 312 + { 313 + __be32 *p; 314 + 315 + p = xdr_reserve_space(xdr, 4 * sizeof(*p)); 316 + if (unlikely(!p)) 317 + return -EMSGSIZE; 318 + 319 + xdr_encode_rdma_segment(p, mw); 320 + return 0; 321 + } 322 + 323 + static int 324 + encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw, 325 + u32 position) 326 + { 327 + __be32 *p; 328 + 329 + p = xdr_reserve_space(xdr, 6 * sizeof(*p)); 330 + if (unlikely(!p)) 331 + return -EMSGSIZE; 332 + 333 + *p++ = xdr_one; /* Item present */ 334 + *p++ = cpu_to_be32(position); 335 + xdr_encode_rdma_segment(p, mw); 336 + return 0; 337 + } 338 + 339 + /* Register and XDR encode the Read list. Supports encoding a list of read 340 * segments that belong to a single read chunk. 341 * 342 * Encoding key for single-list chunks (HLOO = Handle32 Length32 Offset64): ··· 290 * N elements, position P (same P for all chunks of same arg!): 291 * 1 - PHLOO - 1 - PHLOO - ... - 1 - PHLOO - 0 292 * 293 + * Returns zero on success, or a negative errno if a failure occurred. 294 + * @xdr is advanced to the next position in the stream. 295 + * 296 + * Only a single @pos value is currently supported. 297 */ 298 + static noinline int 299 + rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, 300 + struct rpc_rqst *rqst, enum rpcrdma_chunktype rtype) 301 { 302 + struct xdr_stream *xdr = &req->rl_stream; 303 struct rpcrdma_mr_seg *seg; 304 struct rpcrdma_mw *mw; 305 unsigned int pos; 306 int n, nsegs; 307 308 pos = rqst->rq_snd_buf.head[0].iov_len; 309 if (rtype == rpcrdma_areadch) ··· 315 nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_snd_buf, pos, 316 rtype, seg); 317 if (nsegs < 0) 318 + return nsegs; 319 320 do { 321 n = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, 322 false, &mw); 323 if (n < 0) 324 + return n; 325 rpcrdma_push_mw(mw, &req->rl_registered); 326 327 + if (encode_read_segment(xdr, mw, pos) < 0) 328 + return -EMSGSIZE; 329 330 dprintk("RPC: %5u %s: pos %u %u@0x%016llx:0x%08x (%s)\n", 331 rqst->rq_task->tk_pid, __func__, pos, ··· 342 nsegs -= n; 343 } while (nsegs); 344 345 + return 0; 346 } 347 348 + /* Register and XDR encode the Write list. Supports encoding a list 349 + * containing one array of plain segments that belong to a single 350 + * write chunk. 351 * 352 * Encoding key for single-list chunks (HLOO = Handle32 Length32 Offset64): 353 * ··· 356 * N elements: 357 * 1 - N - HLOO - HLOO - ... - HLOO - 0 358 * 359 + * Returns zero on success, or a negative errno if a failure occurred. 360 + * @xdr is advanced to the next position in the stream. 361 + * 362 + * Only a single Write chunk is currently supported. 363 */ 364 + static noinline int 365 rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, 366 + struct rpc_rqst *rqst, enum rpcrdma_chunktype wtype) 367 { 368 + struct xdr_stream *xdr = &req->rl_stream; 369 struct rpcrdma_mr_seg *seg; 370 struct rpcrdma_mw *mw; 371 int n, nsegs, nchunks; 372 __be32 *segcount; 373 374 seg = req->rl_segments; 375 nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf, 376 rqst->rq_rcv_buf.head[0].iov_len, 377 wtype, seg); 378 if (nsegs < 0) 379 + return nsegs; 380 381 + if (encode_item_present(xdr) < 0) 382 + return -EMSGSIZE; 383 + segcount = xdr_reserve_space(xdr, sizeof(*segcount)); 384 + if (unlikely(!segcount)) 385 + return -EMSGSIZE; 386 + /* Actual value encoded below */ 387 388 nchunks = 0; 389 do { 390 n = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, 391 true, &mw); 392 if (n < 0) 393 + return n; 394 rpcrdma_push_mw(mw, &req->rl_registered); 395 396 + if (encode_rdma_segment(xdr, mw) < 0) 397 + return -EMSGSIZE; 398 399 dprintk("RPC: %5u %s: %u@0x016%llx:0x%08x (%s)\n", 400 rqst->rq_task->tk_pid, __func__, ··· 409 /* Update count of segments in this Write chunk */ 410 *segcount = cpu_to_be32(nchunks); 411 412 + return 0; 413 } 414 415 + /* Register and XDR encode the Reply chunk. Supports encoding an array 416 + * of plain segments that belong to a single write (reply) chunk. 417 * 418 * Encoding key for single-list chunks (HLOO = Handle32 Length32 Offset64): 419 * ··· 423 * N elements: 424 * 1 - N - HLOO - HLOO - ... - HLOO 425 * 426 + * Returns zero on success, or a negative errno if a failure occurred. 427 + * @xdr is advanced to the next position in the stream. 428 */ 429 + static noinline int 430 + rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, 431 + struct rpc_rqst *rqst, enum rpcrdma_chunktype wtype) 432 { 433 + struct xdr_stream *xdr = &req->rl_stream; 434 struct rpcrdma_mr_seg *seg; 435 struct rpcrdma_mw *mw; 436 int n, nsegs, nchunks; 437 __be32 *segcount; 438 439 seg = req->rl_segments; 440 nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf, 0, wtype, seg); 441 if (nsegs < 0) 442 + return nsegs; 443 444 + if (encode_item_present(xdr) < 0) 445 + return -EMSGSIZE; 446 + segcount = xdr_reserve_space(xdr, sizeof(*segcount)); 447 + if (unlikely(!segcount)) 448 + return -EMSGSIZE; 449 + /* Actual value encoded below */ 450 451 nchunks = 0; 452 do { 453 n = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, 454 true, &mw); 455 if (n < 0) 456 + return n; 457 rpcrdma_push_mw(mw, &req->rl_registered); 458 459 + if (encode_rdma_segment(xdr, mw) < 0) 460 + return -EMSGSIZE; 461 462 dprintk("RPC: %5u %s: %u@0x%016llx:0x%08x (%s)\n", 463 rqst->rq_task->tk_pid, __func__, ··· 474 /* Update count of segments in the Reply chunk */ 475 *segcount = cpu_to_be32(nchunks); 476 477 + return 0; 478 } 479 480 /* Prepare the RPC-over-RDMA header SGE. ··· 676 struct rpcrdma_req *req = rpcr_to_rdmar(rqst); 677 struct xdr_stream *xdr = &req->rl_stream; 678 enum rpcrdma_chunktype rtype, wtype; 679 bool ddp_allowed; 680 __be32 *p; 681 + int ret; 682 683 #if defined(CONFIG_SUNRPC_BACKCHANNEL) 684 if (test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state)) 685 return rpcrdma_bc_marshal_reply(rqst); 686 #endif 687 688 rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0); 689 xdr_init_encode(xdr, &req->rl_hdrbuf, 690 req->rl_rdmabuf->rg_base); 691 692 /* Fixed header fields */ 693 + ret = -EMSGSIZE; 694 p = xdr_reserve_space(xdr, 4 * sizeof(*p)); 695 if (!p) 696 goto out_err; ··· 775 * send a Call message with a Position Zero Read chunk and a 776 * regular Read chunk at the same time. 777 */ 778 + if (rtype != rpcrdma_noch) { 779 + ret = rpcrdma_encode_read_list(r_xprt, req, rqst, rtype); 780 + if (ret) 781 + goto out_err; 782 + } 783 + ret = encode_item_not_present(xdr); 784 + if (ret) 785 goto out_err; 786 787 + if (wtype == rpcrdma_writech) { 788 + ret = rpcrdma_encode_write_list(r_xprt, req, rqst, wtype); 789 + if (ret) 790 + goto out_err; 791 + } 792 + ret = encode_item_not_present(xdr); 793 + if (ret) 794 + goto out_err; 795 + 796 + if (wtype != rpcrdma_replych) 797 + ret = encode_item_not_present(xdr); 798 + else 799 + ret = rpcrdma_encode_reply_chunk(r_xprt, req, rqst, wtype); 800 + if (ret) 801 + goto out_err; 802 + 803 + dprintk("RPC: %5u %s: %s/%s: hdrlen %u rpclen\n", 804 rqst->rq_task->tk_pid, __func__, 805 transfertypes[rtype], transfertypes[wtype], 806 + xdr_stream_pos(xdr)); 807 808 + if (!rpcrdma_prepare_send_sges(&r_xprt->rx_ia, req, 809 + xdr_stream_pos(xdr), 810 &rqst->rq_snd_buf, rtype)) { 811 + ret = -EIO; 812 goto out_err; 813 } 814 return 0; 815 816 out_err: 817 + if (ret != -ENOBUFS) { 818 + pr_err("rpcrdma: header marshaling failed (%d)\n", ret); 819 r_xprt->rx_stats.failed_marshal_count++; 820 } 821 + return ret; 822 } 823 824 /**