xprtrdma: Harden chunk list encoding against send buffer overflow

While marshaling chunk lists which are variable-length XDR objects,
check for XDR buffer overflow at every step. Measurements show no
significant changes in CPU utilization.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>

authored by

Chuck Lever and committed by
Anna Schumaker
39f4cd9e 7a80f3f0

+144 -88
+144 -88
net/sunrpc/xprtrdma/rpc_rdma.c
··· 273 273 return -EIO; 274 274 } 275 275 276 - static inline __be32 * 276 + static inline int 277 + encode_item_present(struct xdr_stream *xdr) 278 + { 279 + __be32 *p; 280 + 281 + p = xdr_reserve_space(xdr, sizeof(*p)); 282 + if (unlikely(!p)) 283 + return -EMSGSIZE; 284 + 285 + *p = xdr_one; 286 + return 0; 287 + } 288 + 289 + static inline int 290 + encode_item_not_present(struct xdr_stream *xdr) 291 + { 292 + __be32 *p; 293 + 294 + p = xdr_reserve_space(xdr, sizeof(*p)); 295 + if (unlikely(!p)) 296 + return -EMSGSIZE; 297 + 298 + *p = xdr_zero; 299 + return 0; 300 + } 301 + 302 + static void 277 303 xdr_encode_rdma_segment(__be32 *iptr, struct rpcrdma_mw *mw) 278 304 { 279 305 *iptr++ = cpu_to_be32(mw->mw_handle); 280 306 *iptr++ = cpu_to_be32(mw->mw_length); 281 - return xdr_encode_hyper(iptr, mw->mw_offset); 307 + xdr_encode_hyper(iptr, mw->mw_offset); 282 308 } 283 309 284 - /* XDR-encode the Read list. Supports encoding a list of read 310 + static int 311 + encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw) 312 + { 313 + __be32 *p; 314 + 315 + p = xdr_reserve_space(xdr, 4 * sizeof(*p)); 316 + if (unlikely(!p)) 317 + return -EMSGSIZE; 318 + 319 + xdr_encode_rdma_segment(p, mw); 320 + return 0; 321 + } 322 + 323 + static int 324 + encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw, 325 + u32 position) 326 + { 327 + __be32 *p; 328 + 329 + p = xdr_reserve_space(xdr, 6 * sizeof(*p)); 330 + if (unlikely(!p)) 331 + return -EMSGSIZE; 332 + 333 + *p++ = xdr_one; /* Item present */ 334 + *p++ = cpu_to_be32(position); 335 + xdr_encode_rdma_segment(p, mw); 336 + return 0; 337 + } 338 + 339 + /* Register and XDR encode the Read list. Supports encoding a list of read 285 340 * segments that belong to a single read chunk. 286 341 * 287 342 * Encoding key for single-list chunks (HLOO = Handle32 Length32 Offset64): ··· 345 290 * N elements, position P (same P for all chunks of same arg!): 346 291 * 1 - PHLOO - 1 - PHLOO - ... - 1 - PHLOO - 0 347 292 * 348 - * Returns a pointer to the XDR word in the RDMA header following 349 - * the end of the Read list, or an error pointer. 293 + * Returns zero on success, or a negative errno if a failure occurred. 294 + * @xdr is advanced to the next position in the stream. 295 + * 296 + * Only a single @pos value is currently supported. 350 297 */ 351 - static __be32 * 352 - rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, 353 - struct rpcrdma_req *req, struct rpc_rqst *rqst, 354 - __be32 *iptr, enum rpcrdma_chunktype rtype) 298 + static noinline int 299 + rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, 300 + struct rpc_rqst *rqst, enum rpcrdma_chunktype rtype) 355 301 { 302 + struct xdr_stream *xdr = &req->rl_stream; 356 303 struct rpcrdma_mr_seg *seg; 357 304 struct rpcrdma_mw *mw; 358 305 unsigned int pos; 359 306 int n, nsegs; 360 - 361 - if (rtype == rpcrdma_noch) { 362 - *iptr++ = xdr_zero; /* item not present */ 363 - return iptr; 364 - } 365 307 366 308 pos = rqst->rq_snd_buf.head[0].iov_len; 367 309 if (rtype == rpcrdma_areadch) ··· 367 315 nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_snd_buf, pos, 368 316 rtype, seg); 369 317 if (nsegs < 0) 370 - return ERR_PTR(nsegs); 318 + return nsegs; 371 319 372 320 do { 373 321 n = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, 374 322 false, &mw); 375 323 if (n < 0) 376 - return ERR_PTR(n); 324 + return n; 377 325 rpcrdma_push_mw(mw, &req->rl_registered); 378 326 379 - *iptr++ = xdr_one; /* item present */ 380 - 381 - /* All read segments in this chunk 382 - * have the same "position". 383 - */ 384 - *iptr++ = cpu_to_be32(pos); 385 - iptr = xdr_encode_rdma_segment(iptr, mw); 327 + if (encode_read_segment(xdr, mw, pos) < 0) 328 + return -EMSGSIZE; 386 329 387 330 dprintk("RPC: %5u %s: pos %u %u@0x%016llx:0x%08x (%s)\n", 388 331 rqst->rq_task->tk_pid, __func__, pos, ··· 389 342 nsegs -= n; 390 343 } while (nsegs); 391 344 392 - /* Finish Read list */ 393 - *iptr++ = xdr_zero; /* Next item not present */ 394 - return iptr; 345 + return 0; 395 346 } 396 347 397 - /* XDR-encode the Write list. Supports encoding a list containing 398 - * one array of plain segments that belong to a single write chunk. 348 + /* Register and XDR encode the Write list. Supports encoding a list 349 + * containing one array of plain segments that belong to a single 350 + * write chunk. 399 351 * 400 352 * Encoding key for single-list chunks (HLOO = Handle32 Length32 Offset64): 401 353 * ··· 402 356 * N elements: 403 357 * 1 - N - HLOO - HLOO - ... - HLOO - 0 404 358 * 405 - * Returns a pointer to the XDR word in the RDMA header following 406 - * the end of the Write list, or an error pointer. 359 + * Returns zero on success, or a negative errno if a failure occurred. 360 + * @xdr is advanced to the next position in the stream. 361 + * 362 + * Only a single Write chunk is currently supported. 407 363 */ 408 - static __be32 * 364 + static noinline int 409 365 rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, 410 - struct rpc_rqst *rqst, __be32 *iptr, 411 - enum rpcrdma_chunktype wtype) 366 + struct rpc_rqst *rqst, enum rpcrdma_chunktype wtype) 412 367 { 368 + struct xdr_stream *xdr = &req->rl_stream; 413 369 struct rpcrdma_mr_seg *seg; 414 370 struct rpcrdma_mw *mw; 415 371 int n, nsegs, nchunks; 416 372 __be32 *segcount; 417 - 418 - if (wtype != rpcrdma_writech) { 419 - *iptr++ = xdr_zero; /* no Write list present */ 420 - return iptr; 421 - } 422 373 423 374 seg = req->rl_segments; 424 375 nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf, 425 376 rqst->rq_rcv_buf.head[0].iov_len, 426 377 wtype, seg); 427 378 if (nsegs < 0) 428 - return ERR_PTR(nsegs); 379 + return nsegs; 429 380 430 - *iptr++ = xdr_one; /* Write list present */ 431 - segcount = iptr++; /* save location of segment count */ 381 + if (encode_item_present(xdr) < 0) 382 + return -EMSGSIZE; 383 + segcount = xdr_reserve_space(xdr, sizeof(*segcount)); 384 + if (unlikely(!segcount)) 385 + return -EMSGSIZE; 386 + /* Actual value encoded below */ 432 387 433 388 nchunks = 0; 434 389 do { 435 390 n = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, 436 391 true, &mw); 437 392 if (n < 0) 438 - return ERR_PTR(n); 393 + return n; 439 394 rpcrdma_push_mw(mw, &req->rl_registered); 440 395 441 - iptr = xdr_encode_rdma_segment(iptr, mw); 396 + if (encode_rdma_segment(xdr, mw) < 0) 397 + return -EMSGSIZE; 442 398 443 399 dprintk("RPC: %5u %s: %u@0x016%llx:0x%08x (%s)\n", 444 400 rqst->rq_task->tk_pid, __func__, ··· 457 409 /* Update count of segments in this Write chunk */ 458 410 *segcount = cpu_to_be32(nchunks); 459 411 460 - /* Finish Write list */ 461 - *iptr++ = xdr_zero; /* Next item not present */ 462 - return iptr; 412 + return 0; 463 413 } 464 414 465 - /* XDR-encode the Reply chunk. Supports encoding an array of plain 466 - * segments that belong to a single write (reply) chunk. 415 + /* Register and XDR encode the Reply chunk. Supports encoding an array 416 + * of plain segments that belong to a single write (reply) chunk. 467 417 * 468 418 * Encoding key for single-list chunks (HLOO = Handle32 Length32 Offset64): 469 419 * ··· 469 423 * N elements: 470 424 * 1 - N - HLOO - HLOO - ... - HLOO 471 425 * 472 - * Returns a pointer to the XDR word in the RDMA header following 473 - * the end of the Reply chunk, or an error pointer. 426 + * Returns zero on success, or a negative errno if a failure occurred. 427 + * @xdr is advanced to the next position in the stream. 474 428 */ 475 - static __be32 * 476 - rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, 477 - struct rpcrdma_req *req, struct rpc_rqst *rqst, 478 - __be32 *iptr, enum rpcrdma_chunktype wtype) 429 + static noinline int 430 + rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, 431 + struct rpc_rqst *rqst, enum rpcrdma_chunktype wtype) 479 432 { 433 + struct xdr_stream *xdr = &req->rl_stream; 480 434 struct rpcrdma_mr_seg *seg; 481 435 struct rpcrdma_mw *mw; 482 436 int n, nsegs, nchunks; 483 437 __be32 *segcount; 484 438 485 - if (wtype != rpcrdma_replych) { 486 - *iptr++ = xdr_zero; /* no Reply chunk present */ 487 - return iptr; 488 - } 489 - 490 439 seg = req->rl_segments; 491 440 nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf, 0, wtype, seg); 492 441 if (nsegs < 0) 493 - return ERR_PTR(nsegs); 442 + return nsegs; 494 443 495 - *iptr++ = xdr_one; /* Reply chunk present */ 496 - segcount = iptr++; /* save location of segment count */ 444 + if (encode_item_present(xdr) < 0) 445 + return -EMSGSIZE; 446 + segcount = xdr_reserve_space(xdr, sizeof(*segcount)); 447 + if (unlikely(!segcount)) 448 + return -EMSGSIZE; 449 + /* Actual value encoded below */ 497 450 498 451 nchunks = 0; 499 452 do { 500 453 n = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, 501 454 true, &mw); 502 455 if (n < 0) 503 - return ERR_PTR(n); 456 + return n; 504 457 rpcrdma_push_mw(mw, &req->rl_registered); 505 458 506 - iptr = xdr_encode_rdma_segment(iptr, mw); 459 + if (encode_rdma_segment(xdr, mw) < 0) 460 + return -EMSGSIZE; 507 461 508 462 dprintk("RPC: %5u %s: %u@0x%016llx:0x%08x (%s)\n", 509 463 rqst->rq_task->tk_pid, __func__, ··· 520 474 /* Update count of segments in the Reply chunk */ 521 475 *segcount = cpu_to_be32(nchunks); 522 476 523 - return iptr; 477 + return 0; 524 478 } 525 479 526 480 /* Prepare the RPC-over-RDMA header SGE. ··· 722 676 struct rpcrdma_req *req = rpcr_to_rdmar(rqst); 723 677 struct xdr_stream *xdr = &req->rl_stream; 724 678 enum rpcrdma_chunktype rtype, wtype; 725 - struct rpcrdma_msg *headerp; 726 679 bool ddp_allowed; 727 - ssize_t hdrlen; 728 - __be32 *iptr; 729 680 __be32 *p; 681 + int ret; 730 682 731 683 #if defined(CONFIG_SUNRPC_BACKCHANNEL) 732 684 if (test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state)) 733 685 return rpcrdma_bc_marshal_reply(rqst); 734 686 #endif 735 687 736 - headerp = rdmab_to_msg(req->rl_rdmabuf); 737 688 rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0); 738 689 xdr_init_encode(xdr, &req->rl_hdrbuf, 739 690 req->rl_rdmabuf->rg_base); 740 691 741 692 /* Fixed header fields */ 742 - iptr = ERR_PTR(-EMSGSIZE); 693 + ret = -EMSGSIZE; 743 694 p = xdr_reserve_space(xdr, 4 * sizeof(*p)); 744 695 if (!p) 745 696 goto out_err; ··· 818 775 * send a Call message with a Position Zero Read chunk and a 819 776 * regular Read chunk at the same time. 820 777 */ 821 - iptr = headerp->rm_body.rm_chunks; 822 - iptr = rpcrdma_encode_read_list(r_xprt, req, rqst, iptr, rtype); 823 - if (IS_ERR(iptr)) 778 + if (rtype != rpcrdma_noch) { 779 + ret = rpcrdma_encode_read_list(r_xprt, req, rqst, rtype); 780 + if (ret) 781 + goto out_err; 782 + } 783 + ret = encode_item_not_present(xdr); 784 + if (ret) 824 785 goto out_err; 825 - iptr = rpcrdma_encode_write_list(r_xprt, req, rqst, iptr, wtype); 826 - if (IS_ERR(iptr)) 827 - goto out_err; 828 - iptr = rpcrdma_encode_reply_chunk(r_xprt, req, rqst, iptr, wtype); 829 - if (IS_ERR(iptr)) 830 - goto out_err; 831 - hdrlen = (unsigned char *)iptr - (unsigned char *)headerp; 832 786 833 - dprintk("RPC: %5u %s: %s/%s: hdrlen %zd\n", 787 + if (wtype == rpcrdma_writech) { 788 + ret = rpcrdma_encode_write_list(r_xprt, req, rqst, wtype); 789 + if (ret) 790 + goto out_err; 791 + } 792 + ret = encode_item_not_present(xdr); 793 + if (ret) 794 + goto out_err; 795 + 796 + if (wtype != rpcrdma_replych) 797 + ret = encode_item_not_present(xdr); 798 + else 799 + ret = rpcrdma_encode_reply_chunk(r_xprt, req, rqst, wtype); 800 + if (ret) 801 + goto out_err; 802 + 803 + dprintk("RPC: %5u %s: %s/%s: hdrlen %u rpclen\n", 834 804 rqst->rq_task->tk_pid, __func__, 835 805 transfertypes[rtype], transfertypes[wtype], 836 - hdrlen); 806 + xdr_stream_pos(xdr)); 837 807 838 - if (!rpcrdma_prepare_send_sges(&r_xprt->rx_ia, req, hdrlen, 808 + if (!rpcrdma_prepare_send_sges(&r_xprt->rx_ia, req, 809 + xdr_stream_pos(xdr), 839 810 &rqst->rq_snd_buf, rtype)) { 840 - iptr = ERR_PTR(-EIO); 811 + ret = -EIO; 841 812 goto out_err; 842 813 } 843 814 return 0; 844 815 845 816 out_err: 846 - if (PTR_ERR(iptr) != -ENOBUFS) { 847 - pr_err("rpcrdma: rpcrdma_marshal_req failed, status %ld\n", 848 - PTR_ERR(iptr)); 817 + if (ret != -ENOBUFS) { 818 + pr_err("rpcrdma: header marshaling failed (%d)\n", ret); 849 819 r_xprt->rx_stats.failed_marshal_count++; 850 820 } 851 - return PTR_ERR(iptr); 821 + return ret; 852 822 } 853 823 854 824 /**