Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Revert "Merge branch 'loop-aio-nowait' into for-6.19/block"

This reverts commit f43fdeb9a368a5ff56b088b46edc245bd4b52cde, reversing
changes made to 2c6d792d4b7676e2b340df05425330452fee1f40.

There are concerns that doing inline submits can cause excessive
stack usage, particularly when going back into the filesystem. Revert
the loop dio nowait change for now.

Link: https://lore.kernel.org/linux-block/aSP3SG_KaROJTBHx@infradead.org/
Signed-off-by: Jens Axboe <axboe@kernel.dk>

+40 -195
+40 -195
drivers/block/loop.c
··· 68 68 struct rb_root worker_tree; 69 69 struct timer_list timer; 70 70 bool sysfs_inited; 71 - unsigned lo_nr_blocking_writes; 72 71 73 72 struct request_queue *lo_queue; 74 73 struct blk_mq_tag_set tag_set; ··· 89 90 90 91 #define LOOP_IDLE_WORKER_TIMEOUT (60 * HZ) 91 92 #define LOOP_DEFAULT_HW_Q_DEPTH 128 92 - 93 - static void loop_queue_work(struct loop_device *lo, struct loop_cmd *cmd); 94 93 95 94 static DEFINE_IDR(loop_index_idr); 96 95 static DEFINE_MUTEX(loop_ctl_mutex); ··· 321 324 322 325 if (!atomic_dec_and_test(&cmd->ref)) 323 326 return; 324 - 325 - /* -EAGAIN could be returned from bdev's ->ki_complete */ 326 - if (cmd->ret == -EAGAIN) { 327 - struct loop_device *lo = rq->q->queuedata; 328 - 329 - loop_queue_work(lo, cmd); 330 - return; 331 - } 332 - 333 327 kfree(cmd->bvec); 334 328 cmd->bvec = NULL; 335 329 if (req_op(rq) == REQ_OP_WRITE) ··· 337 349 lo_rw_aio_do_completion(cmd); 338 350 } 339 351 340 - static inline unsigned lo_cmd_nr_bvec(struct loop_cmd *cmd) 352 + static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd, 353 + loff_t pos, int rw) 341 354 { 342 - struct request *rq = blk_mq_rq_from_pdu(cmd); 355 + struct iov_iter iter; 343 356 struct req_iterator rq_iter; 357 + struct bio_vec *bvec; 358 + struct request *rq = blk_mq_rq_from_pdu(cmd); 359 + struct bio *bio = rq->bio; 360 + struct file *file = lo->lo_backing_file; 344 361 struct bio_vec tmp; 362 + unsigned int offset; 345 363 int nr_bvec = 0; 364 + int ret; 346 365 347 366 rq_for_each_bvec(tmp, rq, rq_iter) 348 367 nr_bvec++; 349 368 350 - return nr_bvec; 351 - } 352 - 353 - static int lo_rw_aio_prep(struct loop_device *lo, struct loop_cmd *cmd, 354 - unsigned nr_bvec, loff_t pos) 355 - { 356 - struct request *rq = blk_mq_rq_from_pdu(cmd); 357 - 358 369 if (rq->bio != rq->biotail) { 359 - struct req_iterator rq_iter; 360 - struct bio_vec *bvec; 361 - struct bio_vec tmp; 362 370 363 371 bvec = kmalloc_array(nr_bvec, sizeof(struct bio_vec), 364 372 GFP_NOIO); ··· 372 388 *bvec = tmp; 373 389 bvec++; 374 390 } 391 + bvec = cmd->bvec; 392 + offset = 0; 375 393 } else { 376 - cmd->bvec = NULL; 394 + /* 395 + * Same here, this bio may be started from the middle of the 396 + * 'bvec' because of bio splitting, so offset from the bvec 397 + * must be passed to iov iterator 398 + */ 399 + offset = bio->bi_iter.bi_bvec_done; 400 + bvec = __bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter); 377 401 } 402 + atomic_set(&cmd->ref, 2); 403 + 404 + iov_iter_bvec(&iter, rw, bvec, nr_bvec, blk_rq_bytes(rq)); 405 + iter.iov_offset = offset; 378 406 379 407 cmd->iocb.ki_pos = pos; 380 - cmd->iocb.ki_filp = lo->lo_backing_file; 408 + cmd->iocb.ki_filp = file; 381 409 cmd->iocb.ki_ioprio = req_get_ioprio(rq); 382 410 if (cmd->use_aio) { 383 411 cmd->iocb.ki_complete = lo_rw_aio_complete; ··· 398 402 cmd->iocb.ki_complete = NULL; 399 403 cmd->iocb.ki_flags = 0; 400 404 } 401 - return 0; 402 - } 403 - 404 - static int lo_submit_rw_aio(struct loop_device *lo, struct loop_cmd *cmd, 405 - int nr_bvec, int rw) 406 - { 407 - struct request *rq = blk_mq_rq_from_pdu(cmd); 408 - struct file *file = lo->lo_backing_file; 409 - struct iov_iter iter; 410 - int ret; 411 - 412 - if (cmd->bvec) { 413 - iov_iter_bvec(&iter, rw, cmd->bvec, nr_bvec, blk_rq_bytes(rq)); 414 - iter.iov_offset = 0; 415 - } else { 416 - struct bio *bio = rq->bio; 417 - struct bio_vec *bvec = __bvec_iter_bvec(bio->bi_io_vec, 418 - bio->bi_iter); 419 - 420 - /* 421 - * Same here, this bio may be started from the middle of the 422 - * 'bvec' because of bio splitting, so offset from the bvec 423 - * must be passed to iov iterator 424 - */ 425 - iov_iter_bvec(&iter, rw, bvec, nr_bvec, blk_rq_bytes(rq)); 426 - iter.iov_offset = bio->bi_iter.bi_bvec_done; 427 - } 428 - atomic_set(&cmd->ref, 2); 429 - 430 405 431 406 if (rw == ITER_SOURCE) { 432 407 kiocb_start_write(&cmd->iocb); ··· 406 439 ret = file->f_op->read_iter(&cmd->iocb, &iter); 407 440 408 441 lo_rw_aio_do_completion(cmd); 409 - return ret; 410 - } 411 442 412 - static bool lo_backfile_support_nowait(const struct loop_device *lo) 413 - { 414 - return lo->lo_backing_file->f_mode & FMODE_NOWAIT; 415 - } 416 - 417 - static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd, 418 - loff_t pos, int rw) 419 - { 420 - int nr_bvec = lo_cmd_nr_bvec(cmd); 421 - int ret; 422 - 423 - /* prepared already if we have tried nowait */ 424 - if (!cmd->use_aio || !lo_backfile_support_nowait(lo)) { 425 - ret = lo_rw_aio_prep(lo, cmd, nr_bvec, pos); 426 - if (unlikely(ret)) 427 - goto fail; 428 - } 429 - 430 - cmd->iocb.ki_flags &= ~IOCB_NOWAIT; 431 - ret = lo_submit_rw_aio(lo, cmd, nr_bvec, rw); 432 - fail: 433 443 if (ret != -EIOCBQUEUED) 434 444 lo_rw_aio_complete(&cmd->iocb, ret); 435 445 return -EIOCBQUEUED; 436 - } 437 - 438 - static inline bool lo_aio_try_nowait(struct loop_device *lo, 439 - struct loop_cmd *cmd) 440 - { 441 - struct file *file = lo->lo_backing_file; 442 - struct inode *inode = file->f_mapping->host; 443 - struct request *rq = blk_mq_rq_from_pdu(cmd); 444 - 445 - /* NOWAIT works fine for backing block device */ 446 - if (S_ISBLK(inode->i_mode)) 447 - return true; 448 - 449 - /* 450 - * NOWAIT is supposed to be fine for READ without contending with 451 - * blocking WRITE 452 - */ 453 - if (req_op(rq) == REQ_OP_READ) 454 - return true; 455 - 456 - /* 457 - * If there is any queued non-NOWAIT async WRITE , don't try new 458 - * NOWAIT WRITE for avoiding contention 459 - * 460 - * Here we focus on handling stable FS block mapping via NOWAIT 461 - */ 462 - return READ_ONCE(lo->lo_nr_blocking_writes) == 0; 463 - } 464 - 465 - static int lo_rw_aio_nowait(struct loop_device *lo, struct loop_cmd *cmd, 466 - int rw) 467 - { 468 - struct request *rq = blk_mq_rq_from_pdu(cmd); 469 - loff_t pos = ((loff_t) blk_rq_pos(rq) << 9) + lo->lo_offset; 470 - int nr_bvec = lo_cmd_nr_bvec(cmd); 471 - int ret = lo_rw_aio_prep(lo, cmd, nr_bvec, pos); 472 - 473 - if (unlikely(ret)) 474 - goto fail; 475 - 476 - if (!lo_aio_try_nowait(lo, cmd)) 477 - return -EAGAIN; 478 - 479 - cmd->iocb.ki_flags |= IOCB_NOWAIT; 480 - ret = lo_submit_rw_aio(lo, cmd, nr_bvec, rw); 481 - fail: 482 - if (ret != -EIOCBQUEUED && ret != -EAGAIN) 483 - lo_rw_aio_complete(&cmd->iocb, ret); 484 - return ret; 485 446 } 486 447 487 448 static int do_req_filebacked(struct loop_device *lo, struct request *rq) ··· 706 811 return sysfs_emit(buf, "%s\n", dio ? "1" : "0"); 707 812 } 708 813 709 - static ssize_t loop_attr_nr_blocking_writes_show(struct loop_device *lo, 710 - char *buf) 711 - { 712 - return sysfs_emit(buf, "%u\n", lo->lo_nr_blocking_writes); 713 - } 714 - 715 814 LOOP_ATTR_RO(backing_file); 716 815 LOOP_ATTR_RO(offset); 717 816 LOOP_ATTR_RO(sizelimit); 718 817 LOOP_ATTR_RO(autoclear); 719 818 LOOP_ATTR_RO(partscan); 720 819 LOOP_ATTR_RO(dio); 721 - LOOP_ATTR_RO(nr_blocking_writes); 722 820 723 821 static struct attribute *loop_attrs[] = { 724 822 &loop_attr_backing_file.attr, ··· 720 832 &loop_attr_autoclear.attr, 721 833 &loop_attr_partscan.attr, 722 834 &loop_attr_dio.attr, 723 - &loop_attr_nr_blocking_writes.attr, 724 835 NULL, 725 836 }; 726 837 ··· 795 908 } 796 909 #endif 797 910 798 - static inline void loop_inc_blocking_writes(struct loop_device *lo, 799 - struct loop_cmd *cmd) 800 - { 801 - lockdep_assert_held(&lo->lo_work_lock); 802 - 803 - if (req_op(blk_mq_rq_from_pdu(cmd)) == REQ_OP_WRITE) 804 - lo->lo_nr_blocking_writes += 1; 805 - } 806 - 807 - static inline void loop_dec_blocking_writes(struct loop_device *lo, 808 - struct loop_cmd *cmd) 809 - { 810 - lockdep_assert_held(&lo->lo_work_lock); 811 - 812 - if (req_op(blk_mq_rq_from_pdu(cmd)) == REQ_OP_WRITE) 813 - lo->lo_nr_blocking_writes -= 1; 814 - } 815 - 816 911 static void loop_queue_work(struct loop_device *lo, struct loop_cmd *cmd) 817 912 { 818 - struct request __maybe_unused *rq = blk_mq_rq_from_pdu(cmd); 819 913 struct rb_node **node, *parent = NULL; 820 914 struct loop_worker *cur_worker, *worker = NULL; 821 915 struct work_struct *work; 822 916 struct list_head *cmd_list; 823 - 824 - /* always use the first bio's css */ 825 - cmd->blkcg_css = NULL; 826 - cmd->memcg_css = NULL; 827 - #ifdef CONFIG_BLK_CGROUP 828 - if (rq->bio) { 829 - cmd->blkcg_css = bio_blkcg_css(rq->bio); 830 - #ifdef CONFIG_MEMCG 831 - if (cmd->blkcg_css) { 832 - cmd->memcg_css = 833 - cgroup_get_e_css(cmd->blkcg_css->cgroup, 834 - &memory_cgrp_subsys); 835 - } 836 - #endif 837 - } 838 - #endif 839 917 840 918 spin_lock_irq(&lo->lo_work_lock); 841 919 ··· 860 1008 work = &lo->rootcg_work; 861 1009 cmd_list = &lo->rootcg_cmd_list; 862 1010 } 863 - if (cmd->use_aio) 864 - loop_inc_blocking_writes(lo, cmd); 865 1011 list_add_tail(&cmd->list_entry, cmd_list); 866 1012 queue_work(lo->workqueue, work); 867 1013 spin_unlock_irq(&lo->lo_work_lock); ··· 1856 2006 struct request *rq = bd->rq; 1857 2007 struct loop_cmd *cmd = blk_mq_rq_to_pdu(rq); 1858 2008 struct loop_device *lo = rq->q->queuedata; 1859 - int rw = 0; 1860 2009 1861 2010 blk_mq_start_request(rq); 1862 2011 ··· 1868 2019 case REQ_OP_WRITE_ZEROES: 1869 2020 cmd->use_aio = false; 1870 2021 break; 1871 - case REQ_OP_READ: 1872 - rw = ITER_DEST; 1873 - cmd->use_aio = lo->lo_flags & LO_FLAGS_DIRECT_IO; 1874 - break; 1875 - case REQ_OP_WRITE: 1876 - rw = ITER_SOURCE; 1877 - cmd->use_aio = lo->lo_flags & LO_FLAGS_DIRECT_IO; 1878 - break; 1879 2022 default: 1880 - return BLK_STS_IOERR; 2023 + cmd->use_aio = lo->lo_flags & LO_FLAGS_DIRECT_IO; 2024 + break; 1881 2025 } 1882 2026 1883 - /* try NOWAIT if the backing file supports the mode */ 1884 - if (cmd->use_aio && lo_backfile_support_nowait(lo)) { 1885 - int res = lo_rw_aio_nowait(lo, cmd, rw); 1886 - 1887 - if (res != -EAGAIN && res != -EOPNOTSUPP) 1888 - return BLK_STS_OK; 1889 - /* fallback to workqueue for handling aio */ 2027 + /* always use the first bio's css */ 2028 + cmd->blkcg_css = NULL; 2029 + cmd->memcg_css = NULL; 2030 + #ifdef CONFIG_BLK_CGROUP 2031 + if (rq->bio) { 2032 + cmd->blkcg_css = bio_blkcg_css(rq->bio); 2033 + #ifdef CONFIG_MEMCG 2034 + if (cmd->blkcg_css) { 2035 + cmd->memcg_css = 2036 + cgroup_get_e_css(cmd->blkcg_css->cgroup, 2037 + &memory_cgrp_subsys); 2038 + } 2039 + #endif 1890 2040 } 1891 - 2041 + #endif 1892 2042 loop_queue_work(lo, cmd); 1893 2043 1894 2044 return BLK_STS_OK; ··· 1963 2115 cond_resched(); 1964 2116 1965 2117 spin_lock_irq(&lo->lo_work_lock); 1966 - if (cmd->use_aio) 1967 - loop_dec_blocking_writes(lo, cmd); 1968 2118 } 1969 2119 1970 2120 /* ··· 2041 2195 lo->tag_set.queue_depth = hw_queue_depth; 2042 2196 lo->tag_set.numa_node = NUMA_NO_NODE; 2043 2197 lo->tag_set.cmd_size = sizeof(struct loop_cmd); 2044 - lo->tag_set.flags = BLK_MQ_F_STACKING | BLK_MQ_F_NO_SCHED_BY_DEFAULT | 2045 - BLK_MQ_F_BLOCKING; 2198 + lo->tag_set.flags = BLK_MQ_F_STACKING | BLK_MQ_F_NO_SCHED_BY_DEFAULT; 2046 2199 lo->tag_set.driver_data = lo; 2047 2200 2048 2201 err = blk_mq_alloc_tag_set(&lo->tag_set);