Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

blk-throttle: Split the service queue

This patch splits throtl_service_queue->nr_queued into "nr_queued_bps" and
"nr_queued_iops", allowing separate accounting of BPS and IOPS queued bios.
This prepares for future changes that need to check whether the BPS or IOPS
queues are empty.

To facilitate updating the number of IOs in the BPS and IOPS queues, the
addition logic will be moved from throtl_add_bio_tg() to
throtl_qnode_add_bio(), and similarly, the removal logic will be moved from
tg_dispatch_one_bio() to throtl_pop_queued().

And introduce sq_queued() to calculate the total sum of sq->nr_queued.

Signed-off-by: Zizhi Wo <wozizhi@huawei.com>
Reviewed-by: Yu Kuai <yukuai3@huawei.com>
Signed-off-by: Zizhi Wo <wozizhi@huaweicloud.com>
Link: https://lore.kernel.org/r/20250506020935.655574-7-wozizhi@huaweicloud.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>

authored by

Zizhi Wo and committed by
Jens Axboe
28ad83b7 f2c4902b

+49 -30
+47 -29
block/blk-throttle.c
··· 152 152 * throtl_qnode_add_bio - add a bio to a throtl_qnode and activate it 153 153 * @bio: bio being added 154 154 * @qn: qnode to add bio to 155 - * @queued: the service_queue->queued[] list @qn belongs to 155 + * @sq: the service_queue @qn belongs to 156 156 * 157 - * Add @bio to @qn and put @qn on @queued if it's not already on. 157 + * Add @bio to @qn and put @qn on @sq->queued if it's not already on. 158 158 * @qn->tg's reference count is bumped when @qn is activated. See the 159 159 * comment on top of throtl_qnode definition for details. 160 160 */ 161 161 static void throtl_qnode_add_bio(struct bio *bio, struct throtl_qnode *qn, 162 - struct list_head *queued) 162 + struct throtl_service_queue *sq) 163 163 { 164 - if (bio_flagged(bio, BIO_TG_BPS_THROTTLED)) 164 + bool rw = bio_data_dir(bio); 165 + 166 + if (bio_flagged(bio, BIO_TG_BPS_THROTTLED)) { 165 167 bio_list_add(&qn->bios_iops, bio); 166 - else 168 + sq->nr_queued_iops[rw]++; 169 + } else { 167 170 bio_list_add(&qn->bios_bps, bio); 171 + sq->nr_queued_bps[rw]++; 172 + } 168 173 169 174 if (list_empty(&qn->node)) { 170 - list_add_tail(&qn->node, queued); 175 + list_add_tail(&qn->node, &sq->queued[rw]); 171 176 blkg_get(tg_to_blkg(qn->tg)); 172 177 } 173 178 } ··· 203 198 204 199 /** 205 200 * throtl_pop_queued - pop the first bio form a qnode list 206 - * @queued: the qnode list to pop a bio from 201 + * @sq: the service_queue to pop a bio from 207 202 * @tg_to_put: optional out argument for throtl_grp to put 203 + * @rw: read/write 208 204 * 209 - * Pop the first bio from the qnode list @queued. Note that we firstly focus on 210 - * the iops list because bios are ultimately dispatched from it. After popping, 211 - * the first qnode is removed from @queued if empty or moved to the end of 212 - * @queued so that the popping order is round-robin. 205 + * Pop the first bio from the qnode list @sq->queued. Note that we firstly 206 + * focus on the iops list because bios are ultimately dispatched from it. 207 + * After popping, the first qnode is removed from @sq->queued if empty or moved 208 + * to the end of @sq->queued so that the popping order is round-robin. 213 209 * 214 210 * When the first qnode is removed, its associated throtl_grp should be put 215 211 * too. If @tg_to_put is NULL, this function automatically puts it; 216 212 * otherwise, *@tg_to_put is set to the throtl_grp to put and the caller is 217 213 * responsible for putting it. 218 214 */ 219 - static struct bio *throtl_pop_queued(struct list_head *queued, 220 - struct throtl_grp **tg_to_put) 215 + static struct bio *throtl_pop_queued(struct throtl_service_queue *sq, 216 + struct throtl_grp **tg_to_put, bool rw) 221 217 { 218 + struct list_head *queued = &sq->queued[rw]; 222 219 struct throtl_qnode *qn; 223 220 struct bio *bio; 224 221 ··· 229 222 230 223 qn = list_first_entry(queued, struct throtl_qnode, node); 231 224 bio = bio_list_pop(&qn->bios_iops); 232 - if (!bio) 225 + if (bio) { 226 + sq->nr_queued_iops[rw]--; 227 + } else { 233 228 bio = bio_list_pop(&qn->bios_bps); 229 + if (bio) 230 + sq->nr_queued_bps[rw]--; 231 + } 234 232 WARN_ON_ONCE(!bio); 235 233 236 234 if (bio_list_empty(&qn->bios_bps) && bio_list_empty(&qn->bios_iops)) { ··· 565 553 return true; 566 554 } 567 555 556 + static unsigned int sq_queued(struct throtl_service_queue *sq, int type) 557 + { 558 + return sq->nr_queued_bps[type] + sq->nr_queued_iops[type]; 559 + } 560 + 568 561 static unsigned int calculate_io_allowed(u32 iops_limit, 569 562 unsigned long jiffy_elapsed) 570 563 { ··· 717 700 * of subsequent bios. The same handling applies when the previous BPS/IOPS 718 701 * limit was set to max. 719 702 */ 720 - if (tg->service_queue.nr_queued[rw] == 0) { 703 + if (sq_queued(&tg->service_queue, rw) == 0) { 721 704 tg->bytes_disp[rw] = 0; 722 705 tg->io_disp[rw] = 0; 723 706 return; ··· 844 827 */ 845 828 static void tg_update_slice(struct throtl_grp *tg, bool rw) 846 829 { 847 - if (throtl_slice_used(tg, rw) && !(tg->service_queue.nr_queued[rw])) 830 + if (throtl_slice_used(tg, rw) && 831 + sq_queued(&tg->service_queue, rw) == 0) 848 832 throtl_start_new_slice(tg, rw, true); 849 833 else 850 834 throtl_extend_slice(tg, rw, jiffies + tg->td->throtl_slice); ··· 901 883 * this function with a different bio if there are other bios 902 884 * queued. 903 885 */ 904 - BUG_ON(tg->service_queue.nr_queued[rw] && 886 + BUG_ON(sq_queued(&tg->service_queue, rw) && 905 887 bio != throtl_peek_queued(&tg->service_queue.queued[rw])); 906 888 907 889 wait = tg_dispatch_bps_time(tg, bio); ··· 941 923 * dispatched. Mark that @tg was empty. This is automatically 942 924 * cleared on the next tg_update_disptime(). 943 925 */ 944 - if (!sq->nr_queued[rw]) 926 + if (sq_queued(sq, rw) == 0) 945 927 tg->flags |= THROTL_TG_WAS_EMPTY; 946 928 947 - throtl_qnode_add_bio(bio, qn, &sq->queued[rw]); 929 + throtl_qnode_add_bio(bio, qn, sq); 948 930 949 - sq->nr_queued[rw]++; 950 931 throtl_enqueue_tg(tg); 951 932 } 952 933 ··· 999 982 * getting released prematurely. Remember the tg to put and put it 1000 983 * after @bio is transferred to @parent_sq. 1001 984 */ 1002 - bio = throtl_pop_queued(&sq->queued[rw], &tg_to_put); 1003 - sq->nr_queued[rw]--; 985 + bio = throtl_pop_queued(sq, &tg_to_put, rw); 1004 986 1005 987 throtl_charge_iops_bio(tg, bio); 1006 988 ··· 1016 1000 } else { 1017 1001 bio_set_flag(bio, BIO_BPS_THROTTLED); 1018 1002 throtl_qnode_add_bio(bio, &tg->qnode_on_parent[rw], 1019 - &parent_sq->queued[rw]); 1003 + parent_sq); 1020 1004 BUG_ON(tg->td->nr_queued[rw] <= 0); 1021 1005 tg->td->nr_queued[rw]--; 1022 1006 } ··· 1081 1065 nr_disp += throtl_dispatch_tg(tg); 1082 1066 1083 1067 sq = &tg->service_queue; 1084 - if (sq->nr_queued[READ] || sq->nr_queued[WRITE]) 1068 + if (sq_queued(sq, READ) || sq_queued(sq, WRITE)) 1085 1069 tg_update_disptime(tg); 1086 1070 else 1087 1071 throtl_dequeue_tg(tg); ··· 1134 1118 dispatched = false; 1135 1119 1136 1120 while (true) { 1121 + unsigned int bio_cnt_r = sq_queued(sq, READ); 1122 + unsigned int bio_cnt_w = sq_queued(sq, WRITE); 1123 + 1137 1124 throtl_log(sq, "dispatch nr_queued=%u read=%u write=%u", 1138 - sq->nr_queued[READ] + sq->nr_queued[WRITE], 1139 - sq->nr_queued[READ], sq->nr_queued[WRITE]); 1125 + bio_cnt_r + bio_cnt_w, bio_cnt_r, bio_cnt_w); 1140 1126 1141 1127 ret = throtl_select_dispatch(sq); 1142 1128 if (ret) { ··· 1200 1182 1201 1183 spin_lock_irq(&q->queue_lock); 1202 1184 for (rw = READ; rw <= WRITE; rw++) 1203 - while ((bio = throtl_pop_queued(&td_sq->queued[rw], NULL))) 1185 + while ((bio = throtl_pop_queued(td_sq, NULL, rw))) 1204 1186 bio_list_add(&bio_list_on_stack, bio); 1205 1187 spin_unlock_irq(&q->queue_lock); 1206 1188 ··· 1706 1688 static bool tg_within_limit(struct throtl_grp *tg, struct bio *bio, bool rw) 1707 1689 { 1708 1690 /* throtl is FIFO - if bios are already queued, should queue */ 1709 - if (tg->service_queue.nr_queued[rw]) 1691 + if (sq_queued(&tg->service_queue, rw)) 1710 1692 return false; 1711 1693 1712 1694 return tg_dispatch_time(tg, bio) == 0; ··· 1780 1762 tg->bytes_disp[rw], bio->bi_iter.bi_size, 1781 1763 tg_bps_limit(tg, rw), 1782 1764 tg->io_disp[rw], tg_iops_limit(tg, rw), 1783 - sq->nr_queued[READ], sq->nr_queued[WRITE]); 1765 + sq_queued(sq, READ), sq_queued(sq, WRITE)); 1784 1766 1785 1767 td->nr_queued[rw]++; 1786 1768 throtl_add_bio_tg(bio, qn, tg);
+2 -1
block/blk-throttle.h
··· 42 42 * children throtl_grp's. 43 43 */ 44 44 struct list_head queued[2]; /* throtl_qnode [READ/WRITE] */ 45 - unsigned int nr_queued[2]; /* number of queued bios */ 45 + unsigned int nr_queued_bps[2]; /* number of queued bps bios */ 46 + unsigned int nr_queued_iops[2]; /* number of queued iops bios */ 46 47 47 48 /* 48 49 * RB tree of active children throtl_grp's, which are sorted by