Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

blk-throttle: Some cleanups and race fixes in limit update code

When throttle group limits are updated through cgroups, a thread is
woken up to process these updates. While reviewing that code, oleg noted
couple of race conditions existed in the code and he also suggested that
code can be simplified.

This patch fixes the races simplifies the code based on Oleg's suggestions:

- Use xchg().
- Introduced a common function throtl_update_blkio_group_common()
which is shared now by all iops/bps update functions.

Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>

Fixed a merge issue, throtl_schedule_delayed_work() takes throtl_data
as the argument now, not the queue.

Signed-off-by: Jens Axboe <jaxboe@fusionio.com>

authored by

Vivek Goyal and committed by
Jens Axboe
de701c74 231d704b

+41 -57
+41 -57
block/blk-throttle.c
··· 102 102 /* Work for dispatching throttled bios */ 103 103 struct delayed_work throtl_work; 104 104 105 - atomic_t limits_changed; 105 + bool limits_changed; 106 106 }; 107 107 108 108 enum tg_state_flags { ··· 201 201 RB_CLEAR_NODE(&tg->rb_node); 202 202 bio_list_init(&tg->bio_lists[0]); 203 203 bio_list_init(&tg->bio_lists[1]); 204 + td->limits_changed = false; 204 205 205 206 /* 206 207 * Take the initial reference that will be released on destroy ··· 738 737 struct throtl_grp *tg; 739 738 struct hlist_node *pos, *n; 740 739 741 - if (!atomic_read(&td->limits_changed)) 740 + if (!td->limits_changed) 742 741 return; 743 742 744 - throtl_log(td, "limit changed =%d", atomic_read(&td->limits_changed)); 743 + xchg(&td->limits_changed, false); 745 744 746 - /* 747 - * Make sure updates from throtl_update_blkio_group_read_bps() group 748 - * of functions to tg->limits_changed are visible. We do not 749 - * want update td->limits_changed to be visible but update to 750 - * tg->limits_changed not being visible yet on this cpu. Hence 751 - * the read barrier. 752 - */ 753 - smp_rmb(); 745 + throtl_log(td, "limits changed"); 754 746 755 747 hlist_for_each_entry_safe(tg, pos, n, &td->tg_list, tg_node) { 756 - if (throtl_tg_on_rr(tg) && tg->limits_changed) { 757 - throtl_log_tg(td, tg, "limit change rbps=%llu wbps=%llu" 758 - " riops=%u wiops=%u", tg->bps[READ], 759 - tg->bps[WRITE], tg->iops[READ], 760 - tg->iops[WRITE]); 761 - tg_update_disptime(td, tg); 762 - tg->limits_changed = false; 763 - } 764 - } 748 + if (!tg->limits_changed) 749 + continue; 765 750 766 - smp_mb__before_atomic_dec(); 767 - atomic_dec(&td->limits_changed); 768 - smp_mb__after_atomic_dec(); 751 + if (!xchg(&tg->limits_changed, false)) 752 + continue; 753 + 754 + throtl_log_tg(td, tg, "limit change rbps=%llu wbps=%llu" 755 + " riops=%u wiops=%u", tg->bps[READ], tg->bps[WRITE], 756 + tg->iops[READ], tg->iops[WRITE]); 757 + 758 + if (throtl_tg_on_rr(tg)) 759 + tg_update_disptime(td, tg); 760 + } 769 761 } 770 762 771 763 /* Dispatch throttled bios. Should be called without queue lock held. */ ··· 892 898 spin_unlock_irqrestore(td->queue->queue_lock, flags); 893 899 } 894 900 901 + static void throtl_update_blkio_group_common(struct throtl_data *td, 902 + struct throtl_grp *tg) 903 + { 904 + xchg(&tg->limits_changed, true); 905 + xchg(&td->limits_changed, true); 906 + /* Schedule a work now to process the limit change */ 907 + throtl_schedule_delayed_work(td, 0); 908 + } 909 + 895 910 /* 896 911 * For all update functions, key should be a valid pointer because these 897 912 * update functions are called under blkcg_lock, that means, blkg is ··· 914 911 struct blkio_group *blkg, u64 read_bps) 915 912 { 916 913 struct throtl_data *td = key; 914 + struct throtl_grp *tg = tg_of_blkg(blkg); 917 915 918 - tg_of_blkg(blkg)->bps[READ] = read_bps; 919 - /* Make sure read_bps is updated before setting limits_changed */ 920 - smp_wmb(); 921 - tg_of_blkg(blkg)->limits_changed = true; 922 - 923 - /* Make sure tg->limits_changed is updated before td->limits_changed */ 924 - smp_mb__before_atomic_inc(); 925 - atomic_inc(&td->limits_changed); 926 - smp_mb__after_atomic_inc(); 927 - 928 - /* Schedule a work now to process the limit change */ 929 - throtl_schedule_delayed_work(td, 0); 916 + tg->bps[READ] = read_bps; 917 + throtl_update_blkio_group_common(td, tg); 930 918 } 931 919 932 920 static void throtl_update_blkio_group_write_bps(void *key, 933 921 struct blkio_group *blkg, u64 write_bps) 934 922 { 935 923 struct throtl_data *td = key; 924 + struct throtl_grp *tg = tg_of_blkg(blkg); 936 925 937 - tg_of_blkg(blkg)->bps[WRITE] = write_bps; 938 - smp_wmb(); 939 - tg_of_blkg(blkg)->limits_changed = true; 940 - smp_mb__before_atomic_inc(); 941 - atomic_inc(&td->limits_changed); 942 - smp_mb__after_atomic_inc(); 943 - throtl_schedule_delayed_work(td, 0); 926 + tg->bps[WRITE] = write_bps; 927 + throtl_update_blkio_group_common(td, tg); 944 928 } 945 929 946 930 static void throtl_update_blkio_group_read_iops(void *key, 947 931 struct blkio_group *blkg, unsigned int read_iops) 948 932 { 949 933 struct throtl_data *td = key; 934 + struct throtl_grp *tg = tg_of_blkg(blkg); 950 935 951 - tg_of_blkg(blkg)->iops[READ] = read_iops; 952 - smp_wmb(); 953 - tg_of_blkg(blkg)->limits_changed = true; 954 - smp_mb__before_atomic_inc(); 955 - atomic_inc(&td->limits_changed); 956 - smp_mb__after_atomic_inc(); 957 - throtl_schedule_delayed_work(td, 0); 936 + tg->iops[READ] = read_iops; 937 + throtl_update_blkio_group_common(td, tg); 958 938 } 959 939 960 940 static void throtl_update_blkio_group_write_iops(void *key, 961 941 struct blkio_group *blkg, unsigned int write_iops) 962 942 { 963 943 struct throtl_data *td = key; 944 + struct throtl_grp *tg = tg_of_blkg(blkg); 964 945 965 - tg_of_blkg(blkg)->iops[WRITE] = write_iops; 966 - smp_wmb(); 967 - tg_of_blkg(blkg)->limits_changed = true; 968 - smp_mb__before_atomic_inc(); 969 - atomic_inc(&td->limits_changed); 970 - smp_mb__after_atomic_inc(); 971 - throtl_schedule_delayed_work(td, 0); 946 + tg->iops[WRITE] = write_iops; 947 + throtl_update_blkio_group_common(td, tg); 972 948 } 973 949 974 950 static void throtl_shutdown_wq(struct request_queue *q) ··· 994 1012 */ 995 1013 update_disptime = false; 996 1014 goto queue_bio; 1015 + 997 1016 } 998 1017 999 1018 /* Bio is with-in rate limit of group */ ··· 1035 1052 1036 1053 INIT_HLIST_HEAD(&td->tg_list); 1037 1054 td->tg_service_tree = THROTL_RB_ROOT; 1038 - atomic_set(&td->limits_changed, 0); 1055 + td->limits_changed = false; 1039 1056 1040 1057 /* Init root group */ 1041 1058 tg = &td->root_tg; ··· 1047 1064 /* Practically unlimited BW */ 1048 1065 tg->bps[0] = tg->bps[1] = -1; 1049 1066 tg->iops[0] = tg->iops[1] = -1; 1067 + td->limits_changed = false; 1050 1068 1051 1069 /* 1052 1070 * Set root group reference to 2. One reference will be dropped when