[PATCH] lockdep: fix blkdev_open() warning

On Wed, 2006-08-09 at 07:57 +0200, Rolf Eike Beer wrote:
> =============================================
> [ INFO: possible recursive locking detected ]
> ---------------------------------------------
> parted/7929 is trying to acquire lock:
> (&bdev->bd_mutex){--..}, at: [<c105eb8d>] __blkdev_put+0x1e/0x13c
>
> but task is already holding lock:
> (&bdev->bd_mutex){--..}, at: [<c105eec6>] do_open+0x72/0x3a8
>
> other info that might help us debug this:
> 1 lock held by parted/7929:
> #0: (&bdev->bd_mutex){--..}, at: [<c105eec6>] do_open+0x72/0x3a8
> stack backtrace:
> [<c1003aad>] show_trace_log_lvl+0x58/0x15b
> [<c100495f>] show_trace+0xd/0x10
> [<c1004979>] dump_stack+0x17/0x1a
> [<c102dee5>] __lock_acquire+0x753/0x99c
> [<c102e3b0>] lock_acquire+0x4a/0x6a
> [<c1204501>] mutex_lock_nested+0xc8/0x20c
> [<c105eb8d>] __blkdev_put+0x1e/0x13c
> [<c105ecc4>] blkdev_put+0xa/0xc
> [<c105f18a>] do_open+0x336/0x3a8
> [<c105f21b>] blkdev_open+0x1f/0x4c
> [<c1057b40>] __dentry_open+0xc7/0x1aa
> [<c1057c91>] nameidata_to_filp+0x1c/0x2e
> [<c1057cd1>] do_filp_open+0x2e/0x35
> [<c1057dd7>] do_sys_open+0x38/0x68
> [<c1057e33>] sys_open+0x16/0x18
> [<c1002845>] sysenter_past_esp+0x56/0x8d

OK, I'm having a look here; its all new to me so bear with me.

blkdev_open() calls
do_open(bdev, ...,BD_MUTEX_NORMAL) and takes
mutex_lock_nested(&bdev->bd_mutex, BD_MUTEX_NORMAL)

then something fails, and we're thrown to:

out_first: where
if (bdev != bdev->bd_contains)
blkdev_put(bdev->bd_contains) which is
__blkdev_put(bdev->bd_contains, BD_MUTEX_NORMAL) which does
mutex_lock_nested(&bdev->bd_contains->bd_mutex, BD_MUTEX_NORMAL) <--- lockdep trigger

When going to out_first, dbev->bd_contains is either bdev or whole, and
since we take the branch it must be whole. So it seems to me the
following patch would be the right one:

[akpm@osdl.org: compile fix]
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Acked-by: NeilBrown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

authored by Peter Zijlstra and committed by Linus Torvalds 6946bd63 7334bb4a

+56 -58
+56 -58
fs/block_dev.c
··· 884 884 } 885 885 EXPORT_SYMBOL(bd_set_size); 886 886 887 + static int __blkdev_put(struct block_device *bdev, unsigned int subclass) 888 + { 889 + int ret = 0; 890 + struct inode *bd_inode = bdev->bd_inode; 891 + struct gendisk *disk = bdev->bd_disk; 892 + 893 + mutex_lock_nested(&bdev->bd_mutex, subclass); 894 + lock_kernel(); 895 + if (!--bdev->bd_openers) { 896 + sync_blockdev(bdev); 897 + kill_bdev(bdev); 898 + } 899 + if (bdev->bd_contains == bdev) { 900 + if (disk->fops->release) 901 + ret = disk->fops->release(bd_inode, NULL); 902 + } else { 903 + mutex_lock_nested(&bdev->bd_contains->bd_mutex, 904 + subclass + 1); 905 + bdev->bd_contains->bd_part_count--; 906 + mutex_unlock(&bdev->bd_contains->bd_mutex); 907 + } 908 + if (!bdev->bd_openers) { 909 + struct module *owner = disk->fops->owner; 910 + 911 + put_disk(disk); 912 + module_put(owner); 913 + 914 + if (bdev->bd_contains != bdev) { 915 + kobject_put(&bdev->bd_part->kobj); 916 + bdev->bd_part = NULL; 917 + } 918 + bdev->bd_disk = NULL; 919 + bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; 920 + if (bdev != bdev->bd_contains) 921 + __blkdev_put(bdev->bd_contains, subclass + 1); 922 + bdev->bd_contains = NULL; 923 + } 924 + unlock_kernel(); 925 + mutex_unlock(&bdev->bd_mutex); 926 + bdput(bdev); 927 + return ret; 928 + } 929 + 930 + int blkdev_put(struct block_device *bdev) 931 + { 932 + return __blkdev_put(bdev, BD_MUTEX_NORMAL); 933 + } 934 + EXPORT_SYMBOL(blkdev_put); 935 + 936 + int blkdev_put_partition(struct block_device *bdev) 937 + { 938 + return __blkdev_put(bdev, BD_MUTEX_PARTITION); 939 + } 940 + EXPORT_SYMBOL(blkdev_put_partition); 941 + 887 942 static int 888 943 blkdev_get_whole(struct block_device *bdev, mode_t mode, unsigned flags); 889 944 ··· 1035 980 bdev->bd_disk = NULL; 1036 981 bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; 1037 982 if (bdev != bdev->bd_contains) 1038 - blkdev_put(bdev->bd_contains); 983 + __blkdev_put(bdev->bd_contains, BD_MUTEX_WHOLE); 1039 984 bdev->bd_contains = NULL; 1040 985 put_disk(disk); 1041 986 module_put(owner); ··· 1133 1078 blkdev_put(bdev); 1134 1079 return res; 1135 1080 } 1136 - 1137 - static int __blkdev_put(struct block_device *bdev, unsigned int subclass) 1138 - { 1139 - int ret = 0; 1140 - struct inode *bd_inode = bdev->bd_inode; 1141 - struct gendisk *disk = bdev->bd_disk; 1142 - 1143 - mutex_lock_nested(&bdev->bd_mutex, subclass); 1144 - lock_kernel(); 1145 - if (!--bdev->bd_openers) { 1146 - sync_blockdev(bdev); 1147 - kill_bdev(bdev); 1148 - } 1149 - if (bdev->bd_contains == bdev) { 1150 - if (disk->fops->release) 1151 - ret = disk->fops->release(bd_inode, NULL); 1152 - } else { 1153 - mutex_lock_nested(&bdev->bd_contains->bd_mutex, 1154 - subclass + 1); 1155 - bdev->bd_contains->bd_part_count--; 1156 - mutex_unlock(&bdev->bd_contains->bd_mutex); 1157 - } 1158 - if (!bdev->bd_openers) { 1159 - struct module *owner = disk->fops->owner; 1160 - 1161 - put_disk(disk); 1162 - module_put(owner); 1163 - 1164 - if (bdev->bd_contains != bdev) { 1165 - kobject_put(&bdev->bd_part->kobj); 1166 - bdev->bd_part = NULL; 1167 - } 1168 - bdev->bd_disk = NULL; 1169 - bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; 1170 - if (bdev != bdev->bd_contains) 1171 - __blkdev_put(bdev->bd_contains, subclass + 1); 1172 - bdev->bd_contains = NULL; 1173 - } 1174 - unlock_kernel(); 1175 - mutex_unlock(&bdev->bd_mutex); 1176 - bdput(bdev); 1177 - return ret; 1178 - } 1179 - 1180 - int blkdev_put(struct block_device *bdev) 1181 - { 1182 - return __blkdev_put(bdev, BD_MUTEX_NORMAL); 1183 - } 1184 - 1185 - EXPORT_SYMBOL(blkdev_put); 1186 - 1187 - int blkdev_put_partition(struct block_device *bdev) 1188 - { 1189 - return __blkdev_put(bdev, BD_MUTEX_PARTITION); 1190 - } 1191 - 1192 - EXPORT_SYMBOL(blkdev_put_partition); 1193 1081 1194 1082 static int blkdev_close(struct inode * inode, struct file * filp) 1195 1083 {