mm/swapfile.c: move inode_lock out of claim_swapfile

claim_swapfile() currently keeps the inode locked when it is successful,
or the file is already swapfile (with -EBUSY). And, on the other error
cases, it does not lock the inode.

This inconsistency of the lock state and return value is quite confusing
and actually causing a bad unlock balance as below in the "bad_swap"
section of __do_sys_swapon().

This commit fixes this issue by moving the inode_lock() and IS_SWAPFILE
check out of claim_swapfile(). The inode is unlocked in
"bad_swap_unlock_inode" section, so that the inode is ensured to be
unlocked at "bad_swap". Thus, error handling codes after the locking now
jumps to "bad_swap_unlock_inode" instead of "bad_swap".

=====================================
WARNING: bad unlock balance detected!
5.5.0-rc7+ #176 Not tainted
-------------------------------------
swapon/4294 is trying to release lock (&sb->s_type->i_mutex_key) at: __do_sys_swapon+0x94b/0x3550
but there are no more locks to release!

other info that might help us debug this:
no locks held by swapon/4294.

stack backtrace:
CPU: 5 PID: 4294 Comm: swapon Not tainted 5.5.0-rc7-BTRFS-ZNS+ #176
Hardware name: ASUS All Series/H87-PRO, BIOS 2102 07/29/2014
Call Trace:
dump_stack+0xa1/0xea
print_unlock_imbalance_bug.cold+0x114/0x123
lock_release+0x562/0xed0
up_write+0x2d/0x490
__do_sys_swapon+0x94b/0x3550
__x64_sys_swapon+0x54/0x80
do_syscall_64+0xa4/0x4b0
entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x7f15da0a0dc7

Fixes: 1638045c3677 ("mm: set S_SWAPFILE on blockdev swap devices")
Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Tested-by: Qais Youef <qais.yousef@arm.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: <stable@vger.kernel.org>
Link: http://lkml.kernel.org/r/20200206090132.154869-1-naohiro.aota@wdc.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by Naohiro Aota and committed by Linus Torvalds d795a90e 83fd69c9

Changed files
+20 -21
mm
+20 -21
mm/swapfile.c
··· 2899 2899 p->bdev = inode->i_sb->s_bdev; 2900 2900 } 2901 2901 2902 - inode_lock(inode); 2903 - if (IS_SWAPFILE(inode)) 2904 - return -EBUSY; 2905 - 2906 2902 return 0; 2907 2903 } 2908 2904 ··· 3153 3157 mapping = swap_file->f_mapping; 3154 3158 inode = mapping->host; 3155 3159 3156 - /* will take i_rwsem; */ 3157 3160 error = claim_swapfile(p, inode); 3158 3161 if (unlikely(error)) 3159 3162 goto bad_swap; 3163 + 3164 + inode_lock(inode); 3165 + if (IS_SWAPFILE(inode)) { 3166 + error = -EBUSY; 3167 + goto bad_swap_unlock_inode; 3168 + } 3160 3169 3161 3170 /* 3162 3171 * Read the swap header. 3163 3172 */ 3164 3173 if (!mapping->a_ops->readpage) { 3165 3174 error = -EINVAL; 3166 - goto bad_swap; 3175 + goto bad_swap_unlock_inode; 3167 3176 } 3168 3177 page = read_mapping_page(mapping, 0, swap_file); 3169 3178 if (IS_ERR(page)) { 3170 3179 error = PTR_ERR(page); 3171 - goto bad_swap; 3180 + goto bad_swap_unlock_inode; 3172 3181 } 3173 3182 swap_header = kmap(page); 3174 3183 3175 3184 maxpages = read_swap_header(p, swap_header, inode); 3176 3185 if (unlikely(!maxpages)) { 3177 3186 error = -EINVAL; 3178 - goto bad_swap; 3187 + goto bad_swap_unlock_inode; 3179 3188 } 3180 3189 3181 3190 /* OK, set up the swap map and apply the bad block list */ 3182 3191 swap_map = vzalloc(maxpages); 3183 3192 if (!swap_map) { 3184 3193 error = -ENOMEM; 3185 - goto bad_swap; 3194 + goto bad_swap_unlock_inode; 3186 3195 } 3187 3196 3188 3197 if (bdi_cap_stable_pages_required(inode_to_bdi(inode))) ··· 3212 3211 GFP_KERNEL); 3213 3212 if (!cluster_info) { 3214 3213 error = -ENOMEM; 3215 - goto bad_swap; 3214 + goto bad_swap_unlock_inode; 3216 3215 } 3217 3216 3218 3217 for (ci = 0; ci < nr_cluster; ci++) ··· 3221 3220 p->percpu_cluster = alloc_percpu(struct percpu_cluster); 3222 3221 if (!p->percpu_cluster) { 3223 3222 error = -ENOMEM; 3224 - goto bad_swap; 3223 + goto bad_swap_unlock_inode; 3225 3224 } 3226 3225 for_each_possible_cpu(cpu) { 3227 3226 struct percpu_cluster *cluster; ··· 3235 3234 3236 3235 error = swap_cgroup_swapon(p->type, maxpages); 3237 3236 if (error) 3238 - goto bad_swap; 3237 + goto bad_swap_unlock_inode; 3239 3238 3240 3239 nr_extents = setup_swap_map_and_extents(p, swap_header, swap_map, 3241 3240 cluster_info, maxpages, &span); 3242 3241 if (unlikely(nr_extents < 0)) { 3243 3242 error = nr_extents; 3244 - goto bad_swap; 3243 + goto bad_swap_unlock_inode; 3245 3244 } 3246 3245 /* frontswap enabled? set up bit-per-page map for frontswap */ 3247 3246 if (IS_ENABLED(CONFIG_FRONTSWAP)) ··· 3281 3280 3282 3281 error = init_swap_address_space(p->type, maxpages); 3283 3282 if (error) 3284 - goto bad_swap; 3283 + goto bad_swap_unlock_inode; 3285 3284 3286 3285 /* 3287 3286 * Flush any pending IO and dirty mappings before we start using this ··· 3291 3290 error = inode_drain_writes(inode); 3292 3291 if (error) { 3293 3292 inode->i_flags &= ~S_SWAPFILE; 3294 - goto bad_swap; 3293 + goto bad_swap_unlock_inode; 3295 3294 } 3296 3295 3297 3296 mutex_lock(&swapon_mutex); ··· 3316 3315 3317 3316 error = 0; 3318 3317 goto out; 3318 + bad_swap_unlock_inode: 3319 + inode_unlock(inode); 3319 3320 bad_swap: 3320 3321 free_percpu(p->percpu_cluster); 3321 3322 p->percpu_cluster = NULL; ··· 3325 3322 set_blocksize(p->bdev, p->old_block_size); 3326 3323 blkdev_put(p->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); 3327 3324 } 3325 + inode = NULL; 3328 3326 destroy_swap_extents(p); 3329 3327 swap_cgroup_swapoff(p->type); 3330 3328 spin_lock(&swap_lock); ··· 3337 3333 kvfree(frontswap_map); 3338 3334 if (inced_nr_rotate_swap) 3339 3335 atomic_dec(&nr_rotate_swap); 3340 - if (swap_file) { 3341 - if (inode) { 3342 - inode_unlock(inode); 3343 - inode = NULL; 3344 - } 3336 + if (swap_file) 3345 3337 filp_close(swap_file, NULL); 3346 - } 3347 3338 out: 3348 3339 if (page && !IS_ERR(page)) { 3349 3340 kunmap(page);