ceph: preallocate flock state without locks held

When the lock_kernel() turns into lock_flocks() and a spinlock, we won't
be able to do allocations with the lock held. Preallocate space without
the lock, and retry if the lock state changes out from underneath us.

Signed-off-by: Greg Farnum <gregf@hq.newdream.net>
Signed-off-by: Sage Weil <sage@newdream.net>

authored by

Greg Farnum and committed by
Sage Weil
fca4451a ac0b74d8

+42 -13
+15 -2
fs/ceph/locks.c
··· 181 181 * Encode the flock and fcntl locks for the given inode into the pagelist. 182 182 * Format is: #fcntl locks, sequential fcntl locks, #flock locks, 183 183 * sequential flock locks. 184 - * Must be called with BLK already held, and the lock numbers should have 185 - * been gathered under the same lock holding window. 184 + * Must be called with lock_flocks() already held. 185 + * If we encounter more of a specific lock type than expected, 186 + * we return the value 1. 186 187 */ 187 188 int ceph_encode_locks(struct inode *inode, struct ceph_pagelist *pagelist, 188 189 int num_fcntl_locks, int num_flock_locks) ··· 191 190 struct file_lock *lock; 192 191 struct ceph_filelock cephlock; 193 192 int err = 0; 193 + int seen_fcntl = 0; 194 + int seen_flock = 0; 194 195 195 196 dout("encoding %d flock and %d fcntl locks", num_flock_locks, 196 197 num_fcntl_locks); ··· 201 198 goto fail; 202 199 for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { 203 200 if (lock->fl_flags & FL_POSIX) { 201 + ++seen_fcntl; 202 + if (seen_fcntl > num_fcntl_locks) { 203 + err = -ENOSPC; 204 + goto fail; 205 + } 204 206 err = lock_to_ceph_filelock(lock, &cephlock); 205 207 if (err) 206 208 goto fail; ··· 221 213 goto fail; 222 214 for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { 223 215 if (lock->fl_flags & FL_FLOCK) { 216 + ++seen_flock; 217 + if (seen_flock > num_flock_locks) { 218 + err = -ENOSPC; 219 + goto fail; 220 + } 224 221 err = lock_to_ceph_filelock(lock, &cephlock); 225 222 if (err) 226 223 goto fail;
+27 -11
fs/ceph/mds_client.c
··· 2365 2365 2366 2366 if (recon_state->flock) { 2367 2367 int num_fcntl_locks, num_flock_locks; 2368 + struct ceph_pagelist_cursor trunc_point; 2368 2369 2369 - lock_kernel(); 2370 - ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks); 2371 - rec.v2.flock_len = (2*sizeof(u32) + 2372 - (num_fcntl_locks+num_flock_locks) * 2373 - sizeof(struct ceph_filelock)); 2370 + ceph_pagelist_set_cursor(pagelist, &trunc_point); 2371 + do { 2372 + lock_kernel(); 2373 + ceph_count_locks(inode, &num_fcntl_locks, 2374 + &num_flock_locks); 2375 + rec.v2.flock_len = (2*sizeof(u32) + 2376 + (num_fcntl_locks+num_flock_locks) * 2377 + sizeof(struct ceph_filelock)); 2378 + unlock_kernel(); 2374 2379 2375 - err = ceph_pagelist_append(pagelist, &rec, reclen); 2376 - if (!err) 2377 - err = ceph_encode_locks(inode, pagelist, 2378 - num_fcntl_locks, 2379 - num_flock_locks); 2380 - unlock_kernel(); 2380 + /* pre-alloc pagelist */ 2381 + ceph_pagelist_truncate(pagelist, &trunc_point); 2382 + err = ceph_pagelist_append(pagelist, &rec, reclen); 2383 + if (!err) 2384 + err = ceph_pagelist_reserve(pagelist, 2385 + rec.v2.flock_len); 2386 + 2387 + /* encode locks */ 2388 + if (!err) { 2389 + lock_kernel(); 2390 + err = ceph_encode_locks(inode, 2391 + pagelist, 2392 + num_fcntl_locks, 2393 + num_flock_locks); 2394 + unlock_kernel(); 2395 + } 2396 + } while (err == -ENOSPC); 2381 2397 } else { 2382 2398 err = ceph_pagelist_append(pagelist, &rec, reclen); 2383 2399 }