Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client

Pull ceph fixes from Sage Weil:
"There is a pair of fixes for double-frees in the recent bundle for
3.10, a couple of fixes for long-standing bugs (sleep while atomic and
an endianness fix), and a locking fix that can be triggered when osds
are going down"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client:
rbd: fix cleanup in rbd_add()
rbd: don't destroy ceph_opts in rbd_add()
ceph: ceph_pagelist_append might sleep while atomic
ceph: add cpu_to_le32() calls when encoding a reconnect capability
libceph: must hold mutex for reset_changed_osds()

+107 -73
+18 -15
drivers/block/rbd.c
··· 519 519 }; 520 520 521 521 /* 522 - * Initialize an rbd client instance. 523 - * We own *ceph_opts. 522 + * Initialize an rbd client instance. Success or not, this function 523 + * consumes ceph_opts. 524 524 */ 525 525 static struct rbd_client *rbd_client_create(struct ceph_options *ceph_opts) 526 526 { ··· 675 675 676 676 /* 677 677 * Get a ceph client with specific addr and configuration, if one does 678 - * not exist create it. 678 + * not exist create it. Either way, ceph_opts is consumed by this 679 + * function. 679 680 */ 680 681 static struct rbd_client *rbd_get_client(struct ceph_options *ceph_opts) 681 682 { ··· 4698 4697 return ret; 4699 4698 } 4700 4699 4701 - /* Undo whatever state changes are made by v1 or v2 image probe */ 4702 - 4700 + /* 4701 + * Undo whatever state changes are made by v1 or v2 header info 4702 + * call. 4703 + */ 4703 4704 static void rbd_dev_unprobe(struct rbd_device *rbd_dev) 4704 4705 { 4705 4706 struct rbd_image_header *header; ··· 4905 4902 int tmp; 4906 4903 4907 4904 /* 4908 - * Get the id from the image id object. If it's not a 4909 - * format 2 image, we'll get ENOENT back, and we'll assume 4910 - * it's a format 1 image. 4905 + * Get the id from the image id object. Unless there's an 4906 + * error, rbd_dev->spec->image_id will be filled in with 4907 + * a dynamically-allocated string, and rbd_dev->image_format 4908 + * will be set to either 1 or 2. 4911 4909 */ 4912 4910 ret = rbd_dev_image_id(rbd_dev); 4913 4911 if (ret) ··· 4996 4992 rc = PTR_ERR(rbdc); 4997 4993 goto err_out_args; 4998 4994 } 4999 - ceph_opts = NULL; /* rbd_dev client now owns this */ 5000 4995 5001 4996 /* pick the pool */ 5002 4997 osdc = &rbdc->client->osdc; ··· 5030 5027 rbd_dev->mapping.read_only = read_only; 5031 5028 5032 5029 rc = rbd_dev_device_setup(rbd_dev); 5033 - if (!rc) 5034 - return count; 5030 + if (rc) { 5031 + rbd_dev_image_release(rbd_dev); 5032 + goto err_out_module; 5033 + } 5035 5034 5036 - rbd_dev_image_release(rbd_dev); 5035 + return count; 5036 + 5037 5037 err_out_rbd_dev: 5038 5038 rbd_dev_destroy(rbd_dev); 5039 5039 err_out_client: 5040 5040 rbd_put_client(rbdc); 5041 5041 err_out_args: 5042 - if (ceph_opts) 5043 - ceph_destroy_options(ceph_opts); 5044 - kfree(rbd_opts); 5045 5042 rbd_spec_put(spec); 5046 5043 err_out_module: 5047 5044 module_put(THIS_MODULE);
+47 -26
fs/ceph/locks.c
··· 191 191 } 192 192 193 193 /** 194 - * Encode the flock and fcntl locks for the given inode into the pagelist. 195 - * Format is: #fcntl locks, sequential fcntl locks, #flock locks, 196 - * sequential flock locks. 197 - * Must be called with lock_flocks() already held. 198 - * If we encounter more of a specific lock type than expected, 199 - * we return the value 1. 194 + * Encode the flock and fcntl locks for the given inode into the ceph_filelock 195 + * array. Must be called with lock_flocks() already held. 196 + * If we encounter more of a specific lock type than expected, return -ENOSPC. 200 197 */ 201 - int ceph_encode_locks(struct inode *inode, struct ceph_pagelist *pagelist, 202 - int num_fcntl_locks, int num_flock_locks) 198 + int ceph_encode_locks_to_buffer(struct inode *inode, 199 + struct ceph_filelock *flocks, 200 + int num_fcntl_locks, int num_flock_locks) 203 201 { 204 202 struct file_lock *lock; 205 - struct ceph_filelock cephlock; 206 203 int err = 0; 207 204 int seen_fcntl = 0; 208 205 int seen_flock = 0; 206 + int l = 0; 209 207 210 208 dout("encoding %d flock and %d fcntl locks", num_flock_locks, 211 209 num_fcntl_locks); 212 - err = ceph_pagelist_append(pagelist, &num_fcntl_locks, sizeof(u32)); 213 - if (err) 214 - goto fail; 210 + 215 211 for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { 216 212 if (lock->fl_flags & FL_POSIX) { 217 213 ++seen_fcntl; ··· 215 219 err = -ENOSPC; 216 220 goto fail; 217 221 } 218 - err = lock_to_ceph_filelock(lock, &cephlock); 222 + err = lock_to_ceph_filelock(lock, &flocks[l]); 219 223 if (err) 220 224 goto fail; 221 - err = ceph_pagelist_append(pagelist, &cephlock, 222 - sizeof(struct ceph_filelock)); 225 + ++l; 223 226 } 224 - if (err) 225 - goto fail; 226 227 } 227 - 228 - err = ceph_pagelist_append(pagelist, &num_flock_locks, sizeof(u32)); 229 - if (err) 230 - goto fail; 231 228 for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { 232 229 if (lock->fl_flags & FL_FLOCK) { 233 230 ++seen_flock; ··· 228 239 err = -ENOSPC; 229 240 goto fail; 230 241 } 231 - err = lock_to_ceph_filelock(lock, &cephlock); 242 + err = lock_to_ceph_filelock(lock, &flocks[l]); 232 243 if (err) 233 244 goto fail; 234 - err = ceph_pagelist_append(pagelist, &cephlock, 235 - sizeof(struct ceph_filelock)); 245 + ++l; 236 246 } 237 - if (err) 238 - goto fail; 239 247 } 240 248 fail: 249 + return err; 250 + } 251 + 252 + /** 253 + * Copy the encoded flock and fcntl locks into the pagelist. 254 + * Format is: #fcntl locks, sequential fcntl locks, #flock locks, 255 + * sequential flock locks. 256 + * Returns zero on success. 257 + */ 258 + int ceph_locks_to_pagelist(struct ceph_filelock *flocks, 259 + struct ceph_pagelist *pagelist, 260 + int num_fcntl_locks, int num_flock_locks) 261 + { 262 + int err = 0; 263 + __le32 nlocks; 264 + 265 + nlocks = cpu_to_le32(num_fcntl_locks); 266 + err = ceph_pagelist_append(pagelist, &nlocks, sizeof(nlocks)); 267 + if (err) 268 + goto out_fail; 269 + 270 + err = ceph_pagelist_append(pagelist, flocks, 271 + num_fcntl_locks * sizeof(*flocks)); 272 + if (err) 273 + goto out_fail; 274 + 275 + nlocks = cpu_to_le32(num_flock_locks); 276 + err = ceph_pagelist_append(pagelist, &nlocks, sizeof(nlocks)); 277 + if (err) 278 + goto out_fail; 279 + 280 + err = ceph_pagelist_append(pagelist, 281 + &flocks[num_fcntl_locks], 282 + num_flock_locks * sizeof(*flocks)); 283 + out_fail: 241 284 return err; 242 285 } 243 286
+34 -29
fs/ceph/mds_client.c
··· 2478 2478 2479 2479 if (recon_state->flock) { 2480 2480 int num_fcntl_locks, num_flock_locks; 2481 - struct ceph_pagelist_cursor trunc_point; 2481 + struct ceph_filelock *flocks; 2482 2482 2483 - ceph_pagelist_set_cursor(pagelist, &trunc_point); 2484 - do { 2485 - lock_flocks(); 2486 - ceph_count_locks(inode, &num_fcntl_locks, 2487 - &num_flock_locks); 2488 - rec.v2.flock_len = (2*sizeof(u32) + 2489 - (num_fcntl_locks+num_flock_locks) * 2490 - sizeof(struct ceph_filelock)); 2491 - unlock_flocks(); 2492 - 2493 - /* pre-alloc pagelist */ 2494 - ceph_pagelist_truncate(pagelist, &trunc_point); 2495 - err = ceph_pagelist_append(pagelist, &rec, reclen); 2496 - if (!err) 2497 - err = ceph_pagelist_reserve(pagelist, 2498 - rec.v2.flock_len); 2499 - 2500 - /* encode locks */ 2501 - if (!err) { 2502 - lock_flocks(); 2503 - err = ceph_encode_locks(inode, 2504 - pagelist, 2505 - num_fcntl_locks, 2506 - num_flock_locks); 2507 - unlock_flocks(); 2508 - } 2509 - } while (err == -ENOSPC); 2483 + encode_again: 2484 + lock_flocks(); 2485 + ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks); 2486 + unlock_flocks(); 2487 + flocks = kmalloc((num_fcntl_locks+num_flock_locks) * 2488 + sizeof(struct ceph_filelock), GFP_NOFS); 2489 + if (!flocks) { 2490 + err = -ENOMEM; 2491 + goto out_free; 2492 + } 2493 + lock_flocks(); 2494 + err = ceph_encode_locks_to_buffer(inode, flocks, 2495 + num_fcntl_locks, 2496 + num_flock_locks); 2497 + unlock_flocks(); 2498 + if (err) { 2499 + kfree(flocks); 2500 + if (err == -ENOSPC) 2501 + goto encode_again; 2502 + goto out_free; 2503 + } 2504 + /* 2505 + * number of encoded locks is stable, so copy to pagelist 2506 + */ 2507 + rec.v2.flock_len = cpu_to_le32(2*sizeof(u32) + 2508 + (num_fcntl_locks+num_flock_locks) * 2509 + sizeof(struct ceph_filelock)); 2510 + err = ceph_pagelist_append(pagelist, &rec, reclen); 2511 + if (!err) 2512 + err = ceph_locks_to_pagelist(flocks, pagelist, 2513 + num_fcntl_locks, 2514 + num_flock_locks); 2515 + kfree(flocks); 2510 2516 } else { 2511 2517 err = ceph_pagelist_append(pagelist, &rec, reclen); 2512 2518 } 2513 - 2514 2519 out_free: 2515 2520 kfree(path); 2516 2521 out_dput:
+7 -2
fs/ceph/super.h
··· 822 822 extern int ceph_lock(struct file *file, int cmd, struct file_lock *fl); 823 823 extern int ceph_flock(struct file *file, int cmd, struct file_lock *fl); 824 824 extern void ceph_count_locks(struct inode *inode, int *p_num, int *f_num); 825 - extern int ceph_encode_locks(struct inode *i, struct ceph_pagelist *p, 826 - int p_locks, int f_locks); 825 + extern int ceph_encode_locks_to_buffer(struct inode *inode, 826 + struct ceph_filelock *flocks, 827 + int num_fcntl_locks, 828 + int num_flock_locks); 829 + extern int ceph_locks_to_pagelist(struct ceph_filelock *flocks, 830 + struct ceph_pagelist *pagelist, 831 + int num_fcntl_locks, int num_flock_locks); 827 832 extern int lock_to_ceph_filelock(struct file_lock *fl, struct ceph_filelock *c); 828 833 829 834 /* debugfs.c */
+1 -1
net/ceph/osd_client.c
··· 1675 1675 __register_request(osdc, req); 1676 1676 __unregister_linger_request(osdc, req); 1677 1677 } 1678 + reset_changed_osds(osdc); 1678 1679 mutex_unlock(&osdc->request_mutex); 1679 1680 1680 1681 if (needmap) { 1681 1682 dout("%d requests for down osds, need new map\n", needmap); 1682 1683 ceph_monc_request_next_osdmap(&osdc->client->monc); 1683 1684 } 1684 - reset_changed_osds(osdc); 1685 1685 } 1686 1686 1687 1687