Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'ceph-for-4.17-rc2' of git://github.com/ceph/ceph-client

Pull ceph fixes from Ilya Dryomov:
"A couple of follow-up patches for -rc1 changes in rbd, support for a
timeout on waiting for the acquisition of exclusive lock and a fix for
uninitialized memory access in CephFS, marked for stable"

* tag 'ceph-for-4.17-rc2' of git://github.com/ceph/ceph-client:
rbd: notrim map option
rbd: adjust queue limits for "fancy" striping
rbd: avoid Wreturn-type warnings
ceph: always update atime/mtime/ctime for new inode
rbd: support timeout in rbd_wait_state_locked()
rbd: refactor rbd_wait_state_locked()

+76 -35
+69 -32
drivers/block/rbd.c
··· 732 732 */ 733 733 enum { 734 734 Opt_queue_depth, 735 + Opt_lock_timeout, 735 736 Opt_last_int, 736 737 /* int args above */ 737 738 Opt_last_string, ··· 741 740 Opt_read_write, 742 741 Opt_lock_on_read, 743 742 Opt_exclusive, 743 + Opt_notrim, 744 744 Opt_err 745 745 }; 746 746 747 747 static match_table_t rbd_opts_tokens = { 748 748 {Opt_queue_depth, "queue_depth=%d"}, 749 + {Opt_lock_timeout, "lock_timeout=%d"}, 749 750 /* int args above */ 750 751 /* string args above */ 751 752 {Opt_read_only, "read_only"}, ··· 756 753 {Opt_read_write, "rw"}, /* Alternate spelling */ 757 754 {Opt_lock_on_read, "lock_on_read"}, 758 755 {Opt_exclusive, "exclusive"}, 756 + {Opt_notrim, "notrim"}, 759 757 {Opt_err, NULL} 760 758 }; 761 759 762 760 struct rbd_options { 763 761 int queue_depth; 762 + unsigned long lock_timeout; 764 763 bool read_only; 765 764 bool lock_on_read; 766 765 bool exclusive; 766 + bool trim; 767 767 }; 768 768 769 769 #define RBD_QUEUE_DEPTH_DEFAULT BLKDEV_MAX_RQ 770 + #define RBD_LOCK_TIMEOUT_DEFAULT 0 /* no timeout */ 770 771 #define RBD_READ_ONLY_DEFAULT false 771 772 #define RBD_LOCK_ON_READ_DEFAULT false 772 773 #define RBD_EXCLUSIVE_DEFAULT false 774 + #define RBD_TRIM_DEFAULT true 773 775 774 776 static int parse_rbd_opts_token(char *c, void *private) 775 777 { ··· 804 796 } 805 797 rbd_opts->queue_depth = intval; 806 798 break; 799 + case Opt_lock_timeout: 800 + /* 0 is "wait forever" (i.e. infinite timeout) */ 801 + if (intval < 0 || intval > INT_MAX / 1000) { 802 + pr_err("lock_timeout out of range\n"); 803 + return -EINVAL; 804 + } 805 + rbd_opts->lock_timeout = msecs_to_jiffies(intval * 1000); 806 + break; 807 807 case Opt_read_only: 808 808 rbd_opts->read_only = true; 809 809 break; ··· 823 807 break; 824 808 case Opt_exclusive: 825 809 rbd_opts->exclusive = true; 810 + break; 811 + case Opt_notrim: 812 + rbd_opts->trim = false; 826 813 break; 827 814 default: 828 815 /* libceph prints "bad option" msg */ ··· 1411 1392 case OBJ_OP_DISCARD: 1412 1393 return true; 1413 1394 default: 1414 - rbd_assert(0); 1395 + BUG(); 1415 1396 } 1416 1397 } 1417 1398 ··· 2485 2466 } 2486 2467 return false; 2487 2468 default: 2488 - rbd_assert(0); 2469 + BUG(); 2489 2470 } 2490 2471 } 2491 2472 ··· 2513 2494 } 2514 2495 return false; 2515 2496 default: 2516 - rbd_assert(0); 2497 + BUG(); 2517 2498 } 2518 2499 } 2519 2500 ··· 3552 3533 /* 3553 3534 * lock_rwsem must be held for read 3554 3535 */ 3555 - static void rbd_wait_state_locked(struct rbd_device *rbd_dev) 3536 + static int rbd_wait_state_locked(struct rbd_device *rbd_dev, bool may_acquire) 3556 3537 { 3557 3538 DEFINE_WAIT(wait); 3539 + unsigned long timeout; 3540 + int ret = 0; 3541 + 3542 + if (test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)) 3543 + return -EBLACKLISTED; 3544 + 3545 + if (rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED) 3546 + return 0; 3547 + 3548 + if (!may_acquire) { 3549 + rbd_warn(rbd_dev, "exclusive lock required"); 3550 + return -EROFS; 3551 + } 3558 3552 3559 3553 do { 3560 3554 /* ··· 3579 3547 prepare_to_wait_exclusive(&rbd_dev->lock_waitq, &wait, 3580 3548 TASK_UNINTERRUPTIBLE); 3581 3549 up_read(&rbd_dev->lock_rwsem); 3582 - schedule(); 3550 + timeout = schedule_timeout(ceph_timeout_jiffies( 3551 + rbd_dev->opts->lock_timeout)); 3583 3552 down_read(&rbd_dev->lock_rwsem); 3584 - } while (rbd_dev->lock_state != RBD_LOCK_STATE_LOCKED && 3585 - !test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)); 3553 + if (test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)) { 3554 + ret = -EBLACKLISTED; 3555 + break; 3556 + } 3557 + if (!timeout) { 3558 + rbd_warn(rbd_dev, "timed out waiting for lock"); 3559 + ret = -ETIMEDOUT; 3560 + break; 3561 + } 3562 + } while (rbd_dev->lock_state != RBD_LOCK_STATE_LOCKED); 3586 3563 3587 3564 finish_wait(&rbd_dev->lock_waitq, &wait); 3565 + return ret; 3588 3566 } 3589 3567 3590 3568 static void rbd_queue_workfn(struct work_struct *work) ··· 3680 3638 (op_type != OBJ_OP_READ || rbd_dev->opts->lock_on_read); 3681 3639 if (must_be_locked) { 3682 3640 down_read(&rbd_dev->lock_rwsem); 3683 - if (rbd_dev->lock_state != RBD_LOCK_STATE_LOCKED && 3684 - !test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)) { 3685 - if (rbd_dev->opts->exclusive) { 3686 - rbd_warn(rbd_dev, "exclusive lock required"); 3687 - result = -EROFS; 3688 - goto err_unlock; 3689 - } 3690 - rbd_wait_state_locked(rbd_dev); 3691 - } 3692 - if (test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)) { 3693 - result = -EBLACKLISTED; 3641 + result = rbd_wait_state_locked(rbd_dev, 3642 + !rbd_dev->opts->exclusive); 3643 + if (result) 3694 3644 goto err_unlock; 3695 - } 3696 3645 } 3697 3646 3698 3647 img_request = rbd_img_request_create(rbd_dev, op_type, snapc); ··· 3935 3902 { 3936 3903 struct gendisk *disk; 3937 3904 struct request_queue *q; 3938 - u64 segment_size; 3905 + unsigned int objset_bytes = 3906 + rbd_dev->layout.object_size * rbd_dev->layout.stripe_count; 3939 3907 int err; 3940 3908 3941 3909 /* create gendisk info */ ··· 3976 3942 blk_queue_flag_set(QUEUE_FLAG_NONROT, q); 3977 3943 /* QUEUE_FLAG_ADD_RANDOM is off by default for blk-mq */ 3978 3944 3979 - /* set io sizes to object size */ 3980 - segment_size = rbd_obj_bytes(&rbd_dev->header); 3981 - blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE); 3945 + blk_queue_max_hw_sectors(q, objset_bytes >> SECTOR_SHIFT); 3982 3946 q->limits.max_sectors = queue_max_hw_sectors(q); 3983 3947 blk_queue_max_segments(q, USHRT_MAX); 3984 3948 blk_queue_max_segment_size(q, UINT_MAX); 3985 - blk_queue_io_min(q, segment_size); 3986 - blk_queue_io_opt(q, segment_size); 3949 + blk_queue_io_min(q, objset_bytes); 3950 + blk_queue_io_opt(q, objset_bytes); 3987 3951 3988 - /* enable the discard support */ 3989 - blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); 3990 - q->limits.discard_granularity = segment_size; 3991 - blk_queue_max_discard_sectors(q, segment_size / SECTOR_SIZE); 3992 - blk_queue_max_write_zeroes_sectors(q, segment_size / SECTOR_SIZE); 3952 + if (rbd_dev->opts->trim) { 3953 + blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); 3954 + q->limits.discard_granularity = objset_bytes; 3955 + blk_queue_max_discard_sectors(q, objset_bytes >> SECTOR_SHIFT); 3956 + blk_queue_max_write_zeroes_sectors(q, objset_bytes >> SECTOR_SHIFT); 3957 + } 3993 3958 3994 3959 if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC)) 3995 3960 q->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES; ··· 5212 5179 5213 5180 rbd_opts->read_only = RBD_READ_ONLY_DEFAULT; 5214 5181 rbd_opts->queue_depth = RBD_QUEUE_DEPTH_DEFAULT; 5182 + rbd_opts->lock_timeout = RBD_LOCK_TIMEOUT_DEFAULT; 5215 5183 rbd_opts->lock_on_read = RBD_LOCK_ON_READ_DEFAULT; 5216 5184 rbd_opts->exclusive = RBD_EXCLUSIVE_DEFAULT; 5185 + rbd_opts->trim = RBD_TRIM_DEFAULT; 5217 5186 5218 5187 copts = ceph_parse_options(options, mon_addrs, 5219 5188 mon_addrs + mon_addrs_size - 1, ··· 5251 5216 5252 5217 static int rbd_add_acquire_lock(struct rbd_device *rbd_dev) 5253 5218 { 5219 + int ret; 5220 + 5254 5221 if (!(rbd_dev->header.features & RBD_FEATURE_EXCLUSIVE_LOCK)) { 5255 5222 rbd_warn(rbd_dev, "exclusive-lock feature is not enabled"); 5256 5223 return -EINVAL; ··· 5260 5223 5261 5224 /* FIXME: "rbd map --exclusive" should be in interruptible */ 5262 5225 down_read(&rbd_dev->lock_rwsem); 5263 - rbd_wait_state_locked(rbd_dev); 5226 + ret = rbd_wait_state_locked(rbd_dev, true); 5264 5227 up_read(&rbd_dev->lock_rwsem); 5265 - if (test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)) { 5228 + if (ret) { 5266 5229 rbd_warn(rbd_dev, "failed to acquire exclusive lock"); 5267 5230 return -EROFS; 5268 5231 }
+7 -3
fs/ceph/inode.c
··· 669 669 CEPH_CAP_FILE_BUFFER| 670 670 CEPH_CAP_AUTH_EXCL| 671 671 CEPH_CAP_XATTR_EXCL)) { 672 - if (timespec_compare(ctime, &inode->i_ctime) > 0) { 672 + if (ci->i_version == 0 || 673 + timespec_compare(ctime, &inode->i_ctime) > 0) { 673 674 dout("ctime %ld.%09ld -> %ld.%09ld inc w/ cap\n", 674 675 inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec, 675 676 ctime->tv_sec, ctime->tv_nsec); 676 677 inode->i_ctime = *ctime; 677 678 } 678 - if (ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) > 0) { 679 + if (ci->i_version == 0 || 680 + ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) > 0) { 679 681 /* the MDS did a utimes() */ 680 682 dout("mtime %ld.%09ld -> %ld.%09ld " 681 683 "tw %d -> %d\n", ··· 797 795 new_issued = ~issued & le32_to_cpu(info->cap.caps); 798 796 799 797 /* update inode */ 800 - ci->i_version = le64_to_cpu(info->version); 801 798 inode->i_rdev = le32_to_cpu(info->rdev); 802 799 inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; 803 800 ··· 868 867 ceph_forget_all_cached_acls(inode); 869 868 xattr_blob = NULL; 870 869 } 870 + 871 + /* finally update i_version */ 872 + ci->i_version = le64_to_cpu(info->version); 871 873 872 874 inode->i_mapping->a_ops = &ceph_aops; 873 875