Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

block: add an API for Persistent Reservations

This commits adds a driver API and ioctls for controlling Persistent
Reservations s/genericly/generically/ at the block layer. Persistent
Reservations are supported by SCSI and NVMe and allow controlling who gets
access to a device in a shared storage setup.

Note that we add a pr_ops structure to struct block_device_operations
instead of adding the members directly to avoid bloating all instances
of devices that will never support Persistent Reservations.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>

authored by

Christoph Hellwig and committed by
Jens Axboe
bbd3e064 d8e4bb81

+290
+119
Documentation/block/pr.txt
··· 1 + 2 + Block layer support for Persistent Reservations 3 + =============================================== 4 + 5 + The Linux kernel supports a user space interface for simplified 6 + Persistent Reservations which map to block devices that support 7 + these (like SCSI). Persistent Reservations allow restricting 8 + access to block devices to specific initiators in a shared storage 9 + setup. 10 + 11 + This document gives a general overview of the support ioctl commands. 12 + For a more detailed reference please refer the the SCSI Primary 13 + Commands standard, specifically the section on Reservations and the 14 + "PERSISTENT RESERVE IN" and "PERSISTENT RESERVE OUT" commands. 15 + 16 + All implementations are expected to ensure the reservations survive 17 + a power loss and cover all connections in a multi path environment. 18 + These behaviors are optional in SPC but will be automatically applied 19 + by Linux. 20 + 21 + 22 + The following types of reservations are supported: 23 + -------------------------------------------------- 24 + 25 + - PR_WRITE_EXCLUSIVE 26 + 27 + Only the initiator that owns the reservation can write to the 28 + device. Any initiator can read from the device. 29 + 30 + - PR_EXCLUSIVE_ACCESS 31 + 32 + Only the initiator that owns the reservation can access the 33 + device. 34 + 35 + - PR_WRITE_EXCLUSIVE_REG_ONLY 36 + 37 + Only initiators with a registered key can write to the device, 38 + Any initiator can read from the device. 39 + 40 + - PR_EXCLUSIVE_ACCESS_REG_ONLY 41 + 42 + Only initiators with a registered key can access the device. 43 + 44 + - PR_WRITE_EXCLUSIVE_ALL_REGS 45 + 46 + Only initiators with a registered key can write to the device, 47 + Any initiator can read from the device. 48 + All initiators with a registered key are considered reservation 49 + holders. 50 + Please reference the SPC spec on the meaning of a reservation 51 + holder if you want to use this type. 52 + 53 + - PR_EXCLUSIVE_ACCESS_ALL_REGS 54 + 55 + Only initiators with a registered key can access the device. 56 + All initiators with a registered key are considered reservation 57 + holders. 58 + Please reference the SPC spec on the meaning of a reservation 59 + holder if you want to use this type. 60 + 61 + 62 + The following ioctl are supported: 63 + ---------------------------------- 64 + 65 + 1. IOC_PR_REGISTER 66 + 67 + This ioctl command registers a new reservation if the new_key argument 68 + is non-null. If no existing reservation exists old_key must be zero, 69 + if an existing reservation should be replaced old_key must contain 70 + the old reservation key. 71 + 72 + If the new_key argument is 0 it unregisters the existing reservation passed 73 + in old_key. 74 + 75 + 76 + 2. IOC_PR_RESERVE 77 + 78 + This ioctl command reserves the device and thus restricts access for other 79 + devices based on the type argument. The key argument must be the existing 80 + reservation key for the device as acquired by the IOC_PR_REGISTER, 81 + IOC_PR_REGISTER_IGNORE, IOC_PR_PREEMPT or IOC_PR_PREEMPT_ABORT commands. 82 + 83 + 84 + 3. IOC_PR_RELEASE 85 + 86 + This ioctl command releases the reservation specified by key and flags 87 + and thus removes any access restriction implied by it. 88 + 89 + 90 + 4. IOC_PR_PREEMPT 91 + 92 + This ioctl command releases the existing reservation referred to by 93 + old_key and replaces it with a a new reservation of type for the 94 + reservation key new_key. 95 + 96 + 97 + 5. IOC_PR_PREEMPT_ABORT 98 + 99 + This ioctl command works like IOC_PR_PREEMPT except that it also aborts 100 + any outstanding command sent over a connection identified by old_key. 101 + 102 + 6. IOC_PR_CLEAR 103 + 104 + This ioctl command unregisters both key and any other reservation key 105 + registered with the device and drops any existing reservation. 106 + 107 + 108 + Flags 109 + ----- 110 + 111 + All the ioctls have a flag field. Currently only one flag is supported: 112 + 113 + - PR_FL_IGNORE_KEY 114 + 115 + Ignore the existing reservation key. This is commonly supported for 116 + IOC_PR_REGISTER, and some implementation may support the flag for 117 + IOC_PR_RESERVE. 118 + 119 + For all unknown flags the kernel will return -EOPNOTSUPP.
+103
block/ioctl.c
··· 7 7 #include <linux/backing-dev.h> 8 8 #include <linux/fs.h> 9 9 #include <linux/blktrace_api.h> 10 + #include <linux/pr.h> 10 11 #include <asm/uaccess.h> 11 12 12 13 static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user *arg) ··· 296 295 */ 297 296 EXPORT_SYMBOL_GPL(__blkdev_driver_ioctl); 298 297 298 + static int blkdev_pr_register(struct block_device *bdev, 299 + struct pr_registration __user *arg) 300 + { 301 + const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; 302 + struct pr_registration reg; 303 + 304 + if (!capable(CAP_SYS_ADMIN)) 305 + return -EPERM; 306 + if (!ops || !ops->pr_register) 307 + return -EOPNOTSUPP; 308 + if (copy_from_user(&reg, arg, sizeof(reg))) 309 + return -EFAULT; 310 + 311 + if (reg.flags & ~PR_FL_IGNORE_KEY) 312 + return -EOPNOTSUPP; 313 + return ops->pr_register(bdev, reg.old_key, reg.new_key, reg.flags); 314 + } 315 + 316 + static int blkdev_pr_reserve(struct block_device *bdev, 317 + struct pr_reservation __user *arg) 318 + { 319 + const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; 320 + struct pr_reservation rsv; 321 + 322 + if (!capable(CAP_SYS_ADMIN)) 323 + return -EPERM; 324 + if (!ops || !ops->pr_reserve) 325 + return -EOPNOTSUPP; 326 + if (copy_from_user(&rsv, arg, sizeof(rsv))) 327 + return -EFAULT; 328 + 329 + if (rsv.flags & ~PR_FL_IGNORE_KEY) 330 + return -EOPNOTSUPP; 331 + return ops->pr_reserve(bdev, rsv.key, rsv.type, rsv.flags); 332 + } 333 + 334 + static int blkdev_pr_release(struct block_device *bdev, 335 + struct pr_reservation __user *arg) 336 + { 337 + const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; 338 + struct pr_reservation rsv; 339 + 340 + if (!capable(CAP_SYS_ADMIN)) 341 + return -EPERM; 342 + if (!ops || !ops->pr_release) 343 + return -EOPNOTSUPP; 344 + if (copy_from_user(&rsv, arg, sizeof(rsv))) 345 + return -EFAULT; 346 + 347 + if (rsv.flags) 348 + return -EOPNOTSUPP; 349 + return ops->pr_release(bdev, rsv.key, rsv.type); 350 + } 351 + 352 + static int blkdev_pr_preempt(struct block_device *bdev, 353 + struct pr_preempt __user *arg, bool abort) 354 + { 355 + const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; 356 + struct pr_preempt p; 357 + 358 + if (!capable(CAP_SYS_ADMIN)) 359 + return -EPERM; 360 + if (!ops || !ops->pr_preempt) 361 + return -EOPNOTSUPP; 362 + if (copy_from_user(&p, arg, sizeof(p))) 363 + return -EFAULT; 364 + 365 + if (p.flags) 366 + return -EOPNOTSUPP; 367 + return ops->pr_preempt(bdev, p.old_key, p.new_key, p.type, abort); 368 + } 369 + 370 + static int blkdev_pr_clear(struct block_device *bdev, 371 + struct pr_clear __user *arg) 372 + { 373 + const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; 374 + struct pr_clear c; 375 + 376 + if (!capable(CAP_SYS_ADMIN)) 377 + return -EPERM; 378 + if (!ops || !ops->pr_clear) 379 + return -EOPNOTSUPP; 380 + if (copy_from_user(&c, arg, sizeof(c))) 381 + return -EFAULT; 382 + 383 + if (c.flags) 384 + return -EOPNOTSUPP; 385 + return ops->pr_clear(bdev, c.key); 386 + } 387 + 299 388 /* 300 389 * Is it an unrecognized ioctl? The correct returns are either 301 390 * ENOTTY (final) or ENOIOCTLCMD ("I don't know this one, try a ··· 568 477 case BLKTRACESETUP: 569 478 case BLKTRACETEARDOWN: 570 479 return blk_trace_ioctl(bdev, cmd, argp); 480 + case IOC_PR_REGISTER: 481 + return blkdev_pr_register(bdev, argp); 482 + case IOC_PR_RESERVE: 483 + return blkdev_pr_reserve(bdev, argp); 484 + case IOC_PR_RELEASE: 485 + return blkdev_pr_release(bdev, argp); 486 + case IOC_PR_PREEMPT: 487 + return blkdev_pr_preempt(bdev, argp, false); 488 + case IOC_PR_PREEMPT_ABORT: 489 + return blkdev_pr_preempt(bdev, argp, true); 490 + case IOC_PR_CLEAR: 491 + return blkdev_pr_clear(bdev, argp); 571 492 default: 572 493 return __blkdev_driver_ioctl(bdev, mode, cmd, arg); 573 494 }
+2
include/linux/blkdev.h
··· 35 35 struct bsg_job; 36 36 struct blkcg_gq; 37 37 struct blk_flush_queue; 38 + struct pr_ops; 38 39 39 40 #define BLKDEV_MIN_RQ 4 40 41 #define BLKDEV_MAX_RQ 128 /* Default maximum */ ··· 1634 1633 /* this callback is with swap_lock and sometimes page table lock held */ 1635 1634 void (*swap_slot_free_notify) (struct block_device *, unsigned long); 1636 1635 struct module *owner; 1636 + const struct pr_ops *pr_ops; 1637 1637 }; 1638 1638 1639 1639 extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int,
+18
include/linux/pr.h
··· 1 + #ifndef LINUX_PR_H 2 + #define LINUX_PR_H 3 + 4 + #include <uapi/linux/pr.h> 5 + 6 + struct pr_ops { 7 + int (*pr_register)(struct block_device *bdev, u64 old_key, u64 new_key, 8 + u32 flags); 9 + int (*pr_reserve)(struct block_device *bdev, u64 key, 10 + enum pr_type type, u32 flags); 11 + int (*pr_release)(struct block_device *bdev, u64 key, 12 + enum pr_type type); 13 + int (*pr_preempt)(struct block_device *bdev, u64 old_key, u64 new_key, 14 + enum pr_type type, bool abort); 15 + int (*pr_clear)(struct block_device *bdev, u64 key); 16 + }; 17 + 18 + #endif /* LINUX_PR_H */
+48
include/uapi/linux/pr.h
··· 1 + #ifndef _UAPI_PR_H 2 + #define _UAPI_PR_H 3 + 4 + enum pr_type { 5 + PR_WRITE_EXCLUSIVE = 1, 6 + PR_EXCLUSIVE_ACCESS = 2, 7 + PR_WRITE_EXCLUSIVE_REG_ONLY = 3, 8 + PR_EXCLUSIVE_ACCESS_REG_ONLY = 4, 9 + PR_WRITE_EXCLUSIVE_ALL_REGS = 5, 10 + PR_EXCLUSIVE_ACCESS_ALL_REGS = 6, 11 + }; 12 + 13 + struct pr_reservation { 14 + __u64 key; 15 + __u32 type; 16 + __u32 flags; 17 + }; 18 + 19 + struct pr_registration { 20 + __u64 old_key; 21 + __u64 new_key; 22 + __u32 flags; 23 + __u32 __pad; 24 + }; 25 + 26 + struct pr_preempt { 27 + __u64 old_key; 28 + __u64 new_key; 29 + __u32 type; 30 + __u32 flags; 31 + }; 32 + 33 + struct pr_clear { 34 + __u64 key; 35 + __u32 flags; 36 + __u32 __pad; 37 + }; 38 + 39 + #define PR_FL_IGNORE_KEY (1 << 0) /* ignore existing key */ 40 + 41 + #define IOC_PR_REGISTER _IOW('p', 200, struct pr_registration) 42 + #define IOC_PR_RESERVE _IOW('p', 201, struct pr_reservation) 43 + #define IOC_PR_RELEASE _IOW('p', 202, struct pr_reservation) 44 + #define IOC_PR_PREEMPT _IOW('p', 203, struct pr_preempt) 45 + #define IOC_PR_PREEMPT_ABORT _IOW('p', 204, struct pr_preempt) 46 + #define IOC_PR_CLEAR _IOW('p', 205, struct pr_clear) 47 + 48 + #endif /* _UAPI_PR_H */