Merge tag 'nfsd-4.6-1' of git://linux-nfs.org/~bfields/linux

+23

Documentation/filesystems/nfs/pnfs-scsi-server.txt

··· 1 + 2 + pNFS SCSI layout server user guide 3 + ================================== 4 + 5 + This document describes support for pNFS SCSI layouts in the Linux NFS server. 6 + With pNFS SCSI layouts, the NFS server acts as Metadata Server (MDS) for pNFS, 7 + which in addition to handling all the metadata access to the NFS export, 8 + also hands out layouts to the clients so that they can directly access the 9 + underlying SCSI LUNs that are shared with the client. 10 + 11 + To use pNFS SCSI layouts with with the Linux NFS server, the exported file 12 + system needs to support the pNFS SCSI layouts (currently just XFS), and the 13 + file system must sit on a SCSI LUN that is accessible to the clients in 14 + addition to the MDS. As of now the file system needs to sit directly on the 15 + exported LUN, striping or concatenation of LUNs on the MDS and clients 16 + is not supported yet. 17 + 18 + On a server built with CONFIG_NFSD_SCSI, the pNFS SCSI volume support is 19 + automatically enabled if the file system is exported using the "pnfs" 20 + option and the underlying SCSI device support persistent reservations. 21 + On the client make sure the kernel has the CONFIG_PNFS_BLOCK option 22 + enabled, and the file system is mounted using the NFSv4.1 protocol 23 + version (mount -o vers=4.1).

+53 -8

fs/nfs/blocklayout/blocklayout.c

··· 446 446 kfree(bl); 447 447 } 448 448 449 - static struct pnfs_layout_hdr *bl_alloc_layout_hdr(struct inode *inode, 450 - gfp_t gfp_flags) 449 + static struct pnfs_layout_hdr *__bl_alloc_layout_hdr(struct inode *inode, 450 + gfp_t gfp_flags, bool is_scsi_layout) 451 451 { 452 452 struct pnfs_block_layout *bl; 453 453 ··· 460 460 bl->bl_ext_ro = RB_ROOT; 461 461 spin_lock_init(&bl->bl_ext_lock); 462 462 463 + bl->bl_scsi_layout = is_scsi_layout; 463 464 return &bl->bl_layout; 465 + } 466 + 467 + static struct pnfs_layout_hdr *bl_alloc_layout_hdr(struct inode *inode, 468 + gfp_t gfp_flags) 469 + { 470 + return __bl_alloc_layout_hdr(inode, gfp_flags, false); 471 + } 472 + 473 + static struct pnfs_layout_hdr *sl_alloc_layout_hdr(struct inode *inode, 474 + gfp_t gfp_flags) 475 + { 476 + return __bl_alloc_layout_hdr(inode, gfp_flags, true); 464 477 } 465 478 466 479 static void bl_free_lseg(struct pnfs_layout_segment *lseg) ··· 902 889 .sync = pnfs_generic_sync, 903 890 }; 904 891 892 + static struct pnfs_layoutdriver_type scsilayout_type = { 893 + .id = LAYOUT_SCSI, 894 + .name = "LAYOUT_SCSI", 895 + .owner = THIS_MODULE, 896 + .flags = PNFS_LAYOUTRET_ON_SETATTR | 897 + PNFS_READ_WHOLE_PAGE, 898 + .read_pagelist = bl_read_pagelist, 899 + .write_pagelist = bl_write_pagelist, 900 + .alloc_layout_hdr = sl_alloc_layout_hdr, 901 + .free_layout_hdr = bl_free_layout_hdr, 902 + .alloc_lseg = bl_alloc_lseg, 903 + .free_lseg = bl_free_lseg, 904 + .return_range = bl_return_range, 905 + .prepare_layoutcommit = bl_prepare_layoutcommit, 906 + .cleanup_layoutcommit = bl_cleanup_layoutcommit, 907 + .set_layoutdriver = bl_set_layoutdriver, 908 + .alloc_deviceid_node = bl_alloc_deviceid_node, 909 + .free_deviceid_node = bl_free_deviceid_node, 910 + .pg_read_ops = &bl_pg_read_ops, 911 + .pg_write_ops = &bl_pg_write_ops, 912 + .sync = pnfs_generic_sync, 913 + }; 914 + 915 + 905 916 static int __init nfs4blocklayout_init(void) 906 917 { 907 918 int ret; 908 919 909 920 dprintk("%s: NFSv4 Block Layout Driver Registering...\n", __func__); 910 921 911 - ret = pnfs_register_layoutdriver(&blocklayout_type); 912 - if (ret) 913 - goto out; 914 922 ret = bl_init_pipefs(); 915 923 if (ret) 916 - goto out_unregister; 924 + goto out; 925 + 926 + ret = pnfs_register_layoutdriver(&blocklayout_type); 927 + if (ret) 928 + goto out_cleanup_pipe; 929 + 930 + ret = pnfs_register_layoutdriver(&scsilayout_type); 931 + if (ret) 932 + goto out_unregister_block; 917 933 return 0; 918 934 919 - out_unregister: 935 + out_unregister_block: 920 936 pnfs_unregister_layoutdriver(&blocklayout_type); 937 + out_cleanup_pipe: 938 + bl_cleanup_pipefs(); 921 939 out: 922 940 return ret; 923 941 } ··· 958 914 dprintk("%s: NFSv4 Block Layout Driver Unregistering...\n", 959 915 __func__); 960 916 961 - bl_cleanup_pipefs(); 917 + pnfs_unregister_layoutdriver(&scsilayout_type); 962 918 pnfs_unregister_layoutdriver(&blocklayout_type); 919 + bl_cleanup_pipefs(); 963 920 } 964 921 965 922 MODULE_ALIAS("nfs-layouttype4-3");

+12 -2

fs/nfs/blocklayout/blocklayout.h

··· 55 55 */ 56 56 #define PNFS_BLOCK_UUID_LEN 128 57 57 58 - 59 58 struct pnfs_block_volume { 60 59 enum pnfs_block_volume_type type; 61 60 union { ··· 81 82 u32 volumes_count; 82 83 u32 volumes[PNFS_BLOCK_MAX_DEVICES]; 83 84 } stripe; 85 + struct { 86 + enum scsi_code_set code_set; 87 + enum scsi_designator_type designator_type; 88 + int designator_len; 89 + u8 designator[256]; 90 + u64 pr_key; 91 + } scsi; 84 92 }; 85 93 }; 86 94 ··· 111 105 112 106 struct block_device *bdev; 113 107 u64 disk_offset; 108 + 109 + u64 pr_key; 110 + bool pr_registered; 114 111 115 112 bool (*map)(struct pnfs_block_dev *dev, u64 offset, 116 113 struct pnfs_block_dev_map *map); ··· 140 131 struct rb_root bl_ext_rw; 141 132 struct rb_root bl_ext_ro; 142 133 spinlock_t bl_ext_lock; /* Protects list manipulation */ 134 + bool bl_scsi_layout; 143 135 }; 144 136 145 137 static inline struct pnfs_block_layout * ··· 192 182 dev_t bl_resolve_deviceid(struct nfs_server *server, 193 183 struct pnfs_block_volume *b, gfp_t gfp_mask); 194 184 int __init bl_init_pipefs(void); 195 - void __exit bl_cleanup_pipefs(void); 185 + void bl_cleanup_pipefs(void); 196 186 197 187 #endif /* FS_NFS_NFS4BLOCKLAYOUT_H */

+143 -1

fs/nfs/blocklayout/dev.c

··· 1 1 /* 2 - * Copyright (c) 2014 Christoph Hellwig. 2 + * Copyright (c) 2014-2016 Christoph Hellwig. 3 3 */ 4 4 #include <linux/sunrpc/svc.h> 5 5 #include <linux/blkdev.h> 6 6 #include <linux/nfs4.h> 7 7 #include <linux/nfs_fs.h> 8 8 #include <linux/nfs_xdr.h> 9 + #include <linux/pr.h> 9 10 10 11 #include "blocklayout.h" 11 12 ··· 22 21 bl_free_device(&dev->children[i]); 23 22 kfree(dev->children); 24 23 } else { 24 + if (dev->pr_registered) { 25 + const struct pr_ops *ops = 26 + dev->bdev->bd_disk->fops->pr_ops; 27 + int error; 28 + 29 + error = ops->pr_register(dev->bdev, dev->pr_key, 0, 30 + false); 31 + if (error) 32 + pr_err("failed to unregister PR key.\n"); 33 + } 34 + 25 35 if (dev->bdev) 26 36 blkdev_put(dev->bdev, FMODE_READ | FMODE_WRITE); 27 37 } ··· 124 112 return -EIO; 125 113 for (i = 0; i < b->stripe.volumes_count; i++) 126 114 b->stripe.volumes[i] = be32_to_cpup(p++); 115 + break; 116 + case PNFS_BLOCK_VOLUME_SCSI: 117 + p = xdr_inline_decode(xdr, 4 + 4 + 4); 118 + if (!p) 119 + return -EIO; 120 + b->scsi.code_set = be32_to_cpup(p++); 121 + b->scsi.designator_type = be32_to_cpup(p++); 122 + b->scsi.designator_len = be32_to_cpup(p++); 123 + p = xdr_inline_decode(xdr, b->scsi.designator_len); 124 + if (!p) 125 + return -EIO; 126 + if (b->scsi.designator_len > 256) 127 + return -EIO; 128 + memcpy(&b->scsi.designator, p, b->scsi.designator_len); 129 + p = xdr_inline_decode(xdr, 8); 130 + if (!p) 131 + return -EIO; 132 + p = xdr_decode_hyper(p, &b->scsi.pr_key); 127 133 break; 128 134 default: 129 135 dprintk("unknown volume type!\n"); ··· 246 216 return 0; 247 217 } 248 218 219 + static bool 220 + bl_validate_designator(struct pnfs_block_volume *v) 221 + { 222 + switch (v->scsi.designator_type) { 223 + case PS_DESIGNATOR_EUI64: 224 + if (v->scsi.code_set != PS_CODE_SET_BINARY) 225 + return false; 226 + 227 + if (v->scsi.designator_len != 8 && 228 + v->scsi.designator_len != 10 && 229 + v->scsi.designator_len != 16) 230 + return false; 231 + 232 + return true; 233 + case PS_DESIGNATOR_NAA: 234 + if (v->scsi.code_set != PS_CODE_SET_BINARY) 235 + return false; 236 + 237 + if (v->scsi.designator_len != 8 && 238 + v->scsi.designator_len != 16) 239 + return false; 240 + 241 + return true; 242 + case PS_DESIGNATOR_T10: 243 + case PS_DESIGNATOR_NAME: 244 + pr_err("pNFS: unsupported designator " 245 + "(code set %d, type %d, len %d.\n", 246 + v->scsi.code_set, 247 + v->scsi.designator_type, 248 + v->scsi.designator_len); 249 + return false; 250 + default: 251 + pr_err("pNFS: invalid designator " 252 + "(code set %d, type %d, len %d.\n", 253 + v->scsi.code_set, 254 + v->scsi.designator_type, 255 + v->scsi.designator_len); 256 + return false; 257 + } 258 + } 259 + 260 + static int 261 + bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d, 262 + struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask) 263 + { 264 + struct pnfs_block_volume *v = &volumes[idx]; 265 + const struct pr_ops *ops; 266 + const char *devname; 267 + int error; 268 + 269 + if (!bl_validate_designator(v)) 270 + return -EINVAL; 271 + 272 + switch (v->scsi.designator_len) { 273 + case 8: 274 + devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/wwn-0x%8phN", 275 + v->scsi.designator); 276 + break; 277 + case 12: 278 + devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/wwn-0x%12phN", 279 + v->scsi.designator); 280 + break; 281 + case 16: 282 + devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/wwn-0x%16phN", 283 + v->scsi.designator); 284 + break; 285 + default: 286 + return -EINVAL; 287 + } 288 + 289 + d->bdev = blkdev_get_by_path(devname, FMODE_READ, NULL); 290 + if (IS_ERR(d->bdev)) { 291 + pr_warn("pNFS: failed to open device %s (%ld)\n", 292 + devname, PTR_ERR(d->bdev)); 293 + kfree(devname); 294 + return PTR_ERR(d->bdev); 295 + } 296 + 297 + kfree(devname); 298 + 299 + d->len = i_size_read(d->bdev->bd_inode); 300 + d->map = bl_map_simple; 301 + d->pr_key = v->scsi.pr_key; 302 + 303 + pr_info("pNFS: using block device %s (reservation key 0x%llx)\n", 304 + d->bdev->bd_disk->disk_name, d->pr_key); 305 + 306 + ops = d->bdev->bd_disk->fops->pr_ops; 307 + if (!ops) { 308 + pr_err("pNFS: block device %s does not support reservations.", 309 + d->bdev->bd_disk->disk_name); 310 + error = -EINVAL; 311 + goto out_blkdev_put; 312 + } 313 + 314 + error = ops->pr_register(d->bdev, 0, d->pr_key, true); 315 + if (error) { 316 + pr_err("pNFS: failed to register key for block device %s.", 317 + d->bdev->bd_disk->disk_name); 318 + goto out_blkdev_put; 319 + } 320 + 321 + d->pr_registered = true; 322 + return 0; 323 + 324 + out_blkdev_put: 325 + blkdev_put(d->bdev, FMODE_READ); 326 + return error; 327 + } 328 + 249 329 static int 250 330 bl_parse_slice(struct nfs_server *server, struct pnfs_block_dev *d, 251 331 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask) ··· 443 303 return bl_parse_concat(server, d, volumes, idx, gfp_mask); 444 304 case PNFS_BLOCK_VOLUME_STRIPE: 445 305 return bl_parse_stripe(server, d, volumes, idx, gfp_mask); 306 + case PNFS_BLOCK_VOLUME_SCSI: 307 + return bl_parse_scsi(server, d, volumes, idx, gfp_mask); 446 308 default: 447 309 dprintk("unsupported volume type: %d\n", volumes[idx].type); 448 310 return -EIO;

+30 -14

fs/nfs/blocklayout/extent_tree.c

··· 1 1 /* 2 - * Copyright (c) 2014 Christoph Hellwig. 2 + * Copyright (c) 2014-2016 Christoph Hellwig. 3 3 */ 4 4 5 5 #include <linux/vmalloc.h> ··· 462 462 return err; 463 463 } 464 464 465 - static size_t ext_tree_layoutupdate_size(size_t count) 465 + static size_t ext_tree_layoutupdate_size(struct pnfs_block_layout *bl, size_t count) 466 466 { 467 - return sizeof(__be32) /* number of entries */ + 468 - PNFS_BLOCK_EXTENT_SIZE * count; 467 + if (bl->bl_scsi_layout) 468 + return sizeof(__be32) + PNFS_SCSI_RANGE_SIZE * count; 469 + else 470 + return sizeof(__be32) + PNFS_BLOCK_EXTENT_SIZE * count; 469 471 } 470 472 471 473 static void ext_tree_free_commitdata(struct nfs4_layoutcommit_args *arg, ··· 485 483 } 486 484 } 487 485 486 + static __be32 *encode_block_extent(struct pnfs_block_extent *be, __be32 *p) 487 + { 488 + p = xdr_encode_opaque_fixed(p, be->be_device->deviceid.data, 489 + NFS4_DEVICEID4_SIZE); 490 + p = xdr_encode_hyper(p, be->be_f_offset << SECTOR_SHIFT); 491 + p = xdr_encode_hyper(p, be->be_length << SECTOR_SHIFT); 492 + p = xdr_encode_hyper(p, 0LL); 493 + *p++ = cpu_to_be32(PNFS_BLOCK_READWRITE_DATA); 494 + return p; 495 + } 496 + 497 + static __be32 *encode_scsi_range(struct pnfs_block_extent *be, __be32 *p) 498 + { 499 + p = xdr_encode_hyper(p, be->be_f_offset << SECTOR_SHIFT); 500 + return xdr_encode_hyper(p, be->be_length << SECTOR_SHIFT); 501 + } 502 + 488 503 static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p, 489 504 size_t buffer_size, size_t *count) 490 505 { ··· 515 496 continue; 516 497 517 498 (*count)++; 518 - if (ext_tree_layoutupdate_size(*count) > buffer_size) { 499 + if (ext_tree_layoutupdate_size(bl, *count) > buffer_size) { 519 500 /* keep counting.. */ 520 501 ret = -ENOSPC; 521 502 continue; 522 503 } 523 504 524 - p = xdr_encode_opaque_fixed(p, be->be_device->deviceid.data, 525 - NFS4_DEVICEID4_SIZE); 526 - p = xdr_encode_hyper(p, be->be_f_offset << SECTOR_SHIFT); 527 - p = xdr_encode_hyper(p, be->be_length << SECTOR_SHIFT); 528 - p = xdr_encode_hyper(p, 0LL); 529 - *p++ = cpu_to_be32(PNFS_BLOCK_READWRITE_DATA); 530 - 505 + if (bl->bl_scsi_layout) 506 + p = encode_scsi_range(be, p); 507 + else 508 + p = encode_block_extent(be, p); 531 509 be->be_tag = EXTENT_COMMITTING; 532 510 } 533 511 spin_unlock(&bl->bl_ext_lock); ··· 553 537 if (unlikely(ret)) { 554 538 ext_tree_free_commitdata(arg, buffer_size); 555 539 556 - buffer_size = ext_tree_layoutupdate_size(count); 540 + buffer_size = ext_tree_layoutupdate_size(bl, count); 557 541 count = 0; 558 542 559 543 arg->layoutupdate_pages = ··· 572 556 } 573 557 574 558 *start_p = cpu_to_be32(count); 575 - arg->layoutupdate_len = ext_tree_layoutupdate_size(count); 559 + arg->layoutupdate_len = ext_tree_layoutupdate_size(bl, count); 576 560 577 561 if (unlikely(arg->layoutupdate_pages != &arg->layoutupdate_page)) { 578 562 void *p = start_p, *end = p + arg->layoutupdate_len;

+1 -1

fs/nfs/blocklayout/rpc_pipefs.c

··· 281 281 return ret; 282 282 } 283 283 284 - void __exit bl_cleanup_pipefs(void) 284 + void bl_cleanup_pipefs(void) 285 285 { 286 286 rpc_pipefs_notifier_unregister(&nfs4blocklayout_block); 287 287 unregister_pernet_subsys(&nfs4blocklayout_net_ops);

+23 -5

fs/nfsd/Kconfig

··· 84 84 If unsure, say N. 85 85 86 86 config NFSD_PNFS 87 - bool "NFSv4.1 server support for Parallel NFS (pNFS)" 88 - depends on NFSD_V4 87 + bool 88 + 89 + config NFSD_BLOCKLAYOUT 90 + bool "NFSv4.1 server support for pNFS block layouts" 91 + depends on NFSD_V4 && BLOCK 92 + select NFSD_PNFS 89 93 help 90 - This option enables support for the parallel NFS features of the 91 - minor version 1 of the NFSv4 protocol (RFC5661) in the kernel's NFS 92 - server. 94 + This option enables support for the exporting pNFS block layouts 95 + in the kernel's NFS server. The pNFS block layout enables NFS 96 + clients to directly perform I/O to block devices accesible to both 97 + the server and the clients. See RFC 5663 for more details. 98 + 99 + If unsure, say N. 100 + 101 + config NFSD_SCSILAYOUT 102 + bool "NFSv4.1 server support for pNFS SCSI layouts" 103 + depends on NFSD_V4 && BLOCK 104 + select NFSD_PNFS 105 + help 106 + This option enables support for the exporting pNFS SCSI layouts 107 + in the kernel's NFS server. The pNFS SCSI layout enables NFS 108 + clients to directly perform I/O to SCSI devices accesible to both 109 + the server and the clients. See draft-ietf-nfsv4-scsi-layout for 110 + more details. 93 111 94 112 If unsure, say N. 95 113

+3 -1

fs/nfsd/Makefile

··· 17 17 nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o 18 18 nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \ 19 19 nfs4acl.o nfs4callback.o nfs4recover.o 20 - nfsd-$(CONFIG_NFSD_PNFS) += nfs4layouts.o blocklayout.o blocklayoutxdr.o 20 + nfsd-$(CONFIG_NFSD_PNFS) += nfs4layouts.o 21 + nfsd-$(CONFIG_NFSD_BLOCKLAYOUT) += blocklayout.o blocklayoutxdr.o 22 + nfsd-$(CONFIG_NFSD_SCSILAYOUT) += blocklayout.o blocklayoutxdr.o

+257 -41

fs/nfsd/blocklayout.c

··· 1 1 /* 2 - * Copyright (c) 2014 Christoph Hellwig. 2 + * Copyright (c) 2014-2016 Christoph Hellwig. 3 3 */ 4 4 #include <linux/exportfs.h> 5 5 #include <linux/genhd.h> 6 6 #include <linux/slab.h> 7 + #include <linux/pr.h> 7 8 8 9 #include <linux/nfsd/debug.h> 10 + #include <scsi/scsi_proto.h> 11 + #include <scsi/scsi_common.h> 9 12 10 13 #include "blocklayoutxdr.h" 11 14 #include "pnfs.h" 12 15 13 16 #define NFSDDBG_FACILITY NFSDDBG_PNFS 14 17 15 - 16 - static int 17 - nfsd4_block_get_device_info_simple(struct super_block *sb, 18 - struct nfsd4_getdeviceinfo *gdp) 19 - { 20 - struct pnfs_block_deviceaddr *dev; 21 - struct pnfs_block_volume *b; 22 - 23 - dev = kzalloc(sizeof(struct pnfs_block_deviceaddr) + 24 - sizeof(struct pnfs_block_volume), GFP_KERNEL); 25 - if (!dev) 26 - return -ENOMEM; 27 - gdp->gd_device = dev; 28 - 29 - dev->nr_volumes = 1; 30 - b = &dev->volumes[0]; 31 - 32 - b->type = PNFS_BLOCK_VOLUME_SIMPLE; 33 - b->simple.sig_len = PNFS_BLOCK_UUID_LEN; 34 - return sb->s_export_op->get_uuid(sb, b->simple.sig, &b->simple.sig_len, 35 - &b->simple.offset); 36 - } 37 - 38 - static __be32 39 - nfsd4_block_proc_getdeviceinfo(struct super_block *sb, 40 - struct nfsd4_getdeviceinfo *gdp) 41 - { 42 - if (sb->s_bdev != sb->s_bdev->bd_contains) 43 - return nfserr_inval; 44 - return nfserrno(nfsd4_block_get_device_info_simple(sb, gdp)); 45 - } 46 18 47 19 static __be32 48 20 nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp, ··· 113 141 } 114 142 115 143 static __be32 116 - nfsd4_block_proc_layoutcommit(struct inode *inode, 117 - struct nfsd4_layoutcommit *lcp) 144 + nfsd4_block_commit_blocks(struct inode *inode, struct nfsd4_layoutcommit *lcp, 145 + struct iomap *iomaps, int nr_iomaps) 118 146 { 119 147 loff_t new_size = lcp->lc_last_wr + 1; 120 148 struct iattr iattr = { .ia_valid = 0 }; 121 - struct iomap *iomaps; 122 - int nr_iomaps; 123 149 int error; 124 - 125 - nr_iomaps = nfsd4_block_decode_layoutupdate(lcp->lc_up_layout, 126 - lcp->lc_up_len, &iomaps, 1 << inode->i_blkbits); 127 - if (nr_iomaps < 0) 128 - return nfserrno(nr_iomaps); 129 150 130 151 if (lcp->lc_mtime.tv_nsec == UTIME_NOW || 131 152 timespec_compare(&lcp->lc_mtime, &inode->i_mtime) < 0) ··· 135 170 nr_iomaps, &iattr); 136 171 kfree(iomaps); 137 172 return nfserrno(error); 173 + } 174 + 175 + #ifdef CONFIG_NFSD_BLOCKLAYOUT 176 + static int 177 + nfsd4_block_get_device_info_simple(struct super_block *sb, 178 + struct nfsd4_getdeviceinfo *gdp) 179 + { 180 + struct pnfs_block_deviceaddr *dev; 181 + struct pnfs_block_volume *b; 182 + 183 + dev = kzalloc(sizeof(struct pnfs_block_deviceaddr) + 184 + sizeof(struct pnfs_block_volume), GFP_KERNEL); 185 + if (!dev) 186 + return -ENOMEM; 187 + gdp->gd_device = dev; 188 + 189 + dev->nr_volumes = 1; 190 + b = &dev->volumes[0]; 191 + 192 + b->type = PNFS_BLOCK_VOLUME_SIMPLE; 193 + b->simple.sig_len = PNFS_BLOCK_UUID_LEN; 194 + return sb->s_export_op->get_uuid(sb, b->simple.sig, &b->simple.sig_len, 195 + &b->simple.offset); 196 + } 197 + 198 + static __be32 199 + nfsd4_block_proc_getdeviceinfo(struct super_block *sb, 200 + struct nfs4_client *clp, 201 + struct nfsd4_getdeviceinfo *gdp) 202 + { 203 + if (sb->s_bdev != sb->s_bdev->bd_contains) 204 + return nfserr_inval; 205 + return nfserrno(nfsd4_block_get_device_info_simple(sb, gdp)); 206 + } 207 + 208 + static __be32 209 + nfsd4_block_proc_layoutcommit(struct inode *inode, 210 + struct nfsd4_layoutcommit *lcp) 211 + { 212 + struct iomap *iomaps; 213 + int nr_iomaps; 214 + 215 + nr_iomaps = nfsd4_block_decode_layoutupdate(lcp->lc_up_layout, 216 + lcp->lc_up_len, &iomaps, 1 << inode->i_blkbits); 217 + if (nr_iomaps < 0) 218 + return nfserrno(nr_iomaps); 219 + 220 + return nfsd4_block_commit_blocks(inode, lcp, iomaps, nr_iomaps); 138 221 } 139 222 140 223 const struct nfsd4_layout_ops bl_layout_ops = { ··· 203 190 .encode_layoutget = nfsd4_block_encode_layoutget, 204 191 .proc_layoutcommit = nfsd4_block_proc_layoutcommit, 205 192 }; 193 + #endif /* CONFIG_NFSD_BLOCKLAYOUT */ 194 + 195 + #ifdef CONFIG_NFSD_SCSILAYOUT 196 + static int nfsd4_scsi_identify_device(struct block_device *bdev, 197 + struct pnfs_block_volume *b) 198 + { 199 + struct request_queue *q = bdev->bd_disk->queue; 200 + struct request *rq; 201 + size_t bufflen = 252, len, id_len; 202 + u8 *buf, *d, type, assoc; 203 + int error; 204 + 205 + buf = kzalloc(bufflen, GFP_KERNEL); 206 + if (!buf) 207 + return -ENOMEM; 208 + 209 + rq = blk_get_request(q, READ, GFP_KERNEL); 210 + if (IS_ERR(rq)) { 211 + error = -ENOMEM; 212 + goto out_free_buf; 213 + } 214 + blk_rq_set_block_pc(rq); 215 + 216 + error = blk_rq_map_kern(q, rq, buf, bufflen, GFP_KERNEL); 217 + if (error) 218 + goto out_put_request; 219 + 220 + rq->cmd[0] = INQUIRY; 221 + rq->cmd[1] = 1; 222 + rq->cmd[2] = 0x83; 223 + rq->cmd[3] = bufflen >> 8; 224 + rq->cmd[4] = bufflen & 0xff; 225 + rq->cmd_len = COMMAND_SIZE(INQUIRY); 226 + 227 + error = blk_execute_rq(rq->q, NULL, rq, 1); 228 + if (error) { 229 + pr_err("pNFS: INQUIRY 0x83 failed with: %x\n", 230 + rq->errors); 231 + goto out_put_request; 232 + } 233 + 234 + len = (buf[2] << 8) + buf[3] + 4; 235 + if (len > bufflen) { 236 + pr_err("pNFS: INQUIRY 0x83 response invalid (len = %zd)\n", 237 + len); 238 + goto out_put_request; 239 + } 240 + 241 + d = buf + 4; 242 + for (d = buf + 4; d < buf + len; d += id_len + 4) { 243 + id_len = d[3]; 244 + type = d[1] & 0xf; 245 + assoc = (d[1] >> 4) & 0x3; 246 + 247 + /* 248 + * We only care about a EUI-64 and NAA designator types 249 + * with LU association. 250 + */ 251 + if (assoc != 0x00) 252 + continue; 253 + if (type != 0x02 && type != 0x03) 254 + continue; 255 + if (id_len != 8 && id_len != 12 && id_len != 16) 256 + continue; 257 + 258 + b->scsi.code_set = PS_CODE_SET_BINARY; 259 + b->scsi.designator_type = type == 0x02 ? 260 + PS_DESIGNATOR_EUI64 : PS_DESIGNATOR_NAA; 261 + b->scsi.designator_len = id_len; 262 + memcpy(b->scsi.designator, d + 4, id_len); 263 + 264 + /* 265 + * If we found a 8 or 12 byte descriptor continue on to 266 + * see if a 16 byte one is available. If we find a 267 + * 16 byte descriptor we're done. 268 + */ 269 + if (id_len == 16) 270 + break; 271 + } 272 + 273 + out_put_request: 274 + blk_put_request(rq); 275 + out_free_buf: 276 + kfree(buf); 277 + return error; 278 + } 279 + 280 + #define NFSD_MDS_PR_KEY 0x0100000000000000 281 + 282 + /* 283 + * We use the client ID as a unique key for the reservations. 284 + * This allows us to easily fence a client when recalls fail. 285 + */ 286 + static u64 nfsd4_scsi_pr_key(struct nfs4_client *clp) 287 + { 288 + return ((u64)clp->cl_clientid.cl_boot << 32) | clp->cl_clientid.cl_id; 289 + } 290 + 291 + static int 292 + nfsd4_block_get_device_info_scsi(struct super_block *sb, 293 + struct nfs4_client *clp, 294 + struct nfsd4_getdeviceinfo *gdp) 295 + { 296 + struct pnfs_block_deviceaddr *dev; 297 + struct pnfs_block_volume *b; 298 + const struct pr_ops *ops; 299 + int error; 300 + 301 + dev = kzalloc(sizeof(struct pnfs_block_deviceaddr) + 302 + sizeof(struct pnfs_block_volume), GFP_KERNEL); 303 + if (!dev) 304 + return -ENOMEM; 305 + gdp->gd_device = dev; 306 + 307 + dev->nr_volumes = 1; 308 + b = &dev->volumes[0]; 309 + 310 + b->type = PNFS_BLOCK_VOLUME_SCSI; 311 + b->scsi.pr_key = nfsd4_scsi_pr_key(clp); 312 + 313 + error = nfsd4_scsi_identify_device(sb->s_bdev, b); 314 + if (error) 315 + return error; 316 + 317 + ops = sb->s_bdev->bd_disk->fops->pr_ops; 318 + if (!ops) { 319 + pr_err("pNFS: device %s does not support PRs.\n", 320 + sb->s_id); 321 + return -EINVAL; 322 + } 323 + 324 + error = ops->pr_register(sb->s_bdev, 0, NFSD_MDS_PR_KEY, true); 325 + if (error) { 326 + pr_err("pNFS: failed to register key for device %s.\n", 327 + sb->s_id); 328 + return -EINVAL; 329 + } 330 + 331 + error = ops->pr_reserve(sb->s_bdev, NFSD_MDS_PR_KEY, 332 + PR_EXCLUSIVE_ACCESS_REG_ONLY, 0); 333 + if (error) { 334 + pr_err("pNFS: failed to reserve device %s.\n", 335 + sb->s_id); 336 + return -EINVAL; 337 + } 338 + 339 + return 0; 340 + } 341 + 342 + static __be32 343 + nfsd4_scsi_proc_getdeviceinfo(struct super_block *sb, 344 + struct nfs4_client *clp, 345 + struct nfsd4_getdeviceinfo *gdp) 346 + { 347 + if (sb->s_bdev != sb->s_bdev->bd_contains) 348 + return nfserr_inval; 349 + return nfserrno(nfsd4_block_get_device_info_scsi(sb, clp, gdp)); 350 + } 351 + static __be32 352 + nfsd4_scsi_proc_layoutcommit(struct inode *inode, 353 + struct nfsd4_layoutcommit *lcp) 354 + { 355 + struct iomap *iomaps; 356 + int nr_iomaps; 357 + 358 + nr_iomaps = nfsd4_scsi_decode_layoutupdate(lcp->lc_up_layout, 359 + lcp->lc_up_len, &iomaps, 1 << inode->i_blkbits); 360 + if (nr_iomaps < 0) 361 + return nfserrno(nr_iomaps); 362 + 363 + return nfsd4_block_commit_blocks(inode, lcp, iomaps, nr_iomaps); 364 + } 365 + 366 + static void 367 + nfsd4_scsi_fence_client(struct nfs4_layout_stateid *ls) 368 + { 369 + struct nfs4_client *clp = ls->ls_stid.sc_client; 370 + struct block_device *bdev = ls->ls_file->f_path.mnt->mnt_sb->s_bdev; 371 + 372 + bdev->bd_disk->fops->pr_ops->pr_preempt(bdev, NFSD_MDS_PR_KEY, 373 + nfsd4_scsi_pr_key(clp), 0, true); 374 + } 375 + 376 + const struct nfsd4_layout_ops scsi_layout_ops = { 377 + /* 378 + * Pretend that we send notification to the client. This is a blatant 379 + * lie to force recent Linux clients to cache our device IDs. 380 + * We rarely ever change the device ID, so the harm of leaking deviceids 381 + * for a while isn't too bad. Unfortunately RFC5661 is a complete mess 382 + * in this regard, but I filed errata 4119 for this a while ago, and 383 + * hopefully the Linux client will eventually start caching deviceids 384 + * without this again. 385 + */ 386 + .notify_types = 387 + NOTIFY_DEVICEID4_DELETE | NOTIFY_DEVICEID4_CHANGE, 388 + .proc_getdeviceinfo = nfsd4_scsi_proc_getdeviceinfo, 389 + .encode_getdeviceinfo = nfsd4_block_encode_getdeviceinfo, 390 + .proc_layoutget = nfsd4_block_proc_layoutget, 391 + .encode_layoutget = nfsd4_block_encode_layoutget, 392 + .proc_layoutcommit = nfsd4_scsi_proc_layoutcommit, 393 + .fence_client = nfsd4_scsi_fence_client, 394 + }; 395 + #endif /* CONFIG_NFSD_SCSILAYOUT */

+72 -5

fs/nfsd/blocklayoutxdr.c

··· 1 1 /* 2 - * Copyright (c) 2014 Christoph Hellwig. 2 + * Copyright (c) 2014-2016 Christoph Hellwig. 3 3 */ 4 4 #include <linux/sunrpc/svc.h> 5 5 #include <linux/exportfs.h> ··· 53 53 p = xdr_encode_hyper(p, b->simple.offset); 54 54 p = xdr_encode_opaque(p, b->simple.sig, b->simple.sig_len); 55 55 break; 56 + case PNFS_BLOCK_VOLUME_SCSI: 57 + len = 4 + 4 + 4 + 4 + b->scsi.designator_len + 8; 58 + p = xdr_reserve_space(xdr, len); 59 + if (!p) 60 + return -ETOOSMALL; 61 + 62 + *p++ = cpu_to_be32(b->type); 63 + *p++ = cpu_to_be32(b->scsi.code_set); 64 + *p++ = cpu_to_be32(b->scsi.designator_type); 65 + p = xdr_encode_opaque(p, b->scsi.designator, b->scsi.designator_len); 66 + p = xdr_encode_hyper(p, b->scsi.pr_key); 67 + break; 56 68 default: 57 69 return -ENOTSUPP; 58 70 } ··· 105 93 u32 block_size) 106 94 { 107 95 struct iomap *iomaps; 108 - u32 nr_iomaps, expected, i; 96 + u32 nr_iomaps, i; 109 97 110 98 if (len < sizeof(u32)) { 111 99 dprintk("%s: extent array too small: %u\n", __func__, len); 112 100 return -EINVAL; 113 101 } 102 + len -= sizeof(u32); 103 + if (len % PNFS_BLOCK_EXTENT_SIZE) { 104 + dprintk("%s: extent array invalid: %u\n", __func__, len); 105 + return -EINVAL; 106 + } 114 107 115 108 nr_iomaps = be32_to_cpup(p++); 116 - expected = sizeof(__be32) + nr_iomaps * PNFS_BLOCK_EXTENT_SIZE; 117 - if (len != expected) { 109 + if (nr_iomaps != len / PNFS_BLOCK_EXTENT_SIZE) { 118 110 dprintk("%s: extent array size mismatch: %u/%u\n", 119 - __func__, len, expected); 111 + __func__, len, nr_iomaps); 120 112 return -EINVAL; 121 113 } 122 114 ··· 163 147 164 148 iomaps[i].offset = bex.foff; 165 149 iomaps[i].length = bex.len; 150 + } 151 + 152 + *iomapp = iomaps; 153 + return nr_iomaps; 154 + fail: 155 + kfree(iomaps); 156 + return -EINVAL; 157 + } 158 + 159 + int 160 + nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp, 161 + u32 block_size) 162 + { 163 + struct iomap *iomaps; 164 + u32 nr_iomaps, expected, i; 165 + 166 + if (len < sizeof(u32)) { 167 + dprintk("%s: extent array too small: %u\n", __func__, len); 168 + return -EINVAL; 169 + } 170 + 171 + nr_iomaps = be32_to_cpup(p++); 172 + expected = sizeof(__be32) + nr_iomaps * PNFS_SCSI_RANGE_SIZE; 173 + if (len != expected) { 174 + dprintk("%s: extent array size mismatch: %u/%u\n", 175 + __func__, len, expected); 176 + return -EINVAL; 177 + } 178 + 179 + iomaps = kcalloc(nr_iomaps, sizeof(*iomaps), GFP_KERNEL); 180 + if (!iomaps) { 181 + dprintk("%s: failed to allocate extent array\n", __func__); 182 + return -ENOMEM; 183 + } 184 + 185 + for (i = 0; i < nr_iomaps; i++) { 186 + u64 val; 187 + 188 + p = xdr_decode_hyper(p, &val); 189 + if (val & (block_size - 1)) { 190 + dprintk("%s: unaligned offset 0x%llx\n", __func__, val); 191 + goto fail; 192 + } 193 + iomaps[i].offset = val; 194 + 195 + p = xdr_decode_hyper(p, &val); 196 + if (val & (block_size - 1)) { 197 + dprintk("%s: unaligned length 0x%llx\n", __func__, val); 198 + goto fail; 199 + } 200 + iomaps[i].length = val; 166 201 } 167 202 168 203 *iomapp = iomaps;

+14

fs/nfsd/blocklayoutxdr.h

··· 15 15 enum pnfs_block_extent_state es; 16 16 }; 17 17 18 + struct pnfs_block_range { 19 + u64 foff; 20 + u64 len; 21 + }; 22 + 18 23 /* 19 24 * Random upper cap for the uuid length to avoid unbounded allocation. 20 25 * Not actually limited by the protocol. ··· 34 29 u32 sig_len; 35 30 u8 sig[PNFS_BLOCK_UUID_LEN]; 36 31 } simple; 32 + struct { 33 + enum scsi_code_set code_set; 34 + enum scsi_designator_type designator_type; 35 + int designator_len; 36 + u8 designator[256]; 37 + u64 pr_key; 38 + } scsi; 37 39 }; 38 40 }; 39 41 ··· 54 42 __be32 nfsd4_block_encode_layoutget(struct xdr_stream *xdr, 55 43 struct nfsd4_layoutget *lgp); 56 44 int nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp, 45 + u32 block_size); 46 + int nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp, 57 47 u32 block_size); 58 48 59 49 #endif /* _NFSD_BLOCKLAYOUTXDR_H */

+4 -3

fs/nfsd/nfs3proc.c

··· 147 147 { 148 148 __be32 nfserr; 149 149 u32 max_blocksize = svc_max_payload(rqstp); 150 + unsigned long cnt = min(argp->count, max_blocksize); 150 151 151 152 dprintk("nfsd: READ(3) %s %lu bytes at %Lu\n", 152 153 SVCFH_fmt(&argp->fh), ··· 158 157 * 1 (status) + 22 (post_op_attr) + 1 (count) + 1 (eof) 159 158 * + 1 (xdr opaque byte count) = 26 160 159 */ 161 - resp->count = min(argp->count, max_blocksize); 160 + resp->count = cnt; 162 161 svc_reserve_auth(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4); 163 162 164 163 fh_copy(&resp->fh, &argp->fh); ··· 168 167 &resp->count); 169 168 if (nfserr == 0) { 170 169 struct inode *inode = d_inode(resp->fh.fh_dentry); 171 - 172 - resp->eof = (argp->offset + resp->count) >= inode->i_size; 170 + resp->eof = nfsd_eof_on_read(cnt, resp->count, argp->offset, 171 + inode->i_size); 173 172 } 174 173 175 174 RETURN_STATUS(nfserr);

+28 -3

fs/nfsd/nfs4layouts.c

··· 1 1 /* 2 2 * Copyright (c) 2014 Christoph Hellwig. 3 3 */ 4 + #include <linux/blkdev.h> 4 5 #include <linux/kmod.h> 5 6 #include <linux/file.h> 6 7 #include <linux/jhash.h> ··· 27 26 static const struct lock_manager_operations nfsd4_layouts_lm_ops; 28 27 29 28 const struct nfsd4_layout_ops *nfsd4_layout_ops[LAYOUT_TYPE_MAX] = { 29 + #ifdef CONFIG_NFSD_BLOCKLAYOUT 30 30 [LAYOUT_BLOCK_VOLUME] = &bl_layout_ops, 31 + #endif 32 + #ifdef CONFIG_NFSD_SCSILAYOUT 33 + [LAYOUT_SCSI] = &scsi_layout_ops, 34 + #endif 31 35 }; 32 36 33 37 /* pNFS device ID to export fsid mapping */ ··· 127 121 if (!(exp->ex_flags & NFSEXP_PNFS)) 128 122 return; 129 123 124 + /* 125 + * Check if the file system supports exporting a block-like layout. 126 + * If the block device supports reservations prefer the SCSI layout, 127 + * otherwise advertise the block layout. 128 + */ 129 + #ifdef CONFIG_NFSD_BLOCKLAYOUT 130 130 if (sb->s_export_op->get_uuid && 131 131 sb->s_export_op->map_blocks && 132 132 sb->s_export_op->commit_blocks) 133 133 exp->ex_layout_type = LAYOUT_BLOCK_VOLUME; 134 + #endif 135 + #ifdef CONFIG_NFSD_SCSILAYOUT 136 + /* overwrite block layout selection if needed */ 137 + if (sb->s_export_op->map_blocks && 138 + sb->s_export_op->commit_blocks && 139 + sb->s_bdev && sb->s_bdev->bd_disk->fops->pr_ops) 140 + exp->ex_layout_type = LAYOUT_SCSI; 141 + #endif 134 142 } 135 143 136 144 static void ··· 610 590 611 591 rpc_ntop((struct sockaddr *)&clp->cl_addr, addr_str, sizeof(addr_str)); 612 592 613 - trace_layout_recall_fail(&ls->ls_stid.sc_stateid); 614 - 615 593 printk(KERN_WARNING 616 594 "nfsd: client %s failed to respond to layout recall. " 617 595 " Fencing..\n", addr_str); ··· 644 626 container_of(cb, struct nfs4_layout_stateid, ls_recall); 645 627 struct nfsd_net *nn; 646 628 ktime_t now, cutoff; 629 + const struct nfsd4_layout_ops *ops; 647 630 LIST_HEAD(reaplist); 648 631 649 632 ··· 680 661 /* 681 662 * Unknown error or non-responding client, we'll need to fence. 682 663 */ 683 - nfsd4_cb_layout_fail(ls); 664 + trace_layout_recall_fail(&ls->ls_stid.sc_stateid); 665 + 666 + ops = nfsd4_layout_ops[ls->ls_layout_type]; 667 + if (ops->fence_client) 668 + ops->fence_client(ls); 669 + else 670 + nfsd4_cb_layout_fail(ls); 684 671 return -1; 685 672 } 686 673 }

+4 -2

fs/nfsd/nfs4proc.c

··· 1268 1268 goto out; 1269 1269 1270 1270 nfserr = nfs_ok; 1271 - if (gdp->gd_maxcount != 0) 1272 - nfserr = ops->proc_getdeviceinfo(exp->ex_path.mnt->mnt_sb, gdp); 1271 + if (gdp->gd_maxcount != 0) { 1272 + nfserr = ops->proc_getdeviceinfo(exp->ex_path.mnt->mnt_sb, 1273 + cstate->session->se_client, gdp); 1274 + } 1273 1275 1274 1276 gdp->gd_notify_types &= ops->notify_types; 1275 1277 out:

+7 -4

fs/nfsd/nfs4xdr.c

··· 3365 3365 struct xdr_stream *xdr = &resp->xdr; 3366 3366 struct xdr_buf *buf = xdr->buf; 3367 3367 u32 eof; 3368 + long len; 3368 3369 int space_left; 3369 3370 __be32 nfserr; 3370 3371 __be32 *p = xdr->p - 2; ··· 3374 3373 if (xdr->end - xdr->p < 1) 3375 3374 return nfserr_resource; 3376 3375 3376 + len = maxcount; 3377 3377 nfserr = nfsd_splice_read(read->rd_rqstp, file, 3378 3378 read->rd_offset, &maxcount); 3379 3379 if (nfserr) { ··· 3387 3385 return nfserr; 3388 3386 } 3389 3387 3390 - eof = (read->rd_offset + maxcount >= 3391 - d_inode(read->rd_fhp->fh_dentry)->i_size); 3388 + eof = nfsd_eof_on_read(len, maxcount, read->rd_offset, 3389 + d_inode(read->rd_fhp->fh_dentry)->i_size); 3392 3390 3393 3391 *(p++) = htonl(eof); 3394 3392 *(p++) = htonl(maxcount); ··· 3458 3456 } 3459 3457 read->rd_vlen = v; 3460 3458 3459 + len = maxcount; 3461 3460 nfserr = nfsd_readv(file, read->rd_offset, resp->rqstp->rq_vec, 3462 3461 read->rd_vlen, &maxcount); 3463 3462 if (nfserr) 3464 3463 return nfserr; 3465 3464 xdr_truncate_encode(xdr, starting_len + 8 + ((maxcount+3)&~3)); 3466 3465 3467 - eof = (read->rd_offset + maxcount >= 3468 - d_inode(read->rd_fhp->fh_dentry)->i_size); 3466 + eof = nfsd_eof_on_read(len, maxcount, read->rd_offset, 3467 + d_inode(read->rd_fhp->fh_dentry)->i_size); 3469 3468 3470 3469 tmp = htonl(eof); 3471 3470 write_bytes_to_xdr_buf(xdr->buf, starting_len , &tmp, 4);

+8

fs/nfsd/pnfs.h

··· 21 21 u32 notify_types; 22 22 23 23 __be32 (*proc_getdeviceinfo)(struct super_block *sb, 24 + struct nfs4_client *clp, 24 25 struct nfsd4_getdeviceinfo *gdevp); 25 26 __be32 (*encode_getdeviceinfo)(struct xdr_stream *xdr, 26 27 struct nfsd4_getdeviceinfo *gdevp); ··· 33 32 34 33 __be32 (*proc_layoutcommit)(struct inode *inode, 35 34 struct nfsd4_layoutcommit *lcp); 35 + 36 + void (*fence_client)(struct nfs4_layout_stateid *ls); 36 37 }; 37 38 38 39 extern const struct nfsd4_layout_ops *nfsd4_layout_ops[]; 40 + #ifdef CONFIG_NFSD_BLOCKLAYOUT 39 41 extern const struct nfsd4_layout_ops bl_layout_ops; 42 + #endif 43 + #ifdef CONFIG_NFSD_SCSILAYOUT 44 + extern const struct nfsd4_layout_ops scsi_layout_ops; 45 + #endif 40 46 41 47 __be32 nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp, 42 48 struct nfsd4_compound_state *cstate, stateid_t *stateid,

+19

fs/nfsd/vfs.h

··· 139 139 || createmode == NFS4_CREATE_EXCLUSIVE4_1; 140 140 } 141 141 142 + static inline bool nfsd_eof_on_read(long requested, long read, 143 + loff_t offset, loff_t size) 144 + { 145 + /* We assume a short read means eof: */ 146 + if (requested > read) 147 + return true; 148 + /* 149 + * A non-short read might also reach end of file. The spec 150 + * still requires us to set eof in that case. 151 + * 152 + * Further operations may have modified the file size since 153 + * the read, so the following check is not atomic with the read. 154 + * We've only seen that cause a problem for a client in the case 155 + * where the read returned a count of 0 without setting eof. 156 + * That case was fixed by the addition of the above check. 157 + */ 158 + return (offset + read >= size); 159 + } 160 + 142 161 #endif /* LINUX_NFSD_VFS_H */

+2 -1

fs/xfs/Makefile

··· 121 121 xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o 122 122 xfs-$(CONFIG_SYSCTL) += xfs_sysctl.o 123 123 xfs-$(CONFIG_COMPAT) += xfs_ioctl32.o 124 - xfs-$(CONFIG_NFSD_PNFS) += xfs_pnfs.o 124 + xfs-$(CONFIG_NFSD_BLOCKLAYOUT) += xfs_pnfs.o 125 + xfs-$(CONFIG_NFSD_SCSILAYOUT) += xfs_pnfs.o

+1 -1

fs/xfs/xfs_export.c

··· 246 246 .fh_to_parent = xfs_fs_fh_to_parent, 247 247 .get_parent = xfs_fs_get_parent, 248 248 .commit_metadata = xfs_fs_nfs_commit_metadata, 249 - #ifdef CONFIG_NFSD_PNFS 249 + #ifdef CONFIG_NFSD_BLOCKLAYOUT 250 250 .get_uuid = xfs_fs_get_uuid, 251 251 .map_blocks = xfs_fs_map_blocks, 252 252 .commit_blocks = xfs_fs_commit_blocks,

+1 -1

fs/xfs/xfs_pnfs.h

··· 1 1 #ifndef _XFS_PNFS_H 2 2 #define _XFS_PNFS_H 1 3 3 4 - #ifdef CONFIG_NFSD_PNFS 4 + #if defined(CONFIG_NFSD_BLOCKLAYOUT) || defined(CONFIG_NFSD_SCSILAYOUT) 5 5 int xfs_fs_get_uuid(struct super_block *sb, u8 *buf, u32 *len, u64 *offset); 6 6 int xfs_fs_map_blocks(struct inode *inode, loff_t offset, u64 length, 7 7 struct iomap *iomap, bool write, u32 *device_generation);

+19

include/linux/nfs4.h

··· 529 529 LAYOUT_OSD2_OBJECTS = 2, 530 530 LAYOUT_BLOCK_VOLUME = 3, 531 531 LAYOUT_FLEX_FILES = 4, 532 + LAYOUT_SCSI = 5, 532 533 LAYOUT_TYPE_MAX 533 534 }; 534 535 ··· 556 555 PNFS_BLOCK_VOLUME_SLICE = 1, 557 556 PNFS_BLOCK_VOLUME_CONCAT = 2, 558 557 PNFS_BLOCK_VOLUME_STRIPE = 3, 558 + PNFS_BLOCK_VOLUME_SCSI = 4, 559 559 }; 560 560 561 561 enum pnfs_block_extent_state { ··· 569 567 /* on the wire size of a block layout extent */ 570 568 #define PNFS_BLOCK_EXTENT_SIZE \ 571 569 (7 * sizeof(__be32) + NFS4_DEVICEID4_SIZE) 570 + 571 + /* on the wire size of a scsi commit range */ 572 + #define PNFS_SCSI_RANGE_SIZE \ 573 + (4 * sizeof(__be32)) 574 + 575 + enum scsi_code_set { 576 + PS_CODE_SET_BINARY = 1, 577 + PS_CODE_SET_ASCII = 2, 578 + PS_CODE_SET_UTF8 = 3 579 + }; 580 + 581 + enum scsi_designator_type { 582 + PS_DESIGNATOR_T10 = 1, 583 + PS_DESIGNATOR_EUI64 = 2, 584 + PS_DESIGNATOR_NAA = 3, 585 + PS_DESIGNATOR_NAME = 8 586 + }; 572 587 573 588 #define NFL4_UFLG_MASK 0x0000003F 574 589 #define NFL4_UFLG_DENSE 0x00000001