Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

pnfs/blocklayout: in-kernel GETDEVICEINFO XDR parsing

This patches moves parsing of the GETDEVICEINFO XDR to kernel space, as well
as the management of complex devices. The reason for that is we might have
multiple outstanding complex devices after a NOTIFY_DEVICEID4_CHANGE, which
device mapper or md can't handle as they claim devices exclusively.

But as is turns out simple striping / concatenation is fairly trivial to
implement anyway, so we make our life simpler by reducing the reliance
on blkmapd. For now we still use blkmapd by feeding it synthetic SIMPLE
device XDR to translate device signatures to device numbers, but in the
long runs I have plans to eliminate it entirely.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>

authored by

Christoph Hellwig and committed by
Trond Myklebust
5c83746a 871760ce

+529 -147
+1 -1
fs/nfs/blocklayout/Makefile
··· 3 3 # 4 4 obj-$(CONFIG_PNFS_BLOCK) += blocklayoutdriver.o 5 5 6 - blocklayoutdriver-y += blocklayout.o extent_tree.o rpc_pipefs.o 6 + blocklayoutdriver-y += blocklayout.o dev.o extent_tree.o rpc_pipefs.o
+60 -32
fs/nfs/blocklayout/blocklayout.c
··· 114 114 return NULL; 115 115 } 116 116 117 - static struct bio *bl_alloc_init_bio(int npg, sector_t isect, 118 - struct pnfs_block_extent *be, 119 - void (*end_io)(struct bio *, int err), 120 - struct parallel_io *par) 117 + static struct bio * 118 + bl_alloc_init_bio(int npg, struct block_device *bdev, sector_t disk_sector, 119 + void (*end_io)(struct bio *, int err), struct parallel_io *par) 121 120 { 122 - struct pnfs_block_dev *dev = 123 - container_of(be->be_device, struct pnfs_block_dev, d_node); 124 121 struct bio *bio; 125 122 126 123 npg = min(npg, BIO_MAX_PAGES); ··· 128 131 } 129 132 130 133 if (bio) { 131 - bio->bi_iter.bi_sector = isect - be->be_f_offset + 132 - be->be_v_offset; 133 - bio->bi_bdev = dev->d_bdev; 134 + bio->bi_iter.bi_sector = disk_sector; 135 + bio->bi_bdev = bdev; 134 136 bio->bi_end_io = end_io; 135 137 bio->bi_private = par; 136 138 } 137 139 return bio; 138 140 } 139 141 140 - static struct bio *do_add_page_to_bio(struct bio *bio, int npg, int rw, 141 - sector_t isect, struct page *page, 142 - struct pnfs_block_extent *be, 143 - void (*end_io)(struct bio *, int err), 144 - struct parallel_io *par, 145 - unsigned int offset, int len) 142 + static struct bio * 143 + do_add_page_to_bio(struct bio *bio, int npg, int rw, sector_t isect, 144 + struct page *page, struct pnfs_block_dev_map *map, 145 + struct pnfs_block_extent *be, 146 + void (*end_io)(struct bio *, int err), 147 + struct parallel_io *par, unsigned int offset, int *len) 146 148 { 147 - isect = isect + (offset >> SECTOR_SHIFT); 149 + struct pnfs_block_dev *dev = 150 + container_of(be->be_device, struct pnfs_block_dev, node); 151 + u64 disk_addr, end; 152 + 148 153 dprintk("%s: npg %d rw %d isect %llu offset %u len %d\n", __func__, 149 - npg, rw, (unsigned long long)isect, offset, len); 154 + npg, rw, (unsigned long long)isect, offset, *len); 155 + 156 + /* translate to device offset */ 157 + isect += be->be_v_offset; 158 + isect -= be->be_f_offset; 159 + 160 + /* translate to physical disk offset */ 161 + disk_addr = (u64)isect << SECTOR_SHIFT; 162 + if (disk_addr < map->start || disk_addr >= map->start + map->len) { 163 + if (!dev->map(dev, disk_addr, map)) 164 + return ERR_PTR(-EIO); 165 + bio = bl_submit_bio(rw, bio); 166 + } 167 + disk_addr += map->disk_offset; 168 + disk_addr -= map->start; 169 + 170 + /* limit length to what the device mapping allows */ 171 + end = disk_addr + *len; 172 + if (end >= map->start + map->len) 173 + *len = map->start + map->len - disk_addr; 174 + 150 175 retry: 151 176 if (!bio) { 152 - bio = bl_alloc_init_bio(npg, isect, be, end_io, par); 177 + bio = bl_alloc_init_bio(npg, map->bdev, 178 + disk_addr >> SECTOR_SHIFT, end_io, par); 153 179 if (!bio) 154 180 return ERR_PTR(-ENOMEM); 155 181 } 156 - if (bio_add_page(bio, page, len, offset) < len) { 182 + if (bio_add_page(bio, page, *len, offset) < *len) { 157 183 bio = bl_submit_bio(rw, bio); 158 184 goto retry; 159 185 } ··· 223 203 bl_read_pagelist(struct nfs_pgio_header *header) 224 204 { 225 205 struct pnfs_block_layout *bl = BLK_LSEG2EXT(header->lseg); 206 + struct pnfs_block_dev_map map = { .start = NFS4_MAX_UINT64 }; 226 207 struct bio *bio = NULL; 227 208 struct pnfs_block_extent be; 228 209 sector_t isect, extent_length = 0; ··· 269 248 pg_len = PAGE_CACHE_SIZE - pg_offset; 270 249 else 271 250 pg_len = bytes_left; 272 - 273 - f_offset += pg_len; 274 - bytes_left -= pg_len; 275 - isect += (pg_offset >> SECTOR_SHIFT); 276 - extent_length -= (pg_offset >> SECTOR_SHIFT); 277 251 } else { 278 252 BUG_ON(pg_offset != 0); 279 253 pg_len = PAGE_CACHE_SIZE; 280 254 } 255 + 256 + isect += (pg_offset >> SECTOR_SHIFT); 257 + extent_length -= (pg_offset >> SECTOR_SHIFT); 281 258 282 259 if (is_hole(&be)) { 283 260 bio = bl_submit_bio(READ, bio); 284 261 /* Fill hole w/ zeroes w/o accessing device */ 285 262 dprintk("%s Zeroing page for hole\n", __func__); 286 263 zero_user_segment(pages[i], pg_offset, pg_len); 264 + 265 + /* invalidate map */ 266 + map.start = NFS4_MAX_UINT64; 287 267 } else { 288 268 bio = do_add_page_to_bio(bio, 289 269 header->page_array.npages - i, 290 270 READ, 291 - isect, pages[i], &be, 271 + isect, pages[i], &map, &be, 292 272 bl_end_io_read, par, 293 - pg_offset, pg_len); 273 + pg_offset, &pg_len); 294 274 if (IS_ERR(bio)) { 295 275 header->pnfs_error = PTR_ERR(bio); 296 276 bio = NULL; ··· 300 278 } 301 279 isect += (pg_len >> SECTOR_SHIFT); 302 280 extent_length -= (pg_len >> SECTOR_SHIFT); 281 + f_offset += pg_len; 282 + bytes_left -= pg_len; 303 283 } 304 284 if ((isect << SECTOR_SHIFT) >= header->inode->i_size) { 305 285 header->res.eof = 1; ··· 370 346 bl_write_pagelist(struct nfs_pgio_header *header, int sync) 371 347 { 372 348 struct pnfs_block_layout *bl = BLK_LSEG2EXT(header->lseg); 349 + struct pnfs_block_dev_map map = { .start = NFS4_MAX_UINT64 }; 373 350 struct bio *bio = NULL; 374 351 struct pnfs_block_extent be; 375 352 sector_t isect, extent_length = 0; ··· 379 354 size_t count = header->args.count; 380 355 struct page **pages = header->args.pages; 381 356 int pg_index = pg_index = header->args.pgbase >> PAGE_CACHE_SHIFT; 357 + unsigned int pg_len; 382 358 struct blk_plug plug; 383 359 int i; 384 360 ··· 413 387 extent_length = be.be_length - (isect - be.be_f_offset); 414 388 } 415 389 390 + pg_len = PAGE_CACHE_SIZE; 416 391 bio = do_add_page_to_bio(bio, header->page_array.npages - i, 417 - WRITE, isect, pages[i], &be, 392 + WRITE, isect, pages[i], &map, &be, 418 393 bl_end_io_write, par, 419 - 0, PAGE_CACHE_SIZE); 394 + 0, &pg_len); 420 395 if (IS_ERR(bio)) { 421 396 header->pnfs_error = PTR_ERR(bio); 422 397 bio = NULL; 423 398 goto out; 424 399 } 425 - offset += PAGE_CACHE_SIZE; 426 - count -= PAGE_CACHE_SIZE; 427 - isect += PAGE_CACHE_SECTORS; 428 - extent_length -= PAGE_CACHE_SECTORS; 400 + 401 + offset += pg_len; 402 + count -= pg_len; 403 + isect += (pg_len >> SECTOR_SHIFT); 404 + extent_length -= (pg_len >> SECTOR_SHIFT); 429 405 } 430 406 431 407 header->res.count = header->args.count;
+77 -6
fs/nfs/blocklayout/blocklayout.h
··· 44 44 #define PAGE_CACHE_SECTOR_SHIFT (PAGE_CACHE_SHIFT - SECTOR_SHIFT) 45 45 #define SECTOR_SIZE (1 << SECTOR_SHIFT) 46 46 47 + struct pnfs_block_dev; 48 + 49 + enum pnfs_block_volume_type { 50 + PNFS_BLOCK_VOLUME_SIMPLE = 0, 51 + PNFS_BLOCK_VOLUME_SLICE = 1, 52 + PNFS_BLOCK_VOLUME_CONCAT = 2, 53 + PNFS_BLOCK_VOLUME_STRIPE = 3, 54 + }; 55 + 56 + #define PNFS_BLOCK_MAX_UUIDS 4 57 + #define PNFS_BLOCK_MAX_DEVICES 64 58 + 59 + /* 60 + * Random upper cap for the uuid length to avoid unbounded allocation. 61 + * Not actually limited by the protocol. 62 + */ 63 + #define PNFS_BLOCK_UUID_LEN 128 64 + 65 + 66 + struct pnfs_block_volume { 67 + enum pnfs_block_volume_type type; 68 + union { 69 + struct { 70 + int len; 71 + int nr_sigs; 72 + struct { 73 + u64 offset; 74 + u32 sig_len; 75 + u8 sig[PNFS_BLOCK_UUID_LEN]; 76 + } sigs[PNFS_BLOCK_MAX_UUIDS]; 77 + } simple; 78 + struct { 79 + u64 start; 80 + u64 len; 81 + u32 volume; 82 + } slice; 83 + struct { 84 + u32 volumes_count; 85 + u32 volumes[PNFS_BLOCK_MAX_DEVICES]; 86 + } concat; 87 + struct { 88 + u64 chunk_size; 89 + u32 volumes_count; 90 + u32 volumes[PNFS_BLOCK_MAX_DEVICES]; 91 + } stripe; 92 + }; 93 + }; 94 + 95 + struct pnfs_block_dev_map { 96 + sector_t start; 97 + sector_t len; 98 + 99 + sector_t disk_offset; 100 + struct block_device *bdev; 101 + }; 102 + 47 103 struct pnfs_block_dev { 48 - struct nfs4_deviceid_node d_node; 49 - struct block_device *d_bdev; 104 + struct nfs4_deviceid_node node; 105 + 106 + u64 start; 107 + u64 len; 108 + 109 + u32 nr_children; 110 + struct pnfs_block_dev *children; 111 + u64 chunk_size; 112 + 113 + struct block_device *bdev; 114 + u64 disk_offset; 115 + 116 + bool (*map)(struct pnfs_block_dev *dev, u64 offset, 117 + struct pnfs_block_dev_map *map); 50 118 }; 51 119 52 120 enum exstate4 { ··· 178 110 #define BL_DEVICE_REQUEST_PROC 0x1 /* User level process succeeds */ 179 111 #define BL_DEVICE_REQUEST_ERR 0x2 /* User level process fails */ 180 112 113 + /* dev.c */ 114 + struct nfs4_deviceid_node *bl_alloc_deviceid_node(struct nfs_server *server, 115 + struct pnfs_device *pdev, gfp_t gfp_mask); 116 + void bl_free_deviceid_node(struct nfs4_deviceid_node *d); 117 + 181 118 /* extent_tree.c */ 182 119 int ext_tree_insert(struct pnfs_block_layout *bl, 183 120 struct pnfs_block_extent *new); ··· 196 123 void ext_tree_mark_committed(struct nfs4_layoutcommit_args *arg, int status); 197 124 198 125 /* rpc_pipefs.c */ 199 - struct nfs4_deviceid_node *bl_alloc_deviceid_node(struct nfs_server *server, 200 - struct pnfs_device *pdev, gfp_t gfp_mask); 201 - void bl_free_deviceid_node(struct nfs4_deviceid_node *d); 202 - 126 + dev_t bl_resolve_deviceid(struct nfs_server *server, 127 + struct pnfs_block_volume *b, gfp_t gfp_mask); 203 128 int __init bl_init_pipefs(void); 204 129 void __exit bl_cleanup_pipefs(void); 205 130
+360
fs/nfs/blocklayout/dev.c
··· 1 + /* 2 + * Copyright (c) 2014 Christoph Hellwig. 3 + */ 4 + #include <linux/sunrpc/svc.h> 5 + #include <linux/blkdev.h> 6 + #include <linux/nfs4.h> 7 + #include <linux/nfs_fs.h> 8 + #include <linux/nfs_xdr.h> 9 + 10 + #include "blocklayout.h" 11 + 12 + #define NFSDBG_FACILITY NFSDBG_PNFS_LD 13 + 14 + static void 15 + bl_free_device(struct pnfs_block_dev *dev) 16 + { 17 + if (dev->nr_children) { 18 + int i; 19 + 20 + for (i = 0; i < dev->nr_children; i++) 21 + bl_free_device(&dev->children[i]); 22 + kfree(dev->children); 23 + } else { 24 + if (dev->bdev) 25 + blkdev_put(dev->bdev, FMODE_READ); 26 + } 27 + } 28 + 29 + void 30 + bl_free_deviceid_node(struct nfs4_deviceid_node *d) 31 + { 32 + struct pnfs_block_dev *dev = 33 + container_of(d, struct pnfs_block_dev, node); 34 + 35 + bl_free_device(dev); 36 + kfree(dev); 37 + } 38 + 39 + static int 40 + nfs4_block_decode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b) 41 + { 42 + __be32 *p; 43 + int i; 44 + 45 + p = xdr_inline_decode(xdr, 4); 46 + if (!p) 47 + return -EIO; 48 + b->type = be32_to_cpup(p++); 49 + 50 + switch (b->type) { 51 + case PNFS_BLOCK_VOLUME_SIMPLE: 52 + p = xdr_inline_decode(xdr, 4); 53 + if (!p) 54 + return -EIO; 55 + b->simple.nr_sigs = be32_to_cpup(p++); 56 + if (!b->simple.nr_sigs) { 57 + dprintk("no signature\n"); 58 + return -EIO; 59 + } 60 + 61 + b->simple.len = 4 + 4; 62 + for (i = 0; i < b->simple.nr_sigs; i++) { 63 + p = xdr_inline_decode(xdr, 8 + 4); 64 + if (!p) 65 + return -EIO; 66 + p = xdr_decode_hyper(p, &b->simple.sigs[i].offset); 67 + b->simple.sigs[i].sig_len = be32_to_cpup(p++); 68 + 69 + p = xdr_inline_decode(xdr, b->simple.sigs[i].sig_len); 70 + if (!p) 71 + return -EIO; 72 + memcpy(&b->simple.sigs[i].sig, p, 73 + b->simple.sigs[i].sig_len); 74 + 75 + b->simple.len += 8 + 4 + b->simple.sigs[i].sig_len; 76 + } 77 + break; 78 + case PNFS_BLOCK_VOLUME_SLICE: 79 + p = xdr_inline_decode(xdr, 8 + 8 + 4); 80 + if (!p) 81 + return -EIO; 82 + p = xdr_decode_hyper(p, &b->slice.start); 83 + p = xdr_decode_hyper(p, &b->slice.len); 84 + b->slice.volume = be32_to_cpup(p++); 85 + break; 86 + case PNFS_BLOCK_VOLUME_CONCAT: 87 + p = xdr_inline_decode(xdr, 4); 88 + if (!p) 89 + return -EIO; 90 + b->concat.volumes_count = be32_to_cpup(p++); 91 + 92 + p = xdr_inline_decode(xdr, b->concat.volumes_count * 4); 93 + if (!p) 94 + return -EIO; 95 + for (i = 0; i < b->concat.volumes_count; i++) 96 + b->concat.volumes[i] = be32_to_cpup(p++); 97 + break; 98 + case PNFS_BLOCK_VOLUME_STRIPE: 99 + p = xdr_inline_decode(xdr, 8 + 4); 100 + if (!p) 101 + return -EIO; 102 + p = xdr_decode_hyper(p, &b->stripe.chunk_size); 103 + b->stripe.volumes_count = be32_to_cpup(p++); 104 + 105 + p = xdr_inline_decode(xdr, b->stripe.volumes_count * 4); 106 + if (!p) 107 + return -EIO; 108 + for (i = 0; i < b->stripe.volumes_count; i++) 109 + b->stripe.volumes[i] = be32_to_cpup(p++); 110 + break; 111 + default: 112 + dprintk("unknown volume type!\n"); 113 + return -EIO; 114 + } 115 + 116 + return 0; 117 + } 118 + 119 + static bool bl_map_simple(struct pnfs_block_dev *dev, u64 offset, 120 + struct pnfs_block_dev_map *map) 121 + { 122 + map->start = dev->start; 123 + map->len = dev->len; 124 + map->disk_offset = dev->disk_offset; 125 + map->bdev = dev->bdev; 126 + return true; 127 + } 128 + 129 + static bool bl_map_concat(struct pnfs_block_dev *dev, u64 offset, 130 + struct pnfs_block_dev_map *map) 131 + { 132 + int i; 133 + 134 + for (i = 0; i < dev->nr_children; i++) { 135 + struct pnfs_block_dev *child = &dev->children[i]; 136 + 137 + if (child->start > offset || 138 + child->start + child->len <= offset) 139 + continue; 140 + 141 + child->map(child, offset - child->start, map); 142 + return true; 143 + } 144 + 145 + dprintk("%s: ran off loop!\n", __func__); 146 + return false; 147 + } 148 + 149 + static bool bl_map_stripe(struct pnfs_block_dev *dev, u64 offset, 150 + struct pnfs_block_dev_map *map) 151 + { 152 + struct pnfs_block_dev *child; 153 + u64 chunk = (offset / dev->chunk_size); 154 + int chunk_idx = chunk % dev->nr_children; 155 + u64 disk_offset; 156 + 157 + if (chunk_idx > dev->nr_children) { 158 + dprintk("%s: invalid chunk idx %d (%lld/%lld)\n", 159 + __func__, chunk_idx, offset, dev->chunk_size); 160 + /* error, should not happen */ 161 + return false; 162 + } 163 + 164 + /* truncate offset to the beginning of the stripe */ 165 + offset = chunk * dev->chunk_size; 166 + 167 + /* disk offset of the stripe */ 168 + disk_offset = offset / dev->nr_children; 169 + 170 + child = &dev->children[chunk_idx]; 171 + child->map(child, disk_offset, map); 172 + 173 + map->start += offset; 174 + map->disk_offset += disk_offset; 175 + map->len = dev->chunk_size; 176 + return true; 177 + } 178 + 179 + static int 180 + bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d, 181 + struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask); 182 + 183 + 184 + static int 185 + bl_parse_simple(struct nfs_server *server, struct pnfs_block_dev *d, 186 + struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask) 187 + { 188 + struct pnfs_block_volume *v = &volumes[idx]; 189 + dev_t dev; 190 + 191 + dev = bl_resolve_deviceid(server, v, gfp_mask); 192 + if (!dev) 193 + return -EIO; 194 + 195 + d->bdev = blkdev_get_by_dev(dev, FMODE_READ, NULL); 196 + if (IS_ERR(d->bdev)) { 197 + printk(KERN_WARNING "pNFS: failed to open device %d:%d (%ld)\n", 198 + MAJOR(dev), MINOR(dev), PTR_ERR(d->bdev)); 199 + return PTR_ERR(d->bdev); 200 + } 201 + 202 + 203 + d->len = i_size_read(d->bdev->bd_inode); 204 + d->map = bl_map_simple; 205 + 206 + printk(KERN_INFO "pNFS: using block device %s\n", 207 + d->bdev->bd_disk->disk_name); 208 + return 0; 209 + } 210 + 211 + static int 212 + bl_parse_slice(struct nfs_server *server, struct pnfs_block_dev *d, 213 + struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask) 214 + { 215 + struct pnfs_block_volume *v = &volumes[idx]; 216 + int ret; 217 + 218 + ret = bl_parse_deviceid(server, d, volumes, v->slice.volume, gfp_mask); 219 + if (ret) 220 + return ret; 221 + 222 + d->disk_offset = v->slice.start; 223 + d->len = v->slice.len; 224 + return 0; 225 + } 226 + 227 + static int 228 + bl_parse_concat(struct nfs_server *server, struct pnfs_block_dev *d, 229 + struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask) 230 + { 231 + struct pnfs_block_volume *v = &volumes[idx]; 232 + u64 len = 0; 233 + int ret, i; 234 + 235 + d->children = kcalloc(v->concat.volumes_count, 236 + sizeof(struct pnfs_block_dev), GFP_KERNEL); 237 + if (!d->children) 238 + return -ENOMEM; 239 + 240 + for (i = 0; i < v->concat.volumes_count; i++) { 241 + ret = bl_parse_deviceid(server, &d->children[i], 242 + volumes, v->concat.volumes[i], gfp_mask); 243 + if (ret) 244 + return ret; 245 + 246 + d->nr_children++; 247 + d->children[i].start += len; 248 + len += d->children[i].len; 249 + } 250 + 251 + d->len = len; 252 + d->map = bl_map_concat; 253 + return 0; 254 + } 255 + 256 + static int 257 + bl_parse_stripe(struct nfs_server *server, struct pnfs_block_dev *d, 258 + struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask) 259 + { 260 + struct pnfs_block_volume *v = &volumes[idx]; 261 + u64 len = 0; 262 + int ret, i; 263 + 264 + d->children = kcalloc(v->stripe.volumes_count, 265 + sizeof(struct pnfs_block_dev), GFP_KERNEL); 266 + if (!d->children) 267 + return -ENOMEM; 268 + 269 + for (i = 0; i < v->stripe.volumes_count; i++) { 270 + ret = bl_parse_deviceid(server, &d->children[i], 271 + volumes, v->stripe.volumes[i], gfp_mask); 272 + if (ret) 273 + return ret; 274 + 275 + d->nr_children++; 276 + len += d->children[i].len; 277 + } 278 + 279 + d->len = len; 280 + d->chunk_size = v->stripe.chunk_size; 281 + d->map = bl_map_stripe; 282 + return 0; 283 + } 284 + 285 + static int 286 + bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d, 287 + struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask) 288 + { 289 + switch (volumes[idx].type) { 290 + case PNFS_BLOCK_VOLUME_SIMPLE: 291 + return bl_parse_simple(server, d, volumes, idx, gfp_mask); 292 + case PNFS_BLOCK_VOLUME_SLICE: 293 + return bl_parse_slice(server, d, volumes, idx, gfp_mask); 294 + case PNFS_BLOCK_VOLUME_CONCAT: 295 + return bl_parse_concat(server, d, volumes, idx, gfp_mask); 296 + case PNFS_BLOCK_VOLUME_STRIPE: 297 + return bl_parse_stripe(server, d, volumes, idx, gfp_mask); 298 + default: 299 + dprintk("unsupported volume type: %d\n", volumes[idx].type); 300 + return -EIO; 301 + } 302 + } 303 + 304 + struct nfs4_deviceid_node * 305 + bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, 306 + gfp_t gfp_mask) 307 + { 308 + struct nfs4_deviceid_node *node = NULL; 309 + struct pnfs_block_volume *volumes; 310 + struct pnfs_block_dev *top; 311 + struct xdr_stream xdr; 312 + struct xdr_buf buf; 313 + struct page *scratch; 314 + int nr_volumes, ret, i; 315 + __be32 *p; 316 + 317 + scratch = alloc_page(gfp_mask); 318 + if (!scratch) 319 + goto out; 320 + 321 + xdr_init_decode_pages(&xdr, &buf, pdev->pages, pdev->pglen); 322 + xdr_set_scratch_buffer(&xdr, page_address(scratch), PAGE_SIZE); 323 + 324 + p = xdr_inline_decode(&xdr, sizeof(__be32)); 325 + if (!p) 326 + goto out_free_scratch; 327 + nr_volumes = be32_to_cpup(p++); 328 + 329 + volumes = kcalloc(nr_volumes, sizeof(struct pnfs_block_volume), 330 + gfp_mask); 331 + if (!volumes) 332 + goto out_free_scratch; 333 + 334 + for (i = 0; i < nr_volumes; i++) { 335 + ret = nfs4_block_decode_volume(&xdr, &volumes[i]); 336 + if (ret < 0) 337 + goto out_free_volumes; 338 + } 339 + 340 + top = kzalloc(sizeof(*top), gfp_mask); 341 + if (!top) 342 + goto out_free_volumes; 343 + 344 + ret = bl_parse_deviceid(server, top, volumes, nr_volumes - 1, gfp_mask); 345 + if (ret) { 346 + bl_free_device(top); 347 + kfree(top); 348 + goto out_free_volumes; 349 + } 350 + 351 + node = &top->node; 352 + nfs4_init_deviceid_node(node, server, &pdev->dev_id); 353 + 354 + out_free_volumes: 355 + kfree(volumes); 356 + out_free_scratch: 357 + __free_page(scratch); 358 + out: 359 + return node; 360 + }
+31 -108
fs/nfs/blocklayout/rpc_pipefs.c
··· 34 34 35 35 #define NFSDBG_FACILITY NFSDBG_PNFS_LD 36 36 37 - static void bl_dm_remove(struct net *net, dev_t dev) 37 + static void 38 + nfs4_encode_simple(__be32 *p, struct pnfs_block_volume *b) 38 39 { 39 - struct bl_pipe_msg bl_pipe_msg; 40 - struct rpc_pipe_msg *msg = &bl_pipe_msg.msg; 41 - struct bl_dev_msg bl_umount_request; 42 - struct bl_msg_hdr bl_msg = { 43 - .type = BL_DEVICE_UMOUNT, 44 - .totallen = sizeof(bl_umount_request), 45 - }; 46 - uint8_t *dataptr; 47 - DECLARE_WAITQUEUE(wq, current); 48 - struct nfs_net *nn = net_generic(net, nfs_net_id); 40 + int i; 49 41 50 - dprintk("Entering %s\n", __func__); 51 - 52 - bl_pipe_msg.bl_wq = &nn->bl_wq; 53 - memset(msg, 0, sizeof(*msg)); 54 - msg->len = sizeof(bl_msg) + bl_msg.totallen; 55 - msg->data = kzalloc(msg->len, GFP_NOFS); 56 - if (!msg->data) 57 - goto out; 58 - 59 - memset(&bl_umount_request, 0, sizeof(bl_umount_request)); 60 - bl_umount_request.major = MAJOR(dev); 61 - bl_umount_request.minor = MINOR(dev); 62 - 63 - memcpy(msg->data, &bl_msg, sizeof(bl_msg)); 64 - dataptr = (uint8_t *) msg->data; 65 - memcpy(&dataptr[sizeof(bl_msg)], &bl_umount_request, sizeof(bl_umount_request)); 66 - 67 - add_wait_queue(&nn->bl_wq, &wq); 68 - if (rpc_queue_upcall(nn->bl_device_pipe, msg) < 0) { 69 - remove_wait_queue(&nn->bl_wq, &wq); 70 - goto out; 42 + *p++ = cpu_to_be32(1); 43 + *p++ = cpu_to_be32(b->type); 44 + *p++ = cpu_to_be32(b->simple.nr_sigs); 45 + for (i = 0; i < b->simple.nr_sigs; i++) { 46 + p = xdr_encode_hyper(p, b->simple.sigs[i].offset); 47 + p = xdr_encode_opaque(p, b->simple.sigs[i].sig, 48 + b->simple.sigs[i].sig_len); 71 49 } 72 - 73 - set_current_state(TASK_UNINTERRUPTIBLE); 74 - schedule(); 75 - __set_current_state(TASK_RUNNING); 76 - remove_wait_queue(&nn->bl_wq, &wq); 77 - 78 - out: 79 - kfree(msg->data); 80 50 } 81 51 82 - /* 83 - * Decodes pnfs_block_deviceaddr4 which is XDR encoded in dev->dev_addr_buf. 84 - */ 85 - struct nfs4_deviceid_node * 86 - bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *dev, 52 + dev_t 53 + bl_resolve_deviceid(struct nfs_server *server, struct pnfs_block_volume *b, 87 54 gfp_t gfp_mask) 88 55 { 89 - struct pnfs_block_dev *rv; 90 - struct block_device *bd; 91 - struct bl_pipe_msg bl_pipe_msg; 92 - struct rpc_pipe_msg *msg = &bl_pipe_msg.msg; 93 - struct bl_msg_hdr bl_msg = { 94 - .type = BL_DEVICE_MOUNT, 95 - .totallen = dev->mincount, 96 - }; 97 - uint8_t *dataptr; 98 - DECLARE_WAITQUEUE(wq, current); 99 - int offset, len, i, rc; 100 56 struct net *net = server->nfs_client->cl_net; 101 57 struct nfs_net *nn = net_generic(net, nfs_net_id); 102 58 struct bl_dev_msg *reply = &nn->bl_mount_reply; 59 + struct bl_pipe_msg bl_pipe_msg; 60 + struct rpc_pipe_msg *msg = &bl_pipe_msg.msg; 61 + struct bl_msg_hdr *bl_msg; 62 + DECLARE_WAITQUEUE(wq, current); 63 + dev_t dev = 0; 64 + int rc; 103 65 104 66 dprintk("%s CREATING PIPEFS MESSAGE\n", __func__); 105 - dprintk("%s: deviceid: %s, mincount: %d\n", __func__, dev->dev_id.data, 106 - dev->mincount); 107 67 108 68 bl_pipe_msg.bl_wq = &nn->bl_wq; 69 + 70 + b->simple.len += 4; /* single volume */ 71 + if (b->simple.len > PAGE_SIZE) 72 + return -EIO; 73 + 109 74 memset(msg, 0, sizeof(*msg)); 110 - msg->data = kzalloc(sizeof(bl_msg) + dev->mincount, gfp_mask); 75 + msg->len = sizeof(*bl_msg) + b->simple.len; 76 + msg->data = kzalloc(msg->len, gfp_mask); 111 77 if (!msg->data) 112 78 goto out; 113 79 114 - memcpy(msg->data, &bl_msg, sizeof(bl_msg)); 115 - dataptr = (uint8_t *) msg->data; 116 - len = dev->mincount; 117 - offset = sizeof(bl_msg); 118 - for (i = 0; len > 0; i++) { 119 - memcpy(&dataptr[offset], page_address(dev->pages[i]), 120 - len < PAGE_CACHE_SIZE ? len : PAGE_CACHE_SIZE); 121 - len -= PAGE_CACHE_SIZE; 122 - offset += PAGE_CACHE_SIZE; 123 - } 124 - msg->len = sizeof(bl_msg) + dev->mincount; 80 + bl_msg = msg->data; 81 + bl_msg->type = BL_DEVICE_MOUNT, 82 + bl_msg->totallen = b->simple.len; 83 + nfs4_encode_simple(msg->data + sizeof(*bl_msg), b); 125 84 126 85 dprintk("%s CALLING USERSPACE DAEMON\n", __func__); 127 86 add_wait_queue(&nn->bl_wq, &wq); ··· 101 142 goto out; 102 143 } 103 144 104 - bd = blkdev_get_by_dev(MKDEV(reply->major, reply->minor), 105 - FMODE_READ, NULL); 106 - if (IS_ERR(bd)) { 107 - printk(KERN_WARNING "%s failed to open device %d:%d (%ld)\n", 108 - __func__, reply->major, reply->minor, 109 - PTR_ERR(bd)); 110 - goto out; 111 - } 112 - 113 - rv = kzalloc(sizeof(*rv), gfp_mask); 114 - if (!rv) 115 - goto out; 116 - 117 - nfs4_init_deviceid_node(&rv->d_node, server, &dev->dev_id); 118 - rv->d_bdev = bd; 119 - 120 - dprintk("%s Created device %s with bd_block_size %u\n", 121 - __func__, 122 - bd->bd_disk->disk_name, 123 - bd->bd_block_size); 124 - 125 - kfree(msg->data); 126 - return &rv->d_node; 127 - 145 + dev = MKDEV(reply->major, reply->minor); 128 146 out: 129 147 kfree(msg->data); 130 - return NULL; 131 - } 132 - 133 - void 134 - bl_free_deviceid_node(struct nfs4_deviceid_node *d) 135 - { 136 - struct pnfs_block_dev *dev = 137 - container_of(d, struct pnfs_block_dev, d_node); 138 - struct net *net = d->nfs_client->cl_net; 139 - 140 - blkdev_put(dev->d_bdev, FMODE_READ); 141 - bl_dm_remove(net, dev->d_bdev->bd_dev); 142 - 143 - kfree(dev); 148 + return dev; 144 149 } 145 150 146 151 static ssize_t bl_pipe_downcall(struct file *filp, const char __user *src,