Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
at v4.2-rc2 384 lines 9.7 kB view raw
1/* 2 * NVDIMM Block Window Driver 3 * Copyright (c) 2014, Intel Corporation. 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms and conditions of the GNU General Public License, 7 * version 2, as published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 12 * more details. 13 */ 14 15#include <linux/blkdev.h> 16#include <linux/fs.h> 17#include <linux/genhd.h> 18#include <linux/module.h> 19#include <linux/moduleparam.h> 20#include <linux/nd.h> 21#include <linux/sizes.h> 22#include "nd.h" 23 24struct nd_blk_device { 25 struct request_queue *queue; 26 struct gendisk *disk; 27 struct nd_namespace_blk *nsblk; 28 struct nd_blk_region *ndbr; 29 size_t disk_size; 30 u32 sector_size; 31 u32 internal_lbasize; 32}; 33 34static int nd_blk_major; 35 36static u32 nd_blk_meta_size(struct nd_blk_device *blk_dev) 37{ 38 return blk_dev->nsblk->lbasize - blk_dev->sector_size; 39} 40 41static resource_size_t to_dev_offset(struct nd_namespace_blk *nsblk, 42 resource_size_t ns_offset, unsigned int len) 43{ 44 int i; 45 46 for (i = 0; i < nsblk->num_resources; i++) { 47 if (ns_offset < resource_size(nsblk->res[i])) { 48 if (ns_offset + len > resource_size(nsblk->res[i])) { 49 dev_WARN_ONCE(&nsblk->common.dev, 1, 50 "illegal request\n"); 51 return SIZE_MAX; 52 } 53 return nsblk->res[i]->start + ns_offset; 54 } 55 ns_offset -= resource_size(nsblk->res[i]); 56 } 57 58 dev_WARN_ONCE(&nsblk->common.dev, 1, "request out of range\n"); 59 return SIZE_MAX; 60} 61 62#ifdef CONFIG_BLK_DEV_INTEGRITY 63static int nd_blk_rw_integrity(struct nd_blk_device *blk_dev, 64 struct bio_integrity_payload *bip, u64 lba, 65 int rw) 66{ 67 unsigned int len = nd_blk_meta_size(blk_dev); 68 resource_size_t dev_offset, ns_offset; 69 struct nd_namespace_blk *nsblk; 70 struct nd_blk_region *ndbr; 71 int err = 0; 72 73 nsblk = blk_dev->nsblk; 74 ndbr = blk_dev->ndbr; 75 ns_offset = lba * blk_dev->internal_lbasize + blk_dev->sector_size; 76 dev_offset = to_dev_offset(nsblk, ns_offset, len); 77 if (dev_offset == SIZE_MAX) 78 return -EIO; 79 80 while (len) { 81 unsigned int cur_len; 82 struct bio_vec bv; 83 void *iobuf; 84 85 bv = bvec_iter_bvec(bip->bip_vec, bip->bip_iter); 86 /* 87 * The 'bv' obtained from bvec_iter_bvec has its .bv_len and 88 * .bv_offset already adjusted for iter->bi_bvec_done, and we 89 * can use those directly 90 */ 91 92 cur_len = min(len, bv.bv_len); 93 iobuf = kmap_atomic(bv.bv_page); 94 err = ndbr->do_io(ndbr, dev_offset, iobuf + bv.bv_offset, 95 cur_len, rw); 96 kunmap_atomic(iobuf); 97 if (err) 98 return err; 99 100 len -= cur_len; 101 dev_offset += cur_len; 102 bvec_iter_advance(bip->bip_vec, &bip->bip_iter, cur_len); 103 } 104 105 return err; 106} 107 108#else /* CONFIG_BLK_DEV_INTEGRITY */ 109static int nd_blk_rw_integrity(struct nd_blk_device *blk_dev, 110 struct bio_integrity_payload *bip, u64 lba, 111 int rw) 112{ 113 return 0; 114} 115#endif 116 117static int nd_blk_do_bvec(struct nd_blk_device *blk_dev, 118 struct bio_integrity_payload *bip, struct page *page, 119 unsigned int len, unsigned int off, int rw, 120 sector_t sector) 121{ 122 struct nd_blk_region *ndbr = blk_dev->ndbr; 123 resource_size_t dev_offset, ns_offset; 124 int err = 0; 125 void *iobuf; 126 u64 lba; 127 128 while (len) { 129 unsigned int cur_len; 130 131 /* 132 * If we don't have an integrity payload, we don't have to 133 * split the bvec into sectors, as this would cause unnecessary 134 * Block Window setup/move steps. the do_io routine is capable 135 * of handling len <= PAGE_SIZE. 136 */ 137 cur_len = bip ? min(len, blk_dev->sector_size) : len; 138 139 lba = div_u64(sector << SECTOR_SHIFT, blk_dev->sector_size); 140 ns_offset = lba * blk_dev->internal_lbasize; 141 dev_offset = to_dev_offset(blk_dev->nsblk, ns_offset, cur_len); 142 if (dev_offset == SIZE_MAX) 143 return -EIO; 144 145 iobuf = kmap_atomic(page); 146 err = ndbr->do_io(ndbr, dev_offset, iobuf + off, cur_len, rw); 147 kunmap_atomic(iobuf); 148 if (err) 149 return err; 150 151 if (bip) { 152 err = nd_blk_rw_integrity(blk_dev, bip, lba, rw); 153 if (err) 154 return err; 155 } 156 len -= cur_len; 157 off += cur_len; 158 sector += blk_dev->sector_size >> SECTOR_SHIFT; 159 } 160 161 return err; 162} 163 164static void nd_blk_make_request(struct request_queue *q, struct bio *bio) 165{ 166 struct block_device *bdev = bio->bi_bdev; 167 struct gendisk *disk = bdev->bd_disk; 168 struct bio_integrity_payload *bip; 169 struct nd_blk_device *blk_dev; 170 struct bvec_iter iter; 171 unsigned long start; 172 struct bio_vec bvec; 173 int err = 0, rw; 174 bool do_acct; 175 176 /* 177 * bio_integrity_enabled also checks if the bio already has an 178 * integrity payload attached. If it does, we *don't* do a 179 * bio_integrity_prep here - the payload has been generated by 180 * another kernel subsystem, and we just pass it through. 181 */ 182 if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) { 183 err = -EIO; 184 goto out; 185 } 186 187 bip = bio_integrity(bio); 188 blk_dev = disk->private_data; 189 rw = bio_data_dir(bio); 190 do_acct = nd_iostat_start(bio, &start); 191 bio_for_each_segment(bvec, bio, iter) { 192 unsigned int len = bvec.bv_len; 193 194 BUG_ON(len > PAGE_SIZE); 195 err = nd_blk_do_bvec(blk_dev, bip, bvec.bv_page, len, 196 bvec.bv_offset, rw, iter.bi_sector); 197 if (err) { 198 dev_info(&blk_dev->nsblk->common.dev, 199 "io error in %s sector %lld, len %d,\n", 200 (rw == READ) ? "READ" : "WRITE", 201 (unsigned long long) iter.bi_sector, len); 202 break; 203 } 204 } 205 if (do_acct) 206 nd_iostat_end(bio, start); 207 208 out: 209 bio_endio(bio, err); 210} 211 212static int nd_blk_rw_bytes(struct nd_namespace_common *ndns, 213 resource_size_t offset, void *iobuf, size_t n, int rw) 214{ 215 struct nd_blk_device *blk_dev = dev_get_drvdata(ndns->claim); 216 struct nd_namespace_blk *nsblk = blk_dev->nsblk; 217 struct nd_blk_region *ndbr = blk_dev->ndbr; 218 resource_size_t dev_offset; 219 220 dev_offset = to_dev_offset(nsblk, offset, n); 221 222 if (unlikely(offset + n > blk_dev->disk_size)) { 223 dev_WARN_ONCE(&ndns->dev, 1, "request out of range\n"); 224 return -EFAULT; 225 } 226 227 if (dev_offset == SIZE_MAX) 228 return -EIO; 229 230 return ndbr->do_io(ndbr, dev_offset, iobuf, n, rw); 231} 232 233static const struct block_device_operations nd_blk_fops = { 234 .owner = THIS_MODULE, 235 .revalidate_disk = nvdimm_revalidate_disk, 236}; 237 238static int nd_blk_attach_disk(struct nd_namespace_common *ndns, 239 struct nd_blk_device *blk_dev) 240{ 241 resource_size_t available_disk_size; 242 struct gendisk *disk; 243 u64 internal_nlba; 244 245 internal_nlba = div_u64(blk_dev->disk_size, blk_dev->internal_lbasize); 246 available_disk_size = internal_nlba * blk_dev->sector_size; 247 248 blk_dev->queue = blk_alloc_queue(GFP_KERNEL); 249 if (!blk_dev->queue) 250 return -ENOMEM; 251 252 blk_queue_make_request(blk_dev->queue, nd_blk_make_request); 253 blk_queue_max_hw_sectors(blk_dev->queue, UINT_MAX); 254 blk_queue_bounce_limit(blk_dev->queue, BLK_BOUNCE_ANY); 255 blk_queue_logical_block_size(blk_dev->queue, blk_dev->sector_size); 256 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, blk_dev->queue); 257 258 disk = blk_dev->disk = alloc_disk(0); 259 if (!disk) { 260 blk_cleanup_queue(blk_dev->queue); 261 return -ENOMEM; 262 } 263 264 disk->driverfs_dev = &ndns->dev; 265 disk->major = nd_blk_major; 266 disk->first_minor = 0; 267 disk->fops = &nd_blk_fops; 268 disk->private_data = blk_dev; 269 disk->queue = blk_dev->queue; 270 disk->flags = GENHD_FL_EXT_DEVT; 271 nvdimm_namespace_disk_name(ndns, disk->disk_name); 272 set_capacity(disk, 0); 273 add_disk(disk); 274 275 if (nd_blk_meta_size(blk_dev)) { 276 int rc = nd_integrity_init(disk, nd_blk_meta_size(blk_dev)); 277 278 if (rc) { 279 del_gendisk(disk); 280 put_disk(disk); 281 blk_cleanup_queue(blk_dev->queue); 282 return rc; 283 } 284 } 285 286 set_capacity(disk, available_disk_size >> SECTOR_SHIFT); 287 revalidate_disk(disk); 288 return 0; 289} 290 291static int nd_blk_probe(struct device *dev) 292{ 293 struct nd_namespace_common *ndns; 294 struct nd_namespace_blk *nsblk; 295 struct nd_blk_device *blk_dev; 296 int rc; 297 298 ndns = nvdimm_namespace_common_probe(dev); 299 if (IS_ERR(ndns)) 300 return PTR_ERR(ndns); 301 302 blk_dev = kzalloc(sizeof(*blk_dev), GFP_KERNEL); 303 if (!blk_dev) 304 return -ENOMEM; 305 306 nsblk = to_nd_namespace_blk(&ndns->dev); 307 blk_dev->disk_size = nvdimm_namespace_capacity(ndns); 308 blk_dev->ndbr = to_nd_blk_region(dev->parent); 309 blk_dev->nsblk = to_nd_namespace_blk(&ndns->dev); 310 blk_dev->internal_lbasize = roundup(nsblk->lbasize, 311 INT_LBASIZE_ALIGNMENT); 312 blk_dev->sector_size = ((nsblk->lbasize >= 4096) ? 4096 : 512); 313 dev_set_drvdata(dev, blk_dev); 314 315 ndns->rw_bytes = nd_blk_rw_bytes; 316 if (is_nd_btt(dev)) 317 rc = nvdimm_namespace_attach_btt(ndns); 318 else if (nd_btt_probe(ndns, blk_dev) == 0) { 319 /* we'll come back as btt-blk */ 320 rc = -ENXIO; 321 } else 322 rc = nd_blk_attach_disk(ndns, blk_dev); 323 if (rc) 324 kfree(blk_dev); 325 return rc; 326} 327 328static void nd_blk_detach_disk(struct nd_blk_device *blk_dev) 329{ 330 del_gendisk(blk_dev->disk); 331 put_disk(blk_dev->disk); 332 blk_cleanup_queue(blk_dev->queue); 333} 334 335static int nd_blk_remove(struct device *dev) 336{ 337 struct nd_blk_device *blk_dev = dev_get_drvdata(dev); 338 339 if (is_nd_btt(dev)) 340 nvdimm_namespace_detach_btt(to_nd_btt(dev)->ndns); 341 else 342 nd_blk_detach_disk(blk_dev); 343 kfree(blk_dev); 344 345 return 0; 346} 347 348static struct nd_device_driver nd_blk_driver = { 349 .probe = nd_blk_probe, 350 .remove = nd_blk_remove, 351 .drv = { 352 .name = "nd_blk", 353 }, 354 .type = ND_DRIVER_NAMESPACE_BLK, 355}; 356 357static int __init nd_blk_init(void) 358{ 359 int rc; 360 361 rc = register_blkdev(0, "nd_blk"); 362 if (rc < 0) 363 return rc; 364 365 nd_blk_major = rc; 366 rc = nd_driver_register(&nd_blk_driver); 367 368 if (rc < 0) 369 unregister_blkdev(nd_blk_major, "nd_blk"); 370 371 return rc; 372} 373 374static void __exit nd_blk_exit(void) 375{ 376 driver_unregister(&nd_blk_driver.drv); 377 unregister_blkdev(nd_blk_major, "nd_blk"); 378} 379 380MODULE_AUTHOR("Ross Zwisler <ross.zwisler@linux.intel.com>"); 381MODULE_LICENSE("GPL v2"); 382MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_BLK); 383module_init(nd_blk_init); 384module_exit(nd_blk_exit);