at v3.2 48 kB view raw
1/* 2 * linux/drivers/block/loop.c 3 * 4 * Written by Theodore Ts'o, 3/29/93 5 * 6 * Copyright 1993 by Theodore Ts'o. Redistribution of this file is 7 * permitted under the GNU General Public License. 8 * 9 * DES encryption plus some minor changes by Werner Almesberger, 30-MAY-1993 10 * more DES encryption plus IDEA encryption by Nicholas J. Leon, June 20, 1996 11 * 12 * Modularized and updated for 1.1.16 kernel - Mitch Dsouza 28th May 1994 13 * Adapted for 1.3.59 kernel - Andries Brouwer, 1 Feb 1996 14 * 15 * Fixed do_loop_request() re-entrancy - Vincent.Renardias@waw.com Mar 20, 1997 16 * 17 * Added devfs support - Richard Gooch <rgooch@atnf.csiro.au> 16-Jan-1998 18 * 19 * Handle sparse backing files correctly - Kenn Humborg, Jun 28, 1998 20 * 21 * Loadable modules and other fixes by AK, 1998 22 * 23 * Make real block number available to downstream transfer functions, enables 24 * CBC (and relatives) mode encryption requiring unique IVs per data block. 25 * Reed H. Petty, rhp@draper.net 26 * 27 * Maximum number of loop devices now dynamic via max_loop module parameter. 28 * Russell Kroll <rkroll@exploits.org> 19990701 29 * 30 * Maximum number of loop devices when compiled-in now selectable by passing 31 * max_loop=<1-255> to the kernel on boot. 32 * Erik I. Bolsø, <eriki@himolde.no>, Oct 31, 1999 33 * 34 * Completely rewrite request handling to be make_request_fn style and 35 * non blocking, pushing work to a helper thread. Lots of fixes from 36 * Al Viro too. 37 * Jens Axboe <axboe@suse.de>, Nov 2000 38 * 39 * Support up to 256 loop devices 40 * Heinz Mauelshagen <mge@sistina.com>, Feb 2002 41 * 42 * Support for falling back on the write file operation when the address space 43 * operations write_begin is not available on the backing filesystem. 44 * Anton Altaparmakov, 16 Feb 2005 45 * 46 * Still To Fix: 47 * - Advisory locking is ignored here. 48 * - Should use an own CAP_* category instead of CAP_SYS_ADMIN 49 * 50 */ 51 52#include <linux/module.h> 53#include <linux/moduleparam.h> 54#include <linux/sched.h> 55#include <linux/fs.h> 56#include <linux/file.h> 57#include <linux/stat.h> 58#include <linux/errno.h> 59#include <linux/major.h> 60#include <linux/wait.h> 61#include <linux/blkdev.h> 62#include <linux/blkpg.h> 63#include <linux/init.h> 64#include <linux/swap.h> 65#include <linux/slab.h> 66#include <linux/loop.h> 67#include <linux/compat.h> 68#include <linux/suspend.h> 69#include <linux/freezer.h> 70#include <linux/mutex.h> 71#include <linux/writeback.h> 72#include <linux/buffer_head.h> /* for invalidate_bdev() */ 73#include <linux/completion.h> 74#include <linux/highmem.h> 75#include <linux/kthread.h> 76#include <linux/splice.h> 77#include <linux/sysfs.h> 78#include <linux/miscdevice.h> 79#include <linux/falloc.h> 80 81#include <asm/uaccess.h> 82 83static DEFINE_IDR(loop_index_idr); 84static DEFINE_MUTEX(loop_index_mutex); 85 86static int max_part; 87static int part_shift; 88 89/* 90 * Transfer functions 91 */ 92static int transfer_none(struct loop_device *lo, int cmd, 93 struct page *raw_page, unsigned raw_off, 94 struct page *loop_page, unsigned loop_off, 95 int size, sector_t real_block) 96{ 97 char *raw_buf = kmap_atomic(raw_page, KM_USER0) + raw_off; 98 char *loop_buf = kmap_atomic(loop_page, KM_USER1) + loop_off; 99 100 if (cmd == READ) 101 memcpy(loop_buf, raw_buf, size); 102 else 103 memcpy(raw_buf, loop_buf, size); 104 105 kunmap_atomic(loop_buf, KM_USER1); 106 kunmap_atomic(raw_buf, KM_USER0); 107 cond_resched(); 108 return 0; 109} 110 111static int transfer_xor(struct loop_device *lo, int cmd, 112 struct page *raw_page, unsigned raw_off, 113 struct page *loop_page, unsigned loop_off, 114 int size, sector_t real_block) 115{ 116 char *raw_buf = kmap_atomic(raw_page, KM_USER0) + raw_off; 117 char *loop_buf = kmap_atomic(loop_page, KM_USER1) + loop_off; 118 char *in, *out, *key; 119 int i, keysize; 120 121 if (cmd == READ) { 122 in = raw_buf; 123 out = loop_buf; 124 } else { 125 in = loop_buf; 126 out = raw_buf; 127 } 128 129 key = lo->lo_encrypt_key; 130 keysize = lo->lo_encrypt_key_size; 131 for (i = 0; i < size; i++) 132 *out++ = *in++ ^ key[(i & 511) % keysize]; 133 134 kunmap_atomic(loop_buf, KM_USER1); 135 kunmap_atomic(raw_buf, KM_USER0); 136 cond_resched(); 137 return 0; 138} 139 140static int xor_init(struct loop_device *lo, const struct loop_info64 *info) 141{ 142 if (unlikely(info->lo_encrypt_key_size <= 0)) 143 return -EINVAL; 144 return 0; 145} 146 147static struct loop_func_table none_funcs = { 148 .number = LO_CRYPT_NONE, 149 .transfer = transfer_none, 150}; 151 152static struct loop_func_table xor_funcs = { 153 .number = LO_CRYPT_XOR, 154 .transfer = transfer_xor, 155 .init = xor_init 156}; 157 158/* xfer_funcs[0] is special - its release function is never called */ 159static struct loop_func_table *xfer_funcs[MAX_LO_CRYPT] = { 160 &none_funcs, 161 &xor_funcs 162}; 163 164static loff_t get_size(loff_t offset, loff_t sizelimit, struct file *file) 165{ 166 loff_t size, loopsize; 167 168 /* Compute loopsize in bytes */ 169 size = i_size_read(file->f_mapping->host); 170 loopsize = size - offset; 171 /* offset is beyond i_size, wierd but possible */ 172 if (loopsize < 0) 173 return 0; 174 175 if (sizelimit > 0 && sizelimit < loopsize) 176 loopsize = sizelimit; 177 /* 178 * Unfortunately, if we want to do I/O on the device, 179 * the number of 512-byte sectors has to fit into a sector_t. 180 */ 181 return loopsize >> 9; 182} 183 184static loff_t get_loop_size(struct loop_device *lo, struct file *file) 185{ 186 return get_size(lo->lo_offset, lo->lo_sizelimit, file); 187} 188 189static int 190figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit) 191{ 192 loff_t size = get_size(offset, sizelimit, lo->lo_backing_file); 193 sector_t x = (sector_t)size; 194 195 if (unlikely((loff_t)x != size)) 196 return -EFBIG; 197 if (lo->lo_offset != offset) 198 lo->lo_offset = offset; 199 if (lo->lo_sizelimit != sizelimit) 200 lo->lo_sizelimit = sizelimit; 201 set_capacity(lo->lo_disk, x); 202 return 0; 203} 204 205static inline int 206lo_do_transfer(struct loop_device *lo, int cmd, 207 struct page *rpage, unsigned roffs, 208 struct page *lpage, unsigned loffs, 209 int size, sector_t rblock) 210{ 211 if (unlikely(!lo->transfer)) 212 return 0; 213 214 return lo->transfer(lo, cmd, rpage, roffs, lpage, loffs, size, rblock); 215} 216 217/** 218 * __do_lo_send_write - helper for writing data to a loop device 219 * 220 * This helper just factors out common code between do_lo_send_direct_write() 221 * and do_lo_send_write(). 222 */ 223static int __do_lo_send_write(struct file *file, 224 u8 *buf, const int len, loff_t pos) 225{ 226 ssize_t bw; 227 mm_segment_t old_fs = get_fs(); 228 229 set_fs(get_ds()); 230 bw = file->f_op->write(file, buf, len, &pos); 231 set_fs(old_fs); 232 if (likely(bw == len)) 233 return 0; 234 printk(KERN_ERR "loop: Write error at byte offset %llu, length %i.\n", 235 (unsigned long long)pos, len); 236 if (bw >= 0) 237 bw = -EIO; 238 return bw; 239} 240 241/** 242 * do_lo_send_direct_write - helper for writing data to a loop device 243 * 244 * This is the fast, non-transforming version that does not need double 245 * buffering. 246 */ 247static int do_lo_send_direct_write(struct loop_device *lo, 248 struct bio_vec *bvec, loff_t pos, struct page *page) 249{ 250 ssize_t bw = __do_lo_send_write(lo->lo_backing_file, 251 kmap(bvec->bv_page) + bvec->bv_offset, 252 bvec->bv_len, pos); 253 kunmap(bvec->bv_page); 254 cond_resched(); 255 return bw; 256} 257 258/** 259 * do_lo_send_write - helper for writing data to a loop device 260 * 261 * This is the slow, transforming version that needs to double buffer the 262 * data as it cannot do the transformations in place without having direct 263 * access to the destination pages of the backing file. 264 */ 265static int do_lo_send_write(struct loop_device *lo, struct bio_vec *bvec, 266 loff_t pos, struct page *page) 267{ 268 int ret = lo_do_transfer(lo, WRITE, page, 0, bvec->bv_page, 269 bvec->bv_offset, bvec->bv_len, pos >> 9); 270 if (likely(!ret)) 271 return __do_lo_send_write(lo->lo_backing_file, 272 page_address(page), bvec->bv_len, 273 pos); 274 printk(KERN_ERR "loop: Transfer error at byte offset %llu, " 275 "length %i.\n", (unsigned long long)pos, bvec->bv_len); 276 if (ret > 0) 277 ret = -EIO; 278 return ret; 279} 280 281static int lo_send(struct loop_device *lo, struct bio *bio, loff_t pos) 282{ 283 int (*do_lo_send)(struct loop_device *, struct bio_vec *, loff_t, 284 struct page *page); 285 struct bio_vec *bvec; 286 struct page *page = NULL; 287 int i, ret = 0; 288 289 if (lo->transfer != transfer_none) { 290 page = alloc_page(GFP_NOIO | __GFP_HIGHMEM); 291 if (unlikely(!page)) 292 goto fail; 293 kmap(page); 294 do_lo_send = do_lo_send_write; 295 } else { 296 do_lo_send = do_lo_send_direct_write; 297 } 298 299 bio_for_each_segment(bvec, bio, i) { 300 ret = do_lo_send(lo, bvec, pos, page); 301 if (ret < 0) 302 break; 303 pos += bvec->bv_len; 304 } 305 if (page) { 306 kunmap(page); 307 __free_page(page); 308 } 309out: 310 return ret; 311fail: 312 printk(KERN_ERR "loop: Failed to allocate temporary page for write.\n"); 313 ret = -ENOMEM; 314 goto out; 315} 316 317struct lo_read_data { 318 struct loop_device *lo; 319 struct page *page; 320 unsigned offset; 321 int bsize; 322}; 323 324static int 325lo_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf, 326 struct splice_desc *sd) 327{ 328 struct lo_read_data *p = sd->u.data; 329 struct loop_device *lo = p->lo; 330 struct page *page = buf->page; 331 sector_t IV; 332 int size; 333 334 IV = ((sector_t) page->index << (PAGE_CACHE_SHIFT - 9)) + 335 (buf->offset >> 9); 336 size = sd->len; 337 if (size > p->bsize) 338 size = p->bsize; 339 340 if (lo_do_transfer(lo, READ, page, buf->offset, p->page, p->offset, size, IV)) { 341 printk(KERN_ERR "loop: transfer error block %ld\n", 342 page->index); 343 size = -EINVAL; 344 } 345 346 flush_dcache_page(p->page); 347 348 if (size > 0) 349 p->offset += size; 350 351 return size; 352} 353 354static int 355lo_direct_splice_actor(struct pipe_inode_info *pipe, struct splice_desc *sd) 356{ 357 return __splice_from_pipe(pipe, sd, lo_splice_actor); 358} 359 360static int 361do_lo_receive(struct loop_device *lo, 362 struct bio_vec *bvec, int bsize, loff_t pos) 363{ 364 struct lo_read_data cookie; 365 struct splice_desc sd; 366 struct file *file; 367 long retval; 368 369 cookie.lo = lo; 370 cookie.page = bvec->bv_page; 371 cookie.offset = bvec->bv_offset; 372 cookie.bsize = bsize; 373 374 sd.len = 0; 375 sd.total_len = bvec->bv_len; 376 sd.flags = 0; 377 sd.pos = pos; 378 sd.u.data = &cookie; 379 380 file = lo->lo_backing_file; 381 retval = splice_direct_to_actor(file, &sd, lo_direct_splice_actor); 382 383 if (retval < 0) 384 return retval; 385 if (retval != bvec->bv_len) 386 return -EIO; 387 return 0; 388} 389 390static int 391lo_receive(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos) 392{ 393 struct bio_vec *bvec; 394 int i, ret = 0; 395 396 bio_for_each_segment(bvec, bio, i) { 397 ret = do_lo_receive(lo, bvec, bsize, pos); 398 if (ret < 0) 399 break; 400 pos += bvec->bv_len; 401 } 402 return ret; 403} 404 405static int do_bio_filebacked(struct loop_device *lo, struct bio *bio) 406{ 407 loff_t pos; 408 int ret; 409 410 pos = ((loff_t) bio->bi_sector << 9) + lo->lo_offset; 411 412 if (bio_rw(bio) == WRITE) { 413 struct file *file = lo->lo_backing_file; 414 415 if (bio->bi_rw & REQ_FLUSH) { 416 ret = vfs_fsync(file, 0); 417 if (unlikely(ret && ret != -EINVAL)) { 418 ret = -EIO; 419 goto out; 420 } 421 } 422 423 /* 424 * We use punch hole to reclaim the free space used by the 425 * image a.k.a. discard. However we do not support discard if 426 * encryption is enabled, because it may give an attacker 427 * useful information. 428 */ 429 if (bio->bi_rw & REQ_DISCARD) { 430 struct file *file = lo->lo_backing_file; 431 int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE; 432 433 if ((!file->f_op->fallocate) || 434 lo->lo_encrypt_key_size) { 435 ret = -EOPNOTSUPP; 436 goto out; 437 } 438 ret = file->f_op->fallocate(file, mode, pos, 439 bio->bi_size); 440 if (unlikely(ret && ret != -EINVAL && 441 ret != -EOPNOTSUPP)) 442 ret = -EIO; 443 goto out; 444 } 445 446 ret = lo_send(lo, bio, pos); 447 448 if ((bio->bi_rw & REQ_FUA) && !ret) { 449 ret = vfs_fsync(file, 0); 450 if (unlikely(ret && ret != -EINVAL)) 451 ret = -EIO; 452 } 453 } else 454 ret = lo_receive(lo, bio, lo->lo_blocksize, pos); 455 456out: 457 return ret; 458} 459 460/* 461 * Add bio to back of pending list 462 */ 463static void loop_add_bio(struct loop_device *lo, struct bio *bio) 464{ 465 bio_list_add(&lo->lo_bio_list, bio); 466} 467 468/* 469 * Grab first pending buffer 470 */ 471static struct bio *loop_get_bio(struct loop_device *lo) 472{ 473 return bio_list_pop(&lo->lo_bio_list); 474} 475 476static void loop_make_request(struct request_queue *q, struct bio *old_bio) 477{ 478 struct loop_device *lo = q->queuedata; 479 int rw = bio_rw(old_bio); 480 481 if (rw == READA) 482 rw = READ; 483 484 BUG_ON(!lo || (rw != READ && rw != WRITE)); 485 486 spin_lock_irq(&lo->lo_lock); 487 if (lo->lo_state != Lo_bound) 488 goto out; 489 if (unlikely(rw == WRITE && (lo->lo_flags & LO_FLAGS_READ_ONLY))) 490 goto out; 491 loop_add_bio(lo, old_bio); 492 wake_up(&lo->lo_event); 493 spin_unlock_irq(&lo->lo_lock); 494 return; 495 496out: 497 spin_unlock_irq(&lo->lo_lock); 498 bio_io_error(old_bio); 499} 500 501struct switch_request { 502 struct file *file; 503 struct completion wait; 504}; 505 506static void do_loop_switch(struct loop_device *, struct switch_request *); 507 508static inline void loop_handle_bio(struct loop_device *lo, struct bio *bio) 509{ 510 if (unlikely(!bio->bi_bdev)) { 511 do_loop_switch(lo, bio->bi_private); 512 bio_put(bio); 513 } else { 514 int ret = do_bio_filebacked(lo, bio); 515 bio_endio(bio, ret); 516 } 517} 518 519/* 520 * worker thread that handles reads/writes to file backed loop devices, 521 * to avoid blocking in our make_request_fn. it also does loop decrypting 522 * on reads for block backed loop, as that is too heavy to do from 523 * b_end_io context where irqs may be disabled. 524 * 525 * Loop explanation: loop_clr_fd() sets lo_state to Lo_rundown before 526 * calling kthread_stop(). Therefore once kthread_should_stop() is 527 * true, make_request will not place any more requests. Therefore 528 * once kthread_should_stop() is true and lo_bio is NULL, we are 529 * done with the loop. 530 */ 531static int loop_thread(void *data) 532{ 533 struct loop_device *lo = data; 534 struct bio *bio; 535 536 set_user_nice(current, -20); 537 538 while (!kthread_should_stop() || !bio_list_empty(&lo->lo_bio_list)) { 539 540 wait_event_interruptible(lo->lo_event, 541 !bio_list_empty(&lo->lo_bio_list) || 542 kthread_should_stop()); 543 544 if (bio_list_empty(&lo->lo_bio_list)) 545 continue; 546 spin_lock_irq(&lo->lo_lock); 547 bio = loop_get_bio(lo); 548 spin_unlock_irq(&lo->lo_lock); 549 550 BUG_ON(!bio); 551 loop_handle_bio(lo, bio); 552 } 553 554 return 0; 555} 556 557/* 558 * loop_switch performs the hard work of switching a backing store. 559 * First it needs to flush existing IO, it does this by sending a magic 560 * BIO down the pipe. The completion of this BIO does the actual switch. 561 */ 562static int loop_switch(struct loop_device *lo, struct file *file) 563{ 564 struct switch_request w; 565 struct bio *bio = bio_alloc(GFP_KERNEL, 0); 566 if (!bio) 567 return -ENOMEM; 568 init_completion(&w.wait); 569 w.file = file; 570 bio->bi_private = &w; 571 bio->bi_bdev = NULL; 572 loop_make_request(lo->lo_queue, bio); 573 wait_for_completion(&w.wait); 574 return 0; 575} 576 577/* 578 * Helper to flush the IOs in loop, but keeping loop thread running 579 */ 580static int loop_flush(struct loop_device *lo) 581{ 582 /* loop not yet configured, no running thread, nothing to flush */ 583 if (!lo->lo_thread) 584 return 0; 585 586 return loop_switch(lo, NULL); 587} 588 589/* 590 * Do the actual switch; called from the BIO completion routine 591 */ 592static void do_loop_switch(struct loop_device *lo, struct switch_request *p) 593{ 594 struct file *file = p->file; 595 struct file *old_file = lo->lo_backing_file; 596 struct address_space *mapping; 597 598 /* if no new file, only flush of queued bios requested */ 599 if (!file) 600 goto out; 601 602 mapping = file->f_mapping; 603 mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask); 604 lo->lo_backing_file = file; 605 lo->lo_blocksize = S_ISBLK(mapping->host->i_mode) ? 606 mapping->host->i_bdev->bd_block_size : PAGE_SIZE; 607 lo->old_gfp_mask = mapping_gfp_mask(mapping); 608 mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS)); 609out: 610 complete(&p->wait); 611} 612 613 614/* 615 * loop_change_fd switched the backing store of a loopback device to 616 * a new file. This is useful for operating system installers to free up 617 * the original file and in High Availability environments to switch to 618 * an alternative location for the content in case of server meltdown. 619 * This can only work if the loop device is used read-only, and if the 620 * new backing store is the same size and type as the old backing store. 621 */ 622static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, 623 unsigned int arg) 624{ 625 struct file *file, *old_file; 626 struct inode *inode; 627 int error; 628 629 error = -ENXIO; 630 if (lo->lo_state != Lo_bound) 631 goto out; 632 633 /* the loop device has to be read-only */ 634 error = -EINVAL; 635 if (!(lo->lo_flags & LO_FLAGS_READ_ONLY)) 636 goto out; 637 638 error = -EBADF; 639 file = fget(arg); 640 if (!file) 641 goto out; 642 643 inode = file->f_mapping->host; 644 old_file = lo->lo_backing_file; 645 646 error = -EINVAL; 647 648 if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode)) 649 goto out_putf; 650 651 /* size of the new backing store needs to be the same */ 652 if (get_loop_size(lo, file) != get_loop_size(lo, old_file)) 653 goto out_putf; 654 655 /* and ... switch */ 656 error = loop_switch(lo, file); 657 if (error) 658 goto out_putf; 659 660 fput(old_file); 661 if (lo->lo_flags & LO_FLAGS_PARTSCAN) 662 ioctl_by_bdev(bdev, BLKRRPART, 0); 663 return 0; 664 665 out_putf: 666 fput(file); 667 out: 668 return error; 669} 670 671static inline int is_loop_device(struct file *file) 672{ 673 struct inode *i = file->f_mapping->host; 674 675 return i && S_ISBLK(i->i_mode) && MAJOR(i->i_rdev) == LOOP_MAJOR; 676} 677 678/* loop sysfs attributes */ 679 680static ssize_t loop_attr_show(struct device *dev, char *page, 681 ssize_t (*callback)(struct loop_device *, char *)) 682{ 683 struct gendisk *disk = dev_to_disk(dev); 684 struct loop_device *lo = disk->private_data; 685 686 return callback(lo, page); 687} 688 689#define LOOP_ATTR_RO(_name) \ 690static ssize_t loop_attr_##_name##_show(struct loop_device *, char *); \ 691static ssize_t loop_attr_do_show_##_name(struct device *d, \ 692 struct device_attribute *attr, char *b) \ 693{ \ 694 return loop_attr_show(d, b, loop_attr_##_name##_show); \ 695} \ 696static struct device_attribute loop_attr_##_name = \ 697 __ATTR(_name, S_IRUGO, loop_attr_do_show_##_name, NULL); 698 699static ssize_t loop_attr_backing_file_show(struct loop_device *lo, char *buf) 700{ 701 ssize_t ret; 702 char *p = NULL; 703 704 spin_lock_irq(&lo->lo_lock); 705 if (lo->lo_backing_file) 706 p = d_path(&lo->lo_backing_file->f_path, buf, PAGE_SIZE - 1); 707 spin_unlock_irq(&lo->lo_lock); 708 709 if (IS_ERR_OR_NULL(p)) 710 ret = PTR_ERR(p); 711 else { 712 ret = strlen(p); 713 memmove(buf, p, ret); 714 buf[ret++] = '\n'; 715 buf[ret] = 0; 716 } 717 718 return ret; 719} 720 721static ssize_t loop_attr_offset_show(struct loop_device *lo, char *buf) 722{ 723 return sprintf(buf, "%llu\n", (unsigned long long)lo->lo_offset); 724} 725 726static ssize_t loop_attr_sizelimit_show(struct loop_device *lo, char *buf) 727{ 728 return sprintf(buf, "%llu\n", (unsigned long long)lo->lo_sizelimit); 729} 730 731static ssize_t loop_attr_autoclear_show(struct loop_device *lo, char *buf) 732{ 733 int autoclear = (lo->lo_flags & LO_FLAGS_AUTOCLEAR); 734 735 return sprintf(buf, "%s\n", autoclear ? "1" : "0"); 736} 737 738static ssize_t loop_attr_partscan_show(struct loop_device *lo, char *buf) 739{ 740 int partscan = (lo->lo_flags & LO_FLAGS_PARTSCAN); 741 742 return sprintf(buf, "%s\n", partscan ? "1" : "0"); 743} 744 745LOOP_ATTR_RO(backing_file); 746LOOP_ATTR_RO(offset); 747LOOP_ATTR_RO(sizelimit); 748LOOP_ATTR_RO(autoclear); 749LOOP_ATTR_RO(partscan); 750 751static struct attribute *loop_attrs[] = { 752 &loop_attr_backing_file.attr, 753 &loop_attr_offset.attr, 754 &loop_attr_sizelimit.attr, 755 &loop_attr_autoclear.attr, 756 &loop_attr_partscan.attr, 757 NULL, 758}; 759 760static struct attribute_group loop_attribute_group = { 761 .name = "loop", 762 .attrs= loop_attrs, 763}; 764 765static int loop_sysfs_init(struct loop_device *lo) 766{ 767 return sysfs_create_group(&disk_to_dev(lo->lo_disk)->kobj, 768 &loop_attribute_group); 769} 770 771static void loop_sysfs_exit(struct loop_device *lo) 772{ 773 sysfs_remove_group(&disk_to_dev(lo->lo_disk)->kobj, 774 &loop_attribute_group); 775} 776 777static void loop_config_discard(struct loop_device *lo) 778{ 779 struct file *file = lo->lo_backing_file; 780 struct inode *inode = file->f_mapping->host; 781 struct request_queue *q = lo->lo_queue; 782 783 /* 784 * We use punch hole to reclaim the free space used by the 785 * image a.k.a. discard. However we do support discard if 786 * encryption is enabled, because it may give an attacker 787 * useful information. 788 */ 789 if ((!file->f_op->fallocate) || 790 lo->lo_encrypt_key_size) { 791 q->limits.discard_granularity = 0; 792 q->limits.discard_alignment = 0; 793 q->limits.max_discard_sectors = 0; 794 q->limits.discard_zeroes_data = 0; 795 queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q); 796 return; 797 } 798 799 q->limits.discard_granularity = inode->i_sb->s_blocksize; 800 q->limits.discard_alignment = 0; 801 q->limits.max_discard_sectors = UINT_MAX >> 9; 802 q->limits.discard_zeroes_data = 1; 803 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); 804} 805 806static int loop_set_fd(struct loop_device *lo, fmode_t mode, 807 struct block_device *bdev, unsigned int arg) 808{ 809 struct file *file, *f; 810 struct inode *inode; 811 struct address_space *mapping; 812 unsigned lo_blocksize; 813 int lo_flags = 0; 814 int error; 815 loff_t size; 816 817 /* This is safe, since we have a reference from open(). */ 818 __module_get(THIS_MODULE); 819 820 error = -EBADF; 821 file = fget(arg); 822 if (!file) 823 goto out; 824 825 error = -EBUSY; 826 if (lo->lo_state != Lo_unbound) 827 goto out_putf; 828 829 /* Avoid recursion */ 830 f = file; 831 while (is_loop_device(f)) { 832 struct loop_device *l; 833 834 if (f->f_mapping->host->i_bdev == bdev) 835 goto out_putf; 836 837 l = f->f_mapping->host->i_bdev->bd_disk->private_data; 838 if (l->lo_state == Lo_unbound) { 839 error = -EINVAL; 840 goto out_putf; 841 } 842 f = l->lo_backing_file; 843 } 844 845 mapping = file->f_mapping; 846 inode = mapping->host; 847 848 error = -EINVAL; 849 if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode)) 850 goto out_putf; 851 852 if (!(file->f_mode & FMODE_WRITE) || !(mode & FMODE_WRITE) || 853 !file->f_op->write) 854 lo_flags |= LO_FLAGS_READ_ONLY; 855 856 lo_blocksize = S_ISBLK(inode->i_mode) ? 857 inode->i_bdev->bd_block_size : PAGE_SIZE; 858 859 error = -EFBIG; 860 size = get_loop_size(lo, file); 861 if ((loff_t)(sector_t)size != size) 862 goto out_putf; 863 864 error = 0; 865 866 set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0); 867 868 lo->lo_blocksize = lo_blocksize; 869 lo->lo_device = bdev; 870 lo->lo_flags = lo_flags; 871 lo->lo_backing_file = file; 872 lo->transfer = transfer_none; 873 lo->ioctl = NULL; 874 lo->lo_sizelimit = 0; 875 lo->old_gfp_mask = mapping_gfp_mask(mapping); 876 mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS)); 877 878 bio_list_init(&lo->lo_bio_list); 879 880 /* 881 * set queue make_request_fn, and add limits based on lower level 882 * device 883 */ 884 blk_queue_make_request(lo->lo_queue, loop_make_request); 885 lo->lo_queue->queuedata = lo; 886 887 if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync) 888 blk_queue_flush(lo->lo_queue, REQ_FLUSH); 889 890 set_capacity(lo->lo_disk, size); 891 bd_set_size(bdev, size << 9); 892 loop_sysfs_init(lo); 893 /* let user-space know about the new size */ 894 kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); 895 896 set_blocksize(bdev, lo_blocksize); 897 898 lo->lo_thread = kthread_create(loop_thread, lo, "loop%d", 899 lo->lo_number); 900 if (IS_ERR(lo->lo_thread)) { 901 error = PTR_ERR(lo->lo_thread); 902 goto out_clr; 903 } 904 lo->lo_state = Lo_bound; 905 wake_up_process(lo->lo_thread); 906 if (part_shift) 907 lo->lo_flags |= LO_FLAGS_PARTSCAN; 908 if (lo->lo_flags & LO_FLAGS_PARTSCAN) 909 ioctl_by_bdev(bdev, BLKRRPART, 0); 910 return 0; 911 912out_clr: 913 loop_sysfs_exit(lo); 914 lo->lo_thread = NULL; 915 lo->lo_device = NULL; 916 lo->lo_backing_file = NULL; 917 lo->lo_flags = 0; 918 set_capacity(lo->lo_disk, 0); 919 invalidate_bdev(bdev); 920 bd_set_size(bdev, 0); 921 kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); 922 mapping_set_gfp_mask(mapping, lo->old_gfp_mask); 923 lo->lo_state = Lo_unbound; 924 out_putf: 925 fput(file); 926 out: 927 /* This is safe: open() is still holding a reference. */ 928 module_put(THIS_MODULE); 929 return error; 930} 931 932static int 933loop_release_xfer(struct loop_device *lo) 934{ 935 int err = 0; 936 struct loop_func_table *xfer = lo->lo_encryption; 937 938 if (xfer) { 939 if (xfer->release) 940 err = xfer->release(lo); 941 lo->transfer = NULL; 942 lo->lo_encryption = NULL; 943 module_put(xfer->owner); 944 } 945 return err; 946} 947 948static int 949loop_init_xfer(struct loop_device *lo, struct loop_func_table *xfer, 950 const struct loop_info64 *i) 951{ 952 int err = 0; 953 954 if (xfer) { 955 struct module *owner = xfer->owner; 956 957 if (!try_module_get(owner)) 958 return -EINVAL; 959 if (xfer->init) 960 err = xfer->init(lo, i); 961 if (err) 962 module_put(owner); 963 else 964 lo->lo_encryption = xfer; 965 } 966 return err; 967} 968 969static int loop_clr_fd(struct loop_device *lo) 970{ 971 struct file *filp = lo->lo_backing_file; 972 gfp_t gfp = lo->old_gfp_mask; 973 struct block_device *bdev = lo->lo_device; 974 975 if (lo->lo_state != Lo_bound) 976 return -ENXIO; 977 978 if (lo->lo_refcnt > 1) /* we needed one fd for the ioctl */ 979 return -EBUSY; 980 981 if (filp == NULL) 982 return -EINVAL; 983 984 spin_lock_irq(&lo->lo_lock); 985 lo->lo_state = Lo_rundown; 986 spin_unlock_irq(&lo->lo_lock); 987 988 kthread_stop(lo->lo_thread); 989 990 spin_lock_irq(&lo->lo_lock); 991 lo->lo_backing_file = NULL; 992 spin_unlock_irq(&lo->lo_lock); 993 994 loop_release_xfer(lo); 995 lo->transfer = NULL; 996 lo->ioctl = NULL; 997 lo->lo_device = NULL; 998 lo->lo_encryption = NULL; 999 lo->lo_offset = 0; 1000 lo->lo_sizelimit = 0; 1001 lo->lo_encrypt_key_size = 0; 1002 lo->lo_thread = NULL; 1003 memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE); 1004 memset(lo->lo_crypt_name, 0, LO_NAME_SIZE); 1005 memset(lo->lo_file_name, 0, LO_NAME_SIZE); 1006 if (bdev) 1007 invalidate_bdev(bdev); 1008 set_capacity(lo->lo_disk, 0); 1009 loop_sysfs_exit(lo); 1010 if (bdev) { 1011 bd_set_size(bdev, 0); 1012 /* let user-space know about this change */ 1013 kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); 1014 } 1015 mapping_set_gfp_mask(filp->f_mapping, gfp); 1016 lo->lo_state = Lo_unbound; 1017 /* This is safe: open() is still holding a reference. */ 1018 module_put(THIS_MODULE); 1019 if (lo->lo_flags & LO_FLAGS_PARTSCAN && bdev) 1020 ioctl_by_bdev(bdev, BLKRRPART, 0); 1021 lo->lo_flags = 0; 1022 if (!part_shift) 1023 lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN; 1024 mutex_unlock(&lo->lo_ctl_mutex); 1025 /* 1026 * Need not hold lo_ctl_mutex to fput backing file. 1027 * Calling fput holding lo_ctl_mutex triggers a circular 1028 * lock dependency possibility warning as fput can take 1029 * bd_mutex which is usually taken before lo_ctl_mutex. 1030 */ 1031 fput(filp); 1032 return 0; 1033} 1034 1035static int 1036loop_set_status(struct loop_device *lo, const struct loop_info64 *info) 1037{ 1038 int err; 1039 struct loop_func_table *xfer; 1040 uid_t uid = current_uid(); 1041 1042 if (lo->lo_encrypt_key_size && 1043 lo->lo_key_owner != uid && 1044 !capable(CAP_SYS_ADMIN)) 1045 return -EPERM; 1046 if (lo->lo_state != Lo_bound) 1047 return -ENXIO; 1048 if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE) 1049 return -EINVAL; 1050 1051 err = loop_release_xfer(lo); 1052 if (err) 1053 return err; 1054 1055 if (info->lo_encrypt_type) { 1056 unsigned int type = info->lo_encrypt_type; 1057 1058 if (type >= MAX_LO_CRYPT) 1059 return -EINVAL; 1060 xfer = xfer_funcs[type]; 1061 if (xfer == NULL) 1062 return -EINVAL; 1063 } else 1064 xfer = NULL; 1065 1066 err = loop_init_xfer(lo, xfer, info); 1067 if (err) 1068 return err; 1069 1070 if (lo->lo_offset != info->lo_offset || 1071 lo->lo_sizelimit != info->lo_sizelimit) { 1072 if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) 1073 return -EFBIG; 1074 } 1075 loop_config_discard(lo); 1076 1077 memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE); 1078 memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE); 1079 lo->lo_file_name[LO_NAME_SIZE-1] = 0; 1080 lo->lo_crypt_name[LO_NAME_SIZE-1] = 0; 1081 1082 if (!xfer) 1083 xfer = &none_funcs; 1084 lo->transfer = xfer->transfer; 1085 lo->ioctl = xfer->ioctl; 1086 1087 if ((lo->lo_flags & LO_FLAGS_AUTOCLEAR) != 1088 (info->lo_flags & LO_FLAGS_AUTOCLEAR)) 1089 lo->lo_flags ^= LO_FLAGS_AUTOCLEAR; 1090 1091 if ((info->lo_flags & LO_FLAGS_PARTSCAN) && 1092 !(lo->lo_flags & LO_FLAGS_PARTSCAN)) { 1093 lo->lo_flags |= LO_FLAGS_PARTSCAN; 1094 lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN; 1095 ioctl_by_bdev(lo->lo_device, BLKRRPART, 0); 1096 } 1097 1098 lo->lo_encrypt_key_size = info->lo_encrypt_key_size; 1099 lo->lo_init[0] = info->lo_init[0]; 1100 lo->lo_init[1] = info->lo_init[1]; 1101 if (info->lo_encrypt_key_size) { 1102 memcpy(lo->lo_encrypt_key, info->lo_encrypt_key, 1103 info->lo_encrypt_key_size); 1104 lo->lo_key_owner = uid; 1105 } 1106 1107 return 0; 1108} 1109 1110static int 1111loop_get_status(struct loop_device *lo, struct loop_info64 *info) 1112{ 1113 struct file *file = lo->lo_backing_file; 1114 struct kstat stat; 1115 int error; 1116 1117 if (lo->lo_state != Lo_bound) 1118 return -ENXIO; 1119 error = vfs_getattr(file->f_path.mnt, file->f_path.dentry, &stat); 1120 if (error) 1121 return error; 1122 memset(info, 0, sizeof(*info)); 1123 info->lo_number = lo->lo_number; 1124 info->lo_device = huge_encode_dev(stat.dev); 1125 info->lo_inode = stat.ino; 1126 info->lo_rdevice = huge_encode_dev(lo->lo_device ? stat.rdev : stat.dev); 1127 info->lo_offset = lo->lo_offset; 1128 info->lo_sizelimit = lo->lo_sizelimit; 1129 info->lo_flags = lo->lo_flags; 1130 memcpy(info->lo_file_name, lo->lo_file_name, LO_NAME_SIZE); 1131 memcpy(info->lo_crypt_name, lo->lo_crypt_name, LO_NAME_SIZE); 1132 info->lo_encrypt_type = 1133 lo->lo_encryption ? lo->lo_encryption->number : 0; 1134 if (lo->lo_encrypt_key_size && capable(CAP_SYS_ADMIN)) { 1135 info->lo_encrypt_key_size = lo->lo_encrypt_key_size; 1136 memcpy(info->lo_encrypt_key, lo->lo_encrypt_key, 1137 lo->lo_encrypt_key_size); 1138 } 1139 return 0; 1140} 1141 1142static void 1143loop_info64_from_old(const struct loop_info *info, struct loop_info64 *info64) 1144{ 1145 memset(info64, 0, sizeof(*info64)); 1146 info64->lo_number = info->lo_number; 1147 info64->lo_device = info->lo_device; 1148 info64->lo_inode = info->lo_inode; 1149 info64->lo_rdevice = info->lo_rdevice; 1150 info64->lo_offset = info->lo_offset; 1151 info64->lo_sizelimit = 0; 1152 info64->lo_encrypt_type = info->lo_encrypt_type; 1153 info64->lo_encrypt_key_size = info->lo_encrypt_key_size; 1154 info64->lo_flags = info->lo_flags; 1155 info64->lo_init[0] = info->lo_init[0]; 1156 info64->lo_init[1] = info->lo_init[1]; 1157 if (info->lo_encrypt_type == LO_CRYPT_CRYPTOAPI) 1158 memcpy(info64->lo_crypt_name, info->lo_name, LO_NAME_SIZE); 1159 else 1160 memcpy(info64->lo_file_name, info->lo_name, LO_NAME_SIZE); 1161 memcpy(info64->lo_encrypt_key, info->lo_encrypt_key, LO_KEY_SIZE); 1162} 1163 1164static int 1165loop_info64_to_old(const struct loop_info64 *info64, struct loop_info *info) 1166{ 1167 memset(info, 0, sizeof(*info)); 1168 info->lo_number = info64->lo_number; 1169 info->lo_device = info64->lo_device; 1170 info->lo_inode = info64->lo_inode; 1171 info->lo_rdevice = info64->lo_rdevice; 1172 info->lo_offset = info64->lo_offset; 1173 info->lo_encrypt_type = info64->lo_encrypt_type; 1174 info->lo_encrypt_key_size = info64->lo_encrypt_key_size; 1175 info->lo_flags = info64->lo_flags; 1176 info->lo_init[0] = info64->lo_init[0]; 1177 info->lo_init[1] = info64->lo_init[1]; 1178 if (info->lo_encrypt_type == LO_CRYPT_CRYPTOAPI) 1179 memcpy(info->lo_name, info64->lo_crypt_name, LO_NAME_SIZE); 1180 else 1181 memcpy(info->lo_name, info64->lo_file_name, LO_NAME_SIZE); 1182 memcpy(info->lo_encrypt_key, info64->lo_encrypt_key, LO_KEY_SIZE); 1183 1184 /* error in case values were truncated */ 1185 if (info->lo_device != info64->lo_device || 1186 info->lo_rdevice != info64->lo_rdevice || 1187 info->lo_inode != info64->lo_inode || 1188 info->lo_offset != info64->lo_offset) 1189 return -EOVERFLOW; 1190 1191 return 0; 1192} 1193 1194static int 1195loop_set_status_old(struct loop_device *lo, const struct loop_info __user *arg) 1196{ 1197 struct loop_info info; 1198 struct loop_info64 info64; 1199 1200 if (copy_from_user(&info, arg, sizeof (struct loop_info))) 1201 return -EFAULT; 1202 loop_info64_from_old(&info, &info64); 1203 return loop_set_status(lo, &info64); 1204} 1205 1206static int 1207loop_set_status64(struct loop_device *lo, const struct loop_info64 __user *arg) 1208{ 1209 struct loop_info64 info64; 1210 1211 if (copy_from_user(&info64, arg, sizeof (struct loop_info64))) 1212 return -EFAULT; 1213 return loop_set_status(lo, &info64); 1214} 1215 1216static int 1217loop_get_status_old(struct loop_device *lo, struct loop_info __user *arg) { 1218 struct loop_info info; 1219 struct loop_info64 info64; 1220 int err = 0; 1221 1222 if (!arg) 1223 err = -EINVAL; 1224 if (!err) 1225 err = loop_get_status(lo, &info64); 1226 if (!err) 1227 err = loop_info64_to_old(&info64, &info); 1228 if (!err && copy_to_user(arg, &info, sizeof(info))) 1229 err = -EFAULT; 1230 1231 return err; 1232} 1233 1234static int 1235loop_get_status64(struct loop_device *lo, struct loop_info64 __user *arg) { 1236 struct loop_info64 info64; 1237 int err = 0; 1238 1239 if (!arg) 1240 err = -EINVAL; 1241 if (!err) 1242 err = loop_get_status(lo, &info64); 1243 if (!err && copy_to_user(arg, &info64, sizeof(info64))) 1244 err = -EFAULT; 1245 1246 return err; 1247} 1248 1249static int loop_set_capacity(struct loop_device *lo, struct block_device *bdev) 1250{ 1251 int err; 1252 sector_t sec; 1253 loff_t sz; 1254 1255 err = -ENXIO; 1256 if (unlikely(lo->lo_state != Lo_bound)) 1257 goto out; 1258 err = figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit); 1259 if (unlikely(err)) 1260 goto out; 1261 sec = get_capacity(lo->lo_disk); 1262 /* the width of sector_t may be narrow for bit-shift */ 1263 sz = sec; 1264 sz <<= 9; 1265 mutex_lock(&bdev->bd_mutex); 1266 bd_set_size(bdev, sz); 1267 /* let user-space know about the new size */ 1268 kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); 1269 mutex_unlock(&bdev->bd_mutex); 1270 1271 out: 1272 return err; 1273} 1274 1275static int lo_ioctl(struct block_device *bdev, fmode_t mode, 1276 unsigned int cmd, unsigned long arg) 1277{ 1278 struct loop_device *lo = bdev->bd_disk->private_data; 1279 int err; 1280 1281 mutex_lock_nested(&lo->lo_ctl_mutex, 1); 1282 switch (cmd) { 1283 case LOOP_SET_FD: 1284 err = loop_set_fd(lo, mode, bdev, arg); 1285 break; 1286 case LOOP_CHANGE_FD: 1287 err = loop_change_fd(lo, bdev, arg); 1288 break; 1289 case LOOP_CLR_FD: 1290 /* loop_clr_fd would have unlocked lo_ctl_mutex on success */ 1291 err = loop_clr_fd(lo); 1292 if (!err) 1293 goto out_unlocked; 1294 break; 1295 case LOOP_SET_STATUS: 1296 err = -EPERM; 1297 if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) 1298 err = loop_set_status_old(lo, 1299 (struct loop_info __user *)arg); 1300 break; 1301 case LOOP_GET_STATUS: 1302 err = loop_get_status_old(lo, (struct loop_info __user *) arg); 1303 break; 1304 case LOOP_SET_STATUS64: 1305 err = -EPERM; 1306 if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) 1307 err = loop_set_status64(lo, 1308 (struct loop_info64 __user *) arg); 1309 break; 1310 case LOOP_GET_STATUS64: 1311 err = loop_get_status64(lo, (struct loop_info64 __user *) arg); 1312 break; 1313 case LOOP_SET_CAPACITY: 1314 err = -EPERM; 1315 if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) 1316 err = loop_set_capacity(lo, bdev); 1317 break; 1318 default: 1319 err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL; 1320 } 1321 mutex_unlock(&lo->lo_ctl_mutex); 1322 1323out_unlocked: 1324 return err; 1325} 1326 1327#ifdef CONFIG_COMPAT 1328struct compat_loop_info { 1329 compat_int_t lo_number; /* ioctl r/o */ 1330 compat_dev_t lo_device; /* ioctl r/o */ 1331 compat_ulong_t lo_inode; /* ioctl r/o */ 1332 compat_dev_t lo_rdevice; /* ioctl r/o */ 1333 compat_int_t lo_offset; 1334 compat_int_t lo_encrypt_type; 1335 compat_int_t lo_encrypt_key_size; /* ioctl w/o */ 1336 compat_int_t lo_flags; /* ioctl r/o */ 1337 char lo_name[LO_NAME_SIZE]; 1338 unsigned char lo_encrypt_key[LO_KEY_SIZE]; /* ioctl w/o */ 1339 compat_ulong_t lo_init[2]; 1340 char reserved[4]; 1341}; 1342 1343/* 1344 * Transfer 32-bit compatibility structure in userspace to 64-bit loop info 1345 * - noinlined to reduce stack space usage in main part of driver 1346 */ 1347static noinline int 1348loop_info64_from_compat(const struct compat_loop_info __user *arg, 1349 struct loop_info64 *info64) 1350{ 1351 struct compat_loop_info info; 1352 1353 if (copy_from_user(&info, arg, sizeof(info))) 1354 return -EFAULT; 1355 1356 memset(info64, 0, sizeof(*info64)); 1357 info64->lo_number = info.lo_number; 1358 info64->lo_device = info.lo_device; 1359 info64->lo_inode = info.lo_inode; 1360 info64->lo_rdevice = info.lo_rdevice; 1361 info64->lo_offset = info.lo_offset; 1362 info64->lo_sizelimit = 0; 1363 info64->lo_encrypt_type = info.lo_encrypt_type; 1364 info64->lo_encrypt_key_size = info.lo_encrypt_key_size; 1365 info64->lo_flags = info.lo_flags; 1366 info64->lo_init[0] = info.lo_init[0]; 1367 info64->lo_init[1] = info.lo_init[1]; 1368 if (info.lo_encrypt_type == LO_CRYPT_CRYPTOAPI) 1369 memcpy(info64->lo_crypt_name, info.lo_name, LO_NAME_SIZE); 1370 else 1371 memcpy(info64->lo_file_name, info.lo_name, LO_NAME_SIZE); 1372 memcpy(info64->lo_encrypt_key, info.lo_encrypt_key, LO_KEY_SIZE); 1373 return 0; 1374} 1375 1376/* 1377 * Transfer 64-bit loop info to 32-bit compatibility structure in userspace 1378 * - noinlined to reduce stack space usage in main part of driver 1379 */ 1380static noinline int 1381loop_info64_to_compat(const struct loop_info64 *info64, 1382 struct compat_loop_info __user *arg) 1383{ 1384 struct compat_loop_info info; 1385 1386 memset(&info, 0, sizeof(info)); 1387 info.lo_number = info64->lo_number; 1388 info.lo_device = info64->lo_device; 1389 info.lo_inode = info64->lo_inode; 1390 info.lo_rdevice = info64->lo_rdevice; 1391 info.lo_offset = info64->lo_offset; 1392 info.lo_encrypt_type = info64->lo_encrypt_type; 1393 info.lo_encrypt_key_size = info64->lo_encrypt_key_size; 1394 info.lo_flags = info64->lo_flags; 1395 info.lo_init[0] = info64->lo_init[0]; 1396 info.lo_init[1] = info64->lo_init[1]; 1397 if (info.lo_encrypt_type == LO_CRYPT_CRYPTOAPI) 1398 memcpy(info.lo_name, info64->lo_crypt_name, LO_NAME_SIZE); 1399 else 1400 memcpy(info.lo_name, info64->lo_file_name, LO_NAME_SIZE); 1401 memcpy(info.lo_encrypt_key, info64->lo_encrypt_key, LO_KEY_SIZE); 1402 1403 /* error in case values were truncated */ 1404 if (info.lo_device != info64->lo_device || 1405 info.lo_rdevice != info64->lo_rdevice || 1406 info.lo_inode != info64->lo_inode || 1407 info.lo_offset != info64->lo_offset || 1408 info.lo_init[0] != info64->lo_init[0] || 1409 info.lo_init[1] != info64->lo_init[1]) 1410 return -EOVERFLOW; 1411 1412 if (copy_to_user(arg, &info, sizeof(info))) 1413 return -EFAULT; 1414 return 0; 1415} 1416 1417static int 1418loop_set_status_compat(struct loop_device *lo, 1419 const struct compat_loop_info __user *arg) 1420{ 1421 struct loop_info64 info64; 1422 int ret; 1423 1424 ret = loop_info64_from_compat(arg, &info64); 1425 if (ret < 0) 1426 return ret; 1427 return loop_set_status(lo, &info64); 1428} 1429 1430static int 1431loop_get_status_compat(struct loop_device *lo, 1432 struct compat_loop_info __user *arg) 1433{ 1434 struct loop_info64 info64; 1435 int err = 0; 1436 1437 if (!arg) 1438 err = -EINVAL; 1439 if (!err) 1440 err = loop_get_status(lo, &info64); 1441 if (!err) 1442 err = loop_info64_to_compat(&info64, arg); 1443 return err; 1444} 1445 1446static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode, 1447 unsigned int cmd, unsigned long arg) 1448{ 1449 struct loop_device *lo = bdev->bd_disk->private_data; 1450 int err; 1451 1452 switch(cmd) { 1453 case LOOP_SET_STATUS: 1454 mutex_lock(&lo->lo_ctl_mutex); 1455 err = loop_set_status_compat( 1456 lo, (const struct compat_loop_info __user *) arg); 1457 mutex_unlock(&lo->lo_ctl_mutex); 1458 break; 1459 case LOOP_GET_STATUS: 1460 mutex_lock(&lo->lo_ctl_mutex); 1461 err = loop_get_status_compat( 1462 lo, (struct compat_loop_info __user *) arg); 1463 mutex_unlock(&lo->lo_ctl_mutex); 1464 break; 1465 case LOOP_SET_CAPACITY: 1466 case LOOP_CLR_FD: 1467 case LOOP_GET_STATUS64: 1468 case LOOP_SET_STATUS64: 1469 arg = (unsigned long) compat_ptr(arg); 1470 case LOOP_SET_FD: 1471 case LOOP_CHANGE_FD: 1472 err = lo_ioctl(bdev, mode, cmd, arg); 1473 break; 1474 default: 1475 err = -ENOIOCTLCMD; 1476 break; 1477 } 1478 return err; 1479} 1480#endif 1481 1482static int lo_open(struct block_device *bdev, fmode_t mode) 1483{ 1484 struct loop_device *lo; 1485 int err = 0; 1486 1487 mutex_lock(&loop_index_mutex); 1488 lo = bdev->bd_disk->private_data; 1489 if (!lo) { 1490 err = -ENXIO; 1491 goto out; 1492 } 1493 1494 mutex_lock(&lo->lo_ctl_mutex); 1495 lo->lo_refcnt++; 1496 mutex_unlock(&lo->lo_ctl_mutex); 1497out: 1498 mutex_unlock(&loop_index_mutex); 1499 return err; 1500} 1501 1502static int lo_release(struct gendisk *disk, fmode_t mode) 1503{ 1504 struct loop_device *lo = disk->private_data; 1505 int err; 1506 1507 mutex_lock(&lo->lo_ctl_mutex); 1508 1509 if (--lo->lo_refcnt) 1510 goto out; 1511 1512 if (lo->lo_flags & LO_FLAGS_AUTOCLEAR) { 1513 /* 1514 * In autoclear mode, stop the loop thread 1515 * and remove configuration after last close. 1516 */ 1517 err = loop_clr_fd(lo); 1518 if (!err) 1519 goto out_unlocked; 1520 } else { 1521 /* 1522 * Otherwise keep thread (if running) and config, 1523 * but flush possible ongoing bios in thread. 1524 */ 1525 loop_flush(lo); 1526 } 1527 1528out: 1529 mutex_unlock(&lo->lo_ctl_mutex); 1530out_unlocked: 1531 return 0; 1532} 1533 1534static const struct block_device_operations lo_fops = { 1535 .owner = THIS_MODULE, 1536 .open = lo_open, 1537 .release = lo_release, 1538 .ioctl = lo_ioctl, 1539#ifdef CONFIG_COMPAT 1540 .compat_ioctl = lo_compat_ioctl, 1541#endif 1542}; 1543 1544/* 1545 * And now the modules code and kernel interface. 1546 */ 1547static int max_loop; 1548module_param(max_loop, int, S_IRUGO); 1549MODULE_PARM_DESC(max_loop, "Maximum number of loop devices"); 1550module_param(max_part, int, S_IRUGO); 1551MODULE_PARM_DESC(max_part, "Maximum number of partitions per loop device"); 1552MODULE_LICENSE("GPL"); 1553MODULE_ALIAS_BLOCKDEV_MAJOR(LOOP_MAJOR); 1554 1555int loop_register_transfer(struct loop_func_table *funcs) 1556{ 1557 unsigned int n = funcs->number; 1558 1559 if (n >= MAX_LO_CRYPT || xfer_funcs[n]) 1560 return -EINVAL; 1561 xfer_funcs[n] = funcs; 1562 return 0; 1563} 1564 1565static int unregister_transfer_cb(int id, void *ptr, void *data) 1566{ 1567 struct loop_device *lo = ptr; 1568 struct loop_func_table *xfer = data; 1569 1570 mutex_lock(&lo->lo_ctl_mutex); 1571 if (lo->lo_encryption == xfer) 1572 loop_release_xfer(lo); 1573 mutex_unlock(&lo->lo_ctl_mutex); 1574 return 0; 1575} 1576 1577int loop_unregister_transfer(int number) 1578{ 1579 unsigned int n = number; 1580 struct loop_func_table *xfer; 1581 1582 if (n == 0 || n >= MAX_LO_CRYPT || (xfer = xfer_funcs[n]) == NULL) 1583 return -EINVAL; 1584 1585 xfer_funcs[n] = NULL; 1586 idr_for_each(&loop_index_idr, &unregister_transfer_cb, xfer); 1587 return 0; 1588} 1589 1590EXPORT_SYMBOL(loop_register_transfer); 1591EXPORT_SYMBOL(loop_unregister_transfer); 1592 1593static int loop_add(struct loop_device **l, int i) 1594{ 1595 struct loop_device *lo; 1596 struct gendisk *disk; 1597 int err; 1598 1599 lo = kzalloc(sizeof(*lo), GFP_KERNEL); 1600 if (!lo) { 1601 err = -ENOMEM; 1602 goto out; 1603 } 1604 1605 err = idr_pre_get(&loop_index_idr, GFP_KERNEL); 1606 if (err < 0) 1607 goto out_free_dev; 1608 1609 if (i >= 0) { 1610 int m; 1611 1612 /* create specific i in the index */ 1613 err = idr_get_new_above(&loop_index_idr, lo, i, &m); 1614 if (err >= 0 && i != m) { 1615 idr_remove(&loop_index_idr, m); 1616 err = -EEXIST; 1617 } 1618 } else if (i == -1) { 1619 int m; 1620 1621 /* get next free nr */ 1622 err = idr_get_new(&loop_index_idr, lo, &m); 1623 if (err >= 0) 1624 i = m; 1625 } else { 1626 err = -EINVAL; 1627 } 1628 if (err < 0) 1629 goto out_free_dev; 1630 1631 lo->lo_queue = blk_alloc_queue(GFP_KERNEL); 1632 if (!lo->lo_queue) 1633 goto out_free_dev; 1634 1635 disk = lo->lo_disk = alloc_disk(1 << part_shift); 1636 if (!disk) 1637 goto out_free_queue; 1638 1639 /* 1640 * Disable partition scanning by default. The in-kernel partition 1641 * scanning can be requested individually per-device during its 1642 * setup. Userspace can always add and remove partitions from all 1643 * devices. The needed partition minors are allocated from the 1644 * extended minor space, the main loop device numbers will continue 1645 * to match the loop minors, regardless of the number of partitions 1646 * used. 1647 * 1648 * If max_part is given, partition scanning is globally enabled for 1649 * all loop devices. The minors for the main loop devices will be 1650 * multiples of max_part. 1651 * 1652 * Note: Global-for-all-devices, set-only-at-init, read-only module 1653 * parameteters like 'max_loop' and 'max_part' make things needlessly 1654 * complicated, are too static, inflexible and may surprise 1655 * userspace tools. Parameters like this in general should be avoided. 1656 */ 1657 if (!part_shift) 1658 disk->flags |= GENHD_FL_NO_PART_SCAN; 1659 disk->flags |= GENHD_FL_EXT_DEVT; 1660 mutex_init(&lo->lo_ctl_mutex); 1661 lo->lo_number = i; 1662 lo->lo_thread = NULL; 1663 init_waitqueue_head(&lo->lo_event); 1664 spin_lock_init(&lo->lo_lock); 1665 disk->major = LOOP_MAJOR; 1666 disk->first_minor = i << part_shift; 1667 disk->fops = &lo_fops; 1668 disk->private_data = lo; 1669 disk->queue = lo->lo_queue; 1670 sprintf(disk->disk_name, "loop%d", i); 1671 add_disk(disk); 1672 *l = lo; 1673 return lo->lo_number; 1674 1675out_free_queue: 1676 blk_cleanup_queue(lo->lo_queue); 1677out_free_dev: 1678 kfree(lo); 1679out: 1680 return err; 1681} 1682 1683static void loop_remove(struct loop_device *lo) 1684{ 1685 del_gendisk(lo->lo_disk); 1686 blk_cleanup_queue(lo->lo_queue); 1687 put_disk(lo->lo_disk); 1688 kfree(lo); 1689} 1690 1691static int find_free_cb(int id, void *ptr, void *data) 1692{ 1693 struct loop_device *lo = ptr; 1694 struct loop_device **l = data; 1695 1696 if (lo->lo_state == Lo_unbound) { 1697 *l = lo; 1698 return 1; 1699 } 1700 return 0; 1701} 1702 1703static int loop_lookup(struct loop_device **l, int i) 1704{ 1705 struct loop_device *lo; 1706 int ret = -ENODEV; 1707 1708 if (i < 0) { 1709 int err; 1710 1711 err = idr_for_each(&loop_index_idr, &find_free_cb, &lo); 1712 if (err == 1) { 1713 *l = lo; 1714 ret = lo->lo_number; 1715 } 1716 goto out; 1717 } 1718 1719 /* lookup and return a specific i */ 1720 lo = idr_find(&loop_index_idr, i); 1721 if (lo) { 1722 *l = lo; 1723 ret = lo->lo_number; 1724 } 1725out: 1726 return ret; 1727} 1728 1729static struct kobject *loop_probe(dev_t dev, int *part, void *data) 1730{ 1731 struct loop_device *lo; 1732 struct kobject *kobj; 1733 int err; 1734 1735 mutex_lock(&loop_index_mutex); 1736 err = loop_lookup(&lo, MINOR(dev) >> part_shift); 1737 if (err < 0) 1738 err = loop_add(&lo, MINOR(dev) >> part_shift); 1739 if (err < 0) 1740 kobj = ERR_PTR(err); 1741 else 1742 kobj = get_disk(lo->lo_disk); 1743 mutex_unlock(&loop_index_mutex); 1744 1745 *part = 0; 1746 return kobj; 1747} 1748 1749static long loop_control_ioctl(struct file *file, unsigned int cmd, 1750 unsigned long parm) 1751{ 1752 struct loop_device *lo; 1753 int ret = -ENOSYS; 1754 1755 mutex_lock(&loop_index_mutex); 1756 switch (cmd) { 1757 case LOOP_CTL_ADD: 1758 ret = loop_lookup(&lo, parm); 1759 if (ret >= 0) { 1760 ret = -EEXIST; 1761 break; 1762 } 1763 ret = loop_add(&lo, parm); 1764 break; 1765 case LOOP_CTL_REMOVE: 1766 ret = loop_lookup(&lo, parm); 1767 if (ret < 0) 1768 break; 1769 mutex_lock(&lo->lo_ctl_mutex); 1770 if (lo->lo_state != Lo_unbound) { 1771 ret = -EBUSY; 1772 mutex_unlock(&lo->lo_ctl_mutex); 1773 break; 1774 } 1775 if (lo->lo_refcnt > 0) { 1776 ret = -EBUSY; 1777 mutex_unlock(&lo->lo_ctl_mutex); 1778 break; 1779 } 1780 lo->lo_disk->private_data = NULL; 1781 mutex_unlock(&lo->lo_ctl_mutex); 1782 idr_remove(&loop_index_idr, lo->lo_number); 1783 loop_remove(lo); 1784 break; 1785 case LOOP_CTL_GET_FREE: 1786 ret = loop_lookup(&lo, -1); 1787 if (ret >= 0) 1788 break; 1789 ret = loop_add(&lo, -1); 1790 } 1791 mutex_unlock(&loop_index_mutex); 1792 1793 return ret; 1794} 1795 1796static const struct file_operations loop_ctl_fops = { 1797 .open = nonseekable_open, 1798 .unlocked_ioctl = loop_control_ioctl, 1799 .compat_ioctl = loop_control_ioctl, 1800 .owner = THIS_MODULE, 1801 .llseek = noop_llseek, 1802}; 1803 1804static struct miscdevice loop_misc = { 1805 .minor = LOOP_CTRL_MINOR, 1806 .name = "loop-control", 1807 .fops = &loop_ctl_fops, 1808}; 1809 1810MODULE_ALIAS_MISCDEV(LOOP_CTRL_MINOR); 1811MODULE_ALIAS("devname:loop-control"); 1812 1813static int __init loop_init(void) 1814{ 1815 int i, nr; 1816 unsigned long range; 1817 struct loop_device *lo; 1818 int err; 1819 1820 err = misc_register(&loop_misc); 1821 if (err < 0) 1822 return err; 1823 1824 part_shift = 0; 1825 if (max_part > 0) { 1826 part_shift = fls(max_part); 1827 1828 /* 1829 * Adjust max_part according to part_shift as it is exported 1830 * to user space so that user can decide correct minor number 1831 * if [s]he want to create more devices. 1832 * 1833 * Note that -1 is required because partition 0 is reserved 1834 * for the whole disk. 1835 */ 1836 max_part = (1UL << part_shift) - 1; 1837 } 1838 1839 if ((1UL << part_shift) > DISK_MAX_PARTS) 1840 return -EINVAL; 1841 1842 if (max_loop > 1UL << (MINORBITS - part_shift)) 1843 return -EINVAL; 1844 1845 /* 1846 * If max_loop is specified, create that many devices upfront. 1847 * This also becomes a hard limit. If max_loop is not specified, 1848 * create CONFIG_BLK_DEV_LOOP_MIN_COUNT loop devices at module 1849 * init time. Loop devices can be requested on-demand with the 1850 * /dev/loop-control interface, or be instantiated by accessing 1851 * a 'dead' device node. 1852 */ 1853 if (max_loop) { 1854 nr = max_loop; 1855 range = max_loop << part_shift; 1856 } else { 1857 nr = CONFIG_BLK_DEV_LOOP_MIN_COUNT; 1858 range = 1UL << MINORBITS; 1859 } 1860 1861 if (register_blkdev(LOOP_MAJOR, "loop")) 1862 return -EIO; 1863 1864 blk_register_region(MKDEV(LOOP_MAJOR, 0), range, 1865 THIS_MODULE, loop_probe, NULL, NULL); 1866 1867 /* pre-create number of devices given by config or max_loop */ 1868 mutex_lock(&loop_index_mutex); 1869 for (i = 0; i < nr; i++) 1870 loop_add(&lo, i); 1871 mutex_unlock(&loop_index_mutex); 1872 1873 printk(KERN_INFO "loop: module loaded\n"); 1874 return 0; 1875} 1876 1877static int loop_exit_cb(int id, void *ptr, void *data) 1878{ 1879 struct loop_device *lo = ptr; 1880 1881 loop_remove(lo); 1882 return 0; 1883} 1884 1885static void __exit loop_exit(void) 1886{ 1887 unsigned long range; 1888 1889 range = max_loop ? max_loop << part_shift : 1UL << MINORBITS; 1890 1891 idr_for_each(&loop_index_idr, &loop_exit_cb, NULL); 1892 idr_remove_all(&loop_index_idr); 1893 idr_destroy(&loop_index_idr); 1894 1895 blk_unregister_region(MKDEV(LOOP_MAJOR, 0), range); 1896 unregister_blkdev(LOOP_MAJOR, "loop"); 1897 1898 misc_deregister(&loop_misc); 1899} 1900 1901module_init(loop_init); 1902module_exit(loop_exit); 1903 1904#ifndef MODULE 1905static int __init max_loop_setup(char *str) 1906{ 1907 max_loop = simple_strtol(str, NULL, 0); 1908 return 1; 1909} 1910 1911__setup("max_loop=", max_loop_setup); 1912#endif