Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
at v2.6.24 536 lines 15 kB view raw
1/* 2 * ramdisk.c - Multiple RAM disk driver - gzip-loading version - v. 0.8 beta. 3 * 4 * (C) Chad Page, Theodore Ts'o, et. al, 1995. 5 * 6 * This RAM disk is designed to have filesystems created on it and mounted 7 * just like a regular floppy disk. 8 * 9 * It also does something suggested by Linus: use the buffer cache as the 10 * RAM disk data. This makes it possible to dynamically allocate the RAM disk 11 * buffer - with some consequences I have to deal with as I write this. 12 * 13 * This code is based on the original ramdisk.c, written mostly by 14 * Theodore Ts'o (TYT) in 1991. The code was largely rewritten by 15 * Chad Page to use the buffer cache to store the RAM disk data in 16 * 1995; Theodore then took over the driver again, and cleaned it up 17 * for inclusion in the mainline kernel. 18 * 19 * The original CRAMDISK code was written by Richard Lyons, and 20 * adapted by Chad Page to use the new RAM disk interface. Theodore 21 * Ts'o rewrote it so that both the compressed RAM disk loader and the 22 * kernel decompressor uses the same inflate.c codebase. The RAM disk 23 * loader now also loads into a dynamic (buffer cache based) RAM disk, 24 * not the old static RAM disk. Support for the old static RAM disk has 25 * been completely removed. 26 * 27 * Loadable module support added by Tom Dyas. 28 * 29 * Further cleanups by Chad Page (page0588@sundance.sjsu.edu): 30 * Cosmetic changes in #ifdef MODULE, code movement, etc. 31 * When the RAM disk module is removed, free the protected buffers 32 * Default RAM disk size changed to 2.88 MB 33 * 34 * Added initrd: Werner Almesberger & Hans Lermen, Feb '96 35 * 36 * 4/25/96 : Made RAM disk size a parameter (default is now 4 MB) 37 * - Chad Page 38 * 39 * Add support for fs images split across >1 disk, Paul Gortmaker, Mar '98 40 * 41 * Make block size and block size shift for RAM disks a global macro 42 * and set blk_size for -ENOSPC, Werner Fink <werner@suse.de>, Apr '99 43 */ 44 45#include <linux/string.h> 46#include <linux/slab.h> 47#include <asm/atomic.h> 48#include <linux/bio.h> 49#include <linux/module.h> 50#include <linux/moduleparam.h> 51#include <linux/init.h> 52#include <linux/pagemap.h> 53#include <linux/blkdev.h> 54#include <linux/genhd.h> 55#include <linux/buffer_head.h> /* for invalidate_bdev() */ 56#include <linux/backing-dev.h> 57#include <linux/blkpg.h> 58#include <linux/writeback.h> 59 60#include <asm/uaccess.h> 61 62/* Various static variables go here. Most are used only in the RAM disk code. 63 */ 64 65static struct gendisk *rd_disks[CONFIG_BLK_DEV_RAM_COUNT]; 66static struct block_device *rd_bdev[CONFIG_BLK_DEV_RAM_COUNT];/* Protected device data */ 67static struct request_queue *rd_queue[CONFIG_BLK_DEV_RAM_COUNT]; 68 69/* 70 * Parameters for the boot-loading of the RAM disk. These are set by 71 * init/main.c (from arguments to the kernel command line) or from the 72 * architecture-specific setup routine (from the stored boot sector 73 * information). 74 */ 75int rd_size = CONFIG_BLK_DEV_RAM_SIZE; /* Size of the RAM disks */ 76/* 77 * It would be very desirable to have a soft-blocksize (that in the case 78 * of the ramdisk driver is also the hardblocksize ;) of PAGE_SIZE because 79 * doing that we'll achieve a far better MM footprint. Using a rd_blocksize of 80 * BLOCK_SIZE in the worst case we'll make PAGE_SIZE/BLOCK_SIZE buffer-pages 81 * unfreeable. With a rd_blocksize of PAGE_SIZE instead we are sure that only 82 * 1 page will be protected. Depending on the size of the ramdisk you 83 * may want to change the ramdisk blocksize to achieve a better or worse MM 84 * behaviour. The default is still BLOCK_SIZE (needed by rd_load_image that 85 * supposes the filesystem in the image uses a BLOCK_SIZE blocksize). 86 */ 87static int rd_blocksize = CONFIG_BLK_DEV_RAM_BLOCKSIZE; 88 89/* 90 * Copyright (C) 2000 Linus Torvalds. 91 * 2000 Transmeta Corp. 92 * aops copied from ramfs. 93 */ 94 95/* 96 * If a ramdisk page has buffers, some may be uptodate and some may be not. 97 * To bring the page uptodate we zero out the non-uptodate buffers. The 98 * page must be locked. 99 */ 100static void make_page_uptodate(struct page *page) 101{ 102 if (page_has_buffers(page)) { 103 struct buffer_head *bh = page_buffers(page); 104 struct buffer_head *head = bh; 105 106 do { 107 if (!buffer_uptodate(bh)) { 108 memset(bh->b_data, 0, bh->b_size); 109 /* 110 * akpm: I'm totally undecided about this. The 111 * buffer has just been magically brought "up to 112 * date", but nobody should want to be reading 113 * it anyway, because it hasn't been used for 114 * anything yet. It is still in a "not read 115 * from disk yet" state. 116 * 117 * But non-uptodate buffers against an uptodate 118 * page are against the rules. So do it anyway. 119 */ 120 set_buffer_uptodate(bh); 121 } 122 } while ((bh = bh->b_this_page) != head); 123 } else { 124 memset(page_address(page), 0, PAGE_CACHE_SIZE); 125 } 126 flush_dcache_page(page); 127 SetPageUptodate(page); 128} 129 130static int ramdisk_readpage(struct file *file, struct page *page) 131{ 132 if (!PageUptodate(page)) 133 make_page_uptodate(page); 134 unlock_page(page); 135 return 0; 136} 137 138static int ramdisk_prepare_write(struct file *file, struct page *page, 139 unsigned offset, unsigned to) 140{ 141 if (!PageUptodate(page)) 142 make_page_uptodate(page); 143 return 0; 144} 145 146static int ramdisk_commit_write(struct file *file, struct page *page, 147 unsigned offset, unsigned to) 148{ 149 set_page_dirty(page); 150 return 0; 151} 152 153/* 154 * ->writepage to the blockdev's mapping has to redirty the page so that the 155 * VM doesn't go and steal it. We return AOP_WRITEPAGE_ACTIVATE so that the VM 156 * won't try to (pointlessly) write the page again for a while. 157 * 158 * Really, these pages should not be on the LRU at all. 159 */ 160static int ramdisk_writepage(struct page *page, struct writeback_control *wbc) 161{ 162 if (!PageUptodate(page)) 163 make_page_uptodate(page); 164 SetPageDirty(page); 165 if (wbc->for_reclaim) 166 return AOP_WRITEPAGE_ACTIVATE; 167 unlock_page(page); 168 return 0; 169} 170 171/* 172 * This is a little speedup thing: short-circuit attempts to write back the 173 * ramdisk blockdev inode to its non-existent backing store. 174 */ 175static int ramdisk_writepages(struct address_space *mapping, 176 struct writeback_control *wbc) 177{ 178 return 0; 179} 180 181/* 182 * ramdisk blockdev pages have their own ->set_page_dirty() because we don't 183 * want them to contribute to dirty memory accounting. 184 */ 185static int ramdisk_set_page_dirty(struct page *page) 186{ 187 if (!TestSetPageDirty(page)) 188 return 1; 189 return 0; 190} 191 192/* 193 * releasepage is called by pagevec_strip/try_to_release_page if 194 * buffers_heads_over_limit is true. Without a releasepage function 195 * try_to_free_buffers is called instead. That can unset the dirty 196 * bit of our ram disk pages, which will be eventually freed, even 197 * if the page is still in use. 198 */ 199static int ramdisk_releasepage(struct page *page, gfp_t dummy) 200{ 201 return 0; 202} 203 204static const struct address_space_operations ramdisk_aops = { 205 .readpage = ramdisk_readpage, 206 .prepare_write = ramdisk_prepare_write, 207 .commit_write = ramdisk_commit_write, 208 .writepage = ramdisk_writepage, 209 .set_page_dirty = ramdisk_set_page_dirty, 210 .writepages = ramdisk_writepages, 211 .releasepage = ramdisk_releasepage, 212}; 213 214static int rd_blkdev_pagecache_IO(int rw, struct bio_vec *vec, sector_t sector, 215 struct address_space *mapping) 216{ 217 pgoff_t index = sector >> (PAGE_CACHE_SHIFT - 9); 218 unsigned int vec_offset = vec->bv_offset; 219 int offset = (sector << 9) & ~PAGE_CACHE_MASK; 220 int size = vec->bv_len; 221 int err = 0; 222 223 do { 224 int count; 225 struct page *page; 226 char *src; 227 char *dst; 228 229 count = PAGE_CACHE_SIZE - offset; 230 if (count > size) 231 count = size; 232 size -= count; 233 234 page = grab_cache_page(mapping, index); 235 if (!page) { 236 err = -ENOMEM; 237 goto out; 238 } 239 240 if (!PageUptodate(page)) 241 make_page_uptodate(page); 242 243 index++; 244 245 if (rw == READ) { 246 src = kmap_atomic(page, KM_USER0) + offset; 247 dst = kmap_atomic(vec->bv_page, KM_USER1) + vec_offset; 248 } else { 249 src = kmap_atomic(vec->bv_page, KM_USER0) + vec_offset; 250 dst = kmap_atomic(page, KM_USER1) + offset; 251 } 252 offset = 0; 253 vec_offset += count; 254 255 memcpy(dst, src, count); 256 257 kunmap_atomic(src, KM_USER0); 258 kunmap_atomic(dst, KM_USER1); 259 260 if (rw == READ) 261 flush_dcache_page(vec->bv_page); 262 else 263 set_page_dirty(page); 264 unlock_page(page); 265 put_page(page); 266 } while (size); 267 268 out: 269 return err; 270} 271 272/* 273 * Basically, my strategy here is to set up a buffer-head which can't be 274 * deleted, and make that my Ramdisk. If the request is outside of the 275 * allocated size, we must get rid of it... 276 * 277 * 19-JAN-1998 Richard Gooch <rgooch@atnf.csiro.au> Added devfs support 278 * 279 */ 280static int rd_make_request(struct request_queue *q, struct bio *bio) 281{ 282 struct block_device *bdev = bio->bi_bdev; 283 struct address_space * mapping = bdev->bd_inode->i_mapping; 284 sector_t sector = bio->bi_sector; 285 unsigned long len = bio->bi_size >> 9; 286 int rw = bio_data_dir(bio); 287 struct bio_vec *bvec; 288 int ret = 0, i; 289 290 if (sector + len > get_capacity(bdev->bd_disk)) 291 goto fail; 292 293 if (rw==READA) 294 rw=READ; 295 296 bio_for_each_segment(bvec, bio, i) { 297 ret |= rd_blkdev_pagecache_IO(rw, bvec, sector, mapping); 298 sector += bvec->bv_len >> 9; 299 } 300 if (ret) 301 goto fail; 302 303 bio_endio(bio, 0); 304 return 0; 305fail: 306 bio_io_error(bio); 307 return 0; 308} 309 310static int rd_ioctl(struct inode *inode, struct file *file, 311 unsigned int cmd, unsigned long arg) 312{ 313 int error; 314 struct block_device *bdev = inode->i_bdev; 315 316 if (cmd != BLKFLSBUF) 317 return -ENOTTY; 318 319 /* 320 * special: we want to release the ramdisk memory, it's not like with 321 * the other blockdevices where this ioctl only flushes away the buffer 322 * cache 323 */ 324 error = -EBUSY; 325 mutex_lock(&bdev->bd_mutex); 326 if (bdev->bd_openers <= 2) { 327 truncate_inode_pages(bdev->bd_inode->i_mapping, 0); 328 error = 0; 329 } 330 mutex_unlock(&bdev->bd_mutex); 331 return error; 332} 333 334/* 335 * This is the backing_dev_info for the blockdev inode itself. It doesn't need 336 * writeback and it does not contribute to dirty memory accounting. 337 */ 338static struct backing_dev_info rd_backing_dev_info = { 339 .ra_pages = 0, /* No readahead */ 340 .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK | BDI_CAP_MAP_COPY, 341 .unplug_io_fn = default_unplug_io_fn, 342}; 343 344/* 345 * This is the backing_dev_info for the files which live atop the ramdisk 346 * "device". These files do need writeback and they do contribute to dirty 347 * memory accounting. 348 */ 349static struct backing_dev_info rd_file_backing_dev_info = { 350 .ra_pages = 0, /* No readahead */ 351 .capabilities = BDI_CAP_MAP_COPY, /* Does contribute to dirty memory */ 352 .unplug_io_fn = default_unplug_io_fn, 353}; 354 355static int rd_open(struct inode *inode, struct file *filp) 356{ 357 unsigned unit = iminor(inode); 358 359 if (rd_bdev[unit] == NULL) { 360 struct block_device *bdev = inode->i_bdev; 361 struct address_space *mapping; 362 unsigned bsize; 363 gfp_t gfp_mask; 364 365 inode = igrab(bdev->bd_inode); 366 rd_bdev[unit] = bdev; 367 bdev->bd_openers++; 368 bsize = bdev_hardsect_size(bdev); 369 bdev->bd_block_size = bsize; 370 inode->i_blkbits = blksize_bits(bsize); 371 inode->i_size = get_capacity(bdev->bd_disk)<<9; 372 373 mapping = inode->i_mapping; 374 mapping->a_ops = &ramdisk_aops; 375 mapping->backing_dev_info = &rd_backing_dev_info; 376 bdev->bd_inode_backing_dev_info = &rd_file_backing_dev_info; 377 378 /* 379 * Deep badness. rd_blkdev_pagecache_IO() needs to allocate 380 * pagecache pages within a request_fn. We cannot recur back 381 * into the filesystem which is mounted atop the ramdisk, because 382 * that would deadlock on fs locks. And we really don't want 383 * to reenter rd_blkdev_pagecache_IO when we're already within 384 * that function. 385 * 386 * So we turn off __GFP_FS and __GFP_IO. 387 * 388 * And to give this thing a hope of working, turn on __GFP_HIGH. 389 * Hopefully, there's enough regular memory allocation going on 390 * for the page allocator emergency pools to keep the ramdisk 391 * driver happy. 392 */ 393 gfp_mask = mapping_gfp_mask(mapping); 394 gfp_mask &= ~(__GFP_FS|__GFP_IO); 395 gfp_mask |= __GFP_HIGH; 396 mapping_set_gfp_mask(mapping, gfp_mask); 397 } 398 399 return 0; 400} 401 402static struct block_device_operations rd_bd_op = { 403 .owner = THIS_MODULE, 404 .open = rd_open, 405 .ioctl = rd_ioctl, 406}; 407 408/* 409 * Before freeing the module, invalidate all of the protected buffers! 410 */ 411static void __exit rd_cleanup(void) 412{ 413 int i; 414 415 for (i = 0; i < CONFIG_BLK_DEV_RAM_COUNT; i++) { 416 struct block_device *bdev = rd_bdev[i]; 417 rd_bdev[i] = NULL; 418 if (bdev) { 419 invalidate_bdev(bdev); 420 blkdev_put(bdev); 421 } 422 del_gendisk(rd_disks[i]); 423 put_disk(rd_disks[i]); 424 blk_cleanup_queue(rd_queue[i]); 425 } 426 unregister_blkdev(RAMDISK_MAJOR, "ramdisk"); 427 428 bdi_destroy(&rd_file_backing_dev_info); 429 bdi_destroy(&rd_backing_dev_info); 430} 431 432/* 433 * This is the registration and initialization section of the RAM disk driver 434 */ 435static int __init rd_init(void) 436{ 437 int i; 438 int err; 439 440 err = bdi_init(&rd_backing_dev_info); 441 if (err) 442 goto out2; 443 444 err = bdi_init(&rd_file_backing_dev_info); 445 if (err) { 446 bdi_destroy(&rd_backing_dev_info); 447 goto out2; 448 } 449 450 err = -ENOMEM; 451 452 if (rd_blocksize > PAGE_SIZE || rd_blocksize < 512 || 453 (rd_blocksize & (rd_blocksize-1))) { 454 printk("RAMDISK: wrong blocksize %d, reverting to defaults\n", 455 rd_blocksize); 456 rd_blocksize = BLOCK_SIZE; 457 } 458 459 for (i = 0; i < CONFIG_BLK_DEV_RAM_COUNT; i++) { 460 rd_disks[i] = alloc_disk(1); 461 if (!rd_disks[i]) 462 goto out; 463 464 rd_queue[i] = blk_alloc_queue(GFP_KERNEL); 465 if (!rd_queue[i]) { 466 put_disk(rd_disks[i]); 467 goto out; 468 } 469 } 470 471 if (register_blkdev(RAMDISK_MAJOR, "ramdisk")) { 472 err = -EIO; 473 goto out; 474 } 475 476 for (i = 0; i < CONFIG_BLK_DEV_RAM_COUNT; i++) { 477 struct gendisk *disk = rd_disks[i]; 478 479 blk_queue_make_request(rd_queue[i], &rd_make_request); 480 blk_queue_hardsect_size(rd_queue[i], rd_blocksize); 481 482 /* rd_size is given in kB */ 483 disk->major = RAMDISK_MAJOR; 484 disk->first_minor = i; 485 disk->fops = &rd_bd_op; 486 disk->queue = rd_queue[i]; 487 disk->flags |= GENHD_FL_SUPPRESS_PARTITION_INFO; 488 sprintf(disk->disk_name, "ram%d", i); 489 set_capacity(disk, rd_size * 2); 490 add_disk(rd_disks[i]); 491 } 492 493 /* rd_size is given in kB */ 494 printk("RAMDISK driver initialized: " 495 "%d RAM disks of %dK size %d blocksize\n", 496 CONFIG_BLK_DEV_RAM_COUNT, rd_size, rd_blocksize); 497 498 return 0; 499out: 500 while (i--) { 501 put_disk(rd_disks[i]); 502 blk_cleanup_queue(rd_queue[i]); 503 } 504 bdi_destroy(&rd_backing_dev_info); 505 bdi_destroy(&rd_file_backing_dev_info); 506out2: 507 return err; 508} 509 510module_init(rd_init); 511module_exit(rd_cleanup); 512 513/* options - nonmodular */ 514#ifndef MODULE 515static int __init ramdisk_size(char *str) 516{ 517 rd_size = simple_strtol(str,NULL,0); 518 return 1; 519} 520static int __init ramdisk_blocksize(char *str) 521{ 522 rd_blocksize = simple_strtol(str,NULL,0); 523 return 1; 524} 525__setup("ramdisk_size=", ramdisk_size); 526__setup("ramdisk_blocksize=", ramdisk_blocksize); 527#endif 528 529/* options - modular */ 530module_param(rd_size, int, 0); 531MODULE_PARM_DESC(rd_size, "Size of each RAM disk in kbytes."); 532module_param(rd_blocksize, int, 0); 533MODULE_PARM_DESC(rd_blocksize, "Blocksize of each RAM disk in bytes."); 534MODULE_ALIAS_BLOCKDEV_MAJOR(RAMDISK_MAJOR); 535 536MODULE_LICENSE("GPL");