Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1/*
2 * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
3 * Licensed under the GPL
4 */
5
6/* 2001-09-28...2002-04-17
7 * Partition stuff by James_McMechan@hotmail.com
8 * old style ubd by setting UBD_SHIFT to 0
9 * 2002-09-27...2002-10-18 massive tinkering for 2.5
10 * partitions have changed in 2.5
11 * 2003-01-29 more tinkering for 2.5.59-1
12 * This should now address the sysfs problems and has
13 * the symlink for devfs to allow for booting with
14 * the common /dev/ubd/discX/... names rather than
15 * only /dev/ubdN/discN this version also has lots of
16 * clean ups preparing for ubd-many.
17 * James McMechan
18 */
19
20#define MAJOR_NR UBD_MAJOR
21#define UBD_SHIFT 4
22
23#include "linux/module.h"
24#include "linux/blkdev.h"
25#include "linux/hdreg.h"
26#include "linux/init.h"
27#include "linux/cdrom.h"
28#include "linux/proc_fs.h"
29#include "linux/ctype.h"
30#include "linux/capability.h"
31#include "linux/mm.h"
32#include "linux/vmalloc.h"
33#include "linux/blkpg.h"
34#include "linux/genhd.h"
35#include "linux/spinlock.h"
36#include "linux/platform_device.h"
37#include "asm/segment.h"
38#include "asm/uaccess.h"
39#include "asm/irq.h"
40#include "asm/types.h"
41#include "asm/tlbflush.h"
42#include "user_util.h"
43#include "mem_user.h"
44#include "kern_util.h"
45#include "kern.h"
46#include "mconsole_kern.h"
47#include "init.h"
48#include "irq_user.h"
49#include "irq_kern.h"
50#include "ubd_user.h"
51#include "os.h"
52#include "mem.h"
53#include "mem_kern.h"
54#include "cow.h"
55
56enum ubd_req { UBD_READ, UBD_WRITE };
57
58struct io_thread_req {
59 enum ubd_req op;
60 int fds[2];
61 unsigned long offsets[2];
62 unsigned long long offset;
63 unsigned long length;
64 char *buffer;
65 int sectorsize;
66 unsigned long sector_mask;
67 unsigned long long cow_offset;
68 unsigned long bitmap_words[2];
69 int error;
70};
71
72extern int open_ubd_file(char *file, struct openflags *openflags, int shared,
73 char **backing_file_out, int *bitmap_offset_out,
74 unsigned long *bitmap_len_out, int *data_offset_out,
75 int *create_cow_out);
76extern int create_cow_file(char *cow_file, char *backing_file,
77 struct openflags flags, int sectorsize,
78 int alignment, int *bitmap_offset_out,
79 unsigned long *bitmap_len_out,
80 int *data_offset_out);
81extern int read_cow_bitmap(int fd, void *buf, int offset, int len);
82extern void do_io(struct io_thread_req *req);
83
84static inline int ubd_test_bit(__u64 bit, unsigned char *data)
85{
86 __u64 n;
87 int bits, off;
88
89 bits = sizeof(data[0]) * 8;
90 n = bit / bits;
91 off = bit % bits;
92 return((data[n] & (1 << off)) != 0);
93}
94
95static inline void ubd_set_bit(__u64 bit, unsigned char *data)
96{
97 __u64 n;
98 int bits, off;
99
100 bits = sizeof(data[0]) * 8;
101 n = bit / bits;
102 off = bit % bits;
103 data[n] |= (1 << off);
104}
105/*End stuff from ubd_user.h*/
106
107#define DRIVER_NAME "uml-blkdev"
108
109/* Can be taken in interrupt context, and is passed to the block layer to lock
110 * the request queue. Kernel side code knows that. */
111static DEFINE_SPINLOCK(ubd_io_lock);
112
113static DEFINE_MUTEX(ubd_lock);
114
115/* XXX - this made sense in 2.4 days, now it's only used as a boolean, and
116 * probably it doesn't make sense even for that. */
117static int do_ubd;
118
119static int ubd_open(struct inode * inode, struct file * filp);
120static int ubd_release(struct inode * inode, struct file * file);
121static int ubd_ioctl(struct inode * inode, struct file * file,
122 unsigned int cmd, unsigned long arg);
123static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
124
125#define MAX_DEV (16)
126
127static struct block_device_operations ubd_blops = {
128 .owner = THIS_MODULE,
129 .open = ubd_open,
130 .release = ubd_release,
131 .ioctl = ubd_ioctl,
132 .getgeo = ubd_getgeo,
133};
134
135/* Protected by the queue_lock */
136static request_queue_t *ubd_queue;
137
138/* Protected by ubd_lock */
139static int fake_major = MAJOR_NR;
140
141static struct gendisk *ubd_gendisk[MAX_DEV];
142static struct gendisk *fake_gendisk[MAX_DEV];
143
144#ifdef CONFIG_BLK_DEV_UBD_SYNC
145#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
146 .cl = 1 })
147#else
148#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \
149 .cl = 1 })
150#endif
151
152/* Not protected - changed only in ubd_setup_common and then only to
153 * to enable O_SYNC.
154 */
155static struct openflags global_openflags = OPEN_FLAGS;
156
157struct cow {
158 /* backing file name */
159 char *file;
160 /* backing file fd */
161 int fd;
162 unsigned long *bitmap;
163 unsigned long bitmap_len;
164 int bitmap_offset;
165 int data_offset;
166};
167
168struct ubd {
169 /* name (and fd, below) of the file opened for writing, either the
170 * backing or the cow file. */
171 char *file;
172 int count;
173 int fd;
174 __u64 size;
175 struct openflags boot_openflags;
176 struct openflags openflags;
177 unsigned shared:1;
178 unsigned no_cow:1;
179 struct cow cow;
180 struct platform_device pdev;
181};
182
183#define DEFAULT_COW { \
184 .file = NULL, \
185 .fd = -1, \
186 .bitmap = NULL, \
187 .bitmap_offset = 0, \
188 .data_offset = 0, \
189}
190
191#define DEFAULT_UBD { \
192 .file = NULL, \
193 .count = 0, \
194 .fd = -1, \
195 .size = -1, \
196 .boot_openflags = OPEN_FLAGS, \
197 .openflags = OPEN_FLAGS, \
198 .no_cow = 0, \
199 .shared = 0, \
200 .cow = DEFAULT_COW, \
201}
202
203struct ubd ubd_devs[MAX_DEV] = { [ 0 ... MAX_DEV - 1 ] = DEFAULT_UBD };
204
205/* Only changed by fake_ide_setup which is a setup */
206static int fake_ide = 0;
207static struct proc_dir_entry *proc_ide_root = NULL;
208static struct proc_dir_entry *proc_ide = NULL;
209
210static void make_proc_ide(void)
211{
212 proc_ide_root = proc_mkdir("ide", NULL);
213 proc_ide = proc_mkdir("ide0", proc_ide_root);
214}
215
216static int proc_ide_read_media(char *page, char **start, off_t off, int count,
217 int *eof, void *data)
218{
219 int len;
220
221 strcpy(page, "disk\n");
222 len = strlen("disk\n");
223 len -= off;
224 if (len < count){
225 *eof = 1;
226 if (len <= 0) return 0;
227 }
228 else len = count;
229 *start = page + off;
230 return len;
231}
232
233static void make_ide_entries(char *dev_name)
234{
235 struct proc_dir_entry *dir, *ent;
236 char name[64];
237
238 if(proc_ide_root == NULL) make_proc_ide();
239
240 dir = proc_mkdir(dev_name, proc_ide);
241 if(!dir) return;
242
243 ent = create_proc_entry("media", S_IFREG|S_IRUGO, dir);
244 if(!ent) return;
245 ent->nlink = 1;
246 ent->data = NULL;
247 ent->read_proc = proc_ide_read_media;
248 ent->write_proc = NULL;
249 sprintf(name,"ide0/%s", dev_name);
250 proc_symlink(dev_name, proc_ide_root, name);
251}
252
253static int fake_ide_setup(char *str)
254{
255 fake_ide = 1;
256 return(1);
257}
258
259__setup("fake_ide", fake_ide_setup);
260
261__uml_help(fake_ide_setup,
262"fake_ide\n"
263" Create ide0 entries that map onto ubd devices.\n\n"
264);
265
266static int parse_unit(char **ptr)
267{
268 char *str = *ptr, *end;
269 int n = -1;
270
271 if(isdigit(*str)) {
272 n = simple_strtoul(str, &end, 0);
273 if(end == str)
274 return(-1);
275 *ptr = end;
276 }
277 else if (('a' <= *str) && (*str <= 'z')) {
278 n = *str - 'a';
279 str++;
280 *ptr = str;
281 }
282 return(n);
283}
284
285/* If *index_out == -1 at exit, the passed option was a general one;
286 * otherwise, the str pointer is used (and owned) inside ubd_devs array, so it
287 * should not be freed on exit.
288 */
289static int ubd_setup_common(char *str, int *index_out)
290{
291 struct ubd *ubd_dev;
292 struct openflags flags = global_openflags;
293 char *backing_file;
294 int n, err, i;
295
296 if(index_out) *index_out = -1;
297 n = *str;
298 if(n == '='){
299 char *end;
300 int major;
301
302 str++;
303 if(!strcmp(str, "sync")){
304 global_openflags = of_sync(global_openflags);
305 return(0);
306 }
307 major = simple_strtoul(str, &end, 0);
308 if((*end != '\0') || (end == str)){
309 printk(KERN_ERR
310 "ubd_setup : didn't parse major number\n");
311 return(1);
312 }
313
314 err = 1;
315 mutex_lock(&ubd_lock);
316 if(fake_major != MAJOR_NR){
317 printk(KERN_ERR "Can't assign a fake major twice\n");
318 goto out1;
319 }
320
321 fake_major = major;
322
323 printk(KERN_INFO "Setting extra ubd major number to %d\n",
324 major);
325 err = 0;
326 out1:
327 mutex_unlock(&ubd_lock);
328 return(err);
329 }
330
331 n = parse_unit(&str);
332 if(n < 0){
333 printk(KERN_ERR "ubd_setup : couldn't parse unit number "
334 "'%s'\n", str);
335 return(1);
336 }
337 if(n >= MAX_DEV){
338 printk(KERN_ERR "ubd_setup : index %d out of range "
339 "(%d devices, from 0 to %d)\n", n, MAX_DEV, MAX_DEV - 1);
340 return(1);
341 }
342
343 err = 1;
344 mutex_lock(&ubd_lock);
345
346 ubd_dev = &ubd_devs[n];
347 if(ubd_dev->file != NULL){
348 printk(KERN_ERR "ubd_setup : device already configured\n");
349 goto out;
350 }
351
352 if (index_out)
353 *index_out = n;
354
355 for (i = 0; i < sizeof("rscd="); i++) {
356 switch (*str) {
357 case 'r':
358 flags.w = 0;
359 break;
360 case 's':
361 flags.s = 1;
362 break;
363 case 'd':
364 ubd_dev->no_cow = 1;
365 break;
366 case 'c':
367 ubd_dev->shared = 1;
368 break;
369 case '=':
370 str++;
371 goto break_loop;
372 default:
373 printk(KERN_ERR "ubd_setup : Expected '=' or flag letter (r, s, c, or d)\n");
374 goto out;
375 }
376 str++;
377 }
378
379 if (*str == '=')
380 printk(KERN_ERR "ubd_setup : Too many flags specified\n");
381 else
382 printk(KERN_ERR "ubd_setup : Expected '='\n");
383 goto out;
384
385break_loop:
386 err = 0;
387 backing_file = strchr(str, ',');
388
389 if (!backing_file) {
390 backing_file = strchr(str, ':');
391 }
392
393 if(backing_file){
394 if(ubd_dev->no_cow)
395 printk(KERN_ERR "Can't specify both 'd' and a "
396 "cow file\n");
397 else {
398 *backing_file = '\0';
399 backing_file++;
400 }
401 }
402 ubd_dev->file = str;
403 ubd_dev->cow.file = backing_file;
404 ubd_dev->boot_openflags = flags;
405out:
406 mutex_unlock(&ubd_lock);
407 return(err);
408}
409
410static int ubd_setup(char *str)
411{
412 ubd_setup_common(str, NULL);
413 return(1);
414}
415
416__setup("ubd", ubd_setup);
417__uml_help(ubd_setup,
418"ubd<n><flags>=<filename>[(:|,)<filename2>]\n"
419" This is used to associate a device with a file in the underlying\n"
420" filesystem. When specifying two filenames, the first one is the\n"
421" COW name and the second is the backing file name. As separator you can\n"
422" use either a ':' or a ',': the first one allows writing things like;\n"
423" ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n"
424" while with a ',' the shell would not expand the 2nd '~'.\n"
425" When using only one filename, UML will detect whether to thread it like\n"
426" a COW file or a backing file. To override this detection, add the 'd'\n"
427" flag:\n"
428" ubd0d=BackingFile\n"
429" Usually, there is a filesystem in the file, but \n"
430" that's not required. Swap devices containing swap files can be\n"
431" specified like this. Also, a file which doesn't contain a\n"
432" filesystem can have its contents read in the virtual \n"
433" machine by running 'dd' on the device. <n> must be in the range\n"
434" 0 to 7. Appending an 'r' to the number will cause that device\n"
435" to be mounted read-only. For example ubd1r=./ext_fs. Appending\n"
436" an 's' will cause data to be written to disk on the host immediately.\n\n"
437);
438
439static int udb_setup(char *str)
440{
441 printk("udb%s specified on command line is almost certainly a ubd -> "
442 "udb TYPO\n", str);
443 return(1);
444}
445
446__setup("udb", udb_setup);
447__uml_help(udb_setup,
448"udb\n"
449" This option is here solely to catch ubd -> udb typos, which can be\n"
450" to impossible to catch visually unless you specifically look for\n"
451" them. The only result of any option starting with 'udb' is an error\n"
452" in the boot output.\n\n"
453);
454
455static int fakehd_set = 0;
456static int fakehd(char *str)
457{
458 printk(KERN_INFO "fakehd : Changing ubd name to \"hd\".\n");
459 fakehd_set = 1;
460 return 1;
461}
462
463__setup("fakehd", fakehd);
464__uml_help(fakehd,
465"fakehd\n"
466" Change the ubd device name to \"hd\".\n\n"
467);
468
469static void do_ubd_request(request_queue_t * q);
470
471/* Only changed by ubd_init, which is an initcall. */
472int thread_fd = -1;
473
474/* Changed by ubd_handler, which is serialized because interrupts only
475 * happen on CPU 0.
476 * XXX: currently unused.
477 */
478static int intr_count = 0;
479
480/* call ubd_finish if you need to serialize */
481static void __ubd_finish(struct request *req, int error)
482{
483 int nsect;
484
485 if(error){
486 end_request(req, 0);
487 return;
488 }
489 nsect = req->current_nr_sectors;
490 req->sector += nsect;
491 req->buffer += nsect << 9;
492 req->errors = 0;
493 req->nr_sectors -= nsect;
494 req->current_nr_sectors = 0;
495 end_request(req, 1);
496}
497
498/* Callable only from interrupt context - otherwise you need to do
499 * spin_lock_irq()/spin_lock_irqsave() */
500static inline void ubd_finish(struct request *req, int error)
501{
502 spin_lock(&ubd_io_lock);
503 __ubd_finish(req, error);
504 spin_unlock(&ubd_io_lock);
505}
506
507/* XXX - move this inside ubd_intr. */
508/* Called without ubd_io_lock held, and only in interrupt context. */
509static void ubd_handler(void)
510{
511 struct io_thread_req req;
512 struct request *rq = elv_next_request(ubd_queue);
513 int n;
514
515 do_ubd = 0;
516 intr_count++;
517 n = os_read_file(thread_fd, &req, sizeof(req));
518 if(n != sizeof(req)){
519 printk(KERN_ERR "Pid %d - spurious interrupt in ubd_handler, "
520 "err = %d\n", os_getpid(), -n);
521 spin_lock(&ubd_io_lock);
522 end_request(rq, 0);
523 spin_unlock(&ubd_io_lock);
524 return;
525 }
526
527 ubd_finish(rq, req.error);
528 reactivate_fd(thread_fd, UBD_IRQ);
529 spin_lock(&ubd_io_lock);
530 do_ubd_request(ubd_queue);
531 spin_unlock(&ubd_io_lock);
532}
533
534static irqreturn_t ubd_intr(int irq, void *dev)
535{
536 ubd_handler();
537 return(IRQ_HANDLED);
538}
539
540/* Only changed by ubd_init, which is an initcall. */
541static int io_pid = -1;
542
543void kill_io_thread(void)
544{
545 if(io_pid != -1)
546 os_kill_process(io_pid, 1);
547}
548
549__uml_exitcall(kill_io_thread);
550
551static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out)
552{
553 char *file;
554
555 file = ubd_dev->cow.file ? ubd_dev->cow.file : ubd_dev->file;
556 return(os_file_size(file, size_out));
557}
558
559static void ubd_close_dev(struct ubd *ubd_dev)
560{
561 os_close_file(ubd_dev->fd);
562 if(ubd_dev->cow.file == NULL)
563 return;
564
565 os_close_file(ubd_dev->cow.fd);
566 vfree(ubd_dev->cow.bitmap);
567 ubd_dev->cow.bitmap = NULL;
568}
569
570static int ubd_open_dev(struct ubd *ubd_dev)
571{
572 struct openflags flags;
573 char **back_ptr;
574 int err, create_cow, *create_ptr;
575 int fd;
576
577 ubd_dev->openflags = ubd_dev->boot_openflags;
578 create_cow = 0;
579 create_ptr = (ubd_dev->cow.file != NULL) ? &create_cow : NULL;
580 back_ptr = ubd_dev->no_cow ? NULL : &ubd_dev->cow.file;
581
582 fd = open_ubd_file(ubd_dev->file, &ubd_dev->openflags, ubd_dev->shared,
583 back_ptr, &ubd_dev->cow.bitmap_offset,
584 &ubd_dev->cow.bitmap_len, &ubd_dev->cow.data_offset,
585 create_ptr);
586
587 if((fd == -ENOENT) && create_cow){
588 fd = create_cow_file(ubd_dev->file, ubd_dev->cow.file,
589 ubd_dev->openflags, 1 << 9, PAGE_SIZE,
590 &ubd_dev->cow.bitmap_offset,
591 &ubd_dev->cow.bitmap_len,
592 &ubd_dev->cow.data_offset);
593 if(fd >= 0){
594 printk(KERN_INFO "Creating \"%s\" as COW file for "
595 "\"%s\"\n", ubd_dev->file, ubd_dev->cow.file);
596 }
597 }
598
599 if(fd < 0){
600 printk("Failed to open '%s', errno = %d\n", ubd_dev->file,
601 -fd);
602 return fd;
603 }
604 ubd_dev->fd = fd;
605
606 if(ubd_dev->cow.file != NULL){
607 err = -ENOMEM;
608 ubd_dev->cow.bitmap = (void *) vmalloc(ubd_dev->cow.bitmap_len);
609 if(ubd_dev->cow.bitmap == NULL){
610 printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
611 goto error;
612 }
613 flush_tlb_kernel_vm();
614
615 err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap,
616 ubd_dev->cow.bitmap_offset,
617 ubd_dev->cow.bitmap_len);
618 if(err < 0)
619 goto error;
620
621 flags = ubd_dev->openflags;
622 flags.w = 0;
623 err = open_ubd_file(ubd_dev->cow.file, &flags, ubd_dev->shared, NULL,
624 NULL, NULL, NULL, NULL);
625 if(err < 0) goto error;
626 ubd_dev->cow.fd = err;
627 }
628 return(0);
629 error:
630 os_close_file(ubd_dev->fd);
631 return(err);
632}
633
634static int ubd_disk_register(int major, u64 size, int unit,
635 struct gendisk **disk_out)
636
637{
638 struct gendisk *disk;
639
640 disk = alloc_disk(1 << UBD_SHIFT);
641 if(disk == NULL)
642 return(-ENOMEM);
643
644 disk->major = major;
645 disk->first_minor = unit << UBD_SHIFT;
646 disk->fops = &ubd_blops;
647 set_capacity(disk, size / 512);
648 if(major == MAJOR_NR)
649 sprintf(disk->disk_name, "ubd%c", 'a' + unit);
650 else
651 sprintf(disk->disk_name, "ubd_fake%d", unit);
652
653 /* sysfs register (not for ide fake devices) */
654 if (major == MAJOR_NR) {
655 ubd_devs[unit].pdev.id = unit;
656 ubd_devs[unit].pdev.name = DRIVER_NAME;
657 platform_device_register(&ubd_devs[unit].pdev);
658 disk->driverfs_dev = &ubd_devs[unit].pdev.dev;
659 }
660
661 disk->private_data = &ubd_devs[unit];
662 disk->queue = ubd_queue;
663 add_disk(disk);
664
665 *disk_out = disk;
666 return 0;
667}
668
669#define ROUND_BLOCK(n) ((n + ((1 << 9) - 1)) & (-1 << 9))
670
671static int ubd_add(int n)
672{
673 struct ubd *ubd_dev = &ubd_devs[n];
674 int err;
675
676 err = -ENODEV;
677 if(ubd_dev->file == NULL)
678 goto out;
679
680 err = ubd_file_size(ubd_dev, &ubd_dev->size);
681 if(err < 0)
682 goto out;
683
684 ubd_dev->size = ROUND_BLOCK(ubd_dev->size);
685
686 err = ubd_disk_register(MAJOR_NR, ubd_dev->size, n, &ubd_gendisk[n]);
687 if(err)
688 goto out;
689
690 if(fake_major != MAJOR_NR)
691 ubd_disk_register(fake_major, ubd_dev->size, n,
692 &fake_gendisk[n]);
693
694 /* perhaps this should also be under the "if (fake_major)" above */
695 /* using the fake_disk->disk_name and also the fakehd_set name */
696 if (fake_ide)
697 make_ide_entries(ubd_gendisk[n]->disk_name);
698
699 err = 0;
700out:
701 return err;
702}
703
704static int ubd_config(char *str)
705{
706 int n, ret;
707
708 str = kstrdup(str, GFP_KERNEL);
709 if (str == NULL) {
710 printk(KERN_ERR "ubd_config failed to strdup string\n");
711 ret = 1;
712 goto out;
713 }
714 ret = ubd_setup_common(str, &n);
715 if (ret) {
716 ret = -1;
717 goto err_free;
718 }
719 if (n == -1) {
720 ret = 0;
721 goto err_free;
722 }
723
724 mutex_lock(&ubd_lock);
725 ret = ubd_add(n);
726 if (ret)
727 ubd_devs[n].file = NULL;
728 mutex_unlock(&ubd_lock);
729
730out:
731 return ret;
732
733err_free:
734 kfree(str);
735 goto out;
736}
737
738static int ubd_get_config(char *name, char *str, int size, char **error_out)
739{
740 struct ubd *ubd_dev;
741 int n, len = 0;
742
743 n = parse_unit(&name);
744 if((n >= MAX_DEV) || (n < 0)){
745 *error_out = "ubd_get_config : device number out of range";
746 return(-1);
747 }
748
749 ubd_dev = &ubd_devs[n];
750 mutex_lock(&ubd_lock);
751
752 if(ubd_dev->file == NULL){
753 CONFIG_CHUNK(str, size, len, "", 1);
754 goto out;
755 }
756
757 CONFIG_CHUNK(str, size, len, ubd_dev->file, 0);
758
759 if(ubd_dev->cow.file != NULL){
760 CONFIG_CHUNK(str, size, len, ",", 0);
761 CONFIG_CHUNK(str, size, len, ubd_dev->cow.file, 1);
762 }
763 else CONFIG_CHUNK(str, size, len, "", 1);
764
765 out:
766 mutex_unlock(&ubd_lock);
767 return(len);
768}
769
770static int ubd_id(char **str, int *start_out, int *end_out)
771{
772 int n;
773
774 n = parse_unit(str);
775 *start_out = 0;
776 *end_out = MAX_DEV - 1;
777 return n;
778}
779
780static int ubd_remove(int n)
781{
782 struct ubd *ubd_dev;
783 int err = -ENODEV;
784
785 mutex_lock(&ubd_lock);
786
787 if(ubd_gendisk[n] == NULL)
788 goto out;
789
790 ubd_dev = &ubd_devs[n];
791
792 if(ubd_dev->file == NULL)
793 goto out;
794
795 /* you cannot remove a open disk */
796 err = -EBUSY;
797 if(ubd_dev->count > 0)
798 goto out;
799
800 del_gendisk(ubd_gendisk[n]);
801 put_disk(ubd_gendisk[n]);
802 ubd_gendisk[n] = NULL;
803
804 if(fake_gendisk[n] != NULL){
805 del_gendisk(fake_gendisk[n]);
806 put_disk(fake_gendisk[n]);
807 fake_gendisk[n] = NULL;
808 }
809
810 platform_device_unregister(&ubd_dev->pdev);
811 *ubd_dev = ((struct ubd) DEFAULT_UBD);
812 err = 0;
813out:
814 mutex_unlock(&ubd_lock);
815 return err;
816}
817
818/* All these are called by mconsole in process context and without ubd-specific locks. */
819static struct mc_device ubd_mc = {
820 .name = "ubd",
821 .config = ubd_config,
822 .get_config = ubd_get_config,
823 .id = ubd_id,
824 .remove = ubd_remove,
825};
826
827static int __init ubd_mc_init(void)
828{
829 mconsole_register_dev(&ubd_mc);
830 return 0;
831}
832
833__initcall(ubd_mc_init);
834
835static int __init ubd0_init(void)
836{
837 struct ubd *ubd_dev = &ubd_devs[0];
838
839 if(ubd_dev->file == NULL)
840 ubd_dev->file = "root_fs";
841 return(0);
842}
843
844__initcall(ubd0_init);
845
846static struct platform_driver ubd_driver = {
847 .driver = {
848 .name = DRIVER_NAME,
849 },
850};
851
852static int __init ubd_init(void)
853{
854 int i;
855
856 if (register_blkdev(MAJOR_NR, "ubd"))
857 return -1;
858
859 ubd_queue = blk_init_queue(do_ubd_request, &ubd_io_lock);
860 if (!ubd_queue) {
861 unregister_blkdev(MAJOR_NR, "ubd");
862 return -1;
863 }
864
865 if (fake_major != MAJOR_NR) {
866 char name[sizeof("ubd_nnn\0")];
867
868 snprintf(name, sizeof(name), "ubd_%d", fake_major);
869 if (register_blkdev(fake_major, "ubd"))
870 return -1;
871 }
872 platform_driver_register(&ubd_driver);
873 for (i = 0; i < MAX_DEV; i++)
874 ubd_add(i);
875 return 0;
876}
877
878late_initcall(ubd_init);
879
880static int __init ubd_driver_init(void){
881 unsigned long stack;
882 int err;
883
884 /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
885 if(global_openflags.s){
886 printk(KERN_INFO "ubd: Synchronous mode\n");
887 /* Letting ubd=sync be like using ubd#s= instead of ubd#= is
888 * enough. So use anyway the io thread. */
889 }
890 stack = alloc_stack(0, 0);
891 io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *),
892 &thread_fd);
893 if(io_pid < 0){
894 printk(KERN_ERR
895 "ubd : Failed to start I/O thread (errno = %d) - "
896 "falling back to synchronous I/O\n", -io_pid);
897 io_pid = -1;
898 return(0);
899 }
900 err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
901 IRQF_DISABLED, "ubd", ubd_devs);
902 if(err != 0)
903 printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
904 return 0;
905}
906
907device_initcall(ubd_driver_init);
908
909static int ubd_open(struct inode *inode, struct file *filp)
910{
911 struct gendisk *disk = inode->i_bdev->bd_disk;
912 struct ubd *ubd_dev = disk->private_data;
913 int err = 0;
914
915 if(ubd_dev->count == 0){
916 err = ubd_open_dev(ubd_dev);
917 if(err){
918 printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n",
919 disk->disk_name, ubd_dev->file, -err);
920 goto out;
921 }
922 }
923 ubd_dev->count++;
924 set_disk_ro(disk, !ubd_dev->openflags.w);
925
926 /* This should no more be needed. And it didn't work anyway to exclude
927 * read-write remounting of filesystems.*/
928 /*if((filp->f_mode & FMODE_WRITE) && !ubd_dev->openflags.w){
929 if(--ubd_dev->count == 0) ubd_close_dev(ubd_dev);
930 err = -EROFS;
931 }*/
932 out:
933 return(err);
934}
935
936static int ubd_release(struct inode * inode, struct file * file)
937{
938 struct gendisk *disk = inode->i_bdev->bd_disk;
939 struct ubd *ubd_dev = disk->private_data;
940
941 if(--ubd_dev->count == 0)
942 ubd_close_dev(ubd_dev);
943 return(0);
944}
945
946static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
947 __u64 *cow_offset, unsigned long *bitmap,
948 __u64 bitmap_offset, unsigned long *bitmap_words,
949 __u64 bitmap_len)
950{
951 __u64 sector = io_offset >> 9;
952 int i, update_bitmap = 0;
953
954 for(i = 0; i < length >> 9; i++){
955 if(cow_mask != NULL)
956 ubd_set_bit(i, (unsigned char *) cow_mask);
957 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
958 continue;
959
960 update_bitmap = 1;
961 ubd_set_bit(sector + i, (unsigned char *) bitmap);
962 }
963
964 if(!update_bitmap)
965 return;
966
967 *cow_offset = sector / (sizeof(unsigned long) * 8);
968
969 /* This takes care of the case where we're exactly at the end of the
970 * device, and *cow_offset + 1 is off the end. So, just back it up
971 * by one word. Thanks to Lynn Kerby for the fix and James McMechan
972 * for the original diagnosis.
973 */
974 if(*cow_offset == ((bitmap_len + sizeof(unsigned long) - 1) /
975 sizeof(unsigned long) - 1))
976 (*cow_offset)--;
977
978 bitmap_words[0] = bitmap[*cow_offset];
979 bitmap_words[1] = bitmap[*cow_offset + 1];
980
981 *cow_offset *= sizeof(unsigned long);
982 *cow_offset += bitmap_offset;
983}
984
985static void cowify_req(struct io_thread_req *req, unsigned long *bitmap,
986 __u64 bitmap_offset, __u64 bitmap_len)
987{
988 __u64 sector = req->offset >> 9;
989 int i;
990
991 if(req->length > (sizeof(req->sector_mask) * 8) << 9)
992 panic("Operation too long");
993
994 if(req->op == UBD_READ) {
995 for(i = 0; i < req->length >> 9; i++){
996 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
997 ubd_set_bit(i, (unsigned char *)
998 &req->sector_mask);
999 }
1000 }
1001 else cowify_bitmap(req->offset, req->length, &req->sector_mask,
1002 &req->cow_offset, bitmap, bitmap_offset,
1003 req->bitmap_words, bitmap_len);
1004}
1005
1006/* Called with ubd_io_lock held */
1007static int prepare_request(struct request *req, struct io_thread_req *io_req)
1008{
1009 struct gendisk *disk = req->rq_disk;
1010 struct ubd *ubd_dev = disk->private_data;
1011 __u64 offset;
1012 int len;
1013
1014 /* This should be impossible now */
1015 if((rq_data_dir(req) == WRITE) && !ubd_dev->openflags.w){
1016 printk("Write attempted on readonly ubd device %s\n",
1017 disk->disk_name);
1018 end_request(req, 0);
1019 return(1);
1020 }
1021
1022 offset = ((__u64) req->sector) << 9;
1023 len = req->current_nr_sectors << 9;
1024
1025 io_req->fds[0] = (ubd_dev->cow.file != NULL) ? ubd_dev->cow.fd : ubd_dev->fd;
1026 io_req->fds[1] = ubd_dev->fd;
1027 io_req->cow_offset = -1;
1028 io_req->offset = offset;
1029 io_req->length = len;
1030 io_req->error = 0;
1031 io_req->sector_mask = 0;
1032
1033 io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE;
1034 io_req->offsets[0] = 0;
1035 io_req->offsets[1] = ubd_dev->cow.data_offset;
1036 io_req->buffer = req->buffer;
1037 io_req->sectorsize = 1 << 9;
1038
1039 if(ubd_dev->cow.file != NULL)
1040 cowify_req(io_req, ubd_dev->cow.bitmap, ubd_dev->cow.bitmap_offset,
1041 ubd_dev->cow.bitmap_len);
1042
1043 return(0);
1044}
1045
1046/* Called with ubd_io_lock held */
1047static void do_ubd_request(request_queue_t *q)
1048{
1049 struct io_thread_req io_req;
1050 struct request *req;
1051 int err, n;
1052
1053 if(thread_fd == -1){
1054 while((req = elv_next_request(q)) != NULL){
1055 err = prepare_request(req, &io_req);
1056 if(!err){
1057 do_io(&io_req);
1058 __ubd_finish(req, io_req.error);
1059 }
1060 }
1061 }
1062 else {
1063 if(do_ubd || (req = elv_next_request(q)) == NULL)
1064 return;
1065 err = prepare_request(req, &io_req);
1066 if(!err){
1067 do_ubd = 1;
1068 n = os_write_file(thread_fd, (char *) &io_req,
1069 sizeof(io_req));
1070 if(n != sizeof(io_req))
1071 printk("write to io thread failed, "
1072 "errno = %d\n", -n);
1073 }
1074 }
1075}
1076
1077static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
1078{
1079 struct ubd *ubd_dev = bdev->bd_disk->private_data;
1080
1081 geo->heads = 128;
1082 geo->sectors = 32;
1083 geo->cylinders = ubd_dev->size / (128 * 32 * 512);
1084 return 0;
1085}
1086
1087static int ubd_ioctl(struct inode * inode, struct file * file,
1088 unsigned int cmd, unsigned long arg)
1089{
1090 struct ubd *ubd_dev = inode->i_bdev->bd_disk->private_data;
1091 struct hd_driveid ubd_id = {
1092 .cyls = 0,
1093 .heads = 128,
1094 .sectors = 32,
1095 };
1096
1097 switch (cmd) {
1098 struct cdrom_volctrl volume;
1099 case HDIO_GET_IDENTITY:
1100 ubd_id.cyls = ubd_dev->size / (128 * 32 * 512);
1101 if(copy_to_user((char __user *) arg, (char *) &ubd_id,
1102 sizeof(ubd_id)))
1103 return(-EFAULT);
1104 return(0);
1105
1106 case CDROMVOLREAD:
1107 if(copy_from_user(&volume, (char __user *) arg, sizeof(volume)))
1108 return(-EFAULT);
1109 volume.channel0 = 255;
1110 volume.channel1 = 255;
1111 volume.channel2 = 255;
1112 volume.channel3 = 255;
1113 if(copy_to_user((char __user *) arg, &volume, sizeof(volume)))
1114 return(-EFAULT);
1115 return(0);
1116 }
1117 return(-EINVAL);
1118}
1119
1120static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow)
1121{
1122 struct uml_stat buf1, buf2;
1123 int err;
1124
1125 if(from_cmdline == NULL)
1126 return 0;
1127 if(!strcmp(from_cmdline, from_cow))
1128 return 0;
1129
1130 err = os_stat_file(from_cmdline, &buf1);
1131 if(err < 0){
1132 printk("Couldn't stat '%s', err = %d\n", from_cmdline, -err);
1133 return 0;
1134 }
1135 err = os_stat_file(from_cow, &buf2);
1136 if(err < 0){
1137 printk("Couldn't stat '%s', err = %d\n", from_cow, -err);
1138 return 1;
1139 }
1140 if((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
1141 return 0;
1142
1143 printk("Backing file mismatch - \"%s\" requested,\n"
1144 "\"%s\" specified in COW header of \"%s\"\n",
1145 from_cmdline, from_cow, cow);
1146 return 1;
1147}
1148
1149static int backing_file_mismatch(char *file, __u64 size, time_t mtime)
1150{
1151 unsigned long modtime;
1152 unsigned long long actual;
1153 int err;
1154
1155 err = os_file_modtime(file, &modtime);
1156 if(err < 0){
1157 printk("Failed to get modification time of backing file "
1158 "\"%s\", err = %d\n", file, -err);
1159 return(err);
1160 }
1161
1162 err = os_file_size(file, &actual);
1163 if(err < 0){
1164 printk("Failed to get size of backing file \"%s\", "
1165 "err = %d\n", file, -err);
1166 return(err);
1167 }
1168
1169 if(actual != size){
1170 /*__u64 can be a long on AMD64 and with %lu GCC complains; so
1171 * the typecast.*/
1172 printk("Size mismatch (%llu vs %llu) of COW header vs backing "
1173 "file\n", (unsigned long long) size, actual);
1174 return(-EINVAL);
1175 }
1176 if(modtime != mtime){
1177 printk("mtime mismatch (%ld vs %ld) of COW header vs backing "
1178 "file\n", mtime, modtime);
1179 return(-EINVAL);
1180 }
1181 return(0);
1182}
1183
1184int read_cow_bitmap(int fd, void *buf, int offset, int len)
1185{
1186 int err;
1187
1188 err = os_seek_file(fd, offset);
1189 if(err < 0)
1190 return(err);
1191
1192 err = os_read_file(fd, buf, len);
1193 if(err < 0)
1194 return(err);
1195
1196 return(0);
1197}
1198
1199int open_ubd_file(char *file, struct openflags *openflags, int shared,
1200 char **backing_file_out, int *bitmap_offset_out,
1201 unsigned long *bitmap_len_out, int *data_offset_out,
1202 int *create_cow_out)
1203{
1204 time_t mtime;
1205 unsigned long long size;
1206 __u32 version, align;
1207 char *backing_file;
1208 int fd, err, sectorsize, asked_switch, mode = 0644;
1209
1210 fd = os_open_file(file, *openflags, mode);
1211 if (fd < 0) {
1212 if ((fd == -ENOENT) && (create_cow_out != NULL))
1213 *create_cow_out = 1;
1214 if (!openflags->w ||
1215 ((fd != -EROFS) && (fd != -EACCES)))
1216 return fd;
1217 openflags->w = 0;
1218 fd = os_open_file(file, *openflags, mode);
1219 if (fd < 0)
1220 return fd;
1221 }
1222
1223 if(shared)
1224 printk("Not locking \"%s\" on the host\n", file);
1225 else {
1226 err = os_lock_file(fd, openflags->w);
1227 if(err < 0){
1228 printk("Failed to lock '%s', err = %d\n", file, -err);
1229 goto out_close;
1230 }
1231 }
1232
1233 /* Successful return case! */
1234 if(backing_file_out == NULL)
1235 return(fd);
1236
1237 err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
1238 &size, §orsize, &align, bitmap_offset_out);
1239 if(err && (*backing_file_out != NULL)){
1240 printk("Failed to read COW header from COW file \"%s\", "
1241 "errno = %d\n", file, -err);
1242 goto out_close;
1243 }
1244 if(err)
1245 return(fd);
1246
1247 asked_switch = path_requires_switch(*backing_file_out, backing_file, file);
1248
1249 /* Allow switching only if no mismatch. */
1250 if (asked_switch && !backing_file_mismatch(*backing_file_out, size, mtime)) {
1251 printk("Switching backing file to '%s'\n", *backing_file_out);
1252 err = write_cow_header(file, fd, *backing_file_out,
1253 sectorsize, align, &size);
1254 if (err) {
1255 printk("Switch failed, errno = %d\n", -err);
1256 goto out_close;
1257 }
1258 } else {
1259 *backing_file_out = backing_file;
1260 err = backing_file_mismatch(*backing_file_out, size, mtime);
1261 if (err)
1262 goto out_close;
1263 }
1264
1265 cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
1266 bitmap_len_out, data_offset_out);
1267
1268 return fd;
1269 out_close:
1270 os_close_file(fd);
1271 return err;
1272}
1273
1274int create_cow_file(char *cow_file, char *backing_file, struct openflags flags,
1275 int sectorsize, int alignment, int *bitmap_offset_out,
1276 unsigned long *bitmap_len_out, int *data_offset_out)
1277{
1278 int err, fd;
1279
1280 flags.c = 1;
1281 fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL);
1282 if(fd < 0){
1283 err = fd;
1284 printk("Open of COW file '%s' failed, errno = %d\n", cow_file,
1285 -err);
1286 goto out;
1287 }
1288
1289 err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
1290 bitmap_offset_out, bitmap_len_out,
1291 data_offset_out);
1292 if(!err)
1293 return(fd);
1294 os_close_file(fd);
1295 out:
1296 return(err);
1297}
1298
1299static int update_bitmap(struct io_thread_req *req)
1300{
1301 int n;
1302
1303 if(req->cow_offset == -1)
1304 return(0);
1305
1306 n = os_seek_file(req->fds[1], req->cow_offset);
1307 if(n < 0){
1308 printk("do_io - bitmap lseek failed : err = %d\n", -n);
1309 return(1);
1310 }
1311
1312 n = os_write_file(req->fds[1], &req->bitmap_words,
1313 sizeof(req->bitmap_words));
1314 if(n != sizeof(req->bitmap_words)){
1315 printk("do_io - bitmap update failed, err = %d fd = %d\n", -n,
1316 req->fds[1]);
1317 return(1);
1318 }
1319
1320 return(0);
1321}
1322
1323void do_io(struct io_thread_req *req)
1324{
1325 char *buf;
1326 unsigned long len;
1327 int n, nsectors, start, end, bit;
1328 int err;
1329 __u64 off;
1330
1331 nsectors = req->length / req->sectorsize;
1332 start = 0;
1333 do {
1334 bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask);
1335 end = start;
1336 while((end < nsectors) &&
1337 (ubd_test_bit(end, (unsigned char *)
1338 &req->sector_mask) == bit))
1339 end++;
1340
1341 off = req->offset + req->offsets[bit] +
1342 start * req->sectorsize;
1343 len = (end - start) * req->sectorsize;
1344 buf = &req->buffer[start * req->sectorsize];
1345
1346 err = os_seek_file(req->fds[bit], off);
1347 if(err < 0){
1348 printk("do_io - lseek failed : err = %d\n", -err);
1349 req->error = 1;
1350 return;
1351 }
1352 if(req->op == UBD_READ){
1353 n = 0;
1354 do {
1355 buf = &buf[n];
1356 len -= n;
1357 n = os_read_file(req->fds[bit], buf, len);
1358 if (n < 0) {
1359 printk("do_io - read failed, err = %d "
1360 "fd = %d\n", -n, req->fds[bit]);
1361 req->error = 1;
1362 return;
1363 }
1364 } while((n < len) && (n != 0));
1365 if (n < len) memset(&buf[n], 0, len - n);
1366 } else {
1367 n = os_write_file(req->fds[bit], buf, len);
1368 if(n != len){
1369 printk("do_io - write failed err = %d "
1370 "fd = %d\n", -n, req->fds[bit]);
1371 req->error = 1;
1372 return;
1373 }
1374 }
1375
1376 start = end;
1377 } while(start < nsectors);
1378
1379 req->error = update_bitmap(req);
1380}
1381
1382/* Changed in start_io_thread, which is serialized by being called only
1383 * from ubd_init, which is an initcall.
1384 */
1385int kernel_fd = -1;
1386
1387/* Only changed by the io thread. XXX: currently unused. */
1388static int io_count = 0;
1389
1390int io_thread(void *arg)
1391{
1392 struct io_thread_req req;
1393 int n;
1394
1395 ignore_sigwinch_sig();
1396 while(1){
1397 n = os_read_file(kernel_fd, &req, sizeof(req));
1398 if(n != sizeof(req)){
1399 if(n < 0)
1400 printk("io_thread - read failed, fd = %d, "
1401 "err = %d\n", kernel_fd, -n);
1402 else {
1403 printk("io_thread - short read, fd = %d, "
1404 "length = %d\n", kernel_fd, n);
1405 }
1406 continue;
1407 }
1408 io_count++;
1409 do_io(&req);
1410 n = os_write_file(kernel_fd, &req, sizeof(req));
1411 if(n != sizeof(req))
1412 printk("io_thread - write failed, fd = %d, err = %d\n",
1413 kernel_fd, -n);
1414 }
1415
1416 return 0;
1417}