Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1/*
2 * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
3 * Licensed under the GPL
4 */
5
6/* 2001-09-28...2002-04-17
7 * Partition stuff by James_McMechan@hotmail.com
8 * old style ubd by setting UBD_SHIFT to 0
9 * 2002-09-27...2002-10-18 massive tinkering for 2.5
10 * partitions have changed in 2.5
11 * 2003-01-29 more tinkering for 2.5.59-1
12 * This should now address the sysfs problems and has
13 * the symlink for devfs to allow for booting with
14 * the common /dev/ubd/discX/... names rather than
15 * only /dev/ubdN/discN this version also has lots of
16 * clean ups preparing for ubd-many.
17 * James McMechan
18 */
19
20#define UBD_SHIFT 4
21
22#include "linux/kernel.h"
23#include "linux/module.h"
24#include "linux/blkdev.h"
25#include "linux/ata.h"
26#include "linux/hdreg.h"
27#include "linux/init.h"
28#include "linux/cdrom.h"
29#include "linux/proc_fs.h"
30#include "linux/seq_file.h"
31#include "linux/ctype.h"
32#include "linux/capability.h"
33#include "linux/mm.h"
34#include "linux/slab.h"
35#include "linux/vmalloc.h"
36#include "linux/blkpg.h"
37#include "linux/genhd.h"
38#include "linux/spinlock.h"
39#include "linux/platform_device.h"
40#include "linux/scatterlist.h"
41#include "asm/segment.h"
42#include "asm/uaccess.h"
43#include "asm/irq.h"
44#include "asm/types.h"
45#include "asm/tlbflush.h"
46#include "mem_user.h"
47#include "kern_util.h"
48#include "kern.h"
49#include "mconsole_kern.h"
50#include "init.h"
51#include "irq_user.h"
52#include "irq_kern.h"
53#include "ubd_user.h"
54#include "os.h"
55#include "mem.h"
56#include "mem_kern.h"
57#include "cow.h"
58
59enum ubd_req { UBD_READ, UBD_WRITE };
60
61struct io_thread_req {
62 struct request *req;
63 enum ubd_req op;
64 int fds[2];
65 unsigned long offsets[2];
66 unsigned long long offset;
67 unsigned long length;
68 char *buffer;
69 int sectorsize;
70 unsigned long sector_mask;
71 unsigned long long cow_offset;
72 unsigned long bitmap_words[2];
73 int error;
74};
75
76static inline int ubd_test_bit(__u64 bit, unsigned char *data)
77{
78 __u64 n;
79 int bits, off;
80
81 bits = sizeof(data[0]) * 8;
82 n = bit / bits;
83 off = bit % bits;
84 return (data[n] & (1 << off)) != 0;
85}
86
87static inline void ubd_set_bit(__u64 bit, unsigned char *data)
88{
89 __u64 n;
90 int bits, off;
91
92 bits = sizeof(data[0]) * 8;
93 n = bit / bits;
94 off = bit % bits;
95 data[n] |= (1 << off);
96}
97/*End stuff from ubd_user.h*/
98
99#define DRIVER_NAME "uml-blkdev"
100
101static DEFINE_MUTEX(ubd_lock);
102
103static int ubd_open(struct block_device *bdev, fmode_t mode);
104static int ubd_release(struct gendisk *disk, fmode_t mode);
105static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
106 unsigned int cmd, unsigned long arg);
107static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
108
109#define MAX_DEV (16)
110
111static const struct block_device_operations ubd_blops = {
112 .owner = THIS_MODULE,
113 .open = ubd_open,
114 .release = ubd_release,
115 .ioctl = ubd_ioctl,
116 .getgeo = ubd_getgeo,
117};
118
119/* Protected by ubd_lock */
120static int fake_major = UBD_MAJOR;
121static struct gendisk *ubd_gendisk[MAX_DEV];
122static struct gendisk *fake_gendisk[MAX_DEV];
123
124#ifdef CONFIG_BLK_DEV_UBD_SYNC
125#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
126 .cl = 1 })
127#else
128#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \
129 .cl = 1 })
130#endif
131static struct openflags global_openflags = OPEN_FLAGS;
132
133struct cow {
134 /* backing file name */
135 char *file;
136 /* backing file fd */
137 int fd;
138 unsigned long *bitmap;
139 unsigned long bitmap_len;
140 int bitmap_offset;
141 int data_offset;
142};
143
144#define MAX_SG 64
145
146struct ubd {
147 struct list_head restart;
148 /* name (and fd, below) of the file opened for writing, either the
149 * backing or the cow file. */
150 char *file;
151 int count;
152 int fd;
153 __u64 size;
154 struct openflags boot_openflags;
155 struct openflags openflags;
156 unsigned shared:1;
157 unsigned no_cow:1;
158 struct cow cow;
159 struct platform_device pdev;
160 struct request_queue *queue;
161 spinlock_t lock;
162 struct scatterlist sg[MAX_SG];
163 struct request *request;
164 int start_sg, end_sg;
165};
166
167#define DEFAULT_COW { \
168 .file = NULL, \
169 .fd = -1, \
170 .bitmap = NULL, \
171 .bitmap_offset = 0, \
172 .data_offset = 0, \
173}
174
175#define DEFAULT_UBD { \
176 .file = NULL, \
177 .count = 0, \
178 .fd = -1, \
179 .size = -1, \
180 .boot_openflags = OPEN_FLAGS, \
181 .openflags = OPEN_FLAGS, \
182 .no_cow = 0, \
183 .shared = 0, \
184 .cow = DEFAULT_COW, \
185 .lock = SPIN_LOCK_UNLOCKED, \
186 .request = NULL, \
187 .start_sg = 0, \
188 .end_sg = 0, \
189}
190
191/* Protected by ubd_lock */
192static struct ubd ubd_devs[MAX_DEV] = { [0 ... MAX_DEV - 1] = DEFAULT_UBD };
193
194/* Only changed by fake_ide_setup which is a setup */
195static int fake_ide = 0;
196static struct proc_dir_entry *proc_ide_root = NULL;
197static struct proc_dir_entry *proc_ide = NULL;
198
199static void make_proc_ide(void)
200{
201 proc_ide_root = proc_mkdir("ide", NULL);
202 proc_ide = proc_mkdir("ide0", proc_ide_root);
203}
204
205static int fake_ide_media_proc_show(struct seq_file *m, void *v)
206{
207 seq_puts(m, "disk\n");
208 return 0;
209}
210
211static int fake_ide_media_proc_open(struct inode *inode, struct file *file)
212{
213 return single_open(file, fake_ide_media_proc_show, NULL);
214}
215
216static const struct file_operations fake_ide_media_proc_fops = {
217 .owner = THIS_MODULE,
218 .open = fake_ide_media_proc_open,
219 .read = seq_read,
220 .llseek = seq_lseek,
221 .release = single_release,
222};
223
224static void make_ide_entries(const char *dev_name)
225{
226 struct proc_dir_entry *dir, *ent;
227 char name[64];
228
229 if(proc_ide_root == NULL) make_proc_ide();
230
231 dir = proc_mkdir(dev_name, proc_ide);
232 if(!dir) return;
233
234 ent = proc_create("media", S_IRUGO, dir, &fake_ide_media_proc_fops);
235 if(!ent) return;
236 snprintf(name, sizeof(name), "ide0/%s", dev_name);
237 proc_symlink(dev_name, proc_ide_root, name);
238}
239
240static int fake_ide_setup(char *str)
241{
242 fake_ide = 1;
243 return 1;
244}
245
246__setup("fake_ide", fake_ide_setup);
247
248__uml_help(fake_ide_setup,
249"fake_ide\n"
250" Create ide0 entries that map onto ubd devices.\n\n"
251);
252
253static int parse_unit(char **ptr)
254{
255 char *str = *ptr, *end;
256 int n = -1;
257
258 if(isdigit(*str)) {
259 n = simple_strtoul(str, &end, 0);
260 if(end == str)
261 return -1;
262 *ptr = end;
263 }
264 else if (('a' <= *str) && (*str <= 'z')) {
265 n = *str - 'a';
266 str++;
267 *ptr = str;
268 }
269 return n;
270}
271
272/* If *index_out == -1 at exit, the passed option was a general one;
273 * otherwise, the str pointer is used (and owned) inside ubd_devs array, so it
274 * should not be freed on exit.
275 */
276static int ubd_setup_common(char *str, int *index_out, char **error_out)
277{
278 struct ubd *ubd_dev;
279 struct openflags flags = global_openflags;
280 char *backing_file;
281 int n, err = 0, i;
282
283 if(index_out) *index_out = -1;
284 n = *str;
285 if(n == '='){
286 char *end;
287 int major;
288
289 str++;
290 if(!strcmp(str, "sync")){
291 global_openflags = of_sync(global_openflags);
292 goto out1;
293 }
294
295 err = -EINVAL;
296 major = simple_strtoul(str, &end, 0);
297 if((*end != '\0') || (end == str)){
298 *error_out = "Didn't parse major number";
299 goto out1;
300 }
301
302 mutex_lock(&ubd_lock);
303 if (fake_major != UBD_MAJOR) {
304 *error_out = "Can't assign a fake major twice";
305 goto out1;
306 }
307
308 fake_major = major;
309
310 printk(KERN_INFO "Setting extra ubd major number to %d\n",
311 major);
312 err = 0;
313 out1:
314 mutex_unlock(&ubd_lock);
315 return err;
316 }
317
318 n = parse_unit(&str);
319 if(n < 0){
320 *error_out = "Couldn't parse device number";
321 return -EINVAL;
322 }
323 if(n >= MAX_DEV){
324 *error_out = "Device number out of range";
325 return 1;
326 }
327
328 err = -EBUSY;
329 mutex_lock(&ubd_lock);
330
331 ubd_dev = &ubd_devs[n];
332 if(ubd_dev->file != NULL){
333 *error_out = "Device is already configured";
334 goto out;
335 }
336
337 if (index_out)
338 *index_out = n;
339
340 err = -EINVAL;
341 for (i = 0; i < sizeof("rscd="); i++) {
342 switch (*str) {
343 case 'r':
344 flags.w = 0;
345 break;
346 case 's':
347 flags.s = 1;
348 break;
349 case 'd':
350 ubd_dev->no_cow = 1;
351 break;
352 case 'c':
353 ubd_dev->shared = 1;
354 break;
355 case '=':
356 str++;
357 goto break_loop;
358 default:
359 *error_out = "Expected '=' or flag letter "
360 "(r, s, c, or d)";
361 goto out;
362 }
363 str++;
364 }
365
366 if (*str == '=')
367 *error_out = "Too many flags specified";
368 else
369 *error_out = "Missing '='";
370 goto out;
371
372break_loop:
373 backing_file = strchr(str, ',');
374
375 if (backing_file == NULL)
376 backing_file = strchr(str, ':');
377
378 if(backing_file != NULL){
379 if(ubd_dev->no_cow){
380 *error_out = "Can't specify both 'd' and a cow file";
381 goto out;
382 }
383 else {
384 *backing_file = '\0';
385 backing_file++;
386 }
387 }
388 err = 0;
389 ubd_dev->file = str;
390 ubd_dev->cow.file = backing_file;
391 ubd_dev->boot_openflags = flags;
392out:
393 mutex_unlock(&ubd_lock);
394 return err;
395}
396
397static int ubd_setup(char *str)
398{
399 char *error;
400 int err;
401
402 err = ubd_setup_common(str, NULL, &error);
403 if(err)
404 printk(KERN_ERR "Failed to initialize device with \"%s\" : "
405 "%s\n", str, error);
406 return 1;
407}
408
409__setup("ubd", ubd_setup);
410__uml_help(ubd_setup,
411"ubd<n><flags>=<filename>[(:|,)<filename2>]\n"
412" This is used to associate a device with a file in the underlying\n"
413" filesystem. When specifying two filenames, the first one is the\n"
414" COW name and the second is the backing file name. As separator you can\n"
415" use either a ':' or a ',': the first one allows writing things like;\n"
416" ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n"
417" while with a ',' the shell would not expand the 2nd '~'.\n"
418" When using only one filename, UML will detect whether to treat it like\n"
419" a COW file or a backing file. To override this detection, add the 'd'\n"
420" flag:\n"
421" ubd0d=BackingFile\n"
422" Usually, there is a filesystem in the file, but \n"
423" that's not required. Swap devices containing swap files can be\n"
424" specified like this. Also, a file which doesn't contain a\n"
425" filesystem can have its contents read in the virtual \n"
426" machine by running 'dd' on the device. <n> must be in the range\n"
427" 0 to 7. Appending an 'r' to the number will cause that device\n"
428" to be mounted read-only. For example ubd1r=./ext_fs. Appending\n"
429" an 's' will cause data to be written to disk on the host immediately.\n"
430" 'c' will cause the device to be treated as being shared between multiple\n"
431" UMLs and file locking will be turned off - this is appropriate for a\n"
432" cluster filesystem and inappropriate at almost all other times.\n\n"
433);
434
435static int udb_setup(char *str)
436{
437 printk("udb%s specified on command line is almost certainly a ubd -> "
438 "udb TYPO\n", str);
439 return 1;
440}
441
442__setup("udb", udb_setup);
443__uml_help(udb_setup,
444"udb\n"
445" This option is here solely to catch ubd -> udb typos, which can be\n"
446" to impossible to catch visually unless you specifically look for\n"
447" them. The only result of any option starting with 'udb' is an error\n"
448" in the boot output.\n\n"
449);
450
451static void do_ubd_request(struct request_queue * q);
452
453/* Only changed by ubd_init, which is an initcall. */
454static int thread_fd = -1;
455static LIST_HEAD(restart);
456
457/* XXX - move this inside ubd_intr. */
458/* Called without dev->lock held, and only in interrupt context. */
459static void ubd_handler(void)
460{
461 struct io_thread_req *req;
462 struct ubd *ubd;
463 struct list_head *list, *next_ele;
464 unsigned long flags;
465 int n;
466
467 while(1){
468 n = os_read_file(thread_fd, &req,
469 sizeof(struct io_thread_req *));
470 if(n != sizeof(req)){
471 if(n == -EAGAIN)
472 break;
473 printk(KERN_ERR "spurious interrupt in ubd_handler, "
474 "err = %d\n", -n);
475 return;
476 }
477
478 blk_end_request(req->req, 0, req->length);
479 kfree(req);
480 }
481 reactivate_fd(thread_fd, UBD_IRQ);
482
483 list_for_each_safe(list, next_ele, &restart){
484 ubd = container_of(list, struct ubd, restart);
485 list_del_init(&ubd->restart);
486 spin_lock_irqsave(&ubd->lock, flags);
487 do_ubd_request(ubd->queue);
488 spin_unlock_irqrestore(&ubd->lock, flags);
489 }
490}
491
492static irqreturn_t ubd_intr(int irq, void *dev)
493{
494 ubd_handler();
495 return IRQ_HANDLED;
496}
497
498/* Only changed by ubd_init, which is an initcall. */
499static int io_pid = -1;
500
501static void kill_io_thread(void)
502{
503 if(io_pid != -1)
504 os_kill_process(io_pid, 1);
505}
506
507__uml_exitcall(kill_io_thread);
508
509static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out)
510{
511 char *file;
512
513 file = ubd_dev->cow.file ? ubd_dev->cow.file : ubd_dev->file;
514 return os_file_size(file, size_out);
515}
516
517static int read_cow_bitmap(int fd, void *buf, int offset, int len)
518{
519 int err;
520
521 err = os_seek_file(fd, offset);
522 if (err < 0)
523 return err;
524
525 err = os_read_file(fd, buf, len);
526 if (err < 0)
527 return err;
528
529 return 0;
530}
531
532static int backing_file_mismatch(char *file, __u64 size, time_t mtime)
533{
534 unsigned long modtime;
535 unsigned long long actual;
536 int err;
537
538 err = os_file_modtime(file, &modtime);
539 if (err < 0) {
540 printk(KERN_ERR "Failed to get modification time of backing "
541 "file \"%s\", err = %d\n", file, -err);
542 return err;
543 }
544
545 err = os_file_size(file, &actual);
546 if (err < 0) {
547 printk(KERN_ERR "Failed to get size of backing file \"%s\", "
548 "err = %d\n", file, -err);
549 return err;
550 }
551
552 if (actual != size) {
553 /*__u64 can be a long on AMD64 and with %lu GCC complains; so
554 * the typecast.*/
555 printk(KERN_ERR "Size mismatch (%llu vs %llu) of COW header "
556 "vs backing file\n", (unsigned long long) size, actual);
557 return -EINVAL;
558 }
559 if (modtime != mtime) {
560 printk(KERN_ERR "mtime mismatch (%ld vs %ld) of COW header vs "
561 "backing file\n", mtime, modtime);
562 return -EINVAL;
563 }
564 return 0;
565}
566
567static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow)
568{
569 struct uml_stat buf1, buf2;
570 int err;
571
572 if (from_cmdline == NULL)
573 return 0;
574 if (!strcmp(from_cmdline, from_cow))
575 return 0;
576
577 err = os_stat_file(from_cmdline, &buf1);
578 if (err < 0) {
579 printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cmdline,
580 -err);
581 return 0;
582 }
583 err = os_stat_file(from_cow, &buf2);
584 if (err < 0) {
585 printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cow,
586 -err);
587 return 1;
588 }
589 if ((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
590 return 0;
591
592 printk(KERN_ERR "Backing file mismatch - \"%s\" requested, "
593 "\"%s\" specified in COW header of \"%s\"\n",
594 from_cmdline, from_cow, cow);
595 return 1;
596}
597
598static int open_ubd_file(char *file, struct openflags *openflags, int shared,
599 char **backing_file_out, int *bitmap_offset_out,
600 unsigned long *bitmap_len_out, int *data_offset_out,
601 int *create_cow_out)
602{
603 time_t mtime;
604 unsigned long long size;
605 __u32 version, align;
606 char *backing_file;
607 int fd, err, sectorsize, asked_switch, mode = 0644;
608
609 fd = os_open_file(file, *openflags, mode);
610 if (fd < 0) {
611 if ((fd == -ENOENT) && (create_cow_out != NULL))
612 *create_cow_out = 1;
613 if (!openflags->w ||
614 ((fd != -EROFS) && (fd != -EACCES)))
615 return fd;
616 openflags->w = 0;
617 fd = os_open_file(file, *openflags, mode);
618 if (fd < 0)
619 return fd;
620 }
621
622 if (shared)
623 printk(KERN_INFO "Not locking \"%s\" on the host\n", file);
624 else {
625 err = os_lock_file(fd, openflags->w);
626 if (err < 0) {
627 printk(KERN_ERR "Failed to lock '%s', err = %d\n",
628 file, -err);
629 goto out_close;
630 }
631 }
632
633 /* Successful return case! */
634 if (backing_file_out == NULL)
635 return fd;
636
637 err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
638 &size, §orsize, &align, bitmap_offset_out);
639 if (err && (*backing_file_out != NULL)) {
640 printk(KERN_ERR "Failed to read COW header from COW file "
641 "\"%s\", errno = %d\n", file, -err);
642 goto out_close;
643 }
644 if (err)
645 return fd;
646
647 asked_switch = path_requires_switch(*backing_file_out, backing_file,
648 file);
649
650 /* Allow switching only if no mismatch. */
651 if (asked_switch && !backing_file_mismatch(*backing_file_out, size,
652 mtime)) {
653 printk(KERN_ERR "Switching backing file to '%s'\n",
654 *backing_file_out);
655 err = write_cow_header(file, fd, *backing_file_out,
656 sectorsize, align, &size);
657 if (err) {
658 printk(KERN_ERR "Switch failed, errno = %d\n", -err);
659 goto out_close;
660 }
661 } else {
662 *backing_file_out = backing_file;
663 err = backing_file_mismatch(*backing_file_out, size, mtime);
664 if (err)
665 goto out_close;
666 }
667
668 cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
669 bitmap_len_out, data_offset_out);
670
671 return fd;
672 out_close:
673 os_close_file(fd);
674 return err;
675}
676
677static int create_cow_file(char *cow_file, char *backing_file,
678 struct openflags flags,
679 int sectorsize, int alignment, int *bitmap_offset_out,
680 unsigned long *bitmap_len_out, int *data_offset_out)
681{
682 int err, fd;
683
684 flags.c = 1;
685 fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL);
686 if (fd < 0) {
687 err = fd;
688 printk(KERN_ERR "Open of COW file '%s' failed, errno = %d\n",
689 cow_file, -err);
690 goto out;
691 }
692
693 err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
694 bitmap_offset_out, bitmap_len_out,
695 data_offset_out);
696 if (!err)
697 return fd;
698 os_close_file(fd);
699 out:
700 return err;
701}
702
703static void ubd_close_dev(struct ubd *ubd_dev)
704{
705 os_close_file(ubd_dev->fd);
706 if(ubd_dev->cow.file == NULL)
707 return;
708
709 os_close_file(ubd_dev->cow.fd);
710 vfree(ubd_dev->cow.bitmap);
711 ubd_dev->cow.bitmap = NULL;
712}
713
714static int ubd_open_dev(struct ubd *ubd_dev)
715{
716 struct openflags flags;
717 char **back_ptr;
718 int err, create_cow, *create_ptr;
719 int fd;
720
721 ubd_dev->openflags = ubd_dev->boot_openflags;
722 create_cow = 0;
723 create_ptr = (ubd_dev->cow.file != NULL) ? &create_cow : NULL;
724 back_ptr = ubd_dev->no_cow ? NULL : &ubd_dev->cow.file;
725
726 fd = open_ubd_file(ubd_dev->file, &ubd_dev->openflags, ubd_dev->shared,
727 back_ptr, &ubd_dev->cow.bitmap_offset,
728 &ubd_dev->cow.bitmap_len, &ubd_dev->cow.data_offset,
729 create_ptr);
730
731 if((fd == -ENOENT) && create_cow){
732 fd = create_cow_file(ubd_dev->file, ubd_dev->cow.file,
733 ubd_dev->openflags, 1 << 9, PAGE_SIZE,
734 &ubd_dev->cow.bitmap_offset,
735 &ubd_dev->cow.bitmap_len,
736 &ubd_dev->cow.data_offset);
737 if(fd >= 0){
738 printk(KERN_INFO "Creating \"%s\" as COW file for "
739 "\"%s\"\n", ubd_dev->file, ubd_dev->cow.file);
740 }
741 }
742
743 if(fd < 0){
744 printk("Failed to open '%s', errno = %d\n", ubd_dev->file,
745 -fd);
746 return fd;
747 }
748 ubd_dev->fd = fd;
749
750 if(ubd_dev->cow.file != NULL){
751 blk_queue_max_hw_sectors(ubd_dev->queue, 8 * sizeof(long));
752
753 err = -ENOMEM;
754 ubd_dev->cow.bitmap = vmalloc(ubd_dev->cow.bitmap_len);
755 if(ubd_dev->cow.bitmap == NULL){
756 printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
757 goto error;
758 }
759 flush_tlb_kernel_vm();
760
761 err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap,
762 ubd_dev->cow.bitmap_offset,
763 ubd_dev->cow.bitmap_len);
764 if(err < 0)
765 goto error;
766
767 flags = ubd_dev->openflags;
768 flags.w = 0;
769 err = open_ubd_file(ubd_dev->cow.file, &flags, ubd_dev->shared, NULL,
770 NULL, NULL, NULL, NULL);
771 if(err < 0) goto error;
772 ubd_dev->cow.fd = err;
773 }
774 return 0;
775 error:
776 os_close_file(ubd_dev->fd);
777 return err;
778}
779
780static void ubd_device_release(struct device *dev)
781{
782 struct ubd *ubd_dev = dev_get_drvdata(dev);
783
784 blk_cleanup_queue(ubd_dev->queue);
785 *ubd_dev = ((struct ubd) DEFAULT_UBD);
786}
787
788static int ubd_disk_register(int major, u64 size, int unit,
789 struct gendisk **disk_out)
790{
791 struct gendisk *disk;
792
793 disk = alloc_disk(1 << UBD_SHIFT);
794 if(disk == NULL)
795 return -ENOMEM;
796
797 disk->major = major;
798 disk->first_minor = unit << UBD_SHIFT;
799 disk->fops = &ubd_blops;
800 set_capacity(disk, size / 512);
801 if (major == UBD_MAJOR)
802 sprintf(disk->disk_name, "ubd%c", 'a' + unit);
803 else
804 sprintf(disk->disk_name, "ubd_fake%d", unit);
805
806 /* sysfs register (not for ide fake devices) */
807 if (major == UBD_MAJOR) {
808 ubd_devs[unit].pdev.id = unit;
809 ubd_devs[unit].pdev.name = DRIVER_NAME;
810 ubd_devs[unit].pdev.dev.release = ubd_device_release;
811 dev_set_drvdata(&ubd_devs[unit].pdev.dev, &ubd_devs[unit]);
812 platform_device_register(&ubd_devs[unit].pdev);
813 disk->driverfs_dev = &ubd_devs[unit].pdev.dev;
814 }
815
816 disk->private_data = &ubd_devs[unit];
817 disk->queue = ubd_devs[unit].queue;
818 add_disk(disk);
819
820 *disk_out = disk;
821 return 0;
822}
823
824#define ROUND_BLOCK(n) ((n + ((1 << 9) - 1)) & (-1 << 9))
825
826static int ubd_add(int n, char **error_out)
827{
828 struct ubd *ubd_dev = &ubd_devs[n];
829 int err = 0;
830
831 if(ubd_dev->file == NULL)
832 goto out;
833
834 err = ubd_file_size(ubd_dev, &ubd_dev->size);
835 if(err < 0){
836 *error_out = "Couldn't determine size of device's file";
837 goto out;
838 }
839
840 ubd_dev->size = ROUND_BLOCK(ubd_dev->size);
841
842 INIT_LIST_HEAD(&ubd_dev->restart);
843 sg_init_table(ubd_dev->sg, MAX_SG);
844
845 err = -ENOMEM;
846 ubd_dev->queue = blk_init_queue(do_ubd_request, &ubd_dev->lock);
847 if (ubd_dev->queue == NULL) {
848 *error_out = "Failed to initialize device queue";
849 goto out;
850 }
851 ubd_dev->queue->queuedata = ubd_dev;
852
853 blk_queue_max_segments(ubd_dev->queue, MAX_SG);
854 err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, &ubd_gendisk[n]);
855 if(err){
856 *error_out = "Failed to register device";
857 goto out_cleanup;
858 }
859
860 if (fake_major != UBD_MAJOR)
861 ubd_disk_register(fake_major, ubd_dev->size, n,
862 &fake_gendisk[n]);
863
864 /*
865 * Perhaps this should also be under the "if (fake_major)" above
866 * using the fake_disk->disk_name
867 */
868 if (fake_ide)
869 make_ide_entries(ubd_gendisk[n]->disk_name);
870
871 err = 0;
872out:
873 return err;
874
875out_cleanup:
876 blk_cleanup_queue(ubd_dev->queue);
877 goto out;
878}
879
880static int ubd_config(char *str, char **error_out)
881{
882 int n, ret;
883
884 /* This string is possibly broken up and stored, so it's only
885 * freed if ubd_setup_common fails, or if only general options
886 * were set.
887 */
888 str = kstrdup(str, GFP_KERNEL);
889 if (str == NULL) {
890 *error_out = "Failed to allocate memory";
891 return -ENOMEM;
892 }
893
894 ret = ubd_setup_common(str, &n, error_out);
895 if (ret)
896 goto err_free;
897
898 if (n == -1) {
899 ret = 0;
900 goto err_free;
901 }
902
903 mutex_lock(&ubd_lock);
904 ret = ubd_add(n, error_out);
905 if (ret)
906 ubd_devs[n].file = NULL;
907 mutex_unlock(&ubd_lock);
908
909out:
910 return ret;
911
912err_free:
913 kfree(str);
914 goto out;
915}
916
917static int ubd_get_config(char *name, char *str, int size, char **error_out)
918{
919 struct ubd *ubd_dev;
920 int n, len = 0;
921
922 n = parse_unit(&name);
923 if((n >= MAX_DEV) || (n < 0)){
924 *error_out = "ubd_get_config : device number out of range";
925 return -1;
926 }
927
928 ubd_dev = &ubd_devs[n];
929 mutex_lock(&ubd_lock);
930
931 if(ubd_dev->file == NULL){
932 CONFIG_CHUNK(str, size, len, "", 1);
933 goto out;
934 }
935
936 CONFIG_CHUNK(str, size, len, ubd_dev->file, 0);
937
938 if(ubd_dev->cow.file != NULL){
939 CONFIG_CHUNK(str, size, len, ",", 0);
940 CONFIG_CHUNK(str, size, len, ubd_dev->cow.file, 1);
941 }
942 else CONFIG_CHUNK(str, size, len, "", 1);
943
944 out:
945 mutex_unlock(&ubd_lock);
946 return len;
947}
948
949static int ubd_id(char **str, int *start_out, int *end_out)
950{
951 int n;
952
953 n = parse_unit(str);
954 *start_out = 0;
955 *end_out = MAX_DEV - 1;
956 return n;
957}
958
959static int ubd_remove(int n, char **error_out)
960{
961 struct gendisk *disk = ubd_gendisk[n];
962 struct ubd *ubd_dev;
963 int err = -ENODEV;
964
965 mutex_lock(&ubd_lock);
966
967 ubd_dev = &ubd_devs[n];
968
969 if(ubd_dev->file == NULL)
970 goto out;
971
972 /* you cannot remove a open disk */
973 err = -EBUSY;
974 if(ubd_dev->count > 0)
975 goto out;
976
977 ubd_gendisk[n] = NULL;
978 if(disk != NULL){
979 del_gendisk(disk);
980 put_disk(disk);
981 }
982
983 if(fake_gendisk[n] != NULL){
984 del_gendisk(fake_gendisk[n]);
985 put_disk(fake_gendisk[n]);
986 fake_gendisk[n] = NULL;
987 }
988
989 err = 0;
990 platform_device_unregister(&ubd_dev->pdev);
991out:
992 mutex_unlock(&ubd_lock);
993 return err;
994}
995
996/* All these are called by mconsole in process context and without
997 * ubd-specific locks. The structure itself is const except for .list.
998 */
999static struct mc_device ubd_mc = {
1000 .list = LIST_HEAD_INIT(ubd_mc.list),
1001 .name = "ubd",
1002 .config = ubd_config,
1003 .get_config = ubd_get_config,
1004 .id = ubd_id,
1005 .remove = ubd_remove,
1006};
1007
1008static int __init ubd_mc_init(void)
1009{
1010 mconsole_register_dev(&ubd_mc);
1011 return 0;
1012}
1013
1014__initcall(ubd_mc_init);
1015
1016static int __init ubd0_init(void)
1017{
1018 struct ubd *ubd_dev = &ubd_devs[0];
1019
1020 mutex_lock(&ubd_lock);
1021 if(ubd_dev->file == NULL)
1022 ubd_dev->file = "root_fs";
1023 mutex_unlock(&ubd_lock);
1024
1025 return 0;
1026}
1027
1028__initcall(ubd0_init);
1029
1030/* Used in ubd_init, which is an initcall */
1031static struct platform_driver ubd_driver = {
1032 .driver = {
1033 .name = DRIVER_NAME,
1034 },
1035};
1036
1037static int __init ubd_init(void)
1038{
1039 char *error;
1040 int i, err;
1041
1042 if (register_blkdev(UBD_MAJOR, "ubd"))
1043 return -1;
1044
1045 if (fake_major != UBD_MAJOR) {
1046 char name[sizeof("ubd_nnn\0")];
1047
1048 snprintf(name, sizeof(name), "ubd_%d", fake_major);
1049 if (register_blkdev(fake_major, "ubd"))
1050 return -1;
1051 }
1052 platform_driver_register(&ubd_driver);
1053 mutex_lock(&ubd_lock);
1054 for (i = 0; i < MAX_DEV; i++){
1055 err = ubd_add(i, &error);
1056 if(err)
1057 printk(KERN_ERR "Failed to initialize ubd device %d :"
1058 "%s\n", i, error);
1059 }
1060 mutex_unlock(&ubd_lock);
1061 return 0;
1062}
1063
1064late_initcall(ubd_init);
1065
1066static int __init ubd_driver_init(void){
1067 unsigned long stack;
1068 int err;
1069
1070 /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
1071 if(global_openflags.s){
1072 printk(KERN_INFO "ubd: Synchronous mode\n");
1073 /* Letting ubd=sync be like using ubd#s= instead of ubd#= is
1074 * enough. So use anyway the io thread. */
1075 }
1076 stack = alloc_stack(0, 0);
1077 io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *),
1078 &thread_fd);
1079 if(io_pid < 0){
1080 printk(KERN_ERR
1081 "ubd : Failed to start I/O thread (errno = %d) - "
1082 "falling back to synchronous I/O\n", -io_pid);
1083 io_pid = -1;
1084 return 0;
1085 }
1086 err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
1087 IRQF_DISABLED, "ubd", ubd_devs);
1088 if(err != 0)
1089 printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
1090 return 0;
1091}
1092
1093device_initcall(ubd_driver_init);
1094
1095static int ubd_open(struct block_device *bdev, fmode_t mode)
1096{
1097 struct gendisk *disk = bdev->bd_disk;
1098 struct ubd *ubd_dev = disk->private_data;
1099 int err = 0;
1100
1101 if(ubd_dev->count == 0){
1102 err = ubd_open_dev(ubd_dev);
1103 if(err){
1104 printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n",
1105 disk->disk_name, ubd_dev->file, -err);
1106 goto out;
1107 }
1108 }
1109 ubd_dev->count++;
1110 set_disk_ro(disk, !ubd_dev->openflags.w);
1111
1112 /* This should no more be needed. And it didn't work anyway to exclude
1113 * read-write remounting of filesystems.*/
1114 /*if((mode & FMODE_WRITE) && !ubd_dev->openflags.w){
1115 if(--ubd_dev->count == 0) ubd_close_dev(ubd_dev);
1116 err = -EROFS;
1117 }*/
1118 out:
1119 return err;
1120}
1121
1122static int ubd_release(struct gendisk *disk, fmode_t mode)
1123{
1124 struct ubd *ubd_dev = disk->private_data;
1125
1126 if(--ubd_dev->count == 0)
1127 ubd_close_dev(ubd_dev);
1128 return 0;
1129}
1130
1131static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
1132 __u64 *cow_offset, unsigned long *bitmap,
1133 __u64 bitmap_offset, unsigned long *bitmap_words,
1134 __u64 bitmap_len)
1135{
1136 __u64 sector = io_offset >> 9;
1137 int i, update_bitmap = 0;
1138
1139 for(i = 0; i < length >> 9; i++){
1140 if(cow_mask != NULL)
1141 ubd_set_bit(i, (unsigned char *) cow_mask);
1142 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1143 continue;
1144
1145 update_bitmap = 1;
1146 ubd_set_bit(sector + i, (unsigned char *) bitmap);
1147 }
1148
1149 if(!update_bitmap)
1150 return;
1151
1152 *cow_offset = sector / (sizeof(unsigned long) * 8);
1153
1154 /* This takes care of the case where we're exactly at the end of the
1155 * device, and *cow_offset + 1 is off the end. So, just back it up
1156 * by one word. Thanks to Lynn Kerby for the fix and James McMechan
1157 * for the original diagnosis.
1158 */
1159 if (*cow_offset == (DIV_ROUND_UP(bitmap_len,
1160 sizeof(unsigned long)) - 1))
1161 (*cow_offset)--;
1162
1163 bitmap_words[0] = bitmap[*cow_offset];
1164 bitmap_words[1] = bitmap[*cow_offset + 1];
1165
1166 *cow_offset *= sizeof(unsigned long);
1167 *cow_offset += bitmap_offset;
1168}
1169
1170static void cowify_req(struct io_thread_req *req, unsigned long *bitmap,
1171 __u64 bitmap_offset, __u64 bitmap_len)
1172{
1173 __u64 sector = req->offset >> 9;
1174 int i;
1175
1176 if(req->length > (sizeof(req->sector_mask) * 8) << 9)
1177 panic("Operation too long");
1178
1179 if(req->op == UBD_READ) {
1180 for(i = 0; i < req->length >> 9; i++){
1181 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1182 ubd_set_bit(i, (unsigned char *)
1183 &req->sector_mask);
1184 }
1185 }
1186 else cowify_bitmap(req->offset, req->length, &req->sector_mask,
1187 &req->cow_offset, bitmap, bitmap_offset,
1188 req->bitmap_words, bitmap_len);
1189}
1190
1191/* Called with dev->lock held */
1192static void prepare_request(struct request *req, struct io_thread_req *io_req,
1193 unsigned long long offset, int page_offset,
1194 int len, struct page *page)
1195{
1196 struct gendisk *disk = req->rq_disk;
1197 struct ubd *ubd_dev = disk->private_data;
1198
1199 io_req->req = req;
1200 io_req->fds[0] = (ubd_dev->cow.file != NULL) ? ubd_dev->cow.fd :
1201 ubd_dev->fd;
1202 io_req->fds[1] = ubd_dev->fd;
1203 io_req->cow_offset = -1;
1204 io_req->offset = offset;
1205 io_req->length = len;
1206 io_req->error = 0;
1207 io_req->sector_mask = 0;
1208
1209 io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE;
1210 io_req->offsets[0] = 0;
1211 io_req->offsets[1] = ubd_dev->cow.data_offset;
1212 io_req->buffer = page_address(page) + page_offset;
1213 io_req->sectorsize = 1 << 9;
1214
1215 if(ubd_dev->cow.file != NULL)
1216 cowify_req(io_req, ubd_dev->cow.bitmap,
1217 ubd_dev->cow.bitmap_offset, ubd_dev->cow.bitmap_len);
1218
1219}
1220
1221/* Called with dev->lock held */
1222static void do_ubd_request(struct request_queue *q)
1223{
1224 struct io_thread_req *io_req;
1225 struct request *req;
1226 sector_t sector;
1227 int n;
1228
1229 while(1){
1230 struct ubd *dev = q->queuedata;
1231 if(dev->end_sg == 0){
1232 struct request *req = blk_fetch_request(q);
1233 if(req == NULL)
1234 return;
1235
1236 dev->request = req;
1237 dev->start_sg = 0;
1238 dev->end_sg = blk_rq_map_sg(q, req, dev->sg);
1239 }
1240
1241 req = dev->request;
1242 sector = blk_rq_pos(req);
1243 while(dev->start_sg < dev->end_sg){
1244 struct scatterlist *sg = &dev->sg[dev->start_sg];
1245
1246 io_req = kmalloc(sizeof(struct io_thread_req),
1247 GFP_ATOMIC);
1248 if(io_req == NULL){
1249 if(list_empty(&dev->restart))
1250 list_add(&dev->restart, &restart);
1251 return;
1252 }
1253 prepare_request(req, io_req,
1254 (unsigned long long)sector << 9,
1255 sg->offset, sg->length, sg_page(sg));
1256
1257 sector += sg->length >> 9;
1258 n = os_write_file(thread_fd, &io_req,
1259 sizeof(struct io_thread_req *));
1260 if(n != sizeof(struct io_thread_req *)){
1261 if(n != -EAGAIN)
1262 printk("write to io thread failed, "
1263 "errno = %d\n", -n);
1264 else if(list_empty(&dev->restart))
1265 list_add(&dev->restart, &restart);
1266 kfree(io_req);
1267 return;
1268 }
1269
1270 dev->start_sg++;
1271 }
1272 dev->end_sg = 0;
1273 dev->request = NULL;
1274 }
1275}
1276
1277static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
1278{
1279 struct ubd *ubd_dev = bdev->bd_disk->private_data;
1280
1281 geo->heads = 128;
1282 geo->sectors = 32;
1283 geo->cylinders = ubd_dev->size / (128 * 32 * 512);
1284 return 0;
1285}
1286
1287static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
1288 unsigned int cmd, unsigned long arg)
1289{
1290 struct ubd *ubd_dev = bdev->bd_disk->private_data;
1291 u16 ubd_id[ATA_ID_WORDS];
1292
1293 switch (cmd) {
1294 struct cdrom_volctrl volume;
1295 case HDIO_GET_IDENTITY:
1296 memset(&ubd_id, 0, ATA_ID_WORDS * 2);
1297 ubd_id[ATA_ID_CYLS] = ubd_dev->size / (128 * 32 * 512);
1298 ubd_id[ATA_ID_HEADS] = 128;
1299 ubd_id[ATA_ID_SECTORS] = 32;
1300 if(copy_to_user((char __user *) arg, (char *) &ubd_id,
1301 sizeof(ubd_id)))
1302 return -EFAULT;
1303 return 0;
1304
1305 case CDROMVOLREAD:
1306 if(copy_from_user(&volume, (char __user *) arg, sizeof(volume)))
1307 return -EFAULT;
1308 volume.channel0 = 255;
1309 volume.channel1 = 255;
1310 volume.channel2 = 255;
1311 volume.channel3 = 255;
1312 if(copy_to_user((char __user *) arg, &volume, sizeof(volume)))
1313 return -EFAULT;
1314 return 0;
1315 }
1316 return -EINVAL;
1317}
1318
1319static int update_bitmap(struct io_thread_req *req)
1320{
1321 int n;
1322
1323 if(req->cow_offset == -1)
1324 return 0;
1325
1326 n = os_seek_file(req->fds[1], req->cow_offset);
1327 if(n < 0){
1328 printk("do_io - bitmap lseek failed : err = %d\n", -n);
1329 return 1;
1330 }
1331
1332 n = os_write_file(req->fds[1], &req->bitmap_words,
1333 sizeof(req->bitmap_words));
1334 if(n != sizeof(req->bitmap_words)){
1335 printk("do_io - bitmap update failed, err = %d fd = %d\n", -n,
1336 req->fds[1]);
1337 return 1;
1338 }
1339
1340 return 0;
1341}
1342
1343static void do_io(struct io_thread_req *req)
1344{
1345 char *buf;
1346 unsigned long len;
1347 int n, nsectors, start, end, bit;
1348 int err;
1349 __u64 off;
1350
1351 nsectors = req->length / req->sectorsize;
1352 start = 0;
1353 do {
1354 bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask);
1355 end = start;
1356 while((end < nsectors) &&
1357 (ubd_test_bit(end, (unsigned char *)
1358 &req->sector_mask) == bit))
1359 end++;
1360
1361 off = req->offset + req->offsets[bit] +
1362 start * req->sectorsize;
1363 len = (end - start) * req->sectorsize;
1364 buf = &req->buffer[start * req->sectorsize];
1365
1366 err = os_seek_file(req->fds[bit], off);
1367 if(err < 0){
1368 printk("do_io - lseek failed : err = %d\n", -err);
1369 req->error = 1;
1370 return;
1371 }
1372 if(req->op == UBD_READ){
1373 n = 0;
1374 do {
1375 buf = &buf[n];
1376 len -= n;
1377 n = os_read_file(req->fds[bit], buf, len);
1378 if (n < 0) {
1379 printk("do_io - read failed, err = %d "
1380 "fd = %d\n", -n, req->fds[bit]);
1381 req->error = 1;
1382 return;
1383 }
1384 } while((n < len) && (n != 0));
1385 if (n < len) memset(&buf[n], 0, len - n);
1386 } else {
1387 n = os_write_file(req->fds[bit], buf, len);
1388 if(n != len){
1389 printk("do_io - write failed err = %d "
1390 "fd = %d\n", -n, req->fds[bit]);
1391 req->error = 1;
1392 return;
1393 }
1394 }
1395
1396 start = end;
1397 } while(start < nsectors);
1398
1399 req->error = update_bitmap(req);
1400}
1401
1402/* Changed in start_io_thread, which is serialized by being called only
1403 * from ubd_init, which is an initcall.
1404 */
1405int kernel_fd = -1;
1406
1407/* Only changed by the io thread. XXX: currently unused. */
1408static int io_count = 0;
1409
1410int io_thread(void *arg)
1411{
1412 struct io_thread_req *req;
1413 int n;
1414
1415 ignore_sigwinch_sig();
1416 while(1){
1417 n = os_read_file(kernel_fd, &req,
1418 sizeof(struct io_thread_req *));
1419 if(n != sizeof(struct io_thread_req *)){
1420 if(n < 0)
1421 printk("io_thread - read failed, fd = %d, "
1422 "err = %d\n", kernel_fd, -n);
1423 else {
1424 printk("io_thread - short read, fd = %d, "
1425 "length = %d\n", kernel_fd, n);
1426 }
1427 continue;
1428 }
1429 io_count++;
1430 do_io(req);
1431 n = os_write_file(kernel_fd, &req,
1432 sizeof(struct io_thread_req *));
1433 if(n != sizeof(struct io_thread_req *))
1434 printk("io_thread - write failed, fd = %d, err = %d\n",
1435 kernel_fd, -n);
1436 }
1437
1438 return 0;
1439}