drivers/md/linear.c at v2.6.30-rc2 · tjh.dev/kernel

tjh.dev / kernel
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
kernel / drivers / md / linear.c
at v2.6.30-rc2 417 lines 11 kB view raw
  1/*
  2   linear.c : Multiple Devices driver for Linux
  3	      Copyright (C) 1994-96 Marc ZYNGIER
  4	      <zyngier@ufr-info-p7.ibp.fr> or
  5	      <maz@gloups.fdn.fr>
  6
  7   Linear mode management functions.
  8
  9   This program is free software; you can redistribute it and/or modify
 10   it under the terms of the GNU General Public License as published by
 11   the Free Software Foundation; either version 2, or (at your option)
 12   any later version.
 13   
 14   You should have received a copy of the GNU General Public License
 15   (for example /usr/src/linux/COPYING); if not, write to the Free
 16   Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  
 17*/
 18
 19#include <linux/blkdev.h>
 20#include <linux/raid/md_u.h>
 21#include <linux/seq_file.h>
 22#include "md.h"
 23#include "linear.h"
 24
 25/*
 26 * find which device holds a particular offset 
 27 */
 28static inline dev_info_t *which_dev(mddev_t *mddev, sector_t sector)
 29{
 30	dev_info_t *hash;
 31	linear_conf_t *conf = mddev_to_conf(mddev);
 32	sector_t idx = sector >> conf->sector_shift;
 33
 34	/*
 35	 * sector_div(a,b) returns the remainer and sets a to a/b
 36	 */
 37	(void)sector_div(idx, conf->spacing);
 38	hash = conf->hash_table[idx];
 39
 40	while (sector >= hash->num_sectors + hash->start_sector)
 41		hash++;
 42	return hash;
 43}
 44
 45/**
 46 *	linear_mergeable_bvec -- tell bio layer if two requests can be merged
 47 *	@q: request queue
 48 *	@bvm: properties of new bio
 49 *	@biovec: the request that could be merged to it.
 50 *
 51 *	Return amount of bytes we can take at this offset
 52 */
 53static int linear_mergeable_bvec(struct request_queue *q,
 54				 struct bvec_merge_data *bvm,
 55				 struct bio_vec *biovec)
 56{
 57	mddev_t *mddev = q->queuedata;
 58	dev_info_t *dev0;
 59	unsigned long maxsectors, bio_sectors = bvm->bi_size >> 9;
 60	sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
 61
 62	dev0 = which_dev(mddev, sector);
 63	maxsectors = dev0->num_sectors - (sector - dev0->start_sector);
 64
 65	if (maxsectors < bio_sectors)
 66		maxsectors = 0;
 67	else
 68		maxsectors -= bio_sectors;
 69
 70	if (maxsectors <= (PAGE_SIZE >> 9 ) && bio_sectors == 0)
 71		return biovec->bv_len;
 72	/* The bytes available at this offset could be really big,
 73	 * so we cap at 2^31 to avoid overflow */
 74	if (maxsectors > (1 << (31-9)))
 75		return 1<<31;
 76	return maxsectors << 9;
 77}
 78
 79static void linear_unplug(struct request_queue *q)
 80{
 81	mddev_t *mddev = q->queuedata;
 82	linear_conf_t *conf = mddev_to_conf(mddev);
 83	int i;
 84
 85	for (i=0; i < mddev->raid_disks; i++) {
 86		struct request_queue *r_queue = bdev_get_queue(conf->disks[i].rdev->bdev);
 87		blk_unplug(r_queue);
 88	}
 89}
 90
 91static int linear_congested(void *data, int bits)
 92{
 93	mddev_t *mddev = data;
 94	linear_conf_t *conf = mddev_to_conf(mddev);
 95	int i, ret = 0;
 96
 97	for (i = 0; i < mddev->raid_disks && !ret ; i++) {
 98		struct request_queue *q = bdev_get_queue(conf->disks[i].rdev->bdev);
 99		ret |= bdi_congested(&q->backing_dev_info, bits);
100	}
101	return ret;
102}
103
104static sector_t linear_size(mddev_t *mddev, sector_t sectors, int raid_disks)
105{
106	linear_conf_t *conf = mddev_to_conf(mddev);
107
108	WARN_ONCE(sectors || raid_disks,
109		  "%s does not support generic reshape\n", __func__);
110
111	return conf->array_sectors;
112}
113
114static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
115{
116	linear_conf_t *conf;
117	dev_info_t **table;
118	mdk_rdev_t *rdev;
119	int i, nb_zone, cnt;
120	sector_t min_sectors;
121	sector_t curr_sector;
122
123	conf = kzalloc (sizeof (*conf) + raid_disks*sizeof(dev_info_t),
124			GFP_KERNEL);
125	if (!conf)
126		return NULL;
127
128	cnt = 0;
129	conf->array_sectors = 0;
130
131	list_for_each_entry(rdev, &mddev->disks, same_set) {
132		int j = rdev->raid_disk;
133		dev_info_t *disk = conf->disks + j;
134
135		if (j < 0 || j >= raid_disks || disk->rdev) {
136			printk("linear: disk numbering problem. Aborting!\n");
137			goto out;
138		}
139
140		disk->rdev = rdev;
141
142		blk_queue_stack_limits(mddev->queue,
143				       rdev->bdev->bd_disk->queue);
144		/* as we don't honour merge_bvec_fn, we must never risk
145		 * violating it, so limit ->max_sector to one PAGE, as
146		 * a one page request is never in violation.
147		 */
148		if (rdev->bdev->bd_disk->queue->merge_bvec_fn &&
149		    mddev->queue->max_sectors > (PAGE_SIZE>>9))
150			blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
151
152		disk->num_sectors = rdev->sectors;
153		conf->array_sectors += rdev->sectors;
154
155		cnt++;
156	}
157	if (cnt != raid_disks) {
158		printk("linear: not enough drives present. Aborting!\n");
159		goto out;
160	}
161
162	min_sectors = conf->array_sectors;
163	sector_div(min_sectors, PAGE_SIZE/sizeof(struct dev_info *));
164	if (min_sectors == 0)
165		min_sectors = 1;
166
167	/* min_sectors is the minimum spacing that will fit the hash
168	 * table in one PAGE.  This may be much smaller than needed.
169	 * We find the smallest non-terminal set of consecutive devices
170	 * that is larger than min_sectors and use the size of that as
171	 * the actual spacing
172	 */
173	conf->spacing = conf->array_sectors;
174	for (i=0; i < cnt-1 ; i++) {
175		sector_t tmp = 0;
176		int j;
177		for (j = i; j < cnt - 1 && tmp < min_sectors; j++)
178			tmp += conf->disks[j].num_sectors;
179		if (tmp >= min_sectors && tmp < conf->spacing)
180			conf->spacing = tmp;
181	}
182
183	/* spacing may be too large for sector_div to work with,
184	 * so we might need to pre-shift
185	 */
186	conf->sector_shift = 0;
187	if (sizeof(sector_t) > sizeof(u32)) {
188		sector_t space = conf->spacing;
189		while (space > (sector_t)(~(u32)0)) {
190			space >>= 1;
191			conf->sector_shift++;
192		}
193	}
194	/*
195	 * This code was restructured to work around a gcc-2.95.3 internal
196	 * compiler error.  Alter it with care.
197	 */
198	{
199		sector_t sz;
200		unsigned round;
201		unsigned long base;
202
203		sz = conf->array_sectors >> conf->sector_shift;
204		sz += 1; /* force round-up */
205		base = conf->spacing >> conf->sector_shift;
206		round = sector_div(sz, base);
207		nb_zone = sz + (round ? 1 : 0);
208	}
209	BUG_ON(nb_zone > PAGE_SIZE / sizeof(struct dev_info *));
210
211	conf->hash_table = kmalloc (sizeof (struct dev_info *) * nb_zone,
212					GFP_KERNEL);
213	if (!conf->hash_table)
214		goto out;
215
216	/*
217	 * Here we generate the linear hash table
218	 * First calculate the device offsets.
219	 */
220	conf->disks[0].start_sector = 0;
221	for (i = 1; i < raid_disks; i++)
222		conf->disks[i].start_sector =
223			conf->disks[i-1].start_sector +
224			conf->disks[i-1].num_sectors;
225
226	table = conf->hash_table;
227	i = 0;
228	for (curr_sector = 0;
229	     curr_sector < conf->array_sectors;
230	     curr_sector += conf->spacing) {
231
232		while (i < raid_disks-1 &&
233		       curr_sector >= conf->disks[i+1].start_sector)
234			i++;
235
236		*table ++ = conf->disks + i;
237	}
238
239	if (conf->sector_shift) {
240		conf->spacing >>= conf->sector_shift;
241		/* round spacing up so that when we divide by it,
242		 * we err on the side of "too-low", which is safest.
243		 */
244		conf->spacing++;
245	}
246
247	BUG_ON(table - conf->hash_table > nb_zone);
248
249	return conf;
250
251out:
252	kfree(conf);
253	return NULL;
254}
255
256static int linear_run (mddev_t *mddev)
257{
258	linear_conf_t *conf;
259
260	mddev->queue->queue_lock = &mddev->queue->__queue_lock;
261	conf = linear_conf(mddev, mddev->raid_disks);
262
263	if (!conf)
264		return 1;
265	mddev->private = conf;
266	md_set_array_sectors(mddev, linear_size(mddev, 0, 0));
267
268	blk_queue_merge_bvec(mddev->queue, linear_mergeable_bvec);
269	mddev->queue->unplug_fn = linear_unplug;
270	mddev->queue->backing_dev_info.congested_fn = linear_congested;
271	mddev->queue->backing_dev_info.congested_data = mddev;
272	return 0;
273}
274
275static int linear_add(mddev_t *mddev, mdk_rdev_t *rdev)
276{
277	/* Adding a drive to a linear array allows the array to grow.
278	 * It is permitted if the new drive has a matching superblock
279	 * already on it, with raid_disk equal to raid_disks.
280	 * It is achieved by creating a new linear_private_data structure
281	 * and swapping it in in-place of the current one.
282	 * The current one is never freed until the array is stopped.
283	 * This avoids races.
284	 */
285	linear_conf_t *newconf;
286
287	if (rdev->saved_raid_disk != mddev->raid_disks)
288		return -EINVAL;
289
290	rdev->raid_disk = rdev->saved_raid_disk;
291
292	newconf = linear_conf(mddev,mddev->raid_disks+1);
293
294	if (!newconf)
295		return -ENOMEM;
296
297	newconf->prev = mddev_to_conf(mddev);
298	mddev->private = newconf;
299	mddev->raid_disks++;
300	md_set_array_sectors(mddev, linear_size(mddev, 0, 0));
301	set_capacity(mddev->gendisk, mddev->array_sectors);
302	return 0;
303}
304
305static int linear_stop (mddev_t *mddev)
306{
307	linear_conf_t *conf = mddev_to_conf(mddev);
308  
309	blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
310	do {
311		linear_conf_t *t = conf->prev;
312		kfree(conf->hash_table);
313		kfree(conf);
314		conf = t;
315	} while (conf);
316
317	return 0;
318}
319
320static int linear_make_request (struct request_queue *q, struct bio *bio)
321{
322	const int rw = bio_data_dir(bio);
323	mddev_t *mddev = q->queuedata;
324	dev_info_t *tmp_dev;
325	int cpu;
326
327	if (unlikely(bio_barrier(bio))) {
328		bio_endio(bio, -EOPNOTSUPP);
329		return 0;
330	}
331
332	cpu = part_stat_lock();
333	part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
334	part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw],
335		      bio_sectors(bio));
336	part_stat_unlock();
337
338	tmp_dev = which_dev(mddev, bio->bi_sector);
339    
340	if (unlikely(bio->bi_sector >= (tmp_dev->num_sectors +
341					tmp_dev->start_sector)
342		     || (bio->bi_sector <
343			 tmp_dev->start_sector))) {
344		char b[BDEVNAME_SIZE];
345
346		printk("linear_make_request: Sector %llu out of bounds on "
347			"dev %s: %llu sectors, offset %llu\n",
348			(unsigned long long)bio->bi_sector,
349			bdevname(tmp_dev->rdev->bdev, b),
350			(unsigned long long)tmp_dev->num_sectors,
351			(unsigned long long)tmp_dev->start_sector);
352		bio_io_error(bio);
353		return 0;
354	}
355	if (unlikely(bio->bi_sector + (bio->bi_size >> 9) >
356		     tmp_dev->start_sector + tmp_dev->num_sectors)) {
357		/* This bio crosses a device boundary, so we have to
358		 * split it.
359		 */
360		struct bio_pair *bp;
361
362		bp = bio_split(bio,
363			       tmp_dev->start_sector + tmp_dev->num_sectors
364			       - bio->bi_sector);
365
366		if (linear_make_request(q, &bp->bio1))
367			generic_make_request(&bp->bio1);
368		if (linear_make_request(q, &bp->bio2))
369			generic_make_request(&bp->bio2);
370		bio_pair_release(bp);
371		return 0;
372	}
373		    
374	bio->bi_bdev = tmp_dev->rdev->bdev;
375	bio->bi_sector = bio->bi_sector - tmp_dev->start_sector
376		+ tmp_dev->rdev->data_offset;
377
378	return 1;
379}
380
381static void linear_status (struct seq_file *seq, mddev_t *mddev)
382{
383
384	seq_printf(seq, " %dk rounding", mddev->chunk_size/1024);
385}
386
387
388static struct mdk_personality linear_personality =
389{
390	.name		= "linear",
391	.level		= LEVEL_LINEAR,
392	.owner		= THIS_MODULE,
393	.make_request	= linear_make_request,
394	.run		= linear_run,
395	.stop		= linear_stop,
396	.status		= linear_status,
397	.hot_add_disk	= linear_add,
398	.size		= linear_size,
399};
400
401static int __init linear_init (void)
402{
403	return register_md_personality (&linear_personality);
404}
405
406static void linear_exit (void)
407{
408	unregister_md_personality (&linear_personality);
409}
410
411
412module_init(linear_init);
413module_exit(linear_exit);
414MODULE_LICENSE("GPL");
415MODULE_ALIAS("md-personality-1"); /* LINEAR - deprecated*/
416MODULE_ALIAS("md-linear");
417MODULE_ALIAS("md-level--1");