at v2.6.16 340 lines 10 kB view raw
1/* 2 md_k.h : kernel internal structure of the Linux MD driver 3 Copyright (C) 1996-98 Ingo Molnar, Gadi Oxman 4 5 This program is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation; either version 2, or (at your option) 8 any later version. 9 10 You should have received a copy of the GNU General Public License 11 (for example /usr/src/linux/COPYING); if not, write to the Free 12 Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 13*/ 14 15#ifndef _MD_K_H 16#define _MD_K_H 17 18/* and dm-bio-list.h is not under include/linux because.... ??? */ 19#include "../../../drivers/md/dm-bio-list.h" 20 21#define LEVEL_MULTIPATH (-4) 22#define LEVEL_LINEAR (-1) 23#define LEVEL_FAULTY (-5) 24 25/* we need a value for 'no level specified' and 0 26 * means 'raid0', so we need something else. This is 27 * for internal use only 28 */ 29#define LEVEL_NONE (-1000000) 30 31#define MaxSector (~(sector_t)0) 32#define MD_THREAD_NAME_MAX 14 33 34typedef struct mddev_s mddev_t; 35typedef struct mdk_rdev_s mdk_rdev_t; 36 37#define MAX_MD_DEVS 256 /* Max number of md dev */ 38 39/* 40 * options passed in raidrun: 41 */ 42 43#define MAX_CHUNK_SIZE (4096*1024) 44 45/* 46 * MD's 'extended' device 47 */ 48struct mdk_rdev_s 49{ 50 struct list_head same_set; /* RAID devices within the same set */ 51 52 sector_t size; /* Device size (in blocks) */ 53 mddev_t *mddev; /* RAID array if running */ 54 unsigned long last_events; /* IO event timestamp */ 55 56 struct block_device *bdev; /* block device handle */ 57 58 struct page *sb_page; 59 int sb_loaded; 60 sector_t data_offset; /* start of data in array */ 61 sector_t sb_offset; 62 int sb_size; /* bytes in the superblock */ 63 int preferred_minor; /* autorun support */ 64 65 struct kobject kobj; 66 67 /* A device can be in one of three states based on two flags: 68 * Not working: faulty==1 in_sync==0 69 * Fully working: faulty==0 in_sync==1 70 * Working, but not 71 * in sync with array 72 * faulty==0 in_sync==0 73 * 74 * It can never have faulty==1, in_sync==1 75 * This reduces the burden of testing multiple flags in many cases 76 */ 77 78 unsigned long flags; 79#define Faulty 1 /* device is known to have a fault */ 80#define In_sync 2 /* device is in_sync with rest of array */ 81#define WriteMostly 4 /* Avoid reading if at all possible */ 82#define BarriersNotsupp 5 /* BIO_RW_BARRIER is not supported */ 83 84 int desc_nr; /* descriptor index in the superblock */ 85 int raid_disk; /* role of device in array */ 86 int saved_raid_disk; /* role that device used to have in the 87 * array and could again if we did a partial 88 * resync from the bitmap 89 */ 90 91 atomic_t nr_pending; /* number of pending requests. 92 * only maintained for arrays that 93 * support hot removal 94 */ 95 atomic_t read_errors; /* number of consecutive read errors that 96 * we have tried to ignore. 97 */ 98 atomic_t corrected_errors; /* number of corrected read errors, 99 * for reporting to userspace and storing 100 * in superblock. 101 */ 102}; 103 104struct mddev_s 105{ 106 void *private; 107 struct mdk_personality *pers; 108 dev_t unit; 109 int md_minor; 110 struct list_head disks; 111 int sb_dirty; 112 int ro; 113 114 struct gendisk *gendisk; 115 116 struct kobject kobj; 117 118 /* Superblock information */ 119 int major_version, 120 minor_version, 121 patch_version; 122 int persistent; 123 int chunk_size; 124 time_t ctime, utime; 125 int level, layout; 126 char clevel[16]; 127 int raid_disks; 128 int max_disks; 129 sector_t size; /* used size of component devices */ 130 sector_t array_size; /* exported array size */ 131 __u64 events; 132 133 char uuid[16]; 134 135 struct mdk_thread_s *thread; /* management thread */ 136 struct mdk_thread_s *sync_thread; /* doing resync or reconstruct */ 137 sector_t curr_resync; /* blocks scheduled */ 138 unsigned long resync_mark; /* a recent timestamp */ 139 sector_t resync_mark_cnt;/* blocks written at resync_mark */ 140 141 sector_t resync_max_sectors; /* may be set by personality */ 142 143 sector_t resync_mismatches; /* count of sectors where 144 * parity/replica mismatch found 145 */ 146 /* if zero, use the system-wide default */ 147 int sync_speed_min; 148 int sync_speed_max; 149 150 int ok_start_degraded; 151 /* recovery/resync flags 152 * NEEDED: we might need to start a resync/recover 153 * RUNNING: a thread is running, or about to be started 154 * SYNC: actually doing a resync, not a recovery 155 * ERR: and IO error was detected - abort the resync/recovery 156 * INTR: someone requested a (clean) early abort. 157 * DONE: thread is done and is waiting to be reaped 158 * REQUEST: user-space has requested a sync (used with SYNC) 159 * CHECK: user-space request for for check-only, no repair 160 */ 161#define MD_RECOVERY_RUNNING 0 162#define MD_RECOVERY_SYNC 1 163#define MD_RECOVERY_ERR 2 164#define MD_RECOVERY_INTR 3 165#define MD_RECOVERY_DONE 4 166#define MD_RECOVERY_NEEDED 5 167#define MD_RECOVERY_REQUESTED 6 168#define MD_RECOVERY_CHECK 7 169 unsigned long recovery; 170 171 int in_sync; /* know to not need resync */ 172 struct semaphore reconfig_sem; 173 atomic_t active; 174 175 int changed; /* true if we might need to reread partition info */ 176 int degraded; /* whether md should consider 177 * adding a spare 178 */ 179 int barriers_work; /* initialised to true, cleared as soon 180 * as a barrier request to slave 181 * fails. Only supported 182 */ 183 struct bio *biolist; /* bios that need to be retried 184 * because BIO_RW_BARRIER is not supported 185 */ 186 187 atomic_t recovery_active; /* blocks scheduled, but not written */ 188 wait_queue_head_t recovery_wait; 189 sector_t recovery_cp; 190 191 spinlock_t write_lock; 192 wait_queue_head_t sb_wait; /* for waiting on superblock updates */ 193 atomic_t pending_writes; /* number of active superblock writes */ 194 195 unsigned int safemode; /* if set, update "clean" superblock 196 * when no writes pending. 197 */ 198 unsigned int safemode_delay; 199 struct timer_list safemode_timer; 200 atomic_t writes_pending; 201 request_queue_t *queue; /* for plugging ... */ 202 203 atomic_t write_behind; /* outstanding async IO */ 204 unsigned int max_write_behind; /* 0 = sync */ 205 206 struct bitmap *bitmap; /* the bitmap for the device */ 207 struct file *bitmap_file; /* the bitmap file */ 208 long bitmap_offset; /* offset from superblock of 209 * start of bitmap. May be 210 * negative, but not '0' 211 */ 212 long default_bitmap_offset; /* this is the offset to use when 213 * hot-adding a bitmap. It should 214 * eventually be settable by sysfs. 215 */ 216 217 struct list_head all_mddevs; 218}; 219 220 221static inline void rdev_dec_pending(mdk_rdev_t *rdev, mddev_t *mddev) 222{ 223 int faulty = test_bit(Faulty, &rdev->flags); 224 if (atomic_dec_and_test(&rdev->nr_pending) && faulty) 225 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 226} 227 228static inline void md_sync_acct(struct block_device *bdev, unsigned long nr_sectors) 229{ 230 atomic_add(nr_sectors, &bdev->bd_contains->bd_disk->sync_io); 231} 232 233struct mdk_personality 234{ 235 char *name; 236 int level; 237 struct list_head list; 238 struct module *owner; 239 int (*make_request)(request_queue_t *q, struct bio *bio); 240 int (*run)(mddev_t *mddev); 241 int (*stop)(mddev_t *mddev); 242 void (*status)(struct seq_file *seq, mddev_t *mddev); 243 /* error_handler must set ->faulty and clear ->in_sync 244 * if appropriate, and should abort recovery if needed 245 */ 246 void (*error_handler)(mddev_t *mddev, mdk_rdev_t *rdev); 247 int (*hot_add_disk) (mddev_t *mddev, mdk_rdev_t *rdev); 248 int (*hot_remove_disk) (mddev_t *mddev, int number); 249 int (*spare_active) (mddev_t *mddev); 250 sector_t (*sync_request)(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster); 251 int (*resize) (mddev_t *mddev, sector_t sectors); 252 int (*reshape) (mddev_t *mddev, int raid_disks); 253 int (*reconfig) (mddev_t *mddev, int layout, int chunk_size); 254 /* quiesce moves between quiescence states 255 * 0 - fully active 256 * 1 - no new requests allowed 257 * others - reserved 258 */ 259 void (*quiesce) (mddev_t *mddev, int state); 260}; 261 262 263struct md_sysfs_entry { 264 struct attribute attr; 265 ssize_t (*show)(mddev_t *, char *); 266 ssize_t (*store)(mddev_t *, const char *, size_t); 267}; 268 269 270static inline char * mdname (mddev_t * mddev) 271{ 272 return mddev->gendisk ? mddev->gendisk->disk_name : "mdX"; 273} 274 275/* 276 * iterates through some rdev ringlist. It's safe to remove the 277 * current 'rdev'. Dont touch 'tmp' though. 278 */ 279#define ITERATE_RDEV_GENERIC(head,rdev,tmp) \ 280 \ 281 for ((tmp) = (head).next; \ 282 (rdev) = (list_entry((tmp), mdk_rdev_t, same_set)), \ 283 (tmp) = (tmp)->next, (tmp)->prev != &(head) \ 284 ; ) 285/* 286 * iterates through the 'same array disks' ringlist 287 */ 288#define ITERATE_RDEV(mddev,rdev,tmp) \ 289 ITERATE_RDEV_GENERIC((mddev)->disks,rdev,tmp) 290 291/* 292 * Iterates through 'pending RAID disks' 293 */ 294#define ITERATE_RDEV_PENDING(rdev,tmp) \ 295 ITERATE_RDEV_GENERIC(pending_raid_disks,rdev,tmp) 296 297typedef struct mdk_thread_s { 298 void (*run) (mddev_t *mddev); 299 mddev_t *mddev; 300 wait_queue_head_t wqueue; 301 unsigned long flags; 302 struct task_struct *tsk; 303 unsigned long timeout; 304} mdk_thread_t; 305 306#define THREAD_WAKEUP 0 307 308#define __wait_event_lock_irq(wq, condition, lock, cmd) \ 309do { \ 310 wait_queue_t __wait; \ 311 init_waitqueue_entry(&__wait, current); \ 312 \ 313 add_wait_queue(&wq, &__wait); \ 314 for (;;) { \ 315 set_current_state(TASK_UNINTERRUPTIBLE); \ 316 if (condition) \ 317 break; \ 318 spin_unlock_irq(&lock); \ 319 cmd; \ 320 schedule(); \ 321 spin_lock_irq(&lock); \ 322 } \ 323 current->state = TASK_RUNNING; \ 324 remove_wait_queue(&wq, &__wait); \ 325} while (0) 326 327#define wait_event_lock_irq(wq, condition, lock, cmd) \ 328do { \ 329 if (condition) \ 330 break; \ 331 __wait_event_lock_irq(wq, condition, lock, cmd); \ 332} while (0) 333 334static inline void safe_put_page(struct page *p) 335{ 336 if (p) put_page(p); 337} 338 339#endif 340