at v2.6.15 11 kB view raw
1/* 2 md_k.h : kernel internal structure of the Linux MD driver 3 Copyright (C) 1996-98 Ingo Molnar, Gadi Oxman 4 5 This program is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation; either version 2, or (at your option) 8 any later version. 9 10 You should have received a copy of the GNU General Public License 11 (for example /usr/src/linux/COPYING); if not, write to the Free 12 Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 13*/ 14 15#ifndef _MD_K_H 16#define _MD_K_H 17 18/* and dm-bio-list.h is not under include/linux because.... ??? */ 19#include "../../../drivers/md/dm-bio-list.h" 20 21#define MD_RESERVED 0UL 22#define LINEAR 1UL 23#define RAID0 2UL 24#define RAID1 3UL 25#define RAID5 4UL 26#define TRANSLUCENT 5UL 27#define HSM 6UL 28#define MULTIPATH 7UL 29#define RAID6 8UL 30#define RAID10 9UL 31#define FAULTY 10UL 32#define MAX_PERSONALITY 11UL 33 34#define LEVEL_MULTIPATH (-4) 35#define LEVEL_LINEAR (-1) 36#define LEVEL_FAULTY (-5) 37 38#define MaxSector (~(sector_t)0) 39#define MD_THREAD_NAME_MAX 14 40 41static inline int pers_to_level (int pers) 42{ 43 switch (pers) { 44 case FAULTY: return LEVEL_FAULTY; 45 case MULTIPATH: return LEVEL_MULTIPATH; 46 case HSM: return -3; 47 case TRANSLUCENT: return -2; 48 case LINEAR: return LEVEL_LINEAR; 49 case RAID0: return 0; 50 case RAID1: return 1; 51 case RAID5: return 5; 52 case RAID6: return 6; 53 case RAID10: return 10; 54 } 55 BUG(); 56 return MD_RESERVED; 57} 58 59static inline int level_to_pers (int level) 60{ 61 switch (level) { 62 case LEVEL_FAULTY: return FAULTY; 63 case LEVEL_MULTIPATH: return MULTIPATH; 64 case -3: return HSM; 65 case -2: return TRANSLUCENT; 66 case LEVEL_LINEAR: return LINEAR; 67 case 0: return RAID0; 68 case 1: return RAID1; 69 case 4: 70 case 5: return RAID5; 71 case 6: return RAID6; 72 case 10: return RAID10; 73 } 74 return MD_RESERVED; 75} 76 77typedef struct mddev_s mddev_t; 78typedef struct mdk_rdev_s mdk_rdev_t; 79 80#define MAX_MD_DEVS 256 /* Max number of md dev */ 81 82/* 83 * options passed in raidrun: 84 */ 85 86#define MAX_CHUNK_SIZE (4096*1024) 87 88/* 89 * MD's 'extended' device 90 */ 91struct mdk_rdev_s 92{ 93 struct list_head same_set; /* RAID devices within the same set */ 94 95 sector_t size; /* Device size (in blocks) */ 96 mddev_t *mddev; /* RAID array if running */ 97 unsigned long last_events; /* IO event timestamp */ 98 99 struct block_device *bdev; /* block device handle */ 100 101 struct page *sb_page; 102 int sb_loaded; 103 sector_t data_offset; /* start of data in array */ 104 sector_t sb_offset; 105 int sb_size; /* bytes in the superblock */ 106 int preferred_minor; /* autorun support */ 107 108 struct kobject kobj; 109 110 /* A device can be in one of three states based on two flags: 111 * Not working: faulty==1 in_sync==0 112 * Fully working: faulty==0 in_sync==1 113 * Working, but not 114 * in sync with array 115 * faulty==0 in_sync==0 116 * 117 * It can never have faulty==1, in_sync==1 118 * This reduces the burden of testing multiple flags in many cases 119 */ 120 121 unsigned long flags; 122#define Faulty 1 /* device is known to have a fault */ 123#define In_sync 2 /* device is in_sync with rest of array */ 124#define WriteMostly 4 /* Avoid reading if at all possible */ 125#define BarriersNotsupp 5 /* BIO_RW_BARRIER is not supported */ 126 127 int desc_nr; /* descriptor index in the superblock */ 128 int raid_disk; /* role of device in array */ 129 int saved_raid_disk; /* role that device used to have in the 130 * array and could again if we did a partial 131 * resync from the bitmap 132 */ 133 134 atomic_t nr_pending; /* number of pending requests. 135 * only maintained for arrays that 136 * support hot removal 137 */ 138 atomic_t read_errors; /* number of consecutive read errors that 139 * we have tried to ignore. 140 */ 141}; 142 143typedef struct mdk_personality_s mdk_personality_t; 144 145struct mddev_s 146{ 147 void *private; 148 mdk_personality_t *pers; 149 dev_t unit; 150 int md_minor; 151 struct list_head disks; 152 int sb_dirty; 153 int ro; 154 155 struct gendisk *gendisk; 156 157 struct kobject kobj; 158 159 /* Superblock information */ 160 int major_version, 161 minor_version, 162 patch_version; 163 int persistent; 164 int chunk_size; 165 time_t ctime, utime; 166 int level, layout; 167 int raid_disks; 168 int max_disks; 169 sector_t size; /* used size of component devices */ 170 sector_t array_size; /* exported array size */ 171 __u64 events; 172 173 char uuid[16]; 174 175 struct mdk_thread_s *thread; /* management thread */ 176 struct mdk_thread_s *sync_thread; /* doing resync or reconstruct */ 177 sector_t curr_resync; /* blocks scheduled */ 178 unsigned long resync_mark; /* a recent timestamp */ 179 sector_t resync_mark_cnt;/* blocks written at resync_mark */ 180 181 sector_t resync_max_sectors; /* may be set by personality */ 182 183 sector_t resync_mismatches; /* count of sectors where 184 * parity/replica mismatch found 185 */ 186 /* recovery/resync flags 187 * NEEDED: we might need to start a resync/recover 188 * RUNNING: a thread is running, or about to be started 189 * SYNC: actually doing a resync, not a recovery 190 * ERR: and IO error was detected - abort the resync/recovery 191 * INTR: someone requested a (clean) early abort. 192 * DONE: thread is done and is waiting to be reaped 193 * REQUEST: user-space has requested a sync (used with SYNC) 194 * CHECK: user-space request for for check-only, no repair 195 */ 196#define MD_RECOVERY_RUNNING 0 197#define MD_RECOVERY_SYNC 1 198#define MD_RECOVERY_ERR 2 199#define MD_RECOVERY_INTR 3 200#define MD_RECOVERY_DONE 4 201#define MD_RECOVERY_NEEDED 5 202#define MD_RECOVERY_REQUESTED 6 203#define MD_RECOVERY_CHECK 7 204 unsigned long recovery; 205 206 int in_sync; /* know to not need resync */ 207 struct semaphore reconfig_sem; 208 atomic_t active; 209 210 int changed; /* true if we might need to reread partition info */ 211 int degraded; /* whether md should consider 212 * adding a spare 213 */ 214 int barriers_work; /* initialised to true, cleared as soon 215 * as a barrier request to slave 216 * fails. Only supported 217 */ 218 struct bio *biolist; /* bios that need to be retried 219 * because BIO_RW_BARRIER is not supported 220 */ 221 222 atomic_t recovery_active; /* blocks scheduled, but not written */ 223 wait_queue_head_t recovery_wait; 224 sector_t recovery_cp; 225 226 spinlock_t write_lock; 227 wait_queue_head_t sb_wait; /* for waiting on superblock updates */ 228 atomic_t pending_writes; /* number of active superblock writes */ 229 230 unsigned int safemode; /* if set, update "clean" superblock 231 * when no writes pending. 232 */ 233 unsigned int safemode_delay; 234 struct timer_list safemode_timer; 235 atomic_t writes_pending; 236 request_queue_t *queue; /* for plugging ... */ 237 238 atomic_t write_behind; /* outstanding async IO */ 239 unsigned int max_write_behind; /* 0 = sync */ 240 241 struct bitmap *bitmap; /* the bitmap for the device */ 242 struct file *bitmap_file; /* the bitmap file */ 243 long bitmap_offset; /* offset from superblock of 244 * start of bitmap. May be 245 * negative, but not '0' 246 */ 247 long default_bitmap_offset; /* this is the offset to use when 248 * hot-adding a bitmap. It should 249 * eventually be settable by sysfs. 250 */ 251 252 struct list_head all_mddevs; 253}; 254 255 256static inline void rdev_dec_pending(mdk_rdev_t *rdev, mddev_t *mddev) 257{ 258 int faulty = test_bit(Faulty, &rdev->flags); 259 if (atomic_dec_and_test(&rdev->nr_pending) && faulty) 260 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 261} 262 263static inline void md_sync_acct(struct block_device *bdev, unsigned long nr_sectors) 264{ 265 atomic_add(nr_sectors, &bdev->bd_contains->bd_disk->sync_io); 266} 267 268struct mdk_personality_s 269{ 270 char *name; 271 struct module *owner; 272 int (*make_request)(request_queue_t *q, struct bio *bio); 273 int (*run)(mddev_t *mddev); 274 int (*stop)(mddev_t *mddev); 275 void (*status)(struct seq_file *seq, mddev_t *mddev); 276 /* error_handler must set ->faulty and clear ->in_sync 277 * if appropriate, and should abort recovery if needed 278 */ 279 void (*error_handler)(mddev_t *mddev, mdk_rdev_t *rdev); 280 int (*hot_add_disk) (mddev_t *mddev, mdk_rdev_t *rdev); 281 int (*hot_remove_disk) (mddev_t *mddev, int number); 282 int (*spare_active) (mddev_t *mddev); 283 sector_t (*sync_request)(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster); 284 int (*resize) (mddev_t *mddev, sector_t sectors); 285 int (*reshape) (mddev_t *mddev, int raid_disks); 286 int (*reconfig) (mddev_t *mddev, int layout, int chunk_size); 287 /* quiesce moves between quiescence states 288 * 0 - fully active 289 * 1 - no new requests allowed 290 * others - reserved 291 */ 292 void (*quiesce) (mddev_t *mddev, int state); 293}; 294 295 296struct md_sysfs_entry { 297 struct attribute attr; 298 ssize_t (*show)(mddev_t *, char *); 299 ssize_t (*store)(mddev_t *, const char *, size_t); 300}; 301 302 303static inline char * mdname (mddev_t * mddev) 304{ 305 return mddev->gendisk ? mddev->gendisk->disk_name : "mdX"; 306} 307 308extern mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr); 309 310/* 311 * iterates through some rdev ringlist. It's safe to remove the 312 * current 'rdev'. Dont touch 'tmp' though. 313 */ 314#define ITERATE_RDEV_GENERIC(head,rdev,tmp) \ 315 \ 316 for ((tmp) = (head).next; \ 317 (rdev) = (list_entry((tmp), mdk_rdev_t, same_set)), \ 318 (tmp) = (tmp)->next, (tmp)->prev != &(head) \ 319 ; ) 320/* 321 * iterates through the 'same array disks' ringlist 322 */ 323#define ITERATE_RDEV(mddev,rdev,tmp) \ 324 ITERATE_RDEV_GENERIC((mddev)->disks,rdev,tmp) 325 326/* 327 * Iterates through 'pending RAID disks' 328 */ 329#define ITERATE_RDEV_PENDING(rdev,tmp) \ 330 ITERATE_RDEV_GENERIC(pending_raid_disks,rdev,tmp) 331 332typedef struct mdk_thread_s { 333 void (*run) (mddev_t *mddev); 334 mddev_t *mddev; 335 wait_queue_head_t wqueue; 336 unsigned long flags; 337 struct task_struct *tsk; 338 unsigned long timeout; 339} mdk_thread_t; 340 341#define THREAD_WAKEUP 0 342 343#define __wait_event_lock_irq(wq, condition, lock, cmd) \ 344do { \ 345 wait_queue_t __wait; \ 346 init_waitqueue_entry(&__wait, current); \ 347 \ 348 add_wait_queue(&wq, &__wait); \ 349 for (;;) { \ 350 set_current_state(TASK_UNINTERRUPTIBLE); \ 351 if (condition) \ 352 break; \ 353 spin_unlock_irq(&lock); \ 354 cmd; \ 355 schedule(); \ 356 spin_lock_irq(&lock); \ 357 } \ 358 current->state = TASK_RUNNING; \ 359 remove_wait_queue(&wq, &__wait); \ 360} while (0) 361 362#define wait_event_lock_irq(wq, condition, lock, cmd) \ 363do { \ 364 if (condition) \ 365 break; \ 366 __wait_event_lock_irq(wq, condition, lock, cmd); \ 367} while (0) 368 369#endif 370