at v2.6.14 9.8 kB view raw
1/* 2 md_k.h : kernel internal structure of the Linux MD driver 3 Copyright (C) 1996-98 Ingo Molnar, Gadi Oxman 4 5 This program is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation; either version 2, or (at your option) 8 any later version. 9 10 You should have received a copy of the GNU General Public License 11 (for example /usr/src/linux/COPYING); if not, write to the Free 12 Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 13*/ 14 15#ifndef _MD_K_H 16#define _MD_K_H 17 18/* and dm-bio-list.h is not under include/linux because.... ??? */ 19#include "../../../drivers/md/dm-bio-list.h" 20 21#define MD_RESERVED 0UL 22#define LINEAR 1UL 23#define RAID0 2UL 24#define RAID1 3UL 25#define RAID5 4UL 26#define TRANSLUCENT 5UL 27#define HSM 6UL 28#define MULTIPATH 7UL 29#define RAID6 8UL 30#define RAID10 9UL 31#define FAULTY 10UL 32#define MAX_PERSONALITY 11UL 33 34#define LEVEL_MULTIPATH (-4) 35#define LEVEL_LINEAR (-1) 36#define LEVEL_FAULTY (-5) 37 38#define MaxSector (~(sector_t)0) 39#define MD_THREAD_NAME_MAX 14 40 41static inline int pers_to_level (int pers) 42{ 43 switch (pers) { 44 case FAULTY: return LEVEL_FAULTY; 45 case MULTIPATH: return LEVEL_MULTIPATH; 46 case HSM: return -3; 47 case TRANSLUCENT: return -2; 48 case LINEAR: return LEVEL_LINEAR; 49 case RAID0: return 0; 50 case RAID1: return 1; 51 case RAID5: return 5; 52 case RAID6: return 6; 53 case RAID10: return 10; 54 } 55 BUG(); 56 return MD_RESERVED; 57} 58 59static inline int level_to_pers (int level) 60{ 61 switch (level) { 62 case LEVEL_FAULTY: return FAULTY; 63 case LEVEL_MULTIPATH: return MULTIPATH; 64 case -3: return HSM; 65 case -2: return TRANSLUCENT; 66 case LEVEL_LINEAR: return LINEAR; 67 case 0: return RAID0; 68 case 1: return RAID1; 69 case 4: 70 case 5: return RAID5; 71 case 6: return RAID6; 72 case 10: return RAID10; 73 } 74 return MD_RESERVED; 75} 76 77typedef struct mddev_s mddev_t; 78typedef struct mdk_rdev_s mdk_rdev_t; 79 80#define MAX_MD_DEVS 256 /* Max number of md dev */ 81 82/* 83 * options passed in raidrun: 84 */ 85 86#define MAX_CHUNK_SIZE (4096*1024) 87 88/* 89 * MD's 'extended' device 90 */ 91struct mdk_rdev_s 92{ 93 struct list_head same_set; /* RAID devices within the same set */ 94 95 sector_t size; /* Device size (in blocks) */ 96 mddev_t *mddev; /* RAID array if running */ 97 unsigned long last_events; /* IO event timestamp */ 98 99 struct block_device *bdev; /* block device handle */ 100 101 struct page *sb_page; 102 int sb_loaded; 103 sector_t data_offset; /* start of data in array */ 104 sector_t sb_offset; 105 int sb_size; /* bytes in the superblock */ 106 int preferred_minor; /* autorun support */ 107 108 /* A device can be in one of three states based on two flags: 109 * Not working: faulty==1 in_sync==0 110 * Fully working: faulty==0 in_sync==1 111 * Working, but not 112 * in sync with array 113 * faulty==0 in_sync==0 114 * 115 * It can never have faulty==1, in_sync==1 116 * This reduces the burden of testing multiple flags in many cases 117 */ 118 int faulty; /* if faulty do not issue IO requests */ 119 int in_sync; /* device is a full member of the array */ 120 121 unsigned long flags; /* Should include faulty and in_sync here. */ 122#define WriteMostly 4 /* Avoid reading if at all possible */ 123 124 int desc_nr; /* descriptor index in the superblock */ 125 int raid_disk; /* role of device in array */ 126 int saved_raid_disk; /* role that device used to have in the 127 * array and could again if we did a partial 128 * resync from the bitmap 129 */ 130 131 atomic_t nr_pending; /* number of pending requests. 132 * only maintained for arrays that 133 * support hot removal 134 */ 135}; 136 137typedef struct mdk_personality_s mdk_personality_t; 138 139struct mddev_s 140{ 141 void *private; 142 mdk_personality_t *pers; 143 dev_t unit; 144 int md_minor; 145 struct list_head disks; 146 int sb_dirty; 147 int ro; 148 149 struct gendisk *gendisk; 150 151 /* Superblock information */ 152 int major_version, 153 minor_version, 154 patch_version; 155 int persistent; 156 int chunk_size; 157 time_t ctime, utime; 158 int level, layout; 159 int raid_disks; 160 int max_disks; 161 sector_t size; /* used size of component devices */ 162 sector_t array_size; /* exported array size */ 163 __u64 events; 164 165 char uuid[16]; 166 167 struct mdk_thread_s *thread; /* management thread */ 168 struct mdk_thread_s *sync_thread; /* doing resync or reconstruct */ 169 sector_t curr_resync; /* blocks scheduled */ 170 unsigned long resync_mark; /* a recent timestamp */ 171 sector_t resync_mark_cnt;/* blocks written at resync_mark */ 172 173 sector_t resync_max_sectors; /* may be set by personality */ 174 /* recovery/resync flags 175 * NEEDED: we might need to start a resync/recover 176 * RUNNING: a thread is running, or about to be started 177 * SYNC: actually doing a resync, not a recovery 178 * ERR: and IO error was detected - abort the resync/recovery 179 * INTR: someone requested a (clean) early abort. 180 * DONE: thread is done and is waiting to be reaped 181 */ 182#define MD_RECOVERY_RUNNING 0 183#define MD_RECOVERY_SYNC 1 184#define MD_RECOVERY_ERR 2 185#define MD_RECOVERY_INTR 3 186#define MD_RECOVERY_DONE 4 187#define MD_RECOVERY_NEEDED 5 188 unsigned long recovery; 189 190 int in_sync; /* know to not need resync */ 191 struct semaphore reconfig_sem; 192 atomic_t active; 193 194 int changed; /* true if we might need to reread partition info */ 195 int degraded; /* whether md should consider 196 * adding a spare 197 */ 198 199 atomic_t recovery_active; /* blocks scheduled, but not written */ 200 wait_queue_head_t recovery_wait; 201 sector_t recovery_cp; 202 203 spinlock_t write_lock; 204 wait_queue_head_t sb_wait; /* for waiting on superblock updates */ 205 atomic_t pending_writes; /* number of active superblock writes */ 206 207 unsigned int safemode; /* if set, update "clean" superblock 208 * when no writes pending. 209 */ 210 unsigned int safemode_delay; 211 struct timer_list safemode_timer; 212 atomic_t writes_pending; 213 request_queue_t *queue; /* for plugging ... */ 214 215 atomic_t write_behind; /* outstanding async IO */ 216 unsigned int max_write_behind; /* 0 = sync */ 217 218 struct bitmap *bitmap; /* the bitmap for the device */ 219 struct file *bitmap_file; /* the bitmap file */ 220 long bitmap_offset; /* offset from superblock of 221 * start of bitmap. May be 222 * negative, but not '0' 223 */ 224 long default_bitmap_offset; /* this is the offset to use when 225 * hot-adding a bitmap. It should 226 * eventually be settable by sysfs. 227 */ 228 229 struct list_head all_mddevs; 230}; 231 232 233static inline void rdev_dec_pending(mdk_rdev_t *rdev, mddev_t *mddev) 234{ 235 int faulty = rdev->faulty; 236 if (atomic_dec_and_test(&rdev->nr_pending) && faulty) 237 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 238} 239 240static inline void md_sync_acct(struct block_device *bdev, unsigned long nr_sectors) 241{ 242 atomic_add(nr_sectors, &bdev->bd_contains->bd_disk->sync_io); 243} 244 245struct mdk_personality_s 246{ 247 char *name; 248 struct module *owner; 249 int (*make_request)(request_queue_t *q, struct bio *bio); 250 int (*run)(mddev_t *mddev); 251 int (*stop)(mddev_t *mddev); 252 void (*status)(struct seq_file *seq, mddev_t *mddev); 253 /* error_handler must set ->faulty and clear ->in_sync 254 * if appropriate, and should abort recovery if needed 255 */ 256 void (*error_handler)(mddev_t *mddev, mdk_rdev_t *rdev); 257 int (*hot_add_disk) (mddev_t *mddev, mdk_rdev_t *rdev); 258 int (*hot_remove_disk) (mddev_t *mddev, int number); 259 int (*spare_active) (mddev_t *mddev); 260 sector_t (*sync_request)(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster); 261 int (*resize) (mddev_t *mddev, sector_t sectors); 262 int (*reshape) (mddev_t *mddev, int raid_disks); 263 int (*reconfig) (mddev_t *mddev, int layout, int chunk_size); 264 /* quiesce moves between quiescence states 265 * 0 - fully active 266 * 1 - no new requests allowed 267 * others - reserved 268 */ 269 void (*quiesce) (mddev_t *mddev, int state); 270}; 271 272 273static inline char * mdname (mddev_t * mddev) 274{ 275 return mddev->gendisk ? mddev->gendisk->disk_name : "mdX"; 276} 277 278extern mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr); 279 280/* 281 * iterates through some rdev ringlist. It's safe to remove the 282 * current 'rdev'. Dont touch 'tmp' though. 283 */ 284#define ITERATE_RDEV_GENERIC(head,rdev,tmp) \ 285 \ 286 for ((tmp) = (head).next; \ 287 (rdev) = (list_entry((tmp), mdk_rdev_t, same_set)), \ 288 (tmp) = (tmp)->next, (tmp)->prev != &(head) \ 289 ; ) 290/* 291 * iterates through the 'same array disks' ringlist 292 */ 293#define ITERATE_RDEV(mddev,rdev,tmp) \ 294 ITERATE_RDEV_GENERIC((mddev)->disks,rdev,tmp) 295 296/* 297 * Iterates through 'pending RAID disks' 298 */ 299#define ITERATE_RDEV_PENDING(rdev,tmp) \ 300 ITERATE_RDEV_GENERIC(pending_raid_disks,rdev,tmp) 301 302typedef struct mdk_thread_s { 303 void (*run) (mddev_t *mddev); 304 mddev_t *mddev; 305 wait_queue_head_t wqueue; 306 unsigned long flags; 307 struct completion *event; 308 struct task_struct *tsk; 309 unsigned long timeout; 310 const char *name; 311} mdk_thread_t; 312 313#define THREAD_WAKEUP 0 314 315#define __wait_event_lock_irq(wq, condition, lock, cmd) \ 316do { \ 317 wait_queue_t __wait; \ 318 init_waitqueue_entry(&__wait, current); \ 319 \ 320 add_wait_queue(&wq, &__wait); \ 321 for (;;) { \ 322 set_current_state(TASK_UNINTERRUPTIBLE); \ 323 if (condition) \ 324 break; \ 325 spin_unlock_irq(&lock); \ 326 cmd; \ 327 schedule(); \ 328 spin_lock_irq(&lock); \ 329 } \ 330 current->state = TASK_RUNNING; \ 331 remove_wait_queue(&wq, &__wait); \ 332} while (0) 333 334#define wait_event_lock_irq(wq, condition, lock, cmd) \ 335do { \ 336 if (condition) \ 337 break; \ 338 __wait_event_lock_irq(wq, condition, lock, cmd); \ 339} while (0) 340 341#endif 342