at v5.6 9.4 kB view raw
1/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ 2#ifndef _LINUX_BCACHE_H 3#define _LINUX_BCACHE_H 4 5/* 6 * Bcache on disk data structures 7 */ 8 9#include <linux/types.h> 10 11#define BITMASK(name, type, field, offset, size) \ 12static inline __u64 name(const type *k) \ 13{ return (k->field >> offset) & ~(~0ULL << size); } \ 14 \ 15static inline void SET_##name(type *k, __u64 v) \ 16{ \ 17 k->field &= ~(~(~0ULL << size) << offset); \ 18 k->field |= (v & ~(~0ULL << size)) << offset; \ 19} 20 21/* Btree keys - all units are in sectors */ 22 23struct bkey { 24 __u64 high; 25 __u64 low; 26 __u64 ptr[]; 27}; 28 29#define KEY_FIELD(name, field, offset, size) \ 30 BITMASK(name, struct bkey, field, offset, size) 31 32#define PTR_FIELD(name, offset, size) \ 33static inline __u64 name(const struct bkey *k, unsigned int i) \ 34{ return (k->ptr[i] >> offset) & ~(~0ULL << size); } \ 35 \ 36static inline void SET_##name(struct bkey *k, unsigned int i, __u64 v) \ 37{ \ 38 k->ptr[i] &= ~(~(~0ULL << size) << offset); \ 39 k->ptr[i] |= (v & ~(~0ULL << size)) << offset; \ 40} 41 42#define KEY_SIZE_BITS 16 43#define KEY_MAX_U64S 8 44 45KEY_FIELD(KEY_PTRS, high, 60, 3) 46KEY_FIELD(HEADER_SIZE, high, 58, 2) 47KEY_FIELD(KEY_CSUM, high, 56, 2) 48KEY_FIELD(KEY_PINNED, high, 55, 1) 49KEY_FIELD(KEY_DIRTY, high, 36, 1) 50 51KEY_FIELD(KEY_SIZE, high, 20, KEY_SIZE_BITS) 52KEY_FIELD(KEY_INODE, high, 0, 20) 53 54/* Next time I change the on disk format, KEY_OFFSET() won't be 64 bits */ 55 56static inline __u64 KEY_OFFSET(const struct bkey *k) 57{ 58 return k->low; 59} 60 61static inline void SET_KEY_OFFSET(struct bkey *k, __u64 v) 62{ 63 k->low = v; 64} 65 66/* 67 * The high bit being set is a relic from when we used it to do binary 68 * searches - it told you where a key started. It's not used anymore, 69 * and can probably be safely dropped. 70 */ 71#define KEY(inode, offset, size) \ 72((struct bkey) { \ 73 .high = (1ULL << 63) | ((__u64) (size) << 20) | (inode), \ 74 .low = (offset) \ 75}) 76 77#define ZERO_KEY KEY(0, 0, 0) 78 79#define MAX_KEY_INODE (~(~0 << 20)) 80#define MAX_KEY_OFFSET (~0ULL >> 1) 81#define MAX_KEY KEY(MAX_KEY_INODE, MAX_KEY_OFFSET, 0) 82 83#define KEY_START(k) (KEY_OFFSET(k) - KEY_SIZE(k)) 84#define START_KEY(k) KEY(KEY_INODE(k), KEY_START(k), 0) 85 86#define PTR_DEV_BITS 12 87 88PTR_FIELD(PTR_DEV, 51, PTR_DEV_BITS) 89PTR_FIELD(PTR_OFFSET, 8, 43) 90PTR_FIELD(PTR_GEN, 0, 8) 91 92#define PTR_CHECK_DEV ((1 << PTR_DEV_BITS) - 1) 93 94#define MAKE_PTR(gen, offset, dev) \ 95 ((((__u64) dev) << 51) | ((__u64) offset) << 8 | gen) 96 97/* Bkey utility code */ 98 99static inline unsigned long bkey_u64s(const struct bkey *k) 100{ 101 return (sizeof(struct bkey) / sizeof(__u64)) + KEY_PTRS(k); 102} 103 104static inline unsigned long bkey_bytes(const struct bkey *k) 105{ 106 return bkey_u64s(k) * sizeof(__u64); 107} 108 109#define bkey_copy(_dest, _src) memcpy(_dest, _src, bkey_bytes(_src)) 110 111static inline void bkey_copy_key(struct bkey *dest, const struct bkey *src) 112{ 113 SET_KEY_INODE(dest, KEY_INODE(src)); 114 SET_KEY_OFFSET(dest, KEY_OFFSET(src)); 115} 116 117static inline struct bkey *bkey_next(const struct bkey *k) 118{ 119 __u64 *d = (void *) k; 120 121 return (struct bkey *) (d + bkey_u64s(k)); 122} 123 124static inline struct bkey *bkey_idx(const struct bkey *k, unsigned int nr_keys) 125{ 126 __u64 *d = (void *) k; 127 128 return (struct bkey *) (d + nr_keys); 129} 130/* Enough for a key with 6 pointers */ 131#define BKEY_PAD 8 132 133#define BKEY_PADDED(key) \ 134 union { struct bkey key; __u64 key ## _pad[BKEY_PAD]; } 135 136/* Superblock */ 137 138/* Version 0: Cache device 139 * Version 1: Backing device 140 * Version 2: Seed pointer into btree node checksum 141 * Version 3: Cache device with new UUID format 142 * Version 4: Backing device with data offset 143 */ 144#define BCACHE_SB_VERSION_CDEV 0 145#define BCACHE_SB_VERSION_BDEV 1 146#define BCACHE_SB_VERSION_CDEV_WITH_UUID 3 147#define BCACHE_SB_VERSION_BDEV_WITH_OFFSET 4 148#define BCACHE_SB_MAX_VERSION 4 149 150#define SB_SECTOR 8 151#define SB_OFFSET (SB_SECTOR << SECTOR_SHIFT) 152#define SB_SIZE 4096 153#define SB_LABEL_SIZE 32 154#define SB_JOURNAL_BUCKETS 256U 155/* SB_JOURNAL_BUCKETS must be divisible by BITS_PER_LONG */ 156#define MAX_CACHES_PER_SET 8 157 158#define BDEV_DATA_START_DEFAULT 16 /* sectors */ 159 160struct cache_sb_disk { 161 __le64 csum; 162 __le64 offset; /* sector where this sb was written */ 163 __le64 version; 164 165 __u8 magic[16]; 166 167 __u8 uuid[16]; 168 union { 169 __u8 set_uuid[16]; 170 __le64 set_magic; 171 }; 172 __u8 label[SB_LABEL_SIZE]; 173 174 __le64 flags; 175 __le64 seq; 176 __le64 pad[8]; 177 178 union { 179 struct { 180 /* Cache devices */ 181 __le64 nbuckets; /* device size */ 182 183 __le16 block_size; /* sectors */ 184 __le16 bucket_size; /* sectors */ 185 186 __le16 nr_in_set; 187 __le16 nr_this_dev; 188 }; 189 struct { 190 /* Backing devices */ 191 __le64 data_offset; 192 193 /* 194 * block_size from the cache device section is still used by 195 * backing devices, so don't add anything here until we fix 196 * things to not need it for backing devices anymore 197 */ 198 }; 199 }; 200 201 __le32 last_mount; /* time overflow in y2106 */ 202 203 __le16 first_bucket; 204 union { 205 __le16 njournal_buckets; 206 __le16 keys; 207 }; 208 __le64 d[SB_JOURNAL_BUCKETS]; /* journal buckets */ 209}; 210 211struct cache_sb { 212 __u64 csum; 213 __u64 offset; /* sector where this sb was written */ 214 __u64 version; 215 216 __u8 magic[16]; 217 218 __u8 uuid[16]; 219 union { 220 __u8 set_uuid[16]; 221 __u64 set_magic; 222 }; 223 __u8 label[SB_LABEL_SIZE]; 224 225 __u64 flags; 226 __u64 seq; 227 __u64 pad[8]; 228 229 union { 230 struct { 231 /* Cache devices */ 232 __u64 nbuckets; /* device size */ 233 234 __u16 block_size; /* sectors */ 235 __u16 bucket_size; /* sectors */ 236 237 __u16 nr_in_set; 238 __u16 nr_this_dev; 239 }; 240 struct { 241 /* Backing devices */ 242 __u64 data_offset; 243 244 /* 245 * block_size from the cache device section is still used by 246 * backing devices, so don't add anything here until we fix 247 * things to not need it for backing devices anymore 248 */ 249 }; 250 }; 251 252 __u32 last_mount; /* time overflow in y2106 */ 253 254 __u16 first_bucket; 255 union { 256 __u16 njournal_buckets; 257 __u16 keys; 258 }; 259 __u64 d[SB_JOURNAL_BUCKETS]; /* journal buckets */ 260}; 261 262static inline _Bool SB_IS_BDEV(const struct cache_sb *sb) 263{ 264 return sb->version == BCACHE_SB_VERSION_BDEV 265 || sb->version == BCACHE_SB_VERSION_BDEV_WITH_OFFSET; 266} 267 268BITMASK(CACHE_SYNC, struct cache_sb, flags, 0, 1); 269BITMASK(CACHE_DISCARD, struct cache_sb, flags, 1, 1); 270BITMASK(CACHE_REPLACEMENT, struct cache_sb, flags, 2, 3); 271#define CACHE_REPLACEMENT_LRU 0U 272#define CACHE_REPLACEMENT_FIFO 1U 273#define CACHE_REPLACEMENT_RANDOM 2U 274 275BITMASK(BDEV_CACHE_MODE, struct cache_sb, flags, 0, 4); 276#define CACHE_MODE_WRITETHROUGH 0U 277#define CACHE_MODE_WRITEBACK 1U 278#define CACHE_MODE_WRITEAROUND 2U 279#define CACHE_MODE_NONE 3U 280BITMASK(BDEV_STATE, struct cache_sb, flags, 61, 2); 281#define BDEV_STATE_NONE 0U 282#define BDEV_STATE_CLEAN 1U 283#define BDEV_STATE_DIRTY 2U 284#define BDEV_STATE_STALE 3U 285 286/* 287 * Magic numbers 288 * 289 * The various other data structures have their own magic numbers, which are 290 * xored with the first part of the cache set's UUID 291 */ 292 293#define JSET_MAGIC 0x245235c1a3625032ULL 294#define PSET_MAGIC 0x6750e15f87337f91ULL 295#define BSET_MAGIC 0x90135c78b99e07f5ULL 296 297static inline __u64 jset_magic(struct cache_sb *sb) 298{ 299 return sb->set_magic ^ JSET_MAGIC; 300} 301 302static inline __u64 pset_magic(struct cache_sb *sb) 303{ 304 return sb->set_magic ^ PSET_MAGIC; 305} 306 307static inline __u64 bset_magic(struct cache_sb *sb) 308{ 309 return sb->set_magic ^ BSET_MAGIC; 310} 311 312/* 313 * Journal 314 * 315 * On disk format for a journal entry: 316 * seq is monotonically increasing; every journal entry has its own unique 317 * sequence number. 318 * 319 * last_seq is the oldest journal entry that still has keys the btree hasn't 320 * flushed to disk yet. 321 * 322 * version is for on disk format changes. 323 */ 324 325#define BCACHE_JSET_VERSION_UUIDv1 1 326#define BCACHE_JSET_VERSION_UUID 1 /* Always latest UUID format */ 327#define BCACHE_JSET_VERSION 1 328 329struct jset { 330 __u64 csum; 331 __u64 magic; 332 __u64 seq; 333 __u32 version; 334 __u32 keys; 335 336 __u64 last_seq; 337 338 BKEY_PADDED(uuid_bucket); 339 BKEY_PADDED(btree_root); 340 __u16 btree_level; 341 __u16 pad[3]; 342 343 __u64 prio_bucket[MAX_CACHES_PER_SET]; 344 345 union { 346 struct bkey start[0]; 347 __u64 d[0]; 348 }; 349}; 350 351/* Bucket prios/gens */ 352 353struct prio_set { 354 __u64 csum; 355 __u64 magic; 356 __u64 seq; 357 __u32 version; 358 __u32 pad; 359 360 __u64 next_bucket; 361 362 struct bucket_disk { 363 __u16 prio; 364 __u8 gen; 365 } __attribute((packed)) data[]; 366}; 367 368/* UUIDS - per backing device/flash only volume metadata */ 369 370struct uuid_entry { 371 union { 372 struct { 373 __u8 uuid[16]; 374 __u8 label[32]; 375 __u32 first_reg; /* time overflow in y2106 */ 376 __u32 last_reg; 377 __u32 invalidated; 378 379 __u32 flags; 380 /* Size of flash only volumes */ 381 __u64 sectors; 382 }; 383 384 __u8 pad[128]; 385 }; 386}; 387 388BITMASK(UUID_FLASH_ONLY, struct uuid_entry, flags, 0, 1); 389 390/* Btree nodes */ 391 392/* Version 1: Seed pointer into btree node checksum 393 */ 394#define BCACHE_BSET_CSUM 1 395#define BCACHE_BSET_VERSION 1 396 397/* 398 * Btree nodes 399 * 400 * On disk a btree node is a list/log of these; within each set the keys are 401 * sorted 402 */ 403struct bset { 404 __u64 csum; 405 __u64 magic; 406 __u64 seq; 407 __u32 version; 408 __u32 keys; 409 410 union { 411 struct bkey start[0]; 412 __u64 d[0]; 413 }; 414}; 415 416/* OBSOLETE */ 417 418/* UUIDS - per backing device/flash only volume metadata */ 419 420struct uuid_entry_v0 { 421 __u8 uuid[16]; 422 __u8 label[32]; 423 __u32 first_reg; 424 __u32 last_reg; 425 __u32 invalidated; 426 __u32 pad; 427}; 428 429#endif /* _LINUX_BCACHE_H */