at v5.11 17 kB view raw
1/* SPDX-License-Identifier: GPL-2.0 */ 2/* 3 * Block data types and constants. Directly include this file only to 4 * break include dependency loop. 5 */ 6#ifndef __LINUX_BLK_TYPES_H 7#define __LINUX_BLK_TYPES_H 8 9#include <linux/types.h> 10#include <linux/bvec.h> 11#include <linux/device.h> 12#include <linux/ktime.h> 13 14struct bio_set; 15struct bio; 16struct bio_integrity_payload; 17struct page; 18struct io_context; 19struct cgroup_subsys_state; 20typedef void (bio_end_io_t) (struct bio *); 21struct bio_crypt_ctx; 22 23struct block_device { 24 sector_t bd_start_sect; 25 struct disk_stats __percpu *bd_stats; 26 unsigned long bd_stamp; 27 bool bd_read_only; /* read-only policy */ 28 dev_t bd_dev; 29 int bd_openers; 30 struct inode * bd_inode; /* will die */ 31 struct super_block * bd_super; 32 struct mutex bd_mutex; /* open/close mutex */ 33 void * bd_claiming; 34 struct device bd_device; 35 void * bd_holder; 36 int bd_holders; 37 bool bd_write_holder; 38#ifdef CONFIG_SYSFS 39 struct list_head bd_holder_disks; 40#endif 41 struct kobject *bd_holder_dir; 42 u8 bd_partno; 43 /* number of times partitions within this device have been opened. */ 44 unsigned bd_part_count; 45 46 spinlock_t bd_size_lock; /* for bd_inode->i_size updates */ 47 struct gendisk * bd_disk; 48 struct backing_dev_info *bd_bdi; 49 50 /* The counter of freeze processes */ 51 int bd_fsfreeze_count; 52 /* Mutex for freeze */ 53 struct mutex bd_fsfreeze_mutex; 54 struct super_block *bd_fsfreeze_sb; 55 56 struct partition_meta_info *bd_meta_info; 57#ifdef CONFIG_FAIL_MAKE_REQUEST 58 bool bd_make_it_fail; 59#endif 60} __randomize_layout; 61 62#define bdev_whole(_bdev) \ 63 ((_bdev)->bd_disk->part0) 64 65#define dev_to_bdev(device) \ 66 container_of((device), struct block_device, bd_device) 67 68#define bdev_kobj(_bdev) \ 69 (&((_bdev)->bd_device.kobj)) 70 71/* 72 * Block error status values. See block/blk-core:blk_errors for the details. 73 * Alpha cannot write a byte atomically, so we need to use 32-bit value. 74 */ 75#if defined(CONFIG_ALPHA) && !defined(__alpha_bwx__) 76typedef u32 __bitwise blk_status_t; 77#else 78typedef u8 __bitwise blk_status_t; 79#endif 80#define BLK_STS_OK 0 81#define BLK_STS_NOTSUPP ((__force blk_status_t)1) 82#define BLK_STS_TIMEOUT ((__force blk_status_t)2) 83#define BLK_STS_NOSPC ((__force blk_status_t)3) 84#define BLK_STS_TRANSPORT ((__force blk_status_t)4) 85#define BLK_STS_TARGET ((__force blk_status_t)5) 86#define BLK_STS_NEXUS ((__force blk_status_t)6) 87#define BLK_STS_MEDIUM ((__force blk_status_t)7) 88#define BLK_STS_PROTECTION ((__force blk_status_t)8) 89#define BLK_STS_RESOURCE ((__force blk_status_t)9) 90#define BLK_STS_IOERR ((__force blk_status_t)10) 91 92/* hack for device mapper, don't use elsewhere: */ 93#define BLK_STS_DM_REQUEUE ((__force blk_status_t)11) 94 95#define BLK_STS_AGAIN ((__force blk_status_t)12) 96 97/* 98 * BLK_STS_DEV_RESOURCE is returned from the driver to the block layer if 99 * device related resources are unavailable, but the driver can guarantee 100 * that the queue will be rerun in the future once resources become 101 * available again. This is typically the case for device specific 102 * resources that are consumed for IO. If the driver fails allocating these 103 * resources, we know that inflight (or pending) IO will free these 104 * resource upon completion. 105 * 106 * This is different from BLK_STS_RESOURCE in that it explicitly references 107 * a device specific resource. For resources of wider scope, allocation 108 * failure can happen without having pending IO. This means that we can't 109 * rely on request completions freeing these resources, as IO may not be in 110 * flight. Examples of that are kernel memory allocations, DMA mappings, or 111 * any other system wide resources. 112 */ 113#define BLK_STS_DEV_RESOURCE ((__force blk_status_t)13) 114 115/* 116 * BLK_STS_ZONE_RESOURCE is returned from the driver to the block layer if zone 117 * related resources are unavailable, but the driver can guarantee the queue 118 * will be rerun in the future once the resources become available again. 119 * 120 * This is different from BLK_STS_DEV_RESOURCE in that it explicitly references 121 * a zone specific resource and IO to a different zone on the same device could 122 * still be served. Examples of that are zones that are write-locked, but a read 123 * to the same zone could be served. 124 */ 125#define BLK_STS_ZONE_RESOURCE ((__force blk_status_t)14) 126 127/* 128 * BLK_STS_ZONE_OPEN_RESOURCE is returned from the driver in the completion 129 * path if the device returns a status indicating that too many zone resources 130 * are currently open. The same command should be successful if resubmitted 131 * after the number of open zones decreases below the device's limits, which is 132 * reported in the request_queue's max_open_zones. 133 */ 134#define BLK_STS_ZONE_OPEN_RESOURCE ((__force blk_status_t)15) 135 136/* 137 * BLK_STS_ZONE_ACTIVE_RESOURCE is returned from the driver in the completion 138 * path if the device returns a status indicating that too many zone resources 139 * are currently active. The same command should be successful if resubmitted 140 * after the number of active zones decreases below the device's limits, which 141 * is reported in the request_queue's max_active_zones. 142 */ 143#define BLK_STS_ZONE_ACTIVE_RESOURCE ((__force blk_status_t)16) 144 145/** 146 * blk_path_error - returns true if error may be path related 147 * @error: status the request was completed with 148 * 149 * Description: 150 * This classifies block error status into non-retryable errors and ones 151 * that may be successful if retried on a failover path. 152 * 153 * Return: 154 * %false - retrying failover path will not help 155 * %true - may succeed if retried 156 */ 157static inline bool blk_path_error(blk_status_t error) 158{ 159 switch (error) { 160 case BLK_STS_NOTSUPP: 161 case BLK_STS_NOSPC: 162 case BLK_STS_TARGET: 163 case BLK_STS_NEXUS: 164 case BLK_STS_MEDIUM: 165 case BLK_STS_PROTECTION: 166 return false; 167 } 168 169 /* Anything else could be a path failure, so should be retried */ 170 return true; 171} 172 173/* 174 * From most significant bit: 175 * 1 bit: reserved for other usage, see below 176 * 12 bits: original size of bio 177 * 51 bits: issue time of bio 178 */ 179#define BIO_ISSUE_RES_BITS 1 180#define BIO_ISSUE_SIZE_BITS 12 181#define BIO_ISSUE_RES_SHIFT (64 - BIO_ISSUE_RES_BITS) 182#define BIO_ISSUE_SIZE_SHIFT (BIO_ISSUE_RES_SHIFT - BIO_ISSUE_SIZE_BITS) 183#define BIO_ISSUE_TIME_MASK ((1ULL << BIO_ISSUE_SIZE_SHIFT) - 1) 184#define BIO_ISSUE_SIZE_MASK \ 185 (((1ULL << BIO_ISSUE_SIZE_BITS) - 1) << BIO_ISSUE_SIZE_SHIFT) 186#define BIO_ISSUE_RES_MASK (~((1ULL << BIO_ISSUE_RES_SHIFT) - 1)) 187 188/* Reserved bit for blk-throtl */ 189#define BIO_ISSUE_THROTL_SKIP_LATENCY (1ULL << 63) 190 191struct bio_issue { 192 u64 value; 193}; 194 195static inline u64 __bio_issue_time(u64 time) 196{ 197 return time & BIO_ISSUE_TIME_MASK; 198} 199 200static inline u64 bio_issue_time(struct bio_issue *issue) 201{ 202 return __bio_issue_time(issue->value); 203} 204 205static inline sector_t bio_issue_size(struct bio_issue *issue) 206{ 207 return ((issue->value & BIO_ISSUE_SIZE_MASK) >> BIO_ISSUE_SIZE_SHIFT); 208} 209 210static inline void bio_issue_init(struct bio_issue *issue, 211 sector_t size) 212{ 213 size &= (1ULL << BIO_ISSUE_SIZE_BITS) - 1; 214 issue->value = ((issue->value & BIO_ISSUE_RES_MASK) | 215 (ktime_get_ns() & BIO_ISSUE_TIME_MASK) | 216 ((u64)size << BIO_ISSUE_SIZE_SHIFT)); 217} 218 219/* 220 * main unit of I/O for the block layer and lower layers (ie drivers and 221 * stacking drivers) 222 */ 223struct bio { 224 struct bio *bi_next; /* request queue link */ 225 struct gendisk *bi_disk; 226 unsigned int bi_opf; /* bottom bits req flags, 227 * top bits REQ_OP. Use 228 * accessors. 229 */ 230 unsigned short bi_flags; /* status, etc and bvec pool number */ 231 unsigned short bi_ioprio; 232 unsigned short bi_write_hint; 233 blk_status_t bi_status; 234 u8 bi_partno; 235 atomic_t __bi_remaining; 236 237 struct bvec_iter bi_iter; 238 239 bio_end_io_t *bi_end_io; 240 241 void *bi_private; 242#ifdef CONFIG_BLK_CGROUP 243 /* 244 * Represents the association of the css and request_queue for the bio. 245 * If a bio goes direct to device, it will not have a blkg as it will 246 * not have a request_queue associated with it. The reference is put 247 * on release of the bio. 248 */ 249 struct blkcg_gq *bi_blkg; 250 struct bio_issue bi_issue; 251#ifdef CONFIG_BLK_CGROUP_IOCOST 252 u64 bi_iocost_cost; 253#endif 254#endif 255 256#ifdef CONFIG_BLK_INLINE_ENCRYPTION 257 struct bio_crypt_ctx *bi_crypt_context; 258#endif 259 260 union { 261#if defined(CONFIG_BLK_DEV_INTEGRITY) 262 struct bio_integrity_payload *bi_integrity; /* data integrity */ 263#endif 264 }; 265 266 unsigned short bi_vcnt; /* how many bio_vec's */ 267 268 /* 269 * Everything starting with bi_max_vecs will be preserved by bio_reset() 270 */ 271 272 unsigned short bi_max_vecs; /* max bvl_vecs we can hold */ 273 274 atomic_t __bi_cnt; /* pin count */ 275 276 struct bio_vec *bi_io_vec; /* the actual vec list */ 277 278 struct bio_set *bi_pool; 279 280 /* 281 * We can inline a number of vecs at the end of the bio, to avoid 282 * double allocations for a small number of bio_vecs. This member 283 * MUST obviously be kept at the very end of the bio. 284 */ 285 struct bio_vec bi_inline_vecs[]; 286}; 287 288#define BIO_RESET_BYTES offsetof(struct bio, bi_max_vecs) 289 290/* 291 * bio flags 292 */ 293enum { 294 BIO_NO_PAGE_REF, /* don't put release vec pages */ 295 BIO_CLONED, /* doesn't own data */ 296 BIO_BOUNCED, /* bio is a bounce bio */ 297 BIO_WORKINGSET, /* contains userspace workingset pages */ 298 BIO_QUIET, /* Make BIO Quiet */ 299 BIO_CHAIN, /* chained bio, ->bi_remaining in effect */ 300 BIO_REFFED, /* bio has elevated ->bi_cnt */ 301 BIO_THROTTLED, /* This bio has already been subjected to 302 * throttling rules. Don't do it again. */ 303 BIO_TRACE_COMPLETION, /* bio_endio() should trace the final completion 304 * of this bio. */ 305 BIO_CGROUP_ACCT, /* has been accounted to a cgroup */ 306 BIO_TRACKED, /* set if bio goes through the rq_qos path */ 307 BIO_FLAG_LAST 308}; 309 310/* See BVEC_POOL_OFFSET below before adding new flags */ 311 312/* 313 * We support 6 different bvec pools, the last one is magic in that it 314 * is backed by a mempool. 315 */ 316#define BVEC_POOL_NR 6 317#define BVEC_POOL_MAX (BVEC_POOL_NR - 1) 318 319/* 320 * Top 3 bits of bio flags indicate the pool the bvecs came from. We add 321 * 1 to the actual index so that 0 indicates that there are no bvecs to be 322 * freed. 323 */ 324#define BVEC_POOL_BITS (3) 325#define BVEC_POOL_OFFSET (16 - BVEC_POOL_BITS) 326#define BVEC_POOL_IDX(bio) ((bio)->bi_flags >> BVEC_POOL_OFFSET) 327#if (1<< BVEC_POOL_BITS) < (BVEC_POOL_NR+1) 328# error "BVEC_POOL_BITS is too small" 329#endif 330 331/* 332 * Flags starting here get preserved by bio_reset() - this includes 333 * only BVEC_POOL_IDX() 334 */ 335#define BIO_RESET_BITS BVEC_POOL_OFFSET 336 337typedef __u32 __bitwise blk_mq_req_flags_t; 338 339/* 340 * Operations and flags common to the bio and request structures. 341 * We use 8 bits for encoding the operation, and the remaining 24 for flags. 342 * 343 * The least significant bit of the operation number indicates the data 344 * transfer direction: 345 * 346 * - if the least significant bit is set transfers are TO the device 347 * - if the least significant bit is not set transfers are FROM the device 348 * 349 * If a operation does not transfer data the least significant bit has no 350 * meaning. 351 */ 352#define REQ_OP_BITS 8 353#define REQ_OP_MASK ((1 << REQ_OP_BITS) - 1) 354#define REQ_FLAG_BITS 24 355 356enum req_opf { 357 /* read sectors from the device */ 358 REQ_OP_READ = 0, 359 /* write sectors to the device */ 360 REQ_OP_WRITE = 1, 361 /* flush the volatile write cache */ 362 REQ_OP_FLUSH = 2, 363 /* discard sectors */ 364 REQ_OP_DISCARD = 3, 365 /* securely erase sectors */ 366 REQ_OP_SECURE_ERASE = 5, 367 /* write the same sector many times */ 368 REQ_OP_WRITE_SAME = 7, 369 /* write the zero filled sector many times */ 370 REQ_OP_WRITE_ZEROES = 9, 371 /* Open a zone */ 372 REQ_OP_ZONE_OPEN = 10, 373 /* Close a zone */ 374 REQ_OP_ZONE_CLOSE = 11, 375 /* Transition a zone to full */ 376 REQ_OP_ZONE_FINISH = 12, 377 /* write data at the current zone write pointer */ 378 REQ_OP_ZONE_APPEND = 13, 379 /* reset a zone write pointer */ 380 REQ_OP_ZONE_RESET = 15, 381 /* reset all the zone present on the device */ 382 REQ_OP_ZONE_RESET_ALL = 17, 383 384 /* SCSI passthrough using struct scsi_request */ 385 REQ_OP_SCSI_IN = 32, 386 REQ_OP_SCSI_OUT = 33, 387 /* Driver private requests */ 388 REQ_OP_DRV_IN = 34, 389 REQ_OP_DRV_OUT = 35, 390 391 REQ_OP_LAST, 392}; 393 394enum req_flag_bits { 395 __REQ_FAILFAST_DEV = /* no driver retries of device errors */ 396 REQ_OP_BITS, 397 __REQ_FAILFAST_TRANSPORT, /* no driver retries of transport errors */ 398 __REQ_FAILFAST_DRIVER, /* no driver retries of driver errors */ 399 __REQ_SYNC, /* request is sync (sync write or read) */ 400 __REQ_META, /* metadata io request */ 401 __REQ_PRIO, /* boost priority in cfq */ 402 __REQ_NOMERGE, /* don't touch this for merging */ 403 __REQ_IDLE, /* anticipate more IO after this one */ 404 __REQ_INTEGRITY, /* I/O includes block integrity payload */ 405 __REQ_FUA, /* forced unit access */ 406 __REQ_PREFLUSH, /* request for cache flush */ 407 __REQ_RAHEAD, /* read ahead, can fail anytime */ 408 __REQ_BACKGROUND, /* background IO */ 409 __REQ_NOWAIT, /* Don't wait if request will block */ 410 /* 411 * When a shared kthread needs to issue a bio for a cgroup, doing 412 * so synchronously can lead to priority inversions as the kthread 413 * can be trapped waiting for that cgroup. CGROUP_PUNT flag makes 414 * submit_bio() punt the actual issuing to a dedicated per-blkcg 415 * work item to avoid such priority inversions. 416 */ 417 __REQ_CGROUP_PUNT, 418 419 /* command specific flags for REQ_OP_WRITE_ZEROES: */ 420 __REQ_NOUNMAP, /* do not free blocks when zeroing */ 421 422 __REQ_HIPRI, 423 424 /* for driver use */ 425 __REQ_DRV, 426 __REQ_SWAP, /* swapping request. */ 427 __REQ_NR_BITS, /* stops here */ 428}; 429 430#define REQ_FAILFAST_DEV (1ULL << __REQ_FAILFAST_DEV) 431#define REQ_FAILFAST_TRANSPORT (1ULL << __REQ_FAILFAST_TRANSPORT) 432#define REQ_FAILFAST_DRIVER (1ULL << __REQ_FAILFAST_DRIVER) 433#define REQ_SYNC (1ULL << __REQ_SYNC) 434#define REQ_META (1ULL << __REQ_META) 435#define REQ_PRIO (1ULL << __REQ_PRIO) 436#define REQ_NOMERGE (1ULL << __REQ_NOMERGE) 437#define REQ_IDLE (1ULL << __REQ_IDLE) 438#define REQ_INTEGRITY (1ULL << __REQ_INTEGRITY) 439#define REQ_FUA (1ULL << __REQ_FUA) 440#define REQ_PREFLUSH (1ULL << __REQ_PREFLUSH) 441#define REQ_RAHEAD (1ULL << __REQ_RAHEAD) 442#define REQ_BACKGROUND (1ULL << __REQ_BACKGROUND) 443#define REQ_NOWAIT (1ULL << __REQ_NOWAIT) 444#define REQ_CGROUP_PUNT (1ULL << __REQ_CGROUP_PUNT) 445 446#define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP) 447#define REQ_HIPRI (1ULL << __REQ_HIPRI) 448 449#define REQ_DRV (1ULL << __REQ_DRV) 450#define REQ_SWAP (1ULL << __REQ_SWAP) 451 452#define REQ_FAILFAST_MASK \ 453 (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) 454 455#define REQ_NOMERGE_FLAGS \ 456 (REQ_NOMERGE | REQ_PREFLUSH | REQ_FUA) 457 458enum stat_group { 459 STAT_READ, 460 STAT_WRITE, 461 STAT_DISCARD, 462 STAT_FLUSH, 463 464 NR_STAT_GROUPS 465}; 466 467#define bio_op(bio) \ 468 ((bio)->bi_opf & REQ_OP_MASK) 469#define req_op(req) \ 470 ((req)->cmd_flags & REQ_OP_MASK) 471 472/* obsolete, don't use in new code */ 473static inline void bio_set_op_attrs(struct bio *bio, unsigned op, 474 unsigned op_flags) 475{ 476 bio->bi_opf = op | op_flags; 477} 478 479static inline bool op_is_write(unsigned int op) 480{ 481 return (op & 1); 482} 483 484/* 485 * Check if the bio or request is one that needs special treatment in the 486 * flush state machine. 487 */ 488static inline bool op_is_flush(unsigned int op) 489{ 490 return op & (REQ_FUA | REQ_PREFLUSH); 491} 492 493/* 494 * Reads are always treated as synchronous, as are requests with the FUA or 495 * PREFLUSH flag. Other operations may be marked as synchronous using the 496 * REQ_SYNC flag. 497 */ 498static inline bool op_is_sync(unsigned int op) 499{ 500 return (op & REQ_OP_MASK) == REQ_OP_READ || 501 (op & (REQ_SYNC | REQ_FUA | REQ_PREFLUSH)); 502} 503 504static inline bool op_is_discard(unsigned int op) 505{ 506 return (op & REQ_OP_MASK) == REQ_OP_DISCARD; 507} 508 509/* 510 * Check if a bio or request operation is a zone management operation, with 511 * the exception of REQ_OP_ZONE_RESET_ALL which is treated as a special case 512 * due to its different handling in the block layer and device response in 513 * case of command failure. 514 */ 515static inline bool op_is_zone_mgmt(enum req_opf op) 516{ 517 switch (op & REQ_OP_MASK) { 518 case REQ_OP_ZONE_RESET: 519 case REQ_OP_ZONE_OPEN: 520 case REQ_OP_ZONE_CLOSE: 521 case REQ_OP_ZONE_FINISH: 522 return true; 523 default: 524 return false; 525 } 526} 527 528static inline int op_stat_group(unsigned int op) 529{ 530 if (op_is_discard(op)) 531 return STAT_DISCARD; 532 return op_is_write(op); 533} 534 535typedef unsigned int blk_qc_t; 536#define BLK_QC_T_NONE -1U 537#define BLK_QC_T_SHIFT 16 538#define BLK_QC_T_INTERNAL (1U << 31) 539 540static inline bool blk_qc_t_valid(blk_qc_t cookie) 541{ 542 return cookie != BLK_QC_T_NONE; 543} 544 545static inline unsigned int blk_qc_t_to_queue_num(blk_qc_t cookie) 546{ 547 return (cookie & ~BLK_QC_T_INTERNAL) >> BLK_QC_T_SHIFT; 548} 549 550static inline unsigned int blk_qc_t_to_tag(blk_qc_t cookie) 551{ 552 return cookie & ((1u << BLK_QC_T_SHIFT) - 1); 553} 554 555static inline bool blk_qc_t_is_internal(blk_qc_t cookie) 556{ 557 return (cookie & BLK_QC_T_INTERNAL) != 0; 558} 559 560struct blk_rq_stat { 561 u64 mean; 562 u64 min; 563 u64 max; 564 u32 nr_samples; 565 u64 batch; 566}; 567 568#endif /* __LINUX_BLK_TYPES_H */