Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef _LINUX_BLKDEV_H
3#define _LINUX_BLKDEV_H
4
5#include <linux/sched.h>
6#include <linux/sched/clock.h>
7
8#ifdef CONFIG_BLOCK
9
10#include <linux/major.h>
11#include <linux/genhd.h>
12#include <linux/list.h>
13#include <linux/llist.h>
14#include <linux/timer.h>
15#include <linux/workqueue.h>
16#include <linux/pagemap.h>
17#include <linux/backing-dev-defs.h>
18#include <linux/wait.h>
19#include <linux/mempool.h>
20#include <linux/pfn.h>
21#include <linux/bio.h>
22#include <linux/stringify.h>
23#include <linux/gfp.h>
24#include <linux/bsg.h>
25#include <linux/smp.h>
26#include <linux/rcupdate.h>
27#include <linux/percpu-refcount.h>
28#include <linux/scatterlist.h>
29#include <linux/blkzoned.h>
30
31struct module;
32struct scsi_ioctl_command;
33
34struct request_queue;
35struct elevator_queue;
36struct blk_trace;
37struct request;
38struct sg_io_hdr;
39struct bsg_job;
40struct blkcg_gq;
41struct blk_flush_queue;
42struct pr_ops;
43struct rq_qos;
44struct blk_queue_stats;
45struct blk_stat_callback;
46struct blk_keyslot_manager;
47
48#define BLKDEV_MIN_RQ 4
49#define BLKDEV_MAX_RQ 128 /* Default maximum */
50
51/* Must be consistent with blk_mq_poll_stats_bkt() */
52#define BLK_MQ_POLL_STATS_BKTS 16
53
54/* Doing classic polling */
55#define BLK_MQ_POLL_CLASSIC -1
56
57/*
58 * Maximum number of blkcg policies allowed to be registered concurrently.
59 * Defined here to simplify include dependency.
60 */
61#define BLKCG_MAX_POLS 5
62
63typedef void (rq_end_io_fn)(struct request *, blk_status_t);
64
65/*
66 * request flags */
67typedef __u32 __bitwise req_flags_t;
68
69/* elevator knows about this request */
70#define RQF_SORTED ((__force req_flags_t)(1 << 0))
71/* drive already may have started this one */
72#define RQF_STARTED ((__force req_flags_t)(1 << 1))
73/* may not be passed by ioscheduler */
74#define RQF_SOFTBARRIER ((__force req_flags_t)(1 << 3))
75/* request for flush sequence */
76#define RQF_FLUSH_SEQ ((__force req_flags_t)(1 << 4))
77/* merge of different types, fail separately */
78#define RQF_MIXED_MERGE ((__force req_flags_t)(1 << 5))
79/* track inflight for MQ */
80#define RQF_MQ_INFLIGHT ((__force req_flags_t)(1 << 6))
81/* don't call prep for this one */
82#define RQF_DONTPREP ((__force req_flags_t)(1 << 7))
83/* set for "ide_preempt" requests and also for requests for which the SCSI
84 "quiesce" state must be ignored. */
85#define RQF_PREEMPT ((__force req_flags_t)(1 << 8))
86/* vaguely specified driver internal error. Ignored by the block layer */
87#define RQF_FAILED ((__force req_flags_t)(1 << 10))
88/* don't warn about errors */
89#define RQF_QUIET ((__force req_flags_t)(1 << 11))
90/* elevator private data attached */
91#define RQF_ELVPRIV ((__force req_flags_t)(1 << 12))
92/* account into disk and partition IO statistics */
93#define RQF_IO_STAT ((__force req_flags_t)(1 << 13))
94/* request came from our alloc pool */
95#define RQF_ALLOCED ((__force req_flags_t)(1 << 14))
96/* runtime pm request */
97#define RQF_PM ((__force req_flags_t)(1 << 15))
98/* on IO scheduler merge hash */
99#define RQF_HASHED ((__force req_flags_t)(1 << 16))
100/* track IO completion time */
101#define RQF_STATS ((__force req_flags_t)(1 << 17))
102/* Look at ->special_vec for the actual data payload instead of the
103 bio chain. */
104#define RQF_SPECIAL_PAYLOAD ((__force req_flags_t)(1 << 18))
105/* The per-zone write lock is held for this request */
106#define RQF_ZONE_WRITE_LOCKED ((__force req_flags_t)(1 << 19))
107/* already slept for hybrid poll */
108#define RQF_MQ_POLL_SLEPT ((__force req_flags_t)(1 << 20))
109/* ->timeout has been called, don't expire again */
110#define RQF_TIMED_OUT ((__force req_flags_t)(1 << 21))
111
112/* flags that prevent us from merging requests: */
113#define RQF_NOMERGE_FLAGS \
114 (RQF_STARTED | RQF_SOFTBARRIER | RQF_FLUSH_SEQ | RQF_SPECIAL_PAYLOAD)
115
116/*
117 * Request state for blk-mq.
118 */
119enum mq_rq_state {
120 MQ_RQ_IDLE = 0,
121 MQ_RQ_IN_FLIGHT = 1,
122 MQ_RQ_COMPLETE = 2,
123};
124
125/*
126 * Try to put the fields that are referenced together in the same cacheline.
127 *
128 * If you modify this structure, make sure to update blk_rq_init() and
129 * especially blk_mq_rq_ctx_init() to take care of the added fields.
130 */
131struct request {
132 struct request_queue *q;
133 struct blk_mq_ctx *mq_ctx;
134 struct blk_mq_hw_ctx *mq_hctx;
135
136 unsigned int cmd_flags; /* op and common flags */
137 req_flags_t rq_flags;
138
139 int tag;
140 int internal_tag;
141
142 /* the following two fields are internal, NEVER access directly */
143 unsigned int __data_len; /* total data len */
144 sector_t __sector; /* sector cursor */
145
146 struct bio *bio;
147 struct bio *biotail;
148
149 struct list_head queuelist;
150
151 /*
152 * The hash is used inside the scheduler, and killed once the
153 * request reaches the dispatch list. The ipi_list is only used
154 * to queue the request for softirq completion, which is long
155 * after the request has been unhashed (and even removed from
156 * the dispatch list).
157 */
158 union {
159 struct hlist_node hash; /* merge hash */
160 struct list_head ipi_list;
161 };
162
163 /*
164 * The rb_node is only used inside the io scheduler, requests
165 * are pruned when moved to the dispatch queue. So let the
166 * completion_data share space with the rb_node.
167 */
168 union {
169 struct rb_node rb_node; /* sort/lookup */
170 struct bio_vec special_vec;
171 void *completion_data;
172 int error_count; /* for legacy drivers, don't use */
173 };
174
175 /*
176 * Three pointers are available for the IO schedulers, if they need
177 * more they have to dynamically allocate it. Flush requests are
178 * never put on the IO scheduler. So let the flush fields share
179 * space with the elevator data.
180 */
181 union {
182 struct {
183 struct io_cq *icq;
184 void *priv[2];
185 } elv;
186
187 struct {
188 unsigned int seq;
189 struct list_head list;
190 rq_end_io_fn *saved_end_io;
191 } flush;
192 };
193
194 struct gendisk *rq_disk;
195 struct hd_struct *part;
196#ifdef CONFIG_BLK_RQ_ALLOC_TIME
197 /* Time that the first bio started allocating this request. */
198 u64 alloc_time_ns;
199#endif
200 /* Time that this request was allocated for this IO. */
201 u64 start_time_ns;
202 /* Time that I/O was submitted to the device. */
203 u64 io_start_time_ns;
204
205#ifdef CONFIG_BLK_WBT
206 unsigned short wbt_flags;
207#endif
208 /*
209 * rq sectors used for blk stats. It has the same value
210 * with blk_rq_sectors(rq), except that it never be zeroed
211 * by completion.
212 */
213 unsigned short stats_sectors;
214
215 /*
216 * Number of scatter-gather DMA addr+len pairs after
217 * physical address coalescing is performed.
218 */
219 unsigned short nr_phys_segments;
220
221#if defined(CONFIG_BLK_DEV_INTEGRITY)
222 unsigned short nr_integrity_segments;
223#endif
224
225#ifdef CONFIG_BLK_INLINE_ENCRYPTION
226 struct bio_crypt_ctx *crypt_ctx;
227 struct blk_ksm_keyslot *crypt_keyslot;
228#endif
229
230 unsigned short write_hint;
231 unsigned short ioprio;
232
233 enum mq_rq_state state;
234 refcount_t ref;
235
236 unsigned int timeout;
237 unsigned long deadline;
238
239 union {
240 struct __call_single_data csd;
241 u64 fifo_time;
242 };
243
244 /*
245 * completion callback.
246 */
247 rq_end_io_fn *end_io;
248 void *end_io_data;
249};
250
251static inline bool blk_op_is_scsi(unsigned int op)
252{
253 return op == REQ_OP_SCSI_IN || op == REQ_OP_SCSI_OUT;
254}
255
256static inline bool blk_op_is_private(unsigned int op)
257{
258 return op == REQ_OP_DRV_IN || op == REQ_OP_DRV_OUT;
259}
260
261static inline bool blk_rq_is_scsi(struct request *rq)
262{
263 return blk_op_is_scsi(req_op(rq));
264}
265
266static inline bool blk_rq_is_private(struct request *rq)
267{
268 return blk_op_is_private(req_op(rq));
269}
270
271static inline bool blk_rq_is_passthrough(struct request *rq)
272{
273 return blk_rq_is_scsi(rq) || blk_rq_is_private(rq);
274}
275
276static inline bool bio_is_passthrough(struct bio *bio)
277{
278 unsigned op = bio_op(bio);
279
280 return blk_op_is_scsi(op) || blk_op_is_private(op);
281}
282
283static inline unsigned short req_get_ioprio(struct request *req)
284{
285 return req->ioprio;
286}
287
288#include <linux/elevator.h>
289
290struct blk_queue_ctx;
291
292typedef blk_qc_t (make_request_fn) (struct request_queue *q, struct bio *bio);
293
294struct bio_vec;
295
296enum blk_eh_timer_return {
297 BLK_EH_DONE, /* drivers has completed the command */
298 BLK_EH_RESET_TIMER, /* reset timer and try again */
299};
300
301enum blk_queue_state {
302 Queue_down,
303 Queue_up,
304};
305
306#define BLK_TAG_ALLOC_FIFO 0 /* allocate starting from 0 */
307#define BLK_TAG_ALLOC_RR 1 /* allocate starting from last allocated tag */
308
309#define BLK_SCSI_MAX_CMDS (256)
310#define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8))
311
312/*
313 * Zoned block device models (zoned limit).
314 */
315enum blk_zoned_model {
316 BLK_ZONED_NONE, /* Regular block device */
317 BLK_ZONED_HA, /* Host-aware zoned block device */
318 BLK_ZONED_HM, /* Host-managed zoned block device */
319};
320
321struct queue_limits {
322 unsigned long bounce_pfn;
323 unsigned long seg_boundary_mask;
324 unsigned long virt_boundary_mask;
325
326 unsigned int max_hw_sectors;
327 unsigned int max_dev_sectors;
328 unsigned int chunk_sectors;
329 unsigned int max_sectors;
330 unsigned int max_segment_size;
331 unsigned int physical_block_size;
332 unsigned int logical_block_size;
333 unsigned int alignment_offset;
334 unsigned int io_min;
335 unsigned int io_opt;
336 unsigned int max_discard_sectors;
337 unsigned int max_hw_discard_sectors;
338 unsigned int max_write_same_sectors;
339 unsigned int max_write_zeroes_sectors;
340 unsigned int max_zone_append_sectors;
341 unsigned int discard_granularity;
342 unsigned int discard_alignment;
343
344 unsigned short max_segments;
345 unsigned short max_integrity_segments;
346 unsigned short max_discard_segments;
347
348 unsigned char misaligned;
349 unsigned char discard_misaligned;
350 unsigned char raid_partial_stripes_expensive;
351 enum blk_zoned_model zoned;
352};
353
354typedef int (*report_zones_cb)(struct blk_zone *zone, unsigned int idx,
355 void *data);
356
357#ifdef CONFIG_BLK_DEV_ZONED
358
359#define BLK_ALL_ZONES ((unsigned int)-1)
360int blkdev_report_zones(struct block_device *bdev, sector_t sector,
361 unsigned int nr_zones, report_zones_cb cb, void *data);
362unsigned int blkdev_nr_zones(struct gendisk *disk);
363extern int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op,
364 sector_t sectors, sector_t nr_sectors,
365 gfp_t gfp_mask);
366int blk_revalidate_disk_zones(struct gendisk *disk,
367 void (*update_driver_data)(struct gendisk *disk));
368
369extern int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
370 unsigned int cmd, unsigned long arg);
371extern int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
372 unsigned int cmd, unsigned long arg);
373
374#else /* CONFIG_BLK_DEV_ZONED */
375
376static inline unsigned int blkdev_nr_zones(struct gendisk *disk)
377{
378 return 0;
379}
380
381static inline int blkdev_report_zones_ioctl(struct block_device *bdev,
382 fmode_t mode, unsigned int cmd,
383 unsigned long arg)
384{
385 return -ENOTTY;
386}
387
388static inline int blkdev_zone_mgmt_ioctl(struct block_device *bdev,
389 fmode_t mode, unsigned int cmd,
390 unsigned long arg)
391{
392 return -ENOTTY;
393}
394
395#endif /* CONFIG_BLK_DEV_ZONED */
396
397struct request_queue {
398 struct request *last_merge;
399 struct elevator_queue *elevator;
400
401 struct blk_queue_stats *stats;
402 struct rq_qos *rq_qos;
403
404 make_request_fn *make_request_fn;
405
406 const struct blk_mq_ops *mq_ops;
407
408 /* sw queues */
409 struct blk_mq_ctx __percpu *queue_ctx;
410
411 unsigned int queue_depth;
412
413 /* hw dispatch queues */
414 struct blk_mq_hw_ctx **queue_hw_ctx;
415 unsigned int nr_hw_queues;
416
417 struct backing_dev_info *backing_dev_info;
418
419 /*
420 * The queue owner gets to use this for whatever they like.
421 * ll_rw_blk doesn't touch it.
422 */
423 void *queuedata;
424
425 /*
426 * various queue flags, see QUEUE_* below
427 */
428 unsigned long queue_flags;
429 /*
430 * Number of contexts that have called blk_set_pm_only(). If this
431 * counter is above zero then only RQF_PM and RQF_PREEMPT requests are
432 * processed.
433 */
434 atomic_t pm_only;
435
436 /*
437 * ida allocated id for this queue. Used to index queues from
438 * ioctx.
439 */
440 int id;
441
442 /*
443 * queue needs bounce pages for pages above this limit
444 */
445 gfp_t bounce_gfp;
446
447 spinlock_t queue_lock;
448
449 /*
450 * queue kobject
451 */
452 struct kobject kobj;
453
454 /*
455 * mq queue kobject
456 */
457 struct kobject *mq_kobj;
458
459#ifdef CONFIG_BLK_DEV_INTEGRITY
460 struct blk_integrity integrity;
461#endif /* CONFIG_BLK_DEV_INTEGRITY */
462
463#ifdef CONFIG_PM
464 struct device *dev;
465 int rpm_status;
466 unsigned int nr_pending;
467#endif
468
469 /*
470 * queue settings
471 */
472 unsigned long nr_requests; /* Max # of requests */
473
474 unsigned int dma_pad_mask;
475 unsigned int dma_alignment;
476
477#ifdef CONFIG_BLK_INLINE_ENCRYPTION
478 /* Inline crypto capabilities */
479 struct blk_keyslot_manager *ksm;
480#endif
481
482 unsigned int rq_timeout;
483 int poll_nsec;
484
485 struct blk_stat_callback *poll_cb;
486 struct blk_rq_stat poll_stat[BLK_MQ_POLL_STATS_BKTS];
487
488 struct timer_list timeout;
489 struct work_struct timeout_work;
490
491 struct list_head icq_list;
492#ifdef CONFIG_BLK_CGROUP
493 DECLARE_BITMAP (blkcg_pols, BLKCG_MAX_POLS);
494 struct blkcg_gq *root_blkg;
495 struct list_head blkg_list;
496#endif
497
498 struct queue_limits limits;
499
500 unsigned int required_elevator_features;
501
502#ifdef CONFIG_BLK_DEV_ZONED
503 /*
504 * Zoned block device information for request dispatch control.
505 * nr_zones is the total number of zones of the device. This is always
506 * 0 for regular block devices. conv_zones_bitmap is a bitmap of nr_zones
507 * bits which indicates if a zone is conventional (bit set) or
508 * sequential (bit clear). seq_zones_wlock is a bitmap of nr_zones
509 * bits which indicates if a zone is write locked, that is, if a write
510 * request targeting the zone was dispatched. All three fields are
511 * initialized by the low level device driver (e.g. scsi/sd.c).
512 * Stacking drivers (device mappers) may or may not initialize
513 * these fields.
514 *
515 * Reads of this information must be protected with blk_queue_enter() /
516 * blk_queue_exit(). Modifying this information is only allowed while
517 * no requests are being processed. See also blk_mq_freeze_queue() and
518 * blk_mq_unfreeze_queue().
519 */
520 unsigned int nr_zones;
521 unsigned long *conv_zones_bitmap;
522 unsigned long *seq_zones_wlock;
523#endif /* CONFIG_BLK_DEV_ZONED */
524
525 /*
526 * sg stuff
527 */
528 unsigned int sg_timeout;
529 unsigned int sg_reserved_size;
530 int node;
531#ifdef CONFIG_BLK_DEV_IO_TRACE
532 struct blk_trace __rcu *blk_trace;
533 struct mutex blk_trace_mutex;
534#endif
535 /*
536 * for flush operations
537 */
538 struct blk_flush_queue *fq;
539
540 struct list_head requeue_list;
541 spinlock_t requeue_lock;
542 struct delayed_work requeue_work;
543
544 struct mutex sysfs_lock;
545 struct mutex sysfs_dir_lock;
546
547 /*
548 * for reusing dead hctx instance in case of updating
549 * nr_hw_queues
550 */
551 struct list_head unused_hctx_list;
552 spinlock_t unused_hctx_lock;
553
554 int mq_freeze_depth;
555
556#if defined(CONFIG_BLK_DEV_BSG)
557 struct bsg_class_device bsg_dev;
558#endif
559
560#ifdef CONFIG_BLK_DEV_THROTTLING
561 /* Throttle data */
562 struct throtl_data *td;
563#endif
564 struct rcu_head rcu_head;
565 wait_queue_head_t mq_freeze_wq;
566 /*
567 * Protect concurrent access to q_usage_counter by
568 * percpu_ref_kill() and percpu_ref_reinit().
569 */
570 struct mutex mq_freeze_lock;
571 struct percpu_ref q_usage_counter;
572
573 struct blk_mq_tag_set *tag_set;
574 struct list_head tag_set_list;
575 struct bio_set bio_split;
576
577#ifdef CONFIG_BLK_DEBUG_FS
578 struct dentry *debugfs_dir;
579 struct dentry *sched_debugfs_dir;
580 struct dentry *rqos_debugfs_dir;
581#endif
582
583 bool mq_sysfs_init_done;
584
585 size_t cmd_size;
586
587 struct work_struct release_work;
588
589#define BLK_MAX_WRITE_HINTS 5
590 u64 write_hints[BLK_MAX_WRITE_HINTS];
591};
592
593#define QUEUE_FLAG_STOPPED 0 /* queue is stopped */
594#define QUEUE_FLAG_DYING 1 /* queue being torn down */
595#define QUEUE_FLAG_NOMERGES 3 /* disable merge attempts */
596#define QUEUE_FLAG_SAME_COMP 4 /* complete on same CPU-group */
597#define QUEUE_FLAG_FAIL_IO 5 /* fake timeout */
598#define QUEUE_FLAG_NONROT 6 /* non-rotational device (SSD) */
599#define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */
600#define QUEUE_FLAG_IO_STAT 7 /* do disk/partitions IO accounting */
601#define QUEUE_FLAG_DISCARD 8 /* supports DISCARD */
602#define QUEUE_FLAG_NOXMERGES 9 /* No extended merges */
603#define QUEUE_FLAG_ADD_RANDOM 10 /* Contributes to random pool */
604#define QUEUE_FLAG_SECERASE 11 /* supports secure erase */
605#define QUEUE_FLAG_SAME_FORCE 12 /* force complete on same CPU */
606#define QUEUE_FLAG_DEAD 13 /* queue tear-down finished */
607#define QUEUE_FLAG_INIT_DONE 14 /* queue is initialized */
608#define QUEUE_FLAG_POLL 16 /* IO polling enabled if set */
609#define QUEUE_FLAG_WC 17 /* Write back caching */
610#define QUEUE_FLAG_FUA 18 /* device supports FUA writes */
611#define QUEUE_FLAG_DAX 19 /* device supports DAX */
612#define QUEUE_FLAG_STATS 20 /* track IO start and completion times */
613#define QUEUE_FLAG_POLL_STATS 21 /* collecting stats for hybrid polling */
614#define QUEUE_FLAG_REGISTERED 22 /* queue has been registered to a disk */
615#define QUEUE_FLAG_SCSI_PASSTHROUGH 23 /* queue supports SCSI commands */
616#define QUEUE_FLAG_QUIESCED 24 /* queue has been quiesced */
617#define QUEUE_FLAG_PCI_P2PDMA 25 /* device supports PCI p2p requests */
618#define QUEUE_FLAG_ZONE_RESETALL 26 /* supports Zone Reset All */
619#define QUEUE_FLAG_RQ_ALLOC_TIME 27 /* record rq->alloc_time_ns */
620
621#define QUEUE_FLAG_MQ_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \
622 (1 << QUEUE_FLAG_SAME_COMP))
623
624void blk_queue_flag_set(unsigned int flag, struct request_queue *q);
625void blk_queue_flag_clear(unsigned int flag, struct request_queue *q);
626bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
627
628#define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
629#define blk_queue_dying(q) test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags)
630#define blk_queue_dead(q) test_bit(QUEUE_FLAG_DEAD, &(q)->queue_flags)
631#define blk_queue_init_done(q) test_bit(QUEUE_FLAG_INIT_DONE, &(q)->queue_flags)
632#define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
633#define blk_queue_noxmerges(q) \
634 test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags)
635#define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags)
636#define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags)
637#define blk_queue_add_random(q) test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags)
638#define blk_queue_discard(q) test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags)
639#define blk_queue_zone_resetall(q) \
640 test_bit(QUEUE_FLAG_ZONE_RESETALL, &(q)->queue_flags)
641#define blk_queue_secure_erase(q) \
642 (test_bit(QUEUE_FLAG_SECERASE, &(q)->queue_flags))
643#define blk_queue_dax(q) test_bit(QUEUE_FLAG_DAX, &(q)->queue_flags)
644#define blk_queue_scsi_passthrough(q) \
645 test_bit(QUEUE_FLAG_SCSI_PASSTHROUGH, &(q)->queue_flags)
646#define blk_queue_pci_p2pdma(q) \
647 test_bit(QUEUE_FLAG_PCI_P2PDMA, &(q)->queue_flags)
648#ifdef CONFIG_BLK_RQ_ALLOC_TIME
649#define blk_queue_rq_alloc_time(q) \
650 test_bit(QUEUE_FLAG_RQ_ALLOC_TIME, &(q)->queue_flags)
651#else
652#define blk_queue_rq_alloc_time(q) false
653#endif
654
655#define blk_noretry_request(rq) \
656 ((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \
657 REQ_FAILFAST_DRIVER))
658#define blk_queue_quiesced(q) test_bit(QUEUE_FLAG_QUIESCED, &(q)->queue_flags)
659#define blk_queue_pm_only(q) atomic_read(&(q)->pm_only)
660#define blk_queue_fua(q) test_bit(QUEUE_FLAG_FUA, &(q)->queue_flags)
661#define blk_queue_registered(q) test_bit(QUEUE_FLAG_REGISTERED, &(q)->queue_flags)
662
663extern void blk_set_pm_only(struct request_queue *q);
664extern void blk_clear_pm_only(struct request_queue *q);
665
666static inline bool blk_account_rq(struct request *rq)
667{
668 return (rq->rq_flags & RQF_STARTED) && !blk_rq_is_passthrough(rq);
669}
670
671#define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist)
672
673#define rq_data_dir(rq) (op_is_write(req_op(rq)) ? WRITE : READ)
674
675#define rq_dma_dir(rq) \
676 (op_is_write(req_op(rq)) ? DMA_TO_DEVICE : DMA_FROM_DEVICE)
677
678#define dma_map_bvec(dev, bv, dir, attrs) \
679 dma_map_page_attrs(dev, (bv)->bv_page, (bv)->bv_offset, (bv)->bv_len, \
680 (dir), (attrs))
681
682static inline bool queue_is_mq(struct request_queue *q)
683{
684 return q->mq_ops;
685}
686
687static inline enum blk_zoned_model
688blk_queue_zoned_model(struct request_queue *q)
689{
690 return q->limits.zoned;
691}
692
693static inline bool blk_queue_is_zoned(struct request_queue *q)
694{
695 switch (blk_queue_zoned_model(q)) {
696 case BLK_ZONED_HA:
697 case BLK_ZONED_HM:
698 return true;
699 default:
700 return false;
701 }
702}
703
704static inline sector_t blk_queue_zone_sectors(struct request_queue *q)
705{
706 return blk_queue_is_zoned(q) ? q->limits.chunk_sectors : 0;
707}
708
709#ifdef CONFIG_BLK_DEV_ZONED
710static inline unsigned int blk_queue_nr_zones(struct request_queue *q)
711{
712 return blk_queue_is_zoned(q) ? q->nr_zones : 0;
713}
714
715static inline unsigned int blk_queue_zone_no(struct request_queue *q,
716 sector_t sector)
717{
718 if (!blk_queue_is_zoned(q))
719 return 0;
720 return sector >> ilog2(q->limits.chunk_sectors);
721}
722
723static inline bool blk_queue_zone_is_seq(struct request_queue *q,
724 sector_t sector)
725{
726 if (!blk_queue_is_zoned(q))
727 return false;
728 if (!q->conv_zones_bitmap)
729 return true;
730 return !test_bit(blk_queue_zone_no(q, sector), q->conv_zones_bitmap);
731}
732#else /* CONFIG_BLK_DEV_ZONED */
733static inline unsigned int blk_queue_nr_zones(struct request_queue *q)
734{
735 return 0;
736}
737static inline bool blk_queue_zone_is_seq(struct request_queue *q,
738 sector_t sector)
739{
740 return false;
741}
742static inline unsigned int blk_queue_zone_no(struct request_queue *q,
743 sector_t sector)
744{
745 return 0;
746}
747#endif /* CONFIG_BLK_DEV_ZONED */
748
749static inline bool rq_is_sync(struct request *rq)
750{
751 return op_is_sync(rq->cmd_flags);
752}
753
754static inline bool rq_mergeable(struct request *rq)
755{
756 if (blk_rq_is_passthrough(rq))
757 return false;
758
759 if (req_op(rq) == REQ_OP_FLUSH)
760 return false;
761
762 if (req_op(rq) == REQ_OP_WRITE_ZEROES)
763 return false;
764
765 if (req_op(rq) == REQ_OP_ZONE_APPEND)
766 return false;
767
768 if (rq->cmd_flags & REQ_NOMERGE_FLAGS)
769 return false;
770 if (rq->rq_flags & RQF_NOMERGE_FLAGS)
771 return false;
772
773 return true;
774}
775
776static inline bool blk_write_same_mergeable(struct bio *a, struct bio *b)
777{
778 if (bio_page(a) == bio_page(b) &&
779 bio_offset(a) == bio_offset(b))
780 return true;
781
782 return false;
783}
784
785static inline unsigned int blk_queue_depth(struct request_queue *q)
786{
787 if (q->queue_depth)
788 return q->queue_depth;
789
790 return q->nr_requests;
791}
792
793extern unsigned long blk_max_low_pfn, blk_max_pfn;
794
795/*
796 * standard bounce addresses:
797 *
798 * BLK_BOUNCE_HIGH : bounce all highmem pages
799 * BLK_BOUNCE_ANY : don't bounce anything
800 * BLK_BOUNCE_ISA : bounce pages above ISA DMA boundary
801 */
802
803#if BITS_PER_LONG == 32
804#define BLK_BOUNCE_HIGH ((u64)blk_max_low_pfn << PAGE_SHIFT)
805#else
806#define BLK_BOUNCE_HIGH -1ULL
807#endif
808#define BLK_BOUNCE_ANY (-1ULL)
809#define BLK_BOUNCE_ISA (DMA_BIT_MASK(24))
810
811/*
812 * default timeout for SG_IO if none specified
813 */
814#define BLK_DEFAULT_SG_TIMEOUT (60 * HZ)
815#define BLK_MIN_SG_TIMEOUT (7 * HZ)
816
817struct rq_map_data {
818 struct page **pages;
819 int page_order;
820 int nr_entries;
821 unsigned long offset;
822 int null_mapped;
823 int from_user;
824};
825
826struct req_iterator {
827 struct bvec_iter iter;
828 struct bio *bio;
829};
830
831/* This should not be used directly - use rq_for_each_segment */
832#define for_each_bio(_bio) \
833 for (; _bio; _bio = _bio->bi_next)
834#define __rq_for_each_bio(_bio, rq) \
835 if ((rq->bio)) \
836 for (_bio = (rq)->bio; _bio; _bio = _bio->bi_next)
837
838#define rq_for_each_segment(bvl, _rq, _iter) \
839 __rq_for_each_bio(_iter.bio, _rq) \
840 bio_for_each_segment(bvl, _iter.bio, _iter.iter)
841
842#define rq_for_each_bvec(bvl, _rq, _iter) \
843 __rq_for_each_bio(_iter.bio, _rq) \
844 bio_for_each_bvec(bvl, _iter.bio, _iter.iter)
845
846#define rq_iter_last(bvec, _iter) \
847 (_iter.bio->bi_next == NULL && \
848 bio_iter_last(bvec, _iter.iter))
849
850#ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
851# error "You should define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE for your platform"
852#endif
853#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
854extern void rq_flush_dcache_pages(struct request *rq);
855#else
856static inline void rq_flush_dcache_pages(struct request *rq)
857{
858}
859#endif
860
861extern int blk_register_queue(struct gendisk *disk);
862extern void blk_unregister_queue(struct gendisk *disk);
863extern blk_qc_t generic_make_request(struct bio *bio);
864extern blk_qc_t direct_make_request(struct bio *bio);
865extern void blk_rq_init(struct request_queue *q, struct request *rq);
866extern void blk_put_request(struct request *);
867extern struct request *blk_get_request(struct request_queue *, unsigned int op,
868 blk_mq_req_flags_t flags);
869extern int blk_lld_busy(struct request_queue *q);
870extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
871 struct bio_set *bs, gfp_t gfp_mask,
872 int (*bio_ctr)(struct bio *, struct bio *, void *),
873 void *data);
874extern void blk_rq_unprep_clone(struct request *rq);
875extern blk_status_t blk_insert_cloned_request(struct request_queue *q,
876 struct request *rq);
877extern int blk_rq_append_bio(struct request *rq, struct bio **bio);
878extern void blk_queue_split(struct request_queue *, struct bio **);
879extern int scsi_verify_blk_ioctl(struct block_device *, unsigned int);
880extern int scsi_cmd_blk_ioctl(struct block_device *, fmode_t,
881 unsigned int, void __user *);
882extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t,
883 unsigned int, void __user *);
884extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t,
885 struct scsi_ioctl_command __user *);
886extern int get_sg_io_hdr(struct sg_io_hdr *hdr, const void __user *argp);
887extern int put_sg_io_hdr(const struct sg_io_hdr *hdr, void __user *argp);
888
889extern int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags);
890extern void blk_queue_exit(struct request_queue *q);
891extern void blk_sync_queue(struct request_queue *q);
892extern int blk_rq_map_user(struct request_queue *, struct request *,
893 struct rq_map_data *, void __user *, unsigned long,
894 gfp_t);
895extern int blk_rq_unmap_user(struct bio *);
896extern int blk_rq_map_kern(struct request_queue *, struct request *, void *, unsigned int, gfp_t);
897extern int blk_rq_map_user_iov(struct request_queue *, struct request *,
898 struct rq_map_data *, const struct iov_iter *,
899 gfp_t);
900extern void blk_execute_rq(struct request_queue *, struct gendisk *,
901 struct request *, int);
902extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *,
903 struct request *, int, rq_end_io_fn *);
904
905/* Helper to convert REQ_OP_XXX to its string format XXX */
906extern const char *blk_op_str(unsigned int op);
907
908int blk_status_to_errno(blk_status_t status);
909blk_status_t errno_to_blk_status(int errno);
910
911int blk_poll(struct request_queue *q, blk_qc_t cookie, bool spin);
912
913static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
914{
915 return bdev->bd_disk->queue; /* this is never NULL */
916}
917
918/*
919 * The basic unit of block I/O is a sector. It is used in a number of contexts
920 * in Linux (blk, bio, genhd). The size of one sector is 512 = 2**9
921 * bytes. Variables of type sector_t represent an offset or size that is a
922 * multiple of 512 bytes. Hence these two constants.
923 */
924#ifndef SECTOR_SHIFT
925#define SECTOR_SHIFT 9
926#endif
927#ifndef SECTOR_SIZE
928#define SECTOR_SIZE (1 << SECTOR_SHIFT)
929#endif
930
931/*
932 * blk_rq_pos() : the current sector
933 * blk_rq_bytes() : bytes left in the entire request
934 * blk_rq_cur_bytes() : bytes left in the current segment
935 * blk_rq_err_bytes() : bytes left till the next error boundary
936 * blk_rq_sectors() : sectors left in the entire request
937 * blk_rq_cur_sectors() : sectors left in the current segment
938 * blk_rq_stats_sectors() : sectors of the entire request used for stats
939 */
940static inline sector_t blk_rq_pos(const struct request *rq)
941{
942 return rq->__sector;
943}
944
945static inline unsigned int blk_rq_bytes(const struct request *rq)
946{
947 return rq->__data_len;
948}
949
950static inline int blk_rq_cur_bytes(const struct request *rq)
951{
952 return rq->bio ? bio_cur_bytes(rq->bio) : 0;
953}
954
955extern unsigned int blk_rq_err_bytes(const struct request *rq);
956
957static inline unsigned int blk_rq_sectors(const struct request *rq)
958{
959 return blk_rq_bytes(rq) >> SECTOR_SHIFT;
960}
961
962static inline unsigned int blk_rq_cur_sectors(const struct request *rq)
963{
964 return blk_rq_cur_bytes(rq) >> SECTOR_SHIFT;
965}
966
967static inline unsigned int blk_rq_stats_sectors(const struct request *rq)
968{
969 return rq->stats_sectors;
970}
971
972#ifdef CONFIG_BLK_DEV_ZONED
973
974/* Helper to convert BLK_ZONE_ZONE_XXX to its string format XXX */
975const char *blk_zone_cond_str(enum blk_zone_cond zone_cond);
976
977static inline unsigned int blk_rq_zone_no(struct request *rq)
978{
979 return blk_queue_zone_no(rq->q, blk_rq_pos(rq));
980}
981
982static inline unsigned int blk_rq_zone_is_seq(struct request *rq)
983{
984 return blk_queue_zone_is_seq(rq->q, blk_rq_pos(rq));
985}
986#endif /* CONFIG_BLK_DEV_ZONED */
987
988/*
989 * Some commands like WRITE SAME have a payload or data transfer size which
990 * is different from the size of the request. Any driver that supports such
991 * commands using the RQF_SPECIAL_PAYLOAD flag needs to use this helper to
992 * calculate the data transfer size.
993 */
994static inline unsigned int blk_rq_payload_bytes(struct request *rq)
995{
996 if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
997 return rq->special_vec.bv_len;
998 return blk_rq_bytes(rq);
999}
1000
1001/*
1002 * Return the first full biovec in the request. The caller needs to check that
1003 * there are any bvecs before calling this helper.
1004 */
1005static inline struct bio_vec req_bvec(struct request *rq)
1006{
1007 if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
1008 return rq->special_vec;
1009 return mp_bvec_iter_bvec(rq->bio->bi_io_vec, rq->bio->bi_iter);
1010}
1011
1012static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q,
1013 int op)
1014{
1015 if (unlikely(op == REQ_OP_DISCARD || op == REQ_OP_SECURE_ERASE))
1016 return min(q->limits.max_discard_sectors,
1017 UINT_MAX >> SECTOR_SHIFT);
1018
1019 if (unlikely(op == REQ_OP_WRITE_SAME))
1020 return q->limits.max_write_same_sectors;
1021
1022 if (unlikely(op == REQ_OP_WRITE_ZEROES))
1023 return q->limits.max_write_zeroes_sectors;
1024
1025 return q->limits.max_sectors;
1026}
1027
1028/*
1029 * Return maximum size of a request at given offset. Only valid for
1030 * file system requests.
1031 */
1032static inline unsigned int blk_max_size_offset(struct request_queue *q,
1033 sector_t offset)
1034{
1035 if (!q->limits.chunk_sectors)
1036 return q->limits.max_sectors;
1037
1038 return min(q->limits.max_sectors, (unsigned int)(q->limits.chunk_sectors -
1039 (offset & (q->limits.chunk_sectors - 1))));
1040}
1041
1042static inline unsigned int blk_rq_get_max_sectors(struct request *rq,
1043 sector_t offset)
1044{
1045 struct request_queue *q = rq->q;
1046
1047 if (blk_rq_is_passthrough(rq))
1048 return q->limits.max_hw_sectors;
1049
1050 if (!q->limits.chunk_sectors ||
1051 req_op(rq) == REQ_OP_DISCARD ||
1052 req_op(rq) == REQ_OP_SECURE_ERASE)
1053 return blk_queue_get_max_sectors(q, req_op(rq));
1054
1055 return min(blk_max_size_offset(q, offset),
1056 blk_queue_get_max_sectors(q, req_op(rq)));
1057}
1058
1059static inline unsigned int blk_rq_count_bios(struct request *rq)
1060{
1061 unsigned int nr_bios = 0;
1062 struct bio *bio;
1063
1064 __rq_for_each_bio(bio, rq)
1065 nr_bios++;
1066
1067 return nr_bios;
1068}
1069
1070void blk_steal_bios(struct bio_list *list, struct request *rq);
1071
1072/*
1073 * Request completion related functions.
1074 *
1075 * blk_update_request() completes given number of bytes and updates
1076 * the request without completing it.
1077 */
1078extern bool blk_update_request(struct request *rq, blk_status_t error,
1079 unsigned int nr_bytes);
1080
1081extern void __blk_complete_request(struct request *);
1082extern void blk_abort_request(struct request *);
1083
1084/*
1085 * Access functions for manipulating queue properties
1086 */
1087extern void blk_cleanup_queue(struct request_queue *);
1088extern void blk_queue_bounce_limit(struct request_queue *, u64);
1089extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int);
1090extern void blk_queue_chunk_sectors(struct request_queue *, unsigned int);
1091extern void blk_queue_max_segments(struct request_queue *, unsigned short);
1092extern void blk_queue_max_discard_segments(struct request_queue *,
1093 unsigned short);
1094extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
1095extern void blk_queue_max_discard_sectors(struct request_queue *q,
1096 unsigned int max_discard_sectors);
1097extern void blk_queue_max_write_same_sectors(struct request_queue *q,
1098 unsigned int max_write_same_sectors);
1099extern void blk_queue_max_write_zeroes_sectors(struct request_queue *q,
1100 unsigned int max_write_same_sectors);
1101extern void blk_queue_logical_block_size(struct request_queue *, unsigned int);
1102extern void blk_queue_max_zone_append_sectors(struct request_queue *q,
1103 unsigned int max_zone_append_sectors);
1104extern void blk_queue_physical_block_size(struct request_queue *, unsigned int);
1105extern void blk_queue_alignment_offset(struct request_queue *q,
1106 unsigned int alignment);
1107extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min);
1108extern void blk_queue_io_min(struct request_queue *q, unsigned int min);
1109extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt);
1110extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt);
1111extern void blk_set_queue_depth(struct request_queue *q, unsigned int depth);
1112extern void blk_set_default_limits(struct queue_limits *lim);
1113extern void blk_set_stacking_limits(struct queue_limits *lim);
1114extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
1115 sector_t offset);
1116extern int bdev_stack_limits(struct queue_limits *t, struct block_device *bdev,
1117 sector_t offset);
1118extern void disk_stack_limits(struct gendisk *disk, struct block_device *bdev,
1119 sector_t offset);
1120extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b);
1121extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int);
1122extern void blk_queue_segment_boundary(struct request_queue *, unsigned long);
1123extern void blk_queue_virt_boundary(struct request_queue *, unsigned long);
1124extern void blk_queue_dma_alignment(struct request_queue *, int);
1125extern void blk_queue_update_dma_alignment(struct request_queue *, int);
1126extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
1127extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua);
1128extern void blk_queue_required_elevator_features(struct request_queue *q,
1129 unsigned int features);
1130extern bool blk_queue_can_use_dma_map_merging(struct request_queue *q,
1131 struct device *dev);
1132
1133/*
1134 * Number of physical segments as sent to the device.
1135 *
1136 * Normally this is the number of discontiguous data segments sent by the
1137 * submitter. But for data-less command like discard we might have no
1138 * actual data segments submitted, but the driver might have to add it's
1139 * own special payload. In that case we still return 1 here so that this
1140 * special payload will be mapped.
1141 */
1142static inline unsigned short blk_rq_nr_phys_segments(struct request *rq)
1143{
1144 if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
1145 return 1;
1146 return rq->nr_phys_segments;
1147}
1148
1149/*
1150 * Number of discard segments (or ranges) the driver needs to fill in.
1151 * Each discard bio merged into a request is counted as one segment.
1152 */
1153static inline unsigned short blk_rq_nr_discard_segments(struct request *rq)
1154{
1155 return max_t(unsigned short, rq->nr_phys_segments, 1);
1156}
1157
1158int __blk_rq_map_sg(struct request_queue *q, struct request *rq,
1159 struct scatterlist *sglist, struct scatterlist **last_sg);
1160static inline int blk_rq_map_sg(struct request_queue *q, struct request *rq,
1161 struct scatterlist *sglist)
1162{
1163 struct scatterlist *last_sg = NULL;
1164
1165 return __blk_rq_map_sg(q, rq, sglist, &last_sg);
1166}
1167extern void blk_dump_rq_flags(struct request *, char *);
1168extern long nr_blockdev_pages(void);
1169
1170bool __must_check blk_get_queue(struct request_queue *);
1171struct request_queue *blk_alloc_queue(make_request_fn make_request, int node_id);
1172extern void blk_put_queue(struct request_queue *);
1173extern void blk_set_queue_dying(struct request_queue *);
1174
1175/*
1176 * blk_plug permits building a queue of related requests by holding the I/O
1177 * fragments for a short period. This allows merging of sequential requests
1178 * into single larger request. As the requests are moved from a per-task list to
1179 * the device's request_queue in a batch, this results in improved scalability
1180 * as the lock contention for request_queue lock is reduced.
1181 *
1182 * It is ok not to disable preemption when adding the request to the plug list
1183 * or when attempting a merge, because blk_schedule_flush_list() will only flush
1184 * the plug list when the task sleeps by itself. For details, please see
1185 * schedule() where blk_schedule_flush_plug() is called.
1186 */
1187struct blk_plug {
1188 struct list_head mq_list; /* blk-mq requests */
1189 struct list_head cb_list; /* md requires an unplug callback */
1190 unsigned short rq_count;
1191 bool multiple_queues;
1192};
1193#define BLK_MAX_REQUEST_COUNT 16
1194#define BLK_PLUG_FLUSH_SIZE (128 * 1024)
1195
1196struct blk_plug_cb;
1197typedef void (*blk_plug_cb_fn)(struct blk_plug_cb *, bool);
1198struct blk_plug_cb {
1199 struct list_head list;
1200 blk_plug_cb_fn callback;
1201 void *data;
1202};
1203extern struct blk_plug_cb *blk_check_plugged(blk_plug_cb_fn unplug,
1204 void *data, int size);
1205extern void blk_start_plug(struct blk_plug *);
1206extern void blk_finish_plug(struct blk_plug *);
1207extern void blk_flush_plug_list(struct blk_plug *, bool);
1208
1209static inline void blk_flush_plug(struct task_struct *tsk)
1210{
1211 struct blk_plug *plug = tsk->plug;
1212
1213 if (plug)
1214 blk_flush_plug_list(plug, false);
1215}
1216
1217static inline void blk_schedule_flush_plug(struct task_struct *tsk)
1218{
1219 struct blk_plug *plug = tsk->plug;
1220
1221 if (plug)
1222 blk_flush_plug_list(plug, true);
1223}
1224
1225static inline bool blk_needs_flush_plug(struct task_struct *tsk)
1226{
1227 struct blk_plug *plug = tsk->plug;
1228
1229 return plug &&
1230 (!list_empty(&plug->mq_list) ||
1231 !list_empty(&plug->cb_list));
1232}
1233
1234extern void blk_io_schedule(void);
1235
1236int blkdev_issue_flush(struct block_device *, gfp_t);
1237extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
1238 sector_t nr_sects, gfp_t gfp_mask, struct page *page);
1239
1240#define BLKDEV_DISCARD_SECURE (1 << 0) /* issue a secure erase */
1241
1242extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
1243 sector_t nr_sects, gfp_t gfp_mask, unsigned long flags);
1244extern int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
1245 sector_t nr_sects, gfp_t gfp_mask, int flags,
1246 struct bio **biop);
1247
1248#define BLKDEV_ZERO_NOUNMAP (1 << 0) /* do not free blocks */
1249#define BLKDEV_ZERO_NOFALLBACK (1 << 1) /* don't write explicit zeroes */
1250
1251extern int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
1252 sector_t nr_sects, gfp_t gfp_mask, struct bio **biop,
1253 unsigned flags);
1254extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
1255 sector_t nr_sects, gfp_t gfp_mask, unsigned flags);
1256
1257static inline int sb_issue_discard(struct super_block *sb, sector_t block,
1258 sector_t nr_blocks, gfp_t gfp_mask, unsigned long flags)
1259{
1260 return blkdev_issue_discard(sb->s_bdev,
1261 block << (sb->s_blocksize_bits -
1262 SECTOR_SHIFT),
1263 nr_blocks << (sb->s_blocksize_bits -
1264 SECTOR_SHIFT),
1265 gfp_mask, flags);
1266}
1267static inline int sb_issue_zeroout(struct super_block *sb, sector_t block,
1268 sector_t nr_blocks, gfp_t gfp_mask)
1269{
1270 return blkdev_issue_zeroout(sb->s_bdev,
1271 block << (sb->s_blocksize_bits -
1272 SECTOR_SHIFT),
1273 nr_blocks << (sb->s_blocksize_bits -
1274 SECTOR_SHIFT),
1275 gfp_mask, 0);
1276}
1277
1278extern int blk_verify_command(unsigned char *cmd, fmode_t mode);
1279
1280enum blk_default_limits {
1281 BLK_MAX_SEGMENTS = 128,
1282 BLK_SAFE_MAX_SECTORS = 255,
1283 BLK_DEF_MAX_SECTORS = 2560,
1284 BLK_MAX_SEGMENT_SIZE = 65536,
1285 BLK_SEG_BOUNDARY_MASK = 0xFFFFFFFFUL,
1286};
1287
1288static inline unsigned long queue_segment_boundary(const struct request_queue *q)
1289{
1290 return q->limits.seg_boundary_mask;
1291}
1292
1293static inline unsigned long queue_virt_boundary(const struct request_queue *q)
1294{
1295 return q->limits.virt_boundary_mask;
1296}
1297
1298static inline unsigned int queue_max_sectors(const struct request_queue *q)
1299{
1300 return q->limits.max_sectors;
1301}
1302
1303static inline unsigned int queue_max_hw_sectors(const struct request_queue *q)
1304{
1305 return q->limits.max_hw_sectors;
1306}
1307
1308static inline unsigned short queue_max_segments(const struct request_queue *q)
1309{
1310 return q->limits.max_segments;
1311}
1312
1313static inline unsigned short queue_max_discard_segments(const struct request_queue *q)
1314{
1315 return q->limits.max_discard_segments;
1316}
1317
1318static inline unsigned int queue_max_segment_size(const struct request_queue *q)
1319{
1320 return q->limits.max_segment_size;
1321}
1322
1323static inline unsigned int queue_max_zone_append_sectors(const struct request_queue *q)
1324{
1325 return q->limits.max_zone_append_sectors;
1326}
1327
1328static inline unsigned queue_logical_block_size(const struct request_queue *q)
1329{
1330 int retval = 512;
1331
1332 if (q && q->limits.logical_block_size)
1333 retval = q->limits.logical_block_size;
1334
1335 return retval;
1336}
1337
1338static inline unsigned int bdev_logical_block_size(struct block_device *bdev)
1339{
1340 return queue_logical_block_size(bdev_get_queue(bdev));
1341}
1342
1343static inline unsigned int queue_physical_block_size(const struct request_queue *q)
1344{
1345 return q->limits.physical_block_size;
1346}
1347
1348static inline unsigned int bdev_physical_block_size(struct block_device *bdev)
1349{
1350 return queue_physical_block_size(bdev_get_queue(bdev));
1351}
1352
1353static inline unsigned int queue_io_min(const struct request_queue *q)
1354{
1355 return q->limits.io_min;
1356}
1357
1358static inline int bdev_io_min(struct block_device *bdev)
1359{
1360 return queue_io_min(bdev_get_queue(bdev));
1361}
1362
1363static inline unsigned int queue_io_opt(const struct request_queue *q)
1364{
1365 return q->limits.io_opt;
1366}
1367
1368static inline int bdev_io_opt(struct block_device *bdev)
1369{
1370 return queue_io_opt(bdev_get_queue(bdev));
1371}
1372
1373static inline int queue_alignment_offset(const struct request_queue *q)
1374{
1375 if (q->limits.misaligned)
1376 return -1;
1377
1378 return q->limits.alignment_offset;
1379}
1380
1381static inline int queue_limit_alignment_offset(struct queue_limits *lim, sector_t sector)
1382{
1383 unsigned int granularity = max(lim->physical_block_size, lim->io_min);
1384 unsigned int alignment = sector_div(sector, granularity >> SECTOR_SHIFT)
1385 << SECTOR_SHIFT;
1386
1387 return (granularity + lim->alignment_offset - alignment) % granularity;
1388}
1389
1390static inline int bdev_alignment_offset(struct block_device *bdev)
1391{
1392 struct request_queue *q = bdev_get_queue(bdev);
1393
1394 if (q->limits.misaligned)
1395 return -1;
1396
1397 if (bdev != bdev->bd_contains)
1398 return bdev->bd_part->alignment_offset;
1399
1400 return q->limits.alignment_offset;
1401}
1402
1403static inline int queue_discard_alignment(const struct request_queue *q)
1404{
1405 if (q->limits.discard_misaligned)
1406 return -1;
1407
1408 return q->limits.discard_alignment;
1409}
1410
1411static inline int queue_limit_discard_alignment(struct queue_limits *lim, sector_t sector)
1412{
1413 unsigned int alignment, granularity, offset;
1414
1415 if (!lim->max_discard_sectors)
1416 return 0;
1417
1418 /* Why are these in bytes, not sectors? */
1419 alignment = lim->discard_alignment >> SECTOR_SHIFT;
1420 granularity = lim->discard_granularity >> SECTOR_SHIFT;
1421 if (!granularity)
1422 return 0;
1423
1424 /* Offset of the partition start in 'granularity' sectors */
1425 offset = sector_div(sector, granularity);
1426
1427 /* And why do we do this modulus *again* in blkdev_issue_discard()? */
1428 offset = (granularity + alignment - offset) % granularity;
1429
1430 /* Turn it back into bytes, gaah */
1431 return offset << SECTOR_SHIFT;
1432}
1433
1434static inline int bdev_discard_alignment(struct block_device *bdev)
1435{
1436 struct request_queue *q = bdev_get_queue(bdev);
1437
1438 if (bdev != bdev->bd_contains)
1439 return bdev->bd_part->discard_alignment;
1440
1441 return q->limits.discard_alignment;
1442}
1443
1444static inline unsigned int bdev_write_same(struct block_device *bdev)
1445{
1446 struct request_queue *q = bdev_get_queue(bdev);
1447
1448 if (q)
1449 return q->limits.max_write_same_sectors;
1450
1451 return 0;
1452}
1453
1454static inline unsigned int bdev_write_zeroes_sectors(struct block_device *bdev)
1455{
1456 struct request_queue *q = bdev_get_queue(bdev);
1457
1458 if (q)
1459 return q->limits.max_write_zeroes_sectors;
1460
1461 return 0;
1462}
1463
1464static inline enum blk_zoned_model bdev_zoned_model(struct block_device *bdev)
1465{
1466 struct request_queue *q = bdev_get_queue(bdev);
1467
1468 if (q)
1469 return blk_queue_zoned_model(q);
1470
1471 return BLK_ZONED_NONE;
1472}
1473
1474static inline bool bdev_is_zoned(struct block_device *bdev)
1475{
1476 struct request_queue *q = bdev_get_queue(bdev);
1477
1478 if (q)
1479 return blk_queue_is_zoned(q);
1480
1481 return false;
1482}
1483
1484static inline sector_t bdev_zone_sectors(struct block_device *bdev)
1485{
1486 struct request_queue *q = bdev_get_queue(bdev);
1487
1488 if (q)
1489 return blk_queue_zone_sectors(q);
1490 return 0;
1491}
1492
1493static inline int queue_dma_alignment(const struct request_queue *q)
1494{
1495 return q ? q->dma_alignment : 511;
1496}
1497
1498static inline int blk_rq_aligned(struct request_queue *q, unsigned long addr,
1499 unsigned int len)
1500{
1501 unsigned int alignment = queue_dma_alignment(q) | q->dma_pad_mask;
1502 return !(addr & alignment) && !(len & alignment);
1503}
1504
1505/* assumes size > 256 */
1506static inline unsigned int blksize_bits(unsigned int size)
1507{
1508 unsigned int bits = 8;
1509 do {
1510 bits++;
1511 size >>= 1;
1512 } while (size > 256);
1513 return bits;
1514}
1515
1516static inline unsigned int block_size(struct block_device *bdev)
1517{
1518 return bdev->bd_block_size;
1519}
1520
1521int kblockd_schedule_work(struct work_struct *work);
1522int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay);
1523
1524#define MODULE_ALIAS_BLOCKDEV(major,minor) \
1525 MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor))
1526#define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \
1527 MODULE_ALIAS("block-major-" __stringify(major) "-*")
1528
1529#if defined(CONFIG_BLK_DEV_INTEGRITY)
1530
1531enum blk_integrity_flags {
1532 BLK_INTEGRITY_VERIFY = 1 << 0,
1533 BLK_INTEGRITY_GENERATE = 1 << 1,
1534 BLK_INTEGRITY_DEVICE_CAPABLE = 1 << 2,
1535 BLK_INTEGRITY_IP_CHECKSUM = 1 << 3,
1536};
1537
1538struct blk_integrity_iter {
1539 void *prot_buf;
1540 void *data_buf;
1541 sector_t seed;
1542 unsigned int data_size;
1543 unsigned short interval;
1544 const char *disk_name;
1545};
1546
1547typedef blk_status_t (integrity_processing_fn) (struct blk_integrity_iter *);
1548typedef void (integrity_prepare_fn) (struct request *);
1549typedef void (integrity_complete_fn) (struct request *, unsigned int);
1550
1551struct blk_integrity_profile {
1552 integrity_processing_fn *generate_fn;
1553 integrity_processing_fn *verify_fn;
1554 integrity_prepare_fn *prepare_fn;
1555 integrity_complete_fn *complete_fn;
1556 const char *name;
1557};
1558
1559extern void blk_integrity_register(struct gendisk *, struct blk_integrity *);
1560extern void blk_integrity_unregister(struct gendisk *);
1561extern int blk_integrity_compare(struct gendisk *, struct gendisk *);
1562extern int blk_rq_map_integrity_sg(struct request_queue *, struct bio *,
1563 struct scatterlist *);
1564extern int blk_rq_count_integrity_sg(struct request_queue *, struct bio *);
1565extern bool blk_integrity_merge_rq(struct request_queue *, struct request *,
1566 struct request *);
1567extern bool blk_integrity_merge_bio(struct request_queue *, struct request *,
1568 struct bio *);
1569
1570static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk)
1571{
1572 struct blk_integrity *bi = &disk->queue->integrity;
1573
1574 if (!bi->profile)
1575 return NULL;
1576
1577 return bi;
1578}
1579
1580static inline
1581struct blk_integrity *bdev_get_integrity(struct block_device *bdev)
1582{
1583 return blk_get_integrity(bdev->bd_disk);
1584}
1585
1586static inline bool
1587blk_integrity_queue_supports_integrity(struct request_queue *q)
1588{
1589 return q->integrity.profile;
1590}
1591
1592static inline bool blk_integrity_rq(struct request *rq)
1593{
1594 return rq->cmd_flags & REQ_INTEGRITY;
1595}
1596
1597static inline void blk_queue_max_integrity_segments(struct request_queue *q,
1598 unsigned int segs)
1599{
1600 q->limits.max_integrity_segments = segs;
1601}
1602
1603static inline unsigned short
1604queue_max_integrity_segments(const struct request_queue *q)
1605{
1606 return q->limits.max_integrity_segments;
1607}
1608
1609/**
1610 * bio_integrity_intervals - Return number of integrity intervals for a bio
1611 * @bi: blk_integrity profile for device
1612 * @sectors: Size of the bio in 512-byte sectors
1613 *
1614 * Description: The block layer calculates everything in 512 byte
1615 * sectors but integrity metadata is done in terms of the data integrity
1616 * interval size of the storage device. Convert the block layer sectors
1617 * to the appropriate number of integrity intervals.
1618 */
1619static inline unsigned int bio_integrity_intervals(struct blk_integrity *bi,
1620 unsigned int sectors)
1621{
1622 return sectors >> (bi->interval_exp - 9);
1623}
1624
1625static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi,
1626 unsigned int sectors)
1627{
1628 return bio_integrity_intervals(bi, sectors) * bi->tuple_size;
1629}
1630
1631/*
1632 * Return the first bvec that contains integrity data. Only drivers that are
1633 * limited to a single integrity segment should use this helper.
1634 */
1635static inline struct bio_vec *rq_integrity_vec(struct request *rq)
1636{
1637 if (WARN_ON_ONCE(queue_max_integrity_segments(rq->q) > 1))
1638 return NULL;
1639 return rq->bio->bi_integrity->bip_vec;
1640}
1641
1642#else /* CONFIG_BLK_DEV_INTEGRITY */
1643
1644struct bio;
1645struct block_device;
1646struct gendisk;
1647struct blk_integrity;
1648
1649static inline int blk_integrity_rq(struct request *rq)
1650{
1651 return 0;
1652}
1653static inline int blk_rq_count_integrity_sg(struct request_queue *q,
1654 struct bio *b)
1655{
1656 return 0;
1657}
1658static inline int blk_rq_map_integrity_sg(struct request_queue *q,
1659 struct bio *b,
1660 struct scatterlist *s)
1661{
1662 return 0;
1663}
1664static inline struct blk_integrity *bdev_get_integrity(struct block_device *b)
1665{
1666 return NULL;
1667}
1668static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk)
1669{
1670 return NULL;
1671}
1672static inline bool
1673blk_integrity_queue_supports_integrity(struct request_queue *q)
1674{
1675 return false;
1676}
1677static inline int blk_integrity_compare(struct gendisk *a, struct gendisk *b)
1678{
1679 return 0;
1680}
1681static inline void blk_integrity_register(struct gendisk *d,
1682 struct blk_integrity *b)
1683{
1684}
1685static inline void blk_integrity_unregister(struct gendisk *d)
1686{
1687}
1688static inline void blk_queue_max_integrity_segments(struct request_queue *q,
1689 unsigned int segs)
1690{
1691}
1692static inline unsigned short queue_max_integrity_segments(const struct request_queue *q)
1693{
1694 return 0;
1695}
1696static inline bool blk_integrity_merge_rq(struct request_queue *rq,
1697 struct request *r1,
1698 struct request *r2)
1699{
1700 return true;
1701}
1702static inline bool blk_integrity_merge_bio(struct request_queue *rq,
1703 struct request *r,
1704 struct bio *b)
1705{
1706 return true;
1707}
1708
1709static inline unsigned int bio_integrity_intervals(struct blk_integrity *bi,
1710 unsigned int sectors)
1711{
1712 return 0;
1713}
1714
1715static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi,
1716 unsigned int sectors)
1717{
1718 return 0;
1719}
1720
1721static inline struct bio_vec *rq_integrity_vec(struct request *rq)
1722{
1723 return NULL;
1724}
1725
1726#endif /* CONFIG_BLK_DEV_INTEGRITY */
1727
1728#ifdef CONFIG_BLK_INLINE_ENCRYPTION
1729
1730bool blk_ksm_register(struct blk_keyslot_manager *ksm, struct request_queue *q);
1731
1732void blk_ksm_unregister(struct request_queue *q);
1733
1734#else /* CONFIG_BLK_INLINE_ENCRYPTION */
1735
1736static inline bool blk_ksm_register(struct blk_keyslot_manager *ksm,
1737 struct request_queue *q)
1738{
1739 return true;
1740}
1741
1742static inline void blk_ksm_unregister(struct request_queue *q) { }
1743
1744#endif /* CONFIG_BLK_INLINE_ENCRYPTION */
1745
1746
1747struct block_device_operations {
1748 int (*open) (struct block_device *, fmode_t);
1749 void (*release) (struct gendisk *, fmode_t);
1750 int (*rw_page)(struct block_device *, sector_t, struct page *, unsigned int);
1751 int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
1752 int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
1753 unsigned int (*check_events) (struct gendisk *disk,
1754 unsigned int clearing);
1755 /* ->media_changed() is DEPRECATED, use ->check_events() instead */
1756 int (*media_changed) (struct gendisk *);
1757 void (*unlock_native_capacity) (struct gendisk *);
1758 int (*revalidate_disk) (struct gendisk *);
1759 int (*getgeo)(struct block_device *, struct hd_geometry *);
1760 /* this callback is with swap_lock and sometimes page table lock held */
1761 void (*swap_slot_free_notify) (struct block_device *, unsigned long);
1762 int (*report_zones)(struct gendisk *, sector_t sector,
1763 unsigned int nr_zones, report_zones_cb cb, void *data);
1764 char *(*devnode)(struct gendisk *disk, umode_t *mode);
1765 struct module *owner;
1766 const struct pr_ops *pr_ops;
1767};
1768
1769#ifdef CONFIG_COMPAT
1770extern int blkdev_compat_ptr_ioctl(struct block_device *, fmode_t,
1771 unsigned int, unsigned long);
1772#else
1773#define blkdev_compat_ptr_ioctl NULL
1774#endif
1775
1776extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int,
1777 unsigned long);
1778extern int bdev_read_page(struct block_device *, sector_t, struct page *);
1779extern int bdev_write_page(struct block_device *, sector_t, struct page *,
1780 struct writeback_control *);
1781
1782#ifdef CONFIG_BLK_DEV_ZONED
1783bool blk_req_needs_zone_write_lock(struct request *rq);
1784bool blk_req_zone_write_trylock(struct request *rq);
1785void __blk_req_zone_write_lock(struct request *rq);
1786void __blk_req_zone_write_unlock(struct request *rq);
1787
1788static inline void blk_req_zone_write_lock(struct request *rq)
1789{
1790 if (blk_req_needs_zone_write_lock(rq))
1791 __blk_req_zone_write_lock(rq);
1792}
1793
1794static inline void blk_req_zone_write_unlock(struct request *rq)
1795{
1796 if (rq->rq_flags & RQF_ZONE_WRITE_LOCKED)
1797 __blk_req_zone_write_unlock(rq);
1798}
1799
1800static inline bool blk_req_zone_is_write_locked(struct request *rq)
1801{
1802 return rq->q->seq_zones_wlock &&
1803 test_bit(blk_rq_zone_no(rq), rq->q->seq_zones_wlock);
1804}
1805
1806static inline bool blk_req_can_dispatch_to_zone(struct request *rq)
1807{
1808 if (!blk_req_needs_zone_write_lock(rq))
1809 return true;
1810 return !blk_req_zone_is_write_locked(rq);
1811}
1812#else
1813static inline bool blk_req_needs_zone_write_lock(struct request *rq)
1814{
1815 return false;
1816}
1817
1818static inline void blk_req_zone_write_lock(struct request *rq)
1819{
1820}
1821
1822static inline void blk_req_zone_write_unlock(struct request *rq)
1823{
1824}
1825static inline bool blk_req_zone_is_write_locked(struct request *rq)
1826{
1827 return false;
1828}
1829
1830static inline bool blk_req_can_dispatch_to_zone(struct request *rq)
1831{
1832 return true;
1833}
1834#endif /* CONFIG_BLK_DEV_ZONED */
1835
1836#else /* CONFIG_BLOCK */
1837
1838struct block_device;
1839
1840/*
1841 * stubs for when the block layer is configured out
1842 */
1843#define buffer_heads_over_limit 0
1844
1845static inline long nr_blockdev_pages(void)
1846{
1847 return 0;
1848}
1849
1850struct blk_plug {
1851};
1852
1853static inline void blk_start_plug(struct blk_plug *plug)
1854{
1855}
1856
1857static inline void blk_finish_plug(struct blk_plug *plug)
1858{
1859}
1860
1861static inline void blk_flush_plug(struct task_struct *task)
1862{
1863}
1864
1865static inline void blk_schedule_flush_plug(struct task_struct *task)
1866{
1867}
1868
1869
1870static inline bool blk_needs_flush_plug(struct task_struct *tsk)
1871{
1872 return false;
1873}
1874
1875static inline int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask)
1876{
1877 return 0;
1878}
1879
1880#endif /* CONFIG_BLOCK */
1881
1882static inline void blk_wake_io_task(struct task_struct *waiter)
1883{
1884 /*
1885 * If we're polling, the task itself is doing the completions. For
1886 * that case, we don't need to signal a wakeup, it's enough to just
1887 * mark us as RUNNING.
1888 */
1889 if (waiter == current)
1890 __set_current_state(TASK_RUNNING);
1891 else
1892 wake_up_process(waiter);
1893}
1894
1895#ifdef CONFIG_BLOCK
1896unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors,
1897 unsigned int op);
1898void disk_end_io_acct(struct gendisk *disk, unsigned int op,
1899 unsigned long start_time);
1900
1901/**
1902 * bio_start_io_acct - start I/O accounting for bio based drivers
1903 * @bio: bio to start account for
1904 *
1905 * Returns the start time that should be passed back to bio_end_io_acct().
1906 */
1907static inline unsigned long bio_start_io_acct(struct bio *bio)
1908{
1909 return disk_start_io_acct(bio->bi_disk, bio_sectors(bio), bio_op(bio));
1910}
1911
1912/**
1913 * bio_end_io_acct - end I/O accounting for bio based drivers
1914 * @bio: bio to end account for
1915 * @start: start time returned by bio_start_io_acct()
1916 */
1917static inline void bio_end_io_acct(struct bio *bio, unsigned long start_time)
1918{
1919 return disk_end_io_acct(bio->bi_disk, bio_op(bio), start_time);
1920}
1921#endif /* CONFIG_BLOCK */
1922
1923#endif