include/linux/blkdev.h at v5.0-rc2 · tjh.dev/kernel

tjh.dev / kernel
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
kernel / include / linux / blkdev.h
at v5.0-rc2 52 kB view raw
   1/* SPDX-License-Identifier: GPL-2.0 */
   2#ifndef _LINUX_BLKDEV_H
   3#define _LINUX_BLKDEV_H
   4
   5#include <linux/sched.h>
   6#include <linux/sched/clock.h>
   7
   8#ifdef CONFIG_BLOCK
   9
  10#include <linux/major.h>
  11#include <linux/genhd.h>
  12#include <linux/list.h>
  13#include <linux/llist.h>
  14#include <linux/timer.h>
  15#include <linux/workqueue.h>
  16#include <linux/pagemap.h>
  17#include <linux/backing-dev-defs.h>
  18#include <linux/wait.h>
  19#include <linux/mempool.h>
  20#include <linux/pfn.h>
  21#include <linux/bio.h>
  22#include <linux/stringify.h>
  23#include <linux/gfp.h>
  24#include <linux/bsg.h>
  25#include <linux/smp.h>
  26#include <linux/rcupdate.h>
  27#include <linux/percpu-refcount.h>
  28#include <linux/scatterlist.h>
  29#include <linux/blkzoned.h>
  30
  31struct module;
  32struct scsi_ioctl_command;
  33
  34struct request_queue;
  35struct elevator_queue;
  36struct blk_trace;
  37struct request;
  38struct sg_io_hdr;
  39struct bsg_job;
  40struct blkcg_gq;
  41struct blk_flush_queue;
  42struct pr_ops;
  43struct rq_qos;
  44struct blk_queue_stats;
  45struct blk_stat_callback;
  46
  47#define BLKDEV_MIN_RQ	4
  48#define BLKDEV_MAX_RQ	128	/* Default maximum */
  49
  50/* Must be consistent with blk_mq_poll_stats_bkt() */
  51#define BLK_MQ_POLL_STATS_BKTS 16
  52
  53/*
  54 * Maximum number of blkcg policies allowed to be registered concurrently.
  55 * Defined here to simplify include dependency.
  56 */
  57#define BLKCG_MAX_POLS		5
  58
  59typedef void (rq_end_io_fn)(struct request *, blk_status_t);
  60
  61/*
  62 * request flags */
  63typedef __u32 __bitwise req_flags_t;
  64
  65/* elevator knows about this request */
  66#define RQF_SORTED		((__force req_flags_t)(1 << 0))
  67/* drive already may have started this one */
  68#define RQF_STARTED		((__force req_flags_t)(1 << 1))
  69/* may not be passed by ioscheduler */
  70#define RQF_SOFTBARRIER		((__force req_flags_t)(1 << 3))
  71/* request for flush sequence */
  72#define RQF_FLUSH_SEQ		((__force req_flags_t)(1 << 4))
  73/* merge of different types, fail separately */
  74#define RQF_MIXED_MERGE		((__force req_flags_t)(1 << 5))
  75/* track inflight for MQ */
  76#define RQF_MQ_INFLIGHT		((__force req_flags_t)(1 << 6))
  77/* don't call prep for this one */
  78#define RQF_DONTPREP		((__force req_flags_t)(1 << 7))
  79/* set for "ide_preempt" requests and also for requests for which the SCSI
  80   "quiesce" state must be ignored. */
  81#define RQF_PREEMPT		((__force req_flags_t)(1 << 8))
  82/* contains copies of user pages */
  83#define RQF_COPY_USER		((__force req_flags_t)(1 << 9))
  84/* vaguely specified driver internal error.  Ignored by the block layer */
  85#define RQF_FAILED		((__force req_flags_t)(1 << 10))
  86/* don't warn about errors */
  87#define RQF_QUIET		((__force req_flags_t)(1 << 11))
  88/* elevator private data attached */
  89#define RQF_ELVPRIV		((__force req_flags_t)(1 << 12))
  90/* account into disk and partition IO statistics */
  91#define RQF_IO_STAT		((__force req_flags_t)(1 << 13))
  92/* request came from our alloc pool */
  93#define RQF_ALLOCED		((__force req_flags_t)(1 << 14))
  94/* runtime pm request */
  95#define RQF_PM			((__force req_flags_t)(1 << 15))
  96/* on IO scheduler merge hash */
  97#define RQF_HASHED		((__force req_flags_t)(1 << 16))
  98/* track IO completion time */
  99#define RQF_STATS		((__force req_flags_t)(1 << 17))
 100/* Look at ->special_vec for the actual data payload instead of the
 101   bio chain. */
 102#define RQF_SPECIAL_PAYLOAD	((__force req_flags_t)(1 << 18))
 103/* The per-zone write lock is held for this request */
 104#define RQF_ZONE_WRITE_LOCKED	((__force req_flags_t)(1 << 19))
 105/* already slept for hybrid poll */
 106#define RQF_MQ_POLL_SLEPT	((__force req_flags_t)(1 << 20))
 107/* ->timeout has been called, don't expire again */
 108#define RQF_TIMED_OUT		((__force req_flags_t)(1 << 21))
 109
 110/* flags that prevent us from merging requests: */
 111#define RQF_NOMERGE_FLAGS \
 112	(RQF_STARTED | RQF_SOFTBARRIER | RQF_FLUSH_SEQ | RQF_SPECIAL_PAYLOAD)
 113
 114/*
 115 * Request state for blk-mq.
 116 */
 117enum mq_rq_state {
 118	MQ_RQ_IDLE		= 0,
 119	MQ_RQ_IN_FLIGHT		= 1,
 120	MQ_RQ_COMPLETE		= 2,
 121};
 122
 123/*
 124 * Try to put the fields that are referenced together in the same cacheline.
 125 *
 126 * If you modify this structure, make sure to update blk_rq_init() and
 127 * especially blk_mq_rq_ctx_init() to take care of the added fields.
 128 */
 129struct request {
 130	struct request_queue *q;
 131	struct blk_mq_ctx *mq_ctx;
 132	struct blk_mq_hw_ctx *mq_hctx;
 133
 134	unsigned int cmd_flags;		/* op and common flags */
 135	req_flags_t rq_flags;
 136
 137	int internal_tag;
 138
 139	/* the following two fields are internal, NEVER access directly */
 140	unsigned int __data_len;	/* total data len */
 141	int tag;
 142	sector_t __sector;		/* sector cursor */
 143
 144	struct bio *bio;
 145	struct bio *biotail;
 146
 147	struct list_head queuelist;
 148
 149	/*
 150	 * The hash is used inside the scheduler, and killed once the
 151	 * request reaches the dispatch list. The ipi_list is only used
 152	 * to queue the request for softirq completion, which is long
 153	 * after the request has been unhashed (and even removed from
 154	 * the dispatch list).
 155	 */
 156	union {
 157		struct hlist_node hash;	/* merge hash */
 158		struct list_head ipi_list;
 159	};
 160
 161	/*
 162	 * The rb_node is only used inside the io scheduler, requests
 163	 * are pruned when moved to the dispatch queue. So let the
 164	 * completion_data share space with the rb_node.
 165	 */
 166	union {
 167		struct rb_node rb_node;	/* sort/lookup */
 168		struct bio_vec special_vec;
 169		void *completion_data;
 170		int error_count; /* for legacy drivers, don't use */
 171	};
 172
 173	/*
 174	 * Three pointers are available for the IO schedulers, if they need
 175	 * more they have to dynamically allocate it.  Flush requests are
 176	 * never put on the IO scheduler. So let the flush fields share
 177	 * space with the elevator data.
 178	 */
 179	union {
 180		struct {
 181			struct io_cq		*icq;
 182			void			*priv[2];
 183		} elv;
 184
 185		struct {
 186			unsigned int		seq;
 187			struct list_head	list;
 188			rq_end_io_fn		*saved_end_io;
 189		} flush;
 190	};
 191
 192	struct gendisk *rq_disk;
 193	struct hd_struct *part;
 194	/* Time that I/O was submitted to the kernel. */
 195	u64 start_time_ns;
 196	/* Time that I/O was submitted to the device. */
 197	u64 io_start_time_ns;
 198
 199#ifdef CONFIG_BLK_WBT
 200	unsigned short wbt_flags;
 201#endif
 202#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
 203	unsigned short throtl_size;
 204#endif
 205
 206	/*
 207	 * Number of scatter-gather DMA addr+len pairs after
 208	 * physical address coalescing is performed.
 209	 */
 210	unsigned short nr_phys_segments;
 211
 212#if defined(CONFIG_BLK_DEV_INTEGRITY)
 213	unsigned short nr_integrity_segments;
 214#endif
 215
 216	unsigned short write_hint;
 217	unsigned short ioprio;
 218
 219	void *special;		/* opaque pointer available for LLD use */
 220
 221	unsigned int extra_len;	/* length of alignment and padding */
 222
 223	enum mq_rq_state state;
 224	refcount_t ref;
 225
 226	unsigned int timeout;
 227	unsigned long deadline;
 228
 229	union {
 230		struct __call_single_data csd;
 231		u64 fifo_time;
 232	};
 233
 234	/*
 235	 * completion callback.
 236	 */
 237	rq_end_io_fn *end_io;
 238	void *end_io_data;
 239
 240	/* for bidi */
 241	struct request *next_rq;
 242};
 243
 244static inline bool blk_op_is_scsi(unsigned int op)
 245{
 246	return op == REQ_OP_SCSI_IN || op == REQ_OP_SCSI_OUT;
 247}
 248
 249static inline bool blk_op_is_private(unsigned int op)
 250{
 251	return op == REQ_OP_DRV_IN || op == REQ_OP_DRV_OUT;
 252}
 253
 254static inline bool blk_rq_is_scsi(struct request *rq)
 255{
 256	return blk_op_is_scsi(req_op(rq));
 257}
 258
 259static inline bool blk_rq_is_private(struct request *rq)
 260{
 261	return blk_op_is_private(req_op(rq));
 262}
 263
 264static inline bool blk_rq_is_passthrough(struct request *rq)
 265{
 266	return blk_rq_is_scsi(rq) || blk_rq_is_private(rq);
 267}
 268
 269static inline bool bio_is_passthrough(struct bio *bio)
 270{
 271	unsigned op = bio_op(bio);
 272
 273	return blk_op_is_scsi(op) || blk_op_is_private(op);
 274}
 275
 276static inline unsigned short req_get_ioprio(struct request *req)
 277{
 278	return req->ioprio;
 279}
 280
 281#include <linux/elevator.h>
 282
 283struct blk_queue_ctx;
 284
 285typedef blk_qc_t (make_request_fn) (struct request_queue *q, struct bio *bio);
 286
 287struct bio_vec;
 288typedef int (dma_drain_needed_fn)(struct request *);
 289
 290enum blk_eh_timer_return {
 291	BLK_EH_DONE,		/* drivers has completed the command */
 292	BLK_EH_RESET_TIMER,	/* reset timer and try again */
 293};
 294
 295enum blk_queue_state {
 296	Queue_down,
 297	Queue_up,
 298};
 299
 300#define BLK_TAG_ALLOC_FIFO 0 /* allocate starting from 0 */
 301#define BLK_TAG_ALLOC_RR 1 /* allocate starting from last allocated tag */
 302
 303#define BLK_SCSI_MAX_CMDS	(256)
 304#define BLK_SCSI_CMD_PER_LONG	(BLK_SCSI_MAX_CMDS / (sizeof(long) * 8))
 305
 306/*
 307 * Zoned block device models (zoned limit).
 308 */
 309enum blk_zoned_model {
 310	BLK_ZONED_NONE,	/* Regular block device */
 311	BLK_ZONED_HA,	/* Host-aware zoned block device */
 312	BLK_ZONED_HM,	/* Host-managed zoned block device */
 313};
 314
 315struct queue_limits {
 316	unsigned long		bounce_pfn;
 317	unsigned long		seg_boundary_mask;
 318	unsigned long		virt_boundary_mask;
 319
 320	unsigned int		max_hw_sectors;
 321	unsigned int		max_dev_sectors;
 322	unsigned int		chunk_sectors;
 323	unsigned int		max_sectors;
 324	unsigned int		max_segment_size;
 325	unsigned int		physical_block_size;
 326	unsigned int		alignment_offset;
 327	unsigned int		io_min;
 328	unsigned int		io_opt;
 329	unsigned int		max_discard_sectors;
 330	unsigned int		max_hw_discard_sectors;
 331	unsigned int		max_write_same_sectors;
 332	unsigned int		max_write_zeroes_sectors;
 333	unsigned int		discard_granularity;
 334	unsigned int		discard_alignment;
 335
 336	unsigned short		logical_block_size;
 337	unsigned short		max_segments;
 338	unsigned short		max_integrity_segments;
 339	unsigned short		max_discard_segments;
 340
 341	unsigned char		misaligned;
 342	unsigned char		discard_misaligned;
 343	unsigned char		raid_partial_stripes_expensive;
 344	enum blk_zoned_model	zoned;
 345};
 346
 347#ifdef CONFIG_BLK_DEV_ZONED
 348
 349extern unsigned int blkdev_nr_zones(struct block_device *bdev);
 350extern int blkdev_report_zones(struct block_device *bdev,
 351			       sector_t sector, struct blk_zone *zones,
 352			       unsigned int *nr_zones, gfp_t gfp_mask);
 353extern int blkdev_reset_zones(struct block_device *bdev, sector_t sectors,
 354			      sector_t nr_sectors, gfp_t gfp_mask);
 355extern int blk_revalidate_disk_zones(struct gendisk *disk);
 356
 357extern int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
 358				     unsigned int cmd, unsigned long arg);
 359extern int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode,
 360				    unsigned int cmd, unsigned long arg);
 361
 362#else /* CONFIG_BLK_DEV_ZONED */
 363
 364static inline unsigned int blkdev_nr_zones(struct block_device *bdev)
 365{
 366	return 0;
 367}
 368
 369static inline int blk_revalidate_disk_zones(struct gendisk *disk)
 370{
 371	return 0;
 372}
 373
 374static inline int blkdev_report_zones_ioctl(struct block_device *bdev,
 375					    fmode_t mode, unsigned int cmd,
 376					    unsigned long arg)
 377{
 378	return -ENOTTY;
 379}
 380
 381static inline int blkdev_reset_zones_ioctl(struct block_device *bdev,
 382					   fmode_t mode, unsigned int cmd,
 383					   unsigned long arg)
 384{
 385	return -ENOTTY;
 386}
 387
 388#endif /* CONFIG_BLK_DEV_ZONED */
 389
 390struct request_queue {
 391	/*
 392	 * Together with queue_head for cacheline sharing
 393	 */
 394	struct list_head	queue_head;
 395	struct request		*last_merge;
 396	struct elevator_queue	*elevator;
 397
 398	struct blk_queue_stats	*stats;
 399	struct rq_qos		*rq_qos;
 400
 401	make_request_fn		*make_request_fn;
 402	dma_drain_needed_fn	*dma_drain_needed;
 403
 404	const struct blk_mq_ops	*mq_ops;
 405
 406	/* sw queues */
 407	struct blk_mq_ctx __percpu	*queue_ctx;
 408	unsigned int		nr_queues;
 409
 410	unsigned int		queue_depth;
 411
 412	/* hw dispatch queues */
 413	struct blk_mq_hw_ctx	**queue_hw_ctx;
 414	unsigned int		nr_hw_queues;
 415
 416	struct backing_dev_info	*backing_dev_info;
 417
 418	/*
 419	 * The queue owner gets to use this for whatever they like.
 420	 * ll_rw_blk doesn't touch it.
 421	 */
 422	void			*queuedata;
 423
 424	/*
 425	 * various queue flags, see QUEUE_* below
 426	 */
 427	unsigned long		queue_flags;
 428	/*
 429	 * Number of contexts that have called blk_set_pm_only(). If this
 430	 * counter is above zero then only RQF_PM and RQF_PREEMPT requests are
 431	 * processed.
 432	 */
 433	atomic_t		pm_only;
 434
 435	/*
 436	 * ida allocated id for this queue.  Used to index queues from
 437	 * ioctx.
 438	 */
 439	int			id;
 440
 441	/*
 442	 * queue needs bounce pages for pages above this limit
 443	 */
 444	gfp_t			bounce_gfp;
 445
 446	spinlock_t		queue_lock;
 447
 448	/*
 449	 * queue kobject
 450	 */
 451	struct kobject kobj;
 452
 453	/*
 454	 * mq queue kobject
 455	 */
 456	struct kobject *mq_kobj;
 457
 458#ifdef  CONFIG_BLK_DEV_INTEGRITY
 459	struct blk_integrity integrity;
 460#endif	/* CONFIG_BLK_DEV_INTEGRITY */
 461
 462#ifdef CONFIG_PM
 463	struct device		*dev;
 464	int			rpm_status;
 465	unsigned int		nr_pending;
 466#endif
 467
 468	/*
 469	 * queue settings
 470	 */
 471	unsigned long		nr_requests;	/* Max # of requests */
 472
 473	unsigned int		dma_drain_size;
 474	void			*dma_drain_buffer;
 475	unsigned int		dma_pad_mask;
 476	unsigned int		dma_alignment;
 477
 478	unsigned int		rq_timeout;
 479	int			poll_nsec;
 480
 481	struct blk_stat_callback	*poll_cb;
 482	struct blk_rq_stat	poll_stat[BLK_MQ_POLL_STATS_BKTS];
 483
 484	struct timer_list	timeout;
 485	struct work_struct	timeout_work;
 486
 487	struct list_head	icq_list;
 488#ifdef CONFIG_BLK_CGROUP
 489	DECLARE_BITMAP		(blkcg_pols, BLKCG_MAX_POLS);
 490	struct blkcg_gq		*root_blkg;
 491	struct list_head	blkg_list;
 492#endif
 493
 494	struct queue_limits	limits;
 495
 496#ifdef CONFIG_BLK_DEV_ZONED
 497	/*
 498	 * Zoned block device information for request dispatch control.
 499	 * nr_zones is the total number of zones of the device. This is always
 500	 * 0 for regular block devices. seq_zones_bitmap is a bitmap of nr_zones
 501	 * bits which indicates if a zone is conventional (bit clear) or
 502	 * sequential (bit set). seq_zones_wlock is a bitmap of nr_zones
 503	 * bits which indicates if a zone is write locked, that is, if a write
 504	 * request targeting the zone was dispatched. All three fields are
 505	 * initialized by the low level device driver (e.g. scsi/sd.c).
 506	 * Stacking drivers (device mappers) may or may not initialize
 507	 * these fields.
 508	 *
 509	 * Reads of this information must be protected with blk_queue_enter() /
 510	 * blk_queue_exit(). Modifying this information is only allowed while
 511	 * no requests are being processed. See also blk_mq_freeze_queue() and
 512	 * blk_mq_unfreeze_queue().
 513	 */
 514	unsigned int		nr_zones;
 515	unsigned long		*seq_zones_bitmap;
 516	unsigned long		*seq_zones_wlock;
 517#endif /* CONFIG_BLK_DEV_ZONED */
 518
 519	/*
 520	 * sg stuff
 521	 */
 522	unsigned int		sg_timeout;
 523	unsigned int		sg_reserved_size;
 524	int			node;
 525#ifdef CONFIG_BLK_DEV_IO_TRACE
 526	struct blk_trace	*blk_trace;
 527	struct mutex		blk_trace_mutex;
 528#endif
 529	/*
 530	 * for flush operations
 531	 */
 532	struct blk_flush_queue	*fq;
 533
 534	struct list_head	requeue_list;
 535	spinlock_t		requeue_lock;
 536	struct delayed_work	requeue_work;
 537
 538	struct mutex		sysfs_lock;
 539
 540	atomic_t		mq_freeze_depth;
 541
 542#if defined(CONFIG_BLK_DEV_BSG)
 543	struct bsg_class_device bsg_dev;
 544#endif
 545
 546#ifdef CONFIG_BLK_DEV_THROTTLING
 547	/* Throttle data */
 548	struct throtl_data *td;
 549#endif
 550	struct rcu_head		rcu_head;
 551	wait_queue_head_t	mq_freeze_wq;
 552	struct percpu_ref	q_usage_counter;
 553	struct list_head	all_q_node;
 554
 555	struct blk_mq_tag_set	*tag_set;
 556	struct list_head	tag_set_list;
 557	struct bio_set		bio_split;
 558
 559#ifdef CONFIG_BLK_DEBUG_FS
 560	struct dentry		*debugfs_dir;
 561	struct dentry		*sched_debugfs_dir;
 562	struct dentry		*rqos_debugfs_dir;
 563#endif
 564
 565	bool			mq_sysfs_init_done;
 566
 567	size_t			cmd_size;
 568
 569	struct work_struct	release_work;
 570
 571#define BLK_MAX_WRITE_HINTS	5
 572	u64			write_hints[BLK_MAX_WRITE_HINTS];
 573};
 574
 575#define QUEUE_FLAG_STOPPED	1	/* queue is stopped */
 576#define QUEUE_FLAG_DYING	2	/* queue being torn down */
 577#define QUEUE_FLAG_BIDI		4	/* queue supports bidi requests */
 578#define QUEUE_FLAG_NOMERGES     5	/* disable merge attempts */
 579#define QUEUE_FLAG_SAME_COMP	6	/* complete on same CPU-group */
 580#define QUEUE_FLAG_FAIL_IO	7	/* fake timeout */
 581#define QUEUE_FLAG_NONROT	9	/* non-rotational device (SSD) */
 582#define QUEUE_FLAG_VIRT        QUEUE_FLAG_NONROT /* paravirt device */
 583#define QUEUE_FLAG_IO_STAT     10	/* do disk/partitions IO accounting */
 584#define QUEUE_FLAG_DISCARD     11	/* supports DISCARD */
 585#define QUEUE_FLAG_NOXMERGES   12	/* No extended merges */
 586#define QUEUE_FLAG_ADD_RANDOM  13	/* Contributes to random pool */
 587#define QUEUE_FLAG_SECERASE    14	/* supports secure erase */
 588#define QUEUE_FLAG_SAME_FORCE  15	/* force complete on same CPU */
 589#define QUEUE_FLAG_DEAD        16	/* queue tear-down finished */
 590#define QUEUE_FLAG_INIT_DONE   17	/* queue is initialized */
 591#define QUEUE_FLAG_NO_SG_MERGE 18	/* don't attempt to merge SG segments*/
 592#define QUEUE_FLAG_POLL	       19	/* IO polling enabled if set */
 593#define QUEUE_FLAG_WC	       20	/* Write back caching */
 594#define QUEUE_FLAG_FUA	       21	/* device supports FUA writes */
 595#define QUEUE_FLAG_FLUSH_NQ    22	/* flush not queueuable */
 596#define QUEUE_FLAG_DAX         23	/* device supports DAX */
 597#define QUEUE_FLAG_STATS       24	/* track IO start and completion times */
 598#define QUEUE_FLAG_POLL_STATS  25	/* collecting stats for hybrid polling */
 599#define QUEUE_FLAG_REGISTERED  26	/* queue has been registered to a disk */
 600#define QUEUE_FLAG_SCSI_PASSTHROUGH 27	/* queue supports SCSI commands */
 601#define QUEUE_FLAG_QUIESCED    28	/* queue has been quiesced */
 602#define QUEUE_FLAG_PCI_P2PDMA  29	/* device supports PCI p2p requests */
 603
 604#define QUEUE_FLAG_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 605				 (1 << QUEUE_FLAG_SAME_COMP)	|	\
 606				 (1 << QUEUE_FLAG_ADD_RANDOM))
 607
 608#define QUEUE_FLAG_MQ_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 609				 (1 << QUEUE_FLAG_SAME_COMP))
 610
 611void blk_queue_flag_set(unsigned int flag, struct request_queue *q);
 612void blk_queue_flag_clear(unsigned int flag, struct request_queue *q);
 613bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
 614
 615#define blk_queue_stopped(q)	test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
 616#define blk_queue_dying(q)	test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags)
 617#define blk_queue_dead(q)	test_bit(QUEUE_FLAG_DEAD, &(q)->queue_flags)
 618#define blk_queue_init_done(q)	test_bit(QUEUE_FLAG_INIT_DONE, &(q)->queue_flags)
 619#define blk_queue_nomerges(q)	test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
 620#define blk_queue_noxmerges(q)	\
 621	test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags)
 622#define blk_queue_nonrot(q)	test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags)
 623#define blk_queue_io_stat(q)	test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags)
 624#define blk_queue_add_random(q)	test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags)
 625#define blk_queue_discard(q)	test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags)
 626#define blk_queue_secure_erase(q) \
 627	(test_bit(QUEUE_FLAG_SECERASE, &(q)->queue_flags))
 628#define blk_queue_dax(q)	test_bit(QUEUE_FLAG_DAX, &(q)->queue_flags)
 629#define blk_queue_scsi_passthrough(q)	\
 630	test_bit(QUEUE_FLAG_SCSI_PASSTHROUGH, &(q)->queue_flags)
 631#define blk_queue_pci_p2pdma(q)	\
 632	test_bit(QUEUE_FLAG_PCI_P2PDMA, &(q)->queue_flags)
 633
 634#define blk_noretry_request(rq) \
 635	((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \
 636			     REQ_FAILFAST_DRIVER))
 637#define blk_queue_quiesced(q)	test_bit(QUEUE_FLAG_QUIESCED, &(q)->queue_flags)
 638#define blk_queue_pm_only(q)	atomic_read(&(q)->pm_only)
 639#define blk_queue_fua(q)	test_bit(QUEUE_FLAG_FUA, &(q)->queue_flags)
 640
 641extern void blk_set_pm_only(struct request_queue *q);
 642extern void blk_clear_pm_only(struct request_queue *q);
 643
 644static inline bool blk_account_rq(struct request *rq)
 645{
 646	return (rq->rq_flags & RQF_STARTED) && !blk_rq_is_passthrough(rq);
 647}
 648
 649#define blk_bidi_rq(rq)		((rq)->next_rq != NULL)
 650
 651#define list_entry_rq(ptr)	list_entry((ptr), struct request, queuelist)
 652
 653#define rq_data_dir(rq)		(op_is_write(req_op(rq)) ? WRITE : READ)
 654
 655static inline bool queue_is_mq(struct request_queue *q)
 656{
 657	return q->mq_ops;
 658}
 659
 660static inline enum blk_zoned_model
 661blk_queue_zoned_model(struct request_queue *q)
 662{
 663	return q->limits.zoned;
 664}
 665
 666static inline bool blk_queue_is_zoned(struct request_queue *q)
 667{
 668	switch (blk_queue_zoned_model(q)) {
 669	case BLK_ZONED_HA:
 670	case BLK_ZONED_HM:
 671		return true;
 672	default:
 673		return false;
 674	}
 675}
 676
 677static inline unsigned int blk_queue_zone_sectors(struct request_queue *q)
 678{
 679	return blk_queue_is_zoned(q) ? q->limits.chunk_sectors : 0;
 680}
 681
 682#ifdef CONFIG_BLK_DEV_ZONED
 683static inline unsigned int blk_queue_nr_zones(struct request_queue *q)
 684{
 685	return blk_queue_is_zoned(q) ? q->nr_zones : 0;
 686}
 687
 688static inline unsigned int blk_queue_zone_no(struct request_queue *q,
 689					     sector_t sector)
 690{
 691	if (!blk_queue_is_zoned(q))
 692		return 0;
 693	return sector >> ilog2(q->limits.chunk_sectors);
 694}
 695
 696static inline bool blk_queue_zone_is_seq(struct request_queue *q,
 697					 sector_t sector)
 698{
 699	if (!blk_queue_is_zoned(q) || !q->seq_zones_bitmap)
 700		return false;
 701	return test_bit(blk_queue_zone_no(q, sector), q->seq_zones_bitmap);
 702}
 703#else /* CONFIG_BLK_DEV_ZONED */
 704static inline unsigned int blk_queue_nr_zones(struct request_queue *q)
 705{
 706	return 0;
 707}
 708#endif /* CONFIG_BLK_DEV_ZONED */
 709
 710static inline bool rq_is_sync(struct request *rq)
 711{
 712	return op_is_sync(rq->cmd_flags);
 713}
 714
 715static inline bool rq_mergeable(struct request *rq)
 716{
 717	if (blk_rq_is_passthrough(rq))
 718		return false;
 719
 720	if (req_op(rq) == REQ_OP_FLUSH)
 721		return false;
 722
 723	if (req_op(rq) == REQ_OP_WRITE_ZEROES)
 724		return false;
 725
 726	if (rq->cmd_flags & REQ_NOMERGE_FLAGS)
 727		return false;
 728	if (rq->rq_flags & RQF_NOMERGE_FLAGS)
 729		return false;
 730
 731	return true;
 732}
 733
 734static inline bool blk_write_same_mergeable(struct bio *a, struct bio *b)
 735{
 736	if (bio_page(a) == bio_page(b) &&
 737	    bio_offset(a) == bio_offset(b))
 738		return true;
 739
 740	return false;
 741}
 742
 743static inline unsigned int blk_queue_depth(struct request_queue *q)
 744{
 745	if (q->queue_depth)
 746		return q->queue_depth;
 747
 748	return q->nr_requests;
 749}
 750
 751extern unsigned long blk_max_low_pfn, blk_max_pfn;
 752
 753/*
 754 * standard bounce addresses:
 755 *
 756 * BLK_BOUNCE_HIGH	: bounce all highmem pages
 757 * BLK_BOUNCE_ANY	: don't bounce anything
 758 * BLK_BOUNCE_ISA	: bounce pages above ISA DMA boundary
 759 */
 760
 761#if BITS_PER_LONG == 32
 762#define BLK_BOUNCE_HIGH		((u64)blk_max_low_pfn << PAGE_SHIFT)
 763#else
 764#define BLK_BOUNCE_HIGH		-1ULL
 765#endif
 766#define BLK_BOUNCE_ANY		(-1ULL)
 767#define BLK_BOUNCE_ISA		(DMA_BIT_MASK(24))
 768
 769/*
 770 * default timeout for SG_IO if none specified
 771 */
 772#define BLK_DEFAULT_SG_TIMEOUT	(60 * HZ)
 773#define BLK_MIN_SG_TIMEOUT	(7 * HZ)
 774
 775struct rq_map_data {
 776	struct page **pages;
 777	int page_order;
 778	int nr_entries;
 779	unsigned long offset;
 780	int null_mapped;
 781	int from_user;
 782};
 783
 784struct req_iterator {
 785	struct bvec_iter iter;
 786	struct bio *bio;
 787};
 788
 789/* This should not be used directly - use rq_for_each_segment */
 790#define for_each_bio(_bio)		\
 791	for (; _bio; _bio = _bio->bi_next)
 792#define __rq_for_each_bio(_bio, rq)	\
 793	if ((rq->bio))			\
 794		for (_bio = (rq)->bio; _bio; _bio = _bio->bi_next)
 795
 796#define rq_for_each_segment(bvl, _rq, _iter)			\
 797	__rq_for_each_bio(_iter.bio, _rq)			\
 798		bio_for_each_segment(bvl, _iter.bio, _iter.iter)
 799
 800#define rq_iter_last(bvec, _iter)				\
 801		(_iter.bio->bi_next == NULL &&			\
 802		 bio_iter_last(bvec, _iter.iter))
 803
 804#ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
 805# error	"You should define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE for your platform"
 806#endif
 807#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
 808extern void rq_flush_dcache_pages(struct request *rq);
 809#else
 810static inline void rq_flush_dcache_pages(struct request *rq)
 811{
 812}
 813#endif
 814
 815extern int blk_register_queue(struct gendisk *disk);
 816extern void blk_unregister_queue(struct gendisk *disk);
 817extern blk_qc_t generic_make_request(struct bio *bio);
 818extern blk_qc_t direct_make_request(struct bio *bio);
 819extern void blk_rq_init(struct request_queue *q, struct request *rq);
 820extern void blk_init_request_from_bio(struct request *req, struct bio *bio);
 821extern void blk_put_request(struct request *);
 822extern struct request *blk_get_request(struct request_queue *, unsigned int op,
 823				       blk_mq_req_flags_t flags);
 824extern int blk_lld_busy(struct request_queue *q);
 825extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
 826			     struct bio_set *bs, gfp_t gfp_mask,
 827			     int (*bio_ctr)(struct bio *, struct bio *, void *),
 828			     void *data);
 829extern void blk_rq_unprep_clone(struct request *rq);
 830extern blk_status_t blk_insert_cloned_request(struct request_queue *q,
 831				     struct request *rq);
 832extern int blk_rq_append_bio(struct request *rq, struct bio **bio);
 833extern void blk_queue_split(struct request_queue *, struct bio **);
 834extern void blk_recount_segments(struct request_queue *, struct bio *);
 835extern int scsi_verify_blk_ioctl(struct block_device *, unsigned int);
 836extern int scsi_cmd_blk_ioctl(struct block_device *, fmode_t,
 837			      unsigned int, void __user *);
 838extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t,
 839			  unsigned int, void __user *);
 840extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t,
 841			 struct scsi_ioctl_command __user *);
 842
 843extern int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags);
 844extern void blk_queue_exit(struct request_queue *q);
 845extern void blk_sync_queue(struct request_queue *q);
 846extern int blk_rq_map_user(struct request_queue *, struct request *,
 847			   struct rq_map_data *, void __user *, unsigned long,
 848			   gfp_t);
 849extern int blk_rq_unmap_user(struct bio *);
 850extern int blk_rq_map_kern(struct request_queue *, struct request *, void *, unsigned int, gfp_t);
 851extern int blk_rq_map_user_iov(struct request_queue *, struct request *,
 852			       struct rq_map_data *, const struct iov_iter *,
 853			       gfp_t);
 854extern void blk_execute_rq(struct request_queue *, struct gendisk *,
 855			  struct request *, int);
 856extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *,
 857				  struct request *, int, rq_end_io_fn *);
 858
 859int blk_status_to_errno(blk_status_t status);
 860blk_status_t errno_to_blk_status(int errno);
 861
 862int blk_poll(struct request_queue *q, blk_qc_t cookie, bool spin);
 863
 864static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
 865{
 866	return bdev->bd_disk->queue;	/* this is never NULL */
 867}
 868
 869/*
 870 * The basic unit of block I/O is a sector. It is used in a number of contexts
 871 * in Linux (blk, bio, genhd). The size of one sector is 512 = 2**9
 872 * bytes. Variables of type sector_t represent an offset or size that is a
 873 * multiple of 512 bytes. Hence these two constants.
 874 */
 875#ifndef SECTOR_SHIFT
 876#define SECTOR_SHIFT 9
 877#endif
 878#ifndef SECTOR_SIZE
 879#define SECTOR_SIZE (1 << SECTOR_SHIFT)
 880#endif
 881
 882/*
 883 * blk_rq_pos()			: the current sector
 884 * blk_rq_bytes()		: bytes left in the entire request
 885 * blk_rq_cur_bytes()		: bytes left in the current segment
 886 * blk_rq_err_bytes()		: bytes left till the next error boundary
 887 * blk_rq_sectors()		: sectors left in the entire request
 888 * blk_rq_cur_sectors()		: sectors left in the current segment
 889 */
 890static inline sector_t blk_rq_pos(const struct request *rq)
 891{
 892	return rq->__sector;
 893}
 894
 895static inline unsigned int blk_rq_bytes(const struct request *rq)
 896{
 897	return rq->__data_len;
 898}
 899
 900static inline int blk_rq_cur_bytes(const struct request *rq)
 901{
 902	return rq->bio ? bio_cur_bytes(rq->bio) : 0;
 903}
 904
 905extern unsigned int blk_rq_err_bytes(const struct request *rq);
 906
 907static inline unsigned int blk_rq_sectors(const struct request *rq)
 908{
 909	return blk_rq_bytes(rq) >> SECTOR_SHIFT;
 910}
 911
 912static inline unsigned int blk_rq_cur_sectors(const struct request *rq)
 913{
 914	return blk_rq_cur_bytes(rq) >> SECTOR_SHIFT;
 915}
 916
 917#ifdef CONFIG_BLK_DEV_ZONED
 918static inline unsigned int blk_rq_zone_no(struct request *rq)
 919{
 920	return blk_queue_zone_no(rq->q, blk_rq_pos(rq));
 921}
 922
 923static inline unsigned int blk_rq_zone_is_seq(struct request *rq)
 924{
 925	return blk_queue_zone_is_seq(rq->q, blk_rq_pos(rq));
 926}
 927#endif /* CONFIG_BLK_DEV_ZONED */
 928
 929/*
 930 * Some commands like WRITE SAME have a payload or data transfer size which
 931 * is different from the size of the request.  Any driver that supports such
 932 * commands using the RQF_SPECIAL_PAYLOAD flag needs to use this helper to
 933 * calculate the data transfer size.
 934 */
 935static inline unsigned int blk_rq_payload_bytes(struct request *rq)
 936{
 937	if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
 938		return rq->special_vec.bv_len;
 939	return blk_rq_bytes(rq);
 940}
 941
 942static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q,
 943						     int op)
 944{
 945	if (unlikely(op == REQ_OP_DISCARD || op == REQ_OP_SECURE_ERASE))
 946		return min(q->limits.max_discard_sectors,
 947			   UINT_MAX >> SECTOR_SHIFT);
 948
 949	if (unlikely(op == REQ_OP_WRITE_SAME))
 950		return q->limits.max_write_same_sectors;
 951
 952	if (unlikely(op == REQ_OP_WRITE_ZEROES))
 953		return q->limits.max_write_zeroes_sectors;
 954
 955	return q->limits.max_sectors;
 956}
 957
 958/*
 959 * Return maximum size of a request at given offset. Only valid for
 960 * file system requests.
 961 */
 962static inline unsigned int blk_max_size_offset(struct request_queue *q,
 963					       sector_t offset)
 964{
 965	if (!q->limits.chunk_sectors)
 966		return q->limits.max_sectors;
 967
 968	return min(q->limits.max_sectors, (unsigned int)(q->limits.chunk_sectors -
 969			(offset & (q->limits.chunk_sectors - 1))));
 970}
 971
 972static inline unsigned int blk_rq_get_max_sectors(struct request *rq,
 973						  sector_t offset)
 974{
 975	struct request_queue *q = rq->q;
 976
 977	if (blk_rq_is_passthrough(rq))
 978		return q->limits.max_hw_sectors;
 979
 980	if (!q->limits.chunk_sectors ||
 981	    req_op(rq) == REQ_OP_DISCARD ||
 982	    req_op(rq) == REQ_OP_SECURE_ERASE)
 983		return blk_queue_get_max_sectors(q, req_op(rq));
 984
 985	return min(blk_max_size_offset(q, offset),
 986			blk_queue_get_max_sectors(q, req_op(rq)));
 987}
 988
 989static inline unsigned int blk_rq_count_bios(struct request *rq)
 990{
 991	unsigned int nr_bios = 0;
 992	struct bio *bio;
 993
 994	__rq_for_each_bio(bio, rq)
 995		nr_bios++;
 996
 997	return nr_bios;
 998}
 999
1000void blk_steal_bios(struct bio_list *list, struct request *rq);
1001
1002/*
1003 * Request completion related functions.
1004 *
1005 * blk_update_request() completes given number of bytes and updates
1006 * the request without completing it.
1007 *
1008 * blk_end_request() and friends.  __blk_end_request() must be called
1009 * with the request queue spinlock acquired.
1010 *
1011 * Several drivers define their own end_request and call
1012 * blk_end_request() for parts of the original function.
1013 * This prevents code duplication in drivers.
1014 */
1015extern bool blk_update_request(struct request *rq, blk_status_t error,
1016			       unsigned int nr_bytes);
1017extern void blk_end_request_all(struct request *rq, blk_status_t error);
1018extern bool __blk_end_request(struct request *rq, blk_status_t error,
1019			      unsigned int nr_bytes);
1020extern void __blk_end_request_all(struct request *rq, blk_status_t error);
1021extern bool __blk_end_request_cur(struct request *rq, blk_status_t error);
1022
1023extern void __blk_complete_request(struct request *);
1024extern void blk_abort_request(struct request *);
1025
1026/*
1027 * Access functions for manipulating queue properties
1028 */
1029extern void blk_cleanup_queue(struct request_queue *);
1030extern void blk_queue_make_request(struct request_queue *, make_request_fn *);
1031extern void blk_queue_bounce_limit(struct request_queue *, u64);
1032extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int);
1033extern void blk_queue_chunk_sectors(struct request_queue *, unsigned int);
1034extern void blk_queue_max_segments(struct request_queue *, unsigned short);
1035extern void blk_queue_max_discard_segments(struct request_queue *,
1036		unsigned short);
1037extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
1038extern void blk_queue_max_discard_sectors(struct request_queue *q,
1039		unsigned int max_discard_sectors);
1040extern void blk_queue_max_write_same_sectors(struct request_queue *q,
1041		unsigned int max_write_same_sectors);
1042extern void blk_queue_max_write_zeroes_sectors(struct request_queue *q,
1043		unsigned int max_write_same_sectors);
1044extern void blk_queue_logical_block_size(struct request_queue *, unsigned short);
1045extern void blk_queue_physical_block_size(struct request_queue *, unsigned int);
1046extern void blk_queue_alignment_offset(struct request_queue *q,
1047				       unsigned int alignment);
1048extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min);
1049extern void blk_queue_io_min(struct request_queue *q, unsigned int min);
1050extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt);
1051extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt);
1052extern void blk_set_queue_depth(struct request_queue *q, unsigned int depth);
1053extern void blk_set_default_limits(struct queue_limits *lim);
1054extern void blk_set_stacking_limits(struct queue_limits *lim);
1055extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
1056			    sector_t offset);
1057extern int bdev_stack_limits(struct queue_limits *t, struct block_device *bdev,
1058			    sector_t offset);
1059extern void disk_stack_limits(struct gendisk *disk, struct block_device *bdev,
1060			      sector_t offset);
1061extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b);
1062extern void blk_queue_dma_pad(struct request_queue *, unsigned int);
1063extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int);
1064extern int blk_queue_dma_drain(struct request_queue *q,
1065			       dma_drain_needed_fn *dma_drain_needed,
1066			       void *buf, unsigned int size);
1067extern void blk_queue_segment_boundary(struct request_queue *, unsigned long);
1068extern void blk_queue_virt_boundary(struct request_queue *, unsigned long);
1069extern void blk_queue_dma_alignment(struct request_queue *, int);
1070extern void blk_queue_update_dma_alignment(struct request_queue *, int);
1071extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
1072extern void blk_queue_flush_queueable(struct request_queue *q, bool queueable);
1073extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua);
1074
1075/*
1076 * Number of physical segments as sent to the device.
1077 *
1078 * Normally this is the number of discontiguous data segments sent by the
1079 * submitter.  But for data-less command like discard we might have no
1080 * actual data segments submitted, but the driver might have to add it's
1081 * own special payload.  In that case we still return 1 here so that this
1082 * special payload will be mapped.
1083 */
1084static inline unsigned short blk_rq_nr_phys_segments(struct request *rq)
1085{
1086	if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
1087		return 1;
1088	return rq->nr_phys_segments;
1089}
1090
1091/*
1092 * Number of discard segments (or ranges) the driver needs to fill in.
1093 * Each discard bio merged into a request is counted as one segment.
1094 */
1095static inline unsigned short blk_rq_nr_discard_segments(struct request *rq)
1096{
1097	return max_t(unsigned short, rq->nr_phys_segments, 1);
1098}
1099
1100extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *);
1101extern void blk_dump_rq_flags(struct request *, char *);
1102extern long nr_blockdev_pages(void);
1103
1104bool __must_check blk_get_queue(struct request_queue *);
1105struct request_queue *blk_alloc_queue(gfp_t);
1106struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id);
1107extern void blk_put_queue(struct request_queue *);
1108extern void blk_set_queue_dying(struct request_queue *);
1109
1110/*
1111 * blk_plug permits building a queue of related requests by holding the I/O
1112 * fragments for a short period. This allows merging of sequential requests
1113 * into single larger request. As the requests are moved from a per-task list to
1114 * the device's request_queue in a batch, this results in improved scalability
1115 * as the lock contention for request_queue lock is reduced.
1116 *
1117 * It is ok not to disable preemption when adding the request to the plug list
1118 * or when attempting a merge, because blk_schedule_flush_list() will only flush
1119 * the plug list when the task sleeps by itself. For details, please see
1120 * schedule() where blk_schedule_flush_plug() is called.
1121 */
1122struct blk_plug {
1123	struct list_head mq_list; /* blk-mq requests */
1124	struct list_head cb_list; /* md requires an unplug callback */
1125	unsigned short rq_count;
1126	bool multiple_queues;
1127};
1128#define BLK_MAX_REQUEST_COUNT 16
1129#define BLK_PLUG_FLUSH_SIZE (128 * 1024)
1130
1131struct blk_plug_cb;
1132typedef void (*blk_plug_cb_fn)(struct blk_plug_cb *, bool);
1133struct blk_plug_cb {
1134	struct list_head list;
1135	blk_plug_cb_fn callback;
1136	void *data;
1137};
1138extern struct blk_plug_cb *blk_check_plugged(blk_plug_cb_fn unplug,
1139					     void *data, int size);
1140extern void blk_start_plug(struct blk_plug *);
1141extern void blk_finish_plug(struct blk_plug *);
1142extern void blk_flush_plug_list(struct blk_plug *, bool);
1143
1144static inline void blk_flush_plug(struct task_struct *tsk)
1145{
1146	struct blk_plug *plug = tsk->plug;
1147
1148	if (plug)
1149		blk_flush_plug_list(plug, false);
1150}
1151
1152static inline void blk_schedule_flush_plug(struct task_struct *tsk)
1153{
1154	struct blk_plug *plug = tsk->plug;
1155
1156	if (plug)
1157		blk_flush_plug_list(plug, true);
1158}
1159
1160static inline bool blk_needs_flush_plug(struct task_struct *tsk)
1161{
1162	struct blk_plug *plug = tsk->plug;
1163
1164	return plug &&
1165		 (!list_empty(&plug->mq_list) ||
1166		 !list_empty(&plug->cb_list));
1167}
1168
1169extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *);
1170extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
1171		sector_t nr_sects, gfp_t gfp_mask, struct page *page);
1172
1173#define BLKDEV_DISCARD_SECURE	(1 << 0)	/* issue a secure erase */
1174
1175extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
1176		sector_t nr_sects, gfp_t gfp_mask, unsigned long flags);
1177extern int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
1178		sector_t nr_sects, gfp_t gfp_mask, int flags,
1179		struct bio **biop);
1180
1181#define BLKDEV_ZERO_NOUNMAP	(1 << 0)  /* do not free blocks */
1182#define BLKDEV_ZERO_NOFALLBACK	(1 << 1)  /* don't write explicit zeroes */
1183
1184extern int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
1185		sector_t nr_sects, gfp_t gfp_mask, struct bio **biop,
1186		unsigned flags);
1187extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
1188		sector_t nr_sects, gfp_t gfp_mask, unsigned flags);
1189
1190static inline int sb_issue_discard(struct super_block *sb, sector_t block,
1191		sector_t nr_blocks, gfp_t gfp_mask, unsigned long flags)
1192{
1193	return blkdev_issue_discard(sb->s_bdev,
1194				    block << (sb->s_blocksize_bits -
1195					      SECTOR_SHIFT),
1196				    nr_blocks << (sb->s_blocksize_bits -
1197						  SECTOR_SHIFT),
1198				    gfp_mask, flags);
1199}
1200static inline int sb_issue_zeroout(struct super_block *sb, sector_t block,
1201		sector_t nr_blocks, gfp_t gfp_mask)
1202{
1203	return blkdev_issue_zeroout(sb->s_bdev,
1204				    block << (sb->s_blocksize_bits -
1205					      SECTOR_SHIFT),
1206				    nr_blocks << (sb->s_blocksize_bits -
1207						  SECTOR_SHIFT),
1208				    gfp_mask, 0);
1209}
1210
1211extern int blk_verify_command(unsigned char *cmd, fmode_t mode);
1212
1213enum blk_default_limits {
1214	BLK_MAX_SEGMENTS	= 128,
1215	BLK_SAFE_MAX_SECTORS	= 255,
1216	BLK_DEF_MAX_SECTORS	= 2560,
1217	BLK_MAX_SEGMENT_SIZE	= 65536,
1218	BLK_SEG_BOUNDARY_MASK	= 0xFFFFFFFFUL,
1219};
1220
1221static inline unsigned long queue_segment_boundary(struct request_queue *q)
1222{
1223	return q->limits.seg_boundary_mask;
1224}
1225
1226static inline unsigned long queue_virt_boundary(struct request_queue *q)
1227{
1228	return q->limits.virt_boundary_mask;
1229}
1230
1231static inline unsigned int queue_max_sectors(struct request_queue *q)
1232{
1233	return q->limits.max_sectors;
1234}
1235
1236static inline unsigned int queue_max_hw_sectors(struct request_queue *q)
1237{
1238	return q->limits.max_hw_sectors;
1239}
1240
1241static inline unsigned short queue_max_segments(struct request_queue *q)
1242{
1243	return q->limits.max_segments;
1244}
1245
1246static inline unsigned short queue_max_discard_segments(struct request_queue *q)
1247{
1248	return q->limits.max_discard_segments;
1249}
1250
1251static inline unsigned int queue_max_segment_size(struct request_queue *q)
1252{
1253	return q->limits.max_segment_size;
1254}
1255
1256static inline unsigned short queue_logical_block_size(struct request_queue *q)
1257{
1258	int retval = 512;
1259
1260	if (q && q->limits.logical_block_size)
1261		retval = q->limits.logical_block_size;
1262
1263	return retval;
1264}
1265
1266static inline unsigned short bdev_logical_block_size(struct block_device *bdev)
1267{
1268	return queue_logical_block_size(bdev_get_queue(bdev));
1269}
1270
1271static inline unsigned int queue_physical_block_size(struct request_queue *q)
1272{
1273	return q->limits.physical_block_size;
1274}
1275
1276static inline unsigned int bdev_physical_block_size(struct block_device *bdev)
1277{
1278	return queue_physical_block_size(bdev_get_queue(bdev));
1279}
1280
1281static inline unsigned int queue_io_min(struct request_queue *q)
1282{
1283	return q->limits.io_min;
1284}
1285
1286static inline int bdev_io_min(struct block_device *bdev)
1287{
1288	return queue_io_min(bdev_get_queue(bdev));
1289}
1290
1291static inline unsigned int queue_io_opt(struct request_queue *q)
1292{
1293	return q->limits.io_opt;
1294}
1295
1296static inline int bdev_io_opt(struct block_device *bdev)
1297{
1298	return queue_io_opt(bdev_get_queue(bdev));
1299}
1300
1301static inline int queue_alignment_offset(struct request_queue *q)
1302{
1303	if (q->limits.misaligned)
1304		return -1;
1305
1306	return q->limits.alignment_offset;
1307}
1308
1309static inline int queue_limit_alignment_offset(struct queue_limits *lim, sector_t sector)
1310{
1311	unsigned int granularity = max(lim->physical_block_size, lim->io_min);
1312	unsigned int alignment = sector_div(sector, granularity >> SECTOR_SHIFT)
1313		<< SECTOR_SHIFT;
1314
1315	return (granularity + lim->alignment_offset - alignment) % granularity;
1316}
1317
1318static inline int bdev_alignment_offset(struct block_device *bdev)
1319{
1320	struct request_queue *q = bdev_get_queue(bdev);
1321
1322	if (q->limits.misaligned)
1323		return -1;
1324
1325	if (bdev != bdev->bd_contains)
1326		return bdev->bd_part->alignment_offset;
1327
1328	return q->limits.alignment_offset;
1329}
1330
1331static inline int queue_discard_alignment(struct request_queue *q)
1332{
1333	if (q->limits.discard_misaligned)
1334		return -1;
1335
1336	return q->limits.discard_alignment;
1337}
1338
1339static inline int queue_limit_discard_alignment(struct queue_limits *lim, sector_t sector)
1340{
1341	unsigned int alignment, granularity, offset;
1342
1343	if (!lim->max_discard_sectors)
1344		return 0;
1345
1346	/* Why are these in bytes, not sectors? */
1347	alignment = lim->discard_alignment >> SECTOR_SHIFT;
1348	granularity = lim->discard_granularity >> SECTOR_SHIFT;
1349	if (!granularity)
1350		return 0;
1351
1352	/* Offset of the partition start in 'granularity' sectors */
1353	offset = sector_div(sector, granularity);
1354
1355	/* And why do we do this modulus *again* in blkdev_issue_discard()? */
1356	offset = (granularity + alignment - offset) % granularity;
1357
1358	/* Turn it back into bytes, gaah */
1359	return offset << SECTOR_SHIFT;
1360}
1361
1362static inline int bdev_discard_alignment(struct block_device *bdev)
1363{
1364	struct request_queue *q = bdev_get_queue(bdev);
1365
1366	if (bdev != bdev->bd_contains)
1367		return bdev->bd_part->discard_alignment;
1368
1369	return q->limits.discard_alignment;
1370}
1371
1372static inline unsigned int bdev_write_same(struct block_device *bdev)
1373{
1374	struct request_queue *q = bdev_get_queue(bdev);
1375
1376	if (q)
1377		return q->limits.max_write_same_sectors;
1378
1379	return 0;
1380}
1381
1382static inline unsigned int bdev_write_zeroes_sectors(struct block_device *bdev)
1383{
1384	struct request_queue *q = bdev_get_queue(bdev);
1385
1386	if (q)
1387		return q->limits.max_write_zeroes_sectors;
1388
1389	return 0;
1390}
1391
1392static inline enum blk_zoned_model bdev_zoned_model(struct block_device *bdev)
1393{
1394	struct request_queue *q = bdev_get_queue(bdev);
1395
1396	if (q)
1397		return blk_queue_zoned_model(q);
1398
1399	return BLK_ZONED_NONE;
1400}
1401
1402static inline bool bdev_is_zoned(struct block_device *bdev)
1403{
1404	struct request_queue *q = bdev_get_queue(bdev);
1405
1406	if (q)
1407		return blk_queue_is_zoned(q);
1408
1409	return false;
1410}
1411
1412static inline unsigned int bdev_zone_sectors(struct block_device *bdev)
1413{
1414	struct request_queue *q = bdev_get_queue(bdev);
1415
1416	if (q)
1417		return blk_queue_zone_sectors(q);
1418	return 0;
1419}
1420
1421static inline int queue_dma_alignment(struct request_queue *q)
1422{
1423	return q ? q->dma_alignment : 511;
1424}
1425
1426static inline int blk_rq_aligned(struct request_queue *q, unsigned long addr,
1427				 unsigned int len)
1428{
1429	unsigned int alignment = queue_dma_alignment(q) | q->dma_pad_mask;
1430	return !(addr & alignment) && !(len & alignment);
1431}
1432
1433/* assumes size > 256 */
1434static inline unsigned int blksize_bits(unsigned int size)
1435{
1436	unsigned int bits = 8;
1437	do {
1438		bits++;
1439		size >>= 1;
1440	} while (size > 256);
1441	return bits;
1442}
1443
1444static inline unsigned int block_size(struct block_device *bdev)
1445{
1446	return bdev->bd_block_size;
1447}
1448
1449static inline bool queue_flush_queueable(struct request_queue *q)
1450{
1451	return !test_bit(QUEUE_FLAG_FLUSH_NQ, &q->queue_flags);
1452}
1453
1454typedef struct {struct page *v;} Sector;
1455
1456unsigned char *read_dev_sector(struct block_device *, sector_t, Sector *);
1457
1458static inline void put_dev_sector(Sector p)
1459{
1460	put_page(p.v);
1461}
1462
1463int kblockd_schedule_work(struct work_struct *work);
1464int kblockd_schedule_work_on(int cpu, struct work_struct *work);
1465int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay);
1466
1467#define MODULE_ALIAS_BLOCKDEV(major,minor) \
1468	MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor))
1469#define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \
1470	MODULE_ALIAS("block-major-" __stringify(major) "-*")
1471
1472#if defined(CONFIG_BLK_DEV_INTEGRITY)
1473
1474enum blk_integrity_flags {
1475	BLK_INTEGRITY_VERIFY		= 1 << 0,
1476	BLK_INTEGRITY_GENERATE		= 1 << 1,
1477	BLK_INTEGRITY_DEVICE_CAPABLE	= 1 << 2,
1478	BLK_INTEGRITY_IP_CHECKSUM	= 1 << 3,
1479};
1480
1481struct blk_integrity_iter {
1482	void			*prot_buf;
1483	void			*data_buf;
1484	sector_t		seed;
1485	unsigned int		data_size;
1486	unsigned short		interval;
1487	const char		*disk_name;
1488};
1489
1490typedef blk_status_t (integrity_processing_fn) (struct blk_integrity_iter *);
1491
1492struct blk_integrity_profile {
1493	integrity_processing_fn		*generate_fn;
1494	integrity_processing_fn		*verify_fn;
1495	const char			*name;
1496};
1497
1498extern void blk_integrity_register(struct gendisk *, struct blk_integrity *);
1499extern void blk_integrity_unregister(struct gendisk *);
1500extern int blk_integrity_compare(struct gendisk *, struct gendisk *);
1501extern int blk_rq_map_integrity_sg(struct request_queue *, struct bio *,
1502				   struct scatterlist *);
1503extern int blk_rq_count_integrity_sg(struct request_queue *, struct bio *);
1504extern bool blk_integrity_merge_rq(struct request_queue *, struct request *,
1505				   struct request *);
1506extern bool blk_integrity_merge_bio(struct request_queue *, struct request *,
1507				    struct bio *);
1508
1509static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk)
1510{
1511	struct blk_integrity *bi = &disk->queue->integrity;
1512
1513	if (!bi->profile)
1514		return NULL;
1515
1516	return bi;
1517}
1518
1519static inline
1520struct blk_integrity *bdev_get_integrity(struct block_device *bdev)
1521{
1522	return blk_get_integrity(bdev->bd_disk);
1523}
1524
1525static inline bool blk_integrity_rq(struct request *rq)
1526{
1527	return rq->cmd_flags & REQ_INTEGRITY;
1528}
1529
1530static inline void blk_queue_max_integrity_segments(struct request_queue *q,
1531						    unsigned int segs)
1532{
1533	q->limits.max_integrity_segments = segs;
1534}
1535
1536static inline unsigned short
1537queue_max_integrity_segments(struct request_queue *q)
1538{
1539	return q->limits.max_integrity_segments;
1540}
1541
1542/**
1543 * bio_integrity_intervals - Return number of integrity intervals for a bio
1544 * @bi:		blk_integrity profile for device
1545 * @sectors:	Size of the bio in 512-byte sectors
1546 *
1547 * Description: The block layer calculates everything in 512 byte
1548 * sectors but integrity metadata is done in terms of the data integrity
1549 * interval size of the storage device.  Convert the block layer sectors
1550 * to the appropriate number of integrity intervals.
1551 */
1552static inline unsigned int bio_integrity_intervals(struct blk_integrity *bi,
1553						   unsigned int sectors)
1554{
1555	return sectors >> (bi->interval_exp - 9);
1556}
1557
1558static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi,
1559					       unsigned int sectors)
1560{
1561	return bio_integrity_intervals(bi, sectors) * bi->tuple_size;
1562}
1563
1564#else /* CONFIG_BLK_DEV_INTEGRITY */
1565
1566struct bio;
1567struct block_device;
1568struct gendisk;
1569struct blk_integrity;
1570
1571static inline int blk_integrity_rq(struct request *rq)
1572{
1573	return 0;
1574}
1575static inline int blk_rq_count_integrity_sg(struct request_queue *q,
1576					    struct bio *b)
1577{
1578	return 0;
1579}
1580static inline int blk_rq_map_integrity_sg(struct request_queue *q,
1581					  struct bio *b,
1582					  struct scatterlist *s)
1583{
1584	return 0;
1585}
1586static inline struct blk_integrity *bdev_get_integrity(struct block_device *b)
1587{
1588	return NULL;
1589}
1590static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk)
1591{
1592	return NULL;
1593}
1594static inline int blk_integrity_compare(struct gendisk *a, struct gendisk *b)
1595{
1596	return 0;
1597}
1598static inline void blk_integrity_register(struct gendisk *d,
1599					 struct blk_integrity *b)
1600{
1601}
1602static inline void blk_integrity_unregister(struct gendisk *d)
1603{
1604}
1605static inline void blk_queue_max_integrity_segments(struct request_queue *q,
1606						    unsigned int segs)
1607{
1608}
1609static inline unsigned short queue_max_integrity_segments(struct request_queue *q)
1610{
1611	return 0;
1612}
1613static inline bool blk_integrity_merge_rq(struct request_queue *rq,
1614					  struct request *r1,
1615					  struct request *r2)
1616{
1617	return true;
1618}
1619static inline bool blk_integrity_merge_bio(struct request_queue *rq,
1620					   struct request *r,
1621					   struct bio *b)
1622{
1623	return true;
1624}
1625
1626static inline unsigned int bio_integrity_intervals(struct blk_integrity *bi,
1627						   unsigned int sectors)
1628{
1629	return 0;
1630}
1631
1632static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi,
1633					       unsigned int sectors)
1634{
1635	return 0;
1636}
1637
1638#endif /* CONFIG_BLK_DEV_INTEGRITY */
1639
1640struct block_device_operations {
1641	int (*open) (struct block_device *, fmode_t);
1642	void (*release) (struct gendisk *, fmode_t);
1643	int (*rw_page)(struct block_device *, sector_t, struct page *, unsigned int);
1644	int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
1645	int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
1646	unsigned int (*check_events) (struct gendisk *disk,
1647				      unsigned int clearing);
1648	/* ->media_changed() is DEPRECATED, use ->check_events() instead */
1649	int (*media_changed) (struct gendisk *);
1650	void (*unlock_native_capacity) (struct gendisk *);
1651	int (*revalidate_disk) (struct gendisk *);
1652	int (*getgeo)(struct block_device *, struct hd_geometry *);
1653	/* this callback is with swap_lock and sometimes page table lock held */
1654	void (*swap_slot_free_notify) (struct block_device *, unsigned long);
1655	int (*report_zones)(struct gendisk *, sector_t sector,
1656			    struct blk_zone *zones, unsigned int *nr_zones,
1657			    gfp_t gfp_mask);
1658	struct module *owner;
1659	const struct pr_ops *pr_ops;
1660};
1661
1662extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int,
1663				 unsigned long);
1664extern int bdev_read_page(struct block_device *, sector_t, struct page *);
1665extern int bdev_write_page(struct block_device *, sector_t, struct page *,
1666						struct writeback_control *);
1667
1668#ifdef CONFIG_BLK_DEV_ZONED
1669bool blk_req_needs_zone_write_lock(struct request *rq);
1670void __blk_req_zone_write_lock(struct request *rq);
1671void __blk_req_zone_write_unlock(struct request *rq);
1672
1673static inline void blk_req_zone_write_lock(struct request *rq)
1674{
1675	if (blk_req_needs_zone_write_lock(rq))
1676		__blk_req_zone_write_lock(rq);
1677}
1678
1679static inline void blk_req_zone_write_unlock(struct request *rq)
1680{
1681	if (rq->rq_flags & RQF_ZONE_WRITE_LOCKED)
1682		__blk_req_zone_write_unlock(rq);
1683}
1684
1685static inline bool blk_req_zone_is_write_locked(struct request *rq)
1686{
1687	return rq->q->seq_zones_wlock &&
1688		test_bit(blk_rq_zone_no(rq), rq->q->seq_zones_wlock);
1689}
1690
1691static inline bool blk_req_can_dispatch_to_zone(struct request *rq)
1692{
1693	if (!blk_req_needs_zone_write_lock(rq))
1694		return true;
1695	return !blk_req_zone_is_write_locked(rq);
1696}
1697#else
1698static inline bool blk_req_needs_zone_write_lock(struct request *rq)
1699{
1700	return false;
1701}
1702
1703static inline void blk_req_zone_write_lock(struct request *rq)
1704{
1705}
1706
1707static inline void blk_req_zone_write_unlock(struct request *rq)
1708{
1709}
1710static inline bool blk_req_zone_is_write_locked(struct request *rq)
1711{
1712	return false;
1713}
1714
1715static inline bool blk_req_can_dispatch_to_zone(struct request *rq)
1716{
1717	return true;
1718}
1719#endif /* CONFIG_BLK_DEV_ZONED */
1720
1721#else /* CONFIG_BLOCK */
1722
1723struct block_device;
1724
1725/*
1726 * stubs for when the block layer is configured out
1727 */
1728#define buffer_heads_over_limit 0
1729
1730static inline long nr_blockdev_pages(void)
1731{
1732	return 0;
1733}
1734
1735struct blk_plug {
1736};
1737
1738static inline void blk_start_plug(struct blk_plug *plug)
1739{
1740}
1741
1742static inline void blk_finish_plug(struct blk_plug *plug)
1743{
1744}
1745
1746static inline void blk_flush_plug(struct task_struct *task)
1747{
1748}
1749
1750static inline void blk_schedule_flush_plug(struct task_struct *task)
1751{
1752}
1753
1754
1755static inline bool blk_needs_flush_plug(struct task_struct *tsk)
1756{
1757	return false;
1758}
1759
1760static inline int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
1761				     sector_t *error_sector)
1762{
1763	return 0;
1764}
1765
1766#endif /* CONFIG_BLOCK */
1767
1768static inline void blk_wake_io_task(struct task_struct *waiter)
1769{
1770	/*
1771	 * If we're polling, the task itself is doing the completions. For
1772	 * that case, we don't need to signal a wakeup, it's enough to just
1773	 * mark us as RUNNING.
1774	 */
1775	if (waiter == current)
1776		__set_current_state(TASK_RUNNING);
1777	else
1778		wake_up_process(waiter);
1779}
1780
1781#endif