Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

rust: block: introduce `kernel::block::mq` module

Add initial abstractions for working with blk-mq.

This patch is a maintained, refactored subset of code originally published
by Wedson Almeida Filho <wedsonaf@gmail.com> [1].

[1] https://github.com/wedsonaf/linux/tree/f2cfd2fe0e2ca4e90994f96afe268bbd4382a891/rust/kernel/blk/mq.rs

Cc: Wedson Almeida Filho <wedsonaf@gmail.com>
Signed-off-by: Andreas Hindborg <a.hindborg@samsung.com>
Reviewed-by: Benno Lossin <benno.lossin@proton.me>
Link: https://lore.kernel.org/r/20240611114551.228679-2-nmi@metaspace.dk
Signed-off-by: Jens Axboe <axboe@kernel.dk>

authored by

Andreas Hindborg and committed by
Jens Axboe
3253aba3 c2670cf7

+984
+3
rust/bindings/bindings_helper.h
··· 7 7 */ 8 8 9 9 #include <kunit/test.h> 10 + #include <linux/blk_types.h> 11 + #include <linux/blk-mq.h> 10 12 #include <linux/errname.h> 11 13 #include <linux/ethtool.h> 12 14 #include <linux/jiffies.h> ··· 22 20 23 21 /* `bindgen` gets confused at certain things. */ 24 22 const size_t RUST_CONST_HELPER_ARCH_SLAB_MINALIGN = ARCH_SLAB_MINALIGN; 23 + const size_t RUST_CONST_HELPER_PAGE_SIZE = PAGE_SIZE; 25 24 const gfp_t RUST_CONST_HELPER_GFP_ATOMIC = GFP_ATOMIC; 26 25 const gfp_t RUST_CONST_HELPER_GFP_KERNEL = GFP_KERNEL; 27 26 const gfp_t RUST_CONST_HELPER_GFP_KERNEL_ACCOUNT = GFP_KERNEL_ACCOUNT;
+16
rust/helpers.c
··· 186 186 __alignof__(size_t) == __alignof__(uintptr_t), 187 187 "Rust code expects C `size_t` to match Rust `usize`" 188 188 ); 189 + 190 + // This will soon be moved to a separate file, so no need to merge with above. 191 + #include <linux/blk-mq.h> 192 + #include <linux/blkdev.h> 193 + 194 + void *rust_helper_blk_mq_rq_to_pdu(struct request *rq) 195 + { 196 + return blk_mq_rq_to_pdu(rq); 197 + } 198 + EXPORT_SYMBOL_GPL(rust_helper_blk_mq_rq_to_pdu); 199 + 200 + struct request *rust_helper_blk_mq_rq_from_pdu(void *pdu) 201 + { 202 + return blk_mq_rq_from_pdu(pdu); 203 + } 204 + EXPORT_SYMBOL_GPL(rust_helper_blk_mq_rq_from_pdu);
+5
rust/kernel/block.rs
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + //! Types for working with the block layer. 4 + 5 + pub mod mq;
+98
rust/kernel/block/mq.rs
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + //! This module provides types for implementing block drivers that interface the 4 + //! blk-mq subsystem. 5 + //! 6 + //! To implement a block device driver, a Rust module must do the following: 7 + //! 8 + //! - Implement [`Operations`] for a type `T`. 9 + //! - Create a [`TagSet<T>`]. 10 + //! - Create a [`GenDisk<T>`], via the [`GenDiskBuilder`]. 11 + //! - Add the disk to the system by calling [`GenDiskBuilder::build`] passing in 12 + //! the `TagSet` reference. 13 + //! 14 + //! The types available in this module that have direct C counterparts are: 15 + //! 16 + //! - The [`TagSet`] type that abstracts the C type `struct tag_set`. 17 + //! - The [`GenDisk`] type that abstracts the C type `struct gendisk`. 18 + //! - The [`Request`] type that abstracts the C type `struct request`. 19 + //! 20 + //! The kernel will interface with the block device driver by calling the method 21 + //! implementations of the `Operations` trait. 22 + //! 23 + //! IO requests are passed to the driver as [`kernel::types::ARef<Request>`] 24 + //! instances. The `Request` type is a wrapper around the C `struct request`. 25 + //! The driver must mark end of processing by calling one of the 26 + //! `Request::end`, methods. Failure to do so can lead to deadlock or timeout 27 + //! errors. Please note that the C function `blk_mq_start_request` is implicitly 28 + //! called when the request is queued with the driver. 29 + //! 30 + //! The `TagSet` is responsible for creating and maintaining a mapping between 31 + //! `Request`s and integer ids as well as carrying a pointer to the vtable 32 + //! generated by `Operations`. This mapping is useful for associating 33 + //! completions from hardware with the correct `Request` instance. The `TagSet` 34 + //! determines the maximum queue depth by setting the number of `Request` 35 + //! instances available to the driver, and it determines the number of queues to 36 + //! instantiate for the driver. If possible, a driver should allocate one queue 37 + //! per core, to keep queue data local to a core. 38 + //! 39 + //! One `TagSet` instance can be shared between multiple `GenDisk` instances. 40 + //! This can be useful when implementing drivers where one piece of hardware 41 + //! with one set of IO resources are represented to the user as multiple disks. 42 + //! 43 + //! One significant difference between block device drivers implemented with 44 + //! these Rust abstractions and drivers implemented in C, is that the Rust 45 + //! drivers have to own a reference count on the `Request` type when the IO is 46 + //! in flight. This is to ensure that the C `struct request` instances backing 47 + //! the Rust `Request` instances are live while the Rust driver holds a 48 + //! reference to the `Request`. In addition, the conversion of an integer tag to 49 + //! a `Request` via the `TagSet` would not be sound without this bookkeeping. 50 + //! 51 + //! [`GenDisk`]: gen_disk::GenDisk 52 + //! [`GenDisk<T>`]: gen_disk::GenDisk 53 + //! [`GenDiskBuilder`]: gen_disk::GenDiskBuilder 54 + //! [`GenDiskBuilder::build`]: gen_disk::GenDiskBuilder::build 55 + //! 56 + //! # Example 57 + //! 58 + //! ```rust 59 + //! use kernel::{ 60 + //! alloc::flags, 61 + //! block::mq::*, 62 + //! new_mutex, 63 + //! prelude::*, 64 + //! sync::{Arc, Mutex}, 65 + //! types::{ARef, ForeignOwnable}, 66 + //! }; 67 + //! 68 + //! struct MyBlkDevice; 69 + //! 70 + //! #[vtable] 71 + //! impl Operations for MyBlkDevice { 72 + //! 73 + //! fn queue_rq(rq: ARef<Request<Self>>, _is_last: bool) -> Result { 74 + //! Request::end_ok(rq); 75 + //! Ok(()) 76 + //! } 77 + //! 78 + //! fn commit_rqs() {} 79 + //! } 80 + //! 81 + //! let tagset: Arc<TagSet<MyBlkDevice>> = 82 + //! Arc::pin_init(TagSet::new(1, 256, 1), flags::GFP_KERNEL)?; 83 + //! let mut disk = gen_disk::GenDiskBuilder::new() 84 + //! .capacity_sectors(4096) 85 + //! .build(format_args!("myblk"), tagset)?; 86 + //! 87 + //! # Ok::<(), kernel::error::Error>(()) 88 + //! ``` 89 + 90 + pub mod gen_disk; 91 + mod operations; 92 + mod raw_writer; 93 + mod request; 94 + mod tag_set; 95 + 96 + pub use operations::Operations; 97 + pub use request::Request; 98 + pub use tag_set::TagSet;
+215
rust/kernel/block/mq/gen_disk.rs
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + //! Generic disk abstraction. 4 + //! 5 + //! C header: [`include/linux/blkdev.h`](srctree/include/linux/blkdev.h) 6 + //! C header: [`include/linux/blk_mq.h`](srctree/include/linux/blk_mq.h) 7 + 8 + use crate::block::mq::{raw_writer::RawWriter, Operations, TagSet}; 9 + use crate::error; 10 + use crate::{bindings, error::from_err_ptr, error::Result, sync::Arc}; 11 + use core::fmt::{self, Write}; 12 + 13 + /// A builder for [`GenDisk`]. 14 + /// 15 + /// Use this struct to configure and add new [`GenDisk`] to the VFS. 16 + pub struct GenDiskBuilder { 17 + rotational: bool, 18 + logical_block_size: u32, 19 + physical_block_size: u32, 20 + capacity_sectors: u64, 21 + } 22 + 23 + impl Default for GenDiskBuilder { 24 + fn default() -> Self { 25 + Self { 26 + rotational: false, 27 + logical_block_size: bindings::PAGE_SIZE as u32, 28 + physical_block_size: bindings::PAGE_SIZE as u32, 29 + capacity_sectors: 0, 30 + } 31 + } 32 + } 33 + 34 + impl GenDiskBuilder { 35 + /// Create a new instance. 36 + pub fn new() -> Self { 37 + Self::default() 38 + } 39 + 40 + /// Set the rotational media attribute for the device to be built. 41 + pub fn rotational(mut self, rotational: bool) -> Self { 42 + self.rotational = rotational; 43 + self 44 + } 45 + 46 + /// Validate block size by verifying that it is between 512 and `PAGE_SIZE`, 47 + /// and that it is a power of two. 48 + fn validate_block_size(size: u32) -> Result<()> { 49 + if !(512..=bindings::PAGE_SIZE as u32).contains(&size) || !size.is_power_of_two() { 50 + Err(error::code::EINVAL) 51 + } else { 52 + Ok(()) 53 + } 54 + } 55 + 56 + /// Set the logical block size of the device to be built. 57 + /// 58 + /// This method will check that block size is a power of two and between 512 59 + /// and 4096. If not, an error is returned and the block size is not set. 60 + /// 61 + /// This is the smallest unit the storage device can address. It is 62 + /// typically 4096 bytes. 63 + pub fn logical_block_size(mut self, block_size: u32) -> Result<Self> { 64 + Self::validate_block_size(block_size)?; 65 + self.logical_block_size = block_size; 66 + Ok(self) 67 + } 68 + 69 + /// Set the physical block size of the device to be built. 70 + /// 71 + /// This method will check that block size is a power of two and between 512 72 + /// and 4096. If not, an error is returned and the block size is not set. 73 + /// 74 + /// This is the smallest unit a physical storage device can write 75 + /// atomically. It is usually the same as the logical block size but may be 76 + /// bigger. One example is SATA drives with 4096 byte physical block size 77 + /// that expose a 512 byte logical block size to the operating system. 78 + pub fn physical_block_size(mut self, block_size: u32) -> Result<Self> { 79 + Self::validate_block_size(block_size)?; 80 + self.physical_block_size = block_size; 81 + Ok(self) 82 + } 83 + 84 + /// Set the capacity of the device to be built, in sectors (512 bytes). 85 + pub fn capacity_sectors(mut self, capacity: u64) -> Self { 86 + self.capacity_sectors = capacity; 87 + self 88 + } 89 + 90 + /// Build a new `GenDisk` and add it to the VFS. 91 + pub fn build<T: Operations>( 92 + self, 93 + name: fmt::Arguments<'_>, 94 + tagset: Arc<TagSet<T>>, 95 + ) -> Result<GenDisk<T>> { 96 + let lock_class_key = crate::sync::LockClassKey::new(); 97 + 98 + // SAFETY: `tagset.raw_tag_set()` points to a valid and initialized tag set 99 + let gendisk = from_err_ptr(unsafe { 100 + bindings::__blk_mq_alloc_disk( 101 + tagset.raw_tag_set(), 102 + core::ptr::null_mut(), // TODO: We can pass queue limits right here 103 + core::ptr::null_mut(), 104 + lock_class_key.as_ptr(), 105 + ) 106 + })?; 107 + 108 + const TABLE: bindings::block_device_operations = bindings::block_device_operations { 109 + submit_bio: None, 110 + open: None, 111 + release: None, 112 + ioctl: None, 113 + compat_ioctl: None, 114 + check_events: None, 115 + unlock_native_capacity: None, 116 + getgeo: None, 117 + set_read_only: None, 118 + swap_slot_free_notify: None, 119 + report_zones: None, 120 + devnode: None, 121 + alternative_gpt_sector: None, 122 + get_unique_id: None, 123 + // TODO: Set to THIS_MODULE. Waiting for const_refs_to_static feature to 124 + // be merged (unstable in rustc 1.78 which is staged for linux 6.10) 125 + // https://github.com/rust-lang/rust/issues/119618 126 + owner: core::ptr::null_mut(), 127 + pr_ops: core::ptr::null_mut(), 128 + free_disk: None, 129 + poll_bio: None, 130 + }; 131 + 132 + // SAFETY: `gendisk` is a valid pointer as we initialized it above 133 + unsafe { (*gendisk).fops = &TABLE }; 134 + 135 + let mut raw_writer = RawWriter::from_array( 136 + // SAFETY: `gendisk` points to a valid and initialized instance. We 137 + // have exclusive access, since the disk is not added to the VFS 138 + // yet. 139 + unsafe { &mut (*gendisk).disk_name }, 140 + )?; 141 + raw_writer.write_fmt(name)?; 142 + raw_writer.write_char('\0')?; 143 + 144 + // SAFETY: `gendisk` points to a valid and initialized instance of 145 + // `struct gendisk`. We have exclusive access, so we cannot race. 146 + unsafe { 147 + bindings::blk_queue_logical_block_size((*gendisk).queue, self.logical_block_size) 148 + }; 149 + 150 + // SAFETY: `gendisk` points to a valid and initialized instance of 151 + // `struct gendisk`. We have exclusive access, so we cannot race. 152 + unsafe { 153 + bindings::blk_queue_physical_block_size((*gendisk).queue, self.physical_block_size) 154 + }; 155 + 156 + // SAFETY: `gendisk` points to a valid and initialized instance of 157 + // `struct gendisk`. `set_capacity` takes a lock to synchronize this 158 + // operation, so we will not race. 159 + unsafe { bindings::set_capacity(gendisk, self.capacity_sectors) }; 160 + 161 + if !self.rotational { 162 + // SAFETY: `gendisk` points to a valid and initialized instance of 163 + // `struct gendisk`. This operation uses a relaxed atomic bit flip 164 + // operation, so there is no race on this field. 165 + unsafe { bindings::blk_queue_flag_set(bindings::QUEUE_FLAG_NONROT, (*gendisk).queue) }; 166 + } else { 167 + // SAFETY: `gendisk` points to a valid and initialized instance of 168 + // `struct gendisk`. This operation uses a relaxed atomic bit flip 169 + // operation, so there is no race on this field. 170 + unsafe { 171 + bindings::blk_queue_flag_clear(bindings::QUEUE_FLAG_NONROT, (*gendisk).queue) 172 + }; 173 + } 174 + 175 + crate::error::to_result( 176 + // SAFETY: `gendisk` points to a valid and initialized instance of 177 + // `struct gendisk`. 178 + unsafe { 179 + bindings::device_add_disk(core::ptr::null_mut(), gendisk, core::ptr::null_mut()) 180 + }, 181 + )?; 182 + 183 + // INVARIANT: `gendisk` was initialized above. 184 + // INVARIANT: `gendisk` was added to the VFS via `device_add_disk` above. 185 + Ok(GenDisk { 186 + _tagset: tagset, 187 + gendisk, 188 + }) 189 + } 190 + } 191 + 192 + /// A generic block device. 193 + /// 194 + /// # Invariants 195 + /// 196 + /// - `gendisk` must always point to an initialized and valid `struct gendisk`. 197 + /// - `gendisk` was added to the VFS through a call to 198 + /// `bindings::device_add_disk`. 199 + pub struct GenDisk<T: Operations> { 200 + _tagset: Arc<TagSet<T>>, 201 + gendisk: *mut bindings::gendisk, 202 + } 203 + 204 + // SAFETY: `GenDisk` is an owned pointer to a `struct gendisk` and an `Arc` to a 205 + // `TagSet` It is safe to send this to other threads as long as T is Send. 206 + unsafe impl<T: Operations + Send> Send for GenDisk<T> {} 207 + 208 + impl<T: Operations> Drop for GenDisk<T> { 209 + fn drop(&mut self) { 210 + // SAFETY: By type invariant, `self.gendisk` points to a valid and 211 + // initialized instance of `struct gendisk`, and it was previously added 212 + // to the VFS. 213 + unsafe { bindings::del_gendisk(self.gendisk) }; 214 + } 215 + }
+245
rust/kernel/block/mq/operations.rs
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + //! This module provides an interface for blk-mq drivers to implement. 4 + //! 5 + //! C header: [`include/linux/blk-mq.h`](srctree/include/linux/blk-mq.h) 6 + 7 + use crate::{ 8 + bindings, 9 + block::mq::request::RequestDataWrapper, 10 + block::mq::Request, 11 + error::{from_result, Result}, 12 + types::ARef, 13 + }; 14 + use core::{marker::PhantomData, sync::atomic::AtomicU64, sync::atomic::Ordering}; 15 + 16 + /// Implement this trait to interface blk-mq as block devices. 17 + /// 18 + /// To implement a block device driver, implement this trait as described in the 19 + /// [module level documentation]. The kernel will use the implementation of the 20 + /// functions defined in this trait to interface a block device driver. Note: 21 + /// There is no need for an exit_request() implementation, because the `drop` 22 + /// implementation of the [`Request`] type will be invoked by automatically by 23 + /// the C/Rust glue logic. 24 + /// 25 + /// [module level documentation]: kernel::block::mq 26 + #[macros::vtable] 27 + pub trait Operations: Sized { 28 + /// Called by the kernel to queue a request with the driver. If `is_last` is 29 + /// `false`, the driver is allowed to defer committing the request. 30 + fn queue_rq(rq: ARef<Request<Self>>, is_last: bool) -> Result; 31 + 32 + /// Called by the kernel to indicate that queued requests should be submitted. 33 + fn commit_rqs(); 34 + 35 + /// Called by the kernel to poll the device for completed requests. Only 36 + /// used for poll queues. 37 + fn poll() -> bool { 38 + crate::build_error(crate::error::VTABLE_DEFAULT_ERROR) 39 + } 40 + } 41 + 42 + /// A vtable for blk-mq to interact with a block device driver. 43 + /// 44 + /// A `bindings::blk_mq_ops` vtable is constructed from pointers to the `extern 45 + /// "C"` functions of this struct, exposed through the `OperationsVTable::VTABLE`. 46 + /// 47 + /// For general documentation of these methods, see the kernel source 48 + /// documentation related to `struct blk_mq_operations` in 49 + /// [`include/linux/blk-mq.h`]. 50 + /// 51 + /// [`include/linux/blk-mq.h`]: srctree/include/linux/blk-mq.h 52 + pub(crate) struct OperationsVTable<T: Operations>(PhantomData<T>); 53 + 54 + impl<T: Operations> OperationsVTable<T> { 55 + /// This function is called by the C kernel. A pointer to this function is 56 + /// installed in the `blk_mq_ops` vtable for the driver. 57 + /// 58 + /// # Safety 59 + /// 60 + /// - The caller of this function must ensure that the pointee of `bd` is 61 + /// valid for reads for the duration of this function. 62 + /// - This function must be called for an initialized and live `hctx`. That 63 + /// is, `Self::init_hctx_callback` was called and 64 + /// `Self::exit_hctx_callback()` was not yet called. 65 + /// - `(*bd).rq` must point to an initialized and live `bindings:request`. 66 + /// That is, `Self::init_request_callback` was called but 67 + /// `Self::exit_request_callback` was not yet called for the request. 68 + /// - `(*bd).rq` must be owned by the driver. That is, the block layer must 69 + /// promise to not access the request until the driver calls 70 + /// `bindings::blk_mq_end_request` for the request. 71 + unsafe extern "C" fn queue_rq_callback( 72 + _hctx: *mut bindings::blk_mq_hw_ctx, 73 + bd: *const bindings::blk_mq_queue_data, 74 + ) -> bindings::blk_status_t { 75 + // SAFETY: `bd.rq` is valid as required by the safety requirement for 76 + // this function. 77 + let request = unsafe { &*(*bd).rq.cast::<Request<T>>() }; 78 + 79 + // One refcount for the ARef, one for being in flight 80 + request.wrapper_ref().refcount().store(2, Ordering::Relaxed); 81 + 82 + // SAFETY: 83 + // - We own a refcount that we took above. We pass that to `ARef`. 84 + // - By the safety requirements of this function, `request` is a valid 85 + // `struct request` and the private data is properly initialized. 86 + // - `rq` will be alive until `blk_mq_end_request` is called and is 87 + // reference counted by `ARef` until then. 88 + let rq = unsafe { Request::aref_from_raw((*bd).rq) }; 89 + 90 + // SAFETY: We have exclusive access and we just set the refcount above. 91 + unsafe { Request::start_unchecked(&rq) }; 92 + 93 + let ret = T::queue_rq( 94 + rq, 95 + // SAFETY: `bd` is valid as required by the safety requirement for 96 + // this function. 97 + unsafe { (*bd).last }, 98 + ); 99 + 100 + if let Err(e) = ret { 101 + e.to_blk_status() 102 + } else { 103 + bindings::BLK_STS_OK as _ 104 + } 105 + } 106 + 107 + /// This function is called by the C kernel. A pointer to this function is 108 + /// installed in the `blk_mq_ops` vtable for the driver. 109 + /// 110 + /// # Safety 111 + /// 112 + /// This function may only be called by blk-mq C infrastructure. 113 + unsafe extern "C" fn commit_rqs_callback(_hctx: *mut bindings::blk_mq_hw_ctx) { 114 + T::commit_rqs() 115 + } 116 + 117 + /// This function is called by the C kernel. It is not currently 118 + /// implemented, and there is no way to exercise this code path. 119 + /// 120 + /// # Safety 121 + /// 122 + /// This function may only be called by blk-mq C infrastructure. 123 + unsafe extern "C" fn complete_callback(_rq: *mut bindings::request) {} 124 + 125 + /// This function is called by the C kernel. A pointer to this function is 126 + /// installed in the `blk_mq_ops` vtable for the driver. 127 + /// 128 + /// # Safety 129 + /// 130 + /// This function may only be called by blk-mq C infrastructure. 131 + unsafe extern "C" fn poll_callback( 132 + _hctx: *mut bindings::blk_mq_hw_ctx, 133 + _iob: *mut bindings::io_comp_batch, 134 + ) -> core::ffi::c_int { 135 + T::poll().into() 136 + } 137 + 138 + /// This function is called by the C kernel. A pointer to this function is 139 + /// installed in the `blk_mq_ops` vtable for the driver. 140 + /// 141 + /// # Safety 142 + /// 143 + /// This function may only be called by blk-mq C infrastructure. This 144 + /// function may only be called once before `exit_hctx_callback` is called 145 + /// for the same context. 146 + unsafe extern "C" fn init_hctx_callback( 147 + _hctx: *mut bindings::blk_mq_hw_ctx, 148 + _tagset_data: *mut core::ffi::c_void, 149 + _hctx_idx: core::ffi::c_uint, 150 + ) -> core::ffi::c_int { 151 + from_result(|| Ok(0)) 152 + } 153 + 154 + /// This function is called by the C kernel. A pointer to this function is 155 + /// installed in the `blk_mq_ops` vtable for the driver. 156 + /// 157 + /// # Safety 158 + /// 159 + /// This function may only be called by blk-mq C infrastructure. 160 + unsafe extern "C" fn exit_hctx_callback( 161 + _hctx: *mut bindings::blk_mq_hw_ctx, 162 + _hctx_idx: core::ffi::c_uint, 163 + ) { 164 + } 165 + 166 + /// This function is called by the C kernel. A pointer to this function is 167 + /// installed in the `blk_mq_ops` vtable for the driver. 168 + /// 169 + /// # Safety 170 + /// 171 + /// - This function may only be called by blk-mq C infrastructure. 172 + /// - `_set` must point to an initialized `TagSet<T>`. 173 + /// - `rq` must point to an initialized `bindings::request`. 174 + /// - The allocation pointed to by `rq` must be at the size of `Request` 175 + /// plus the size of `RequestDataWrapper`. 176 + unsafe extern "C" fn init_request_callback( 177 + _set: *mut bindings::blk_mq_tag_set, 178 + rq: *mut bindings::request, 179 + _hctx_idx: core::ffi::c_uint, 180 + _numa_node: core::ffi::c_uint, 181 + ) -> core::ffi::c_int { 182 + from_result(|| { 183 + // SAFETY: By the safety requirements of this function, `rq` points 184 + // to a valid allocation. 185 + let pdu = unsafe { Request::wrapper_ptr(rq.cast::<Request<T>>()) }; 186 + 187 + // SAFETY: The refcount field is allocated but not initialized, so 188 + // it is valid for writes. 189 + unsafe { RequestDataWrapper::refcount_ptr(pdu.as_ptr()).write(AtomicU64::new(0)) }; 190 + 191 + Ok(0) 192 + }) 193 + } 194 + 195 + /// This function is called by the C kernel. A pointer to this function is 196 + /// installed in the `blk_mq_ops` vtable for the driver. 197 + /// 198 + /// # Safety 199 + /// 200 + /// - This function may only be called by blk-mq C infrastructure. 201 + /// - `_set` must point to an initialized `TagSet<T>`. 202 + /// - `rq` must point to an initialized and valid `Request`. 203 + unsafe extern "C" fn exit_request_callback( 204 + _set: *mut bindings::blk_mq_tag_set, 205 + rq: *mut bindings::request, 206 + _hctx_idx: core::ffi::c_uint, 207 + ) { 208 + // SAFETY: The tagset invariants guarantee that all requests are allocated with extra memory 209 + // for the request data. 210 + let pdu = unsafe { bindings::blk_mq_rq_to_pdu(rq) }.cast::<RequestDataWrapper>(); 211 + 212 + // SAFETY: `pdu` is valid for read and write and is properly initialised. 213 + unsafe { core::ptr::drop_in_place(pdu) }; 214 + } 215 + 216 + const VTABLE: bindings::blk_mq_ops = bindings::blk_mq_ops { 217 + queue_rq: Some(Self::queue_rq_callback), 218 + queue_rqs: None, 219 + commit_rqs: Some(Self::commit_rqs_callback), 220 + get_budget: None, 221 + put_budget: None, 222 + set_rq_budget_token: None, 223 + get_rq_budget_token: None, 224 + timeout: None, 225 + poll: if T::HAS_POLL { 226 + Some(Self::poll_callback) 227 + } else { 228 + None 229 + }, 230 + complete: Some(Self::complete_callback), 231 + init_hctx: Some(Self::init_hctx_callback), 232 + exit_hctx: Some(Self::exit_hctx_callback), 233 + init_request: Some(Self::init_request_callback), 234 + exit_request: Some(Self::exit_request_callback), 235 + cleanup_rq: None, 236 + busy: None, 237 + map_queues: None, 238 + #[cfg(CONFIG_BLK_DEBUG_FS)] 239 + show_rq: None, 240 + }; 241 + 242 + pub(crate) const fn build() -> &'static bindings::blk_mq_ops { 243 + &Self::VTABLE 244 + } 245 + }
+55
rust/kernel/block/mq/raw_writer.rs
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + use core::fmt::{self, Write}; 4 + 5 + use crate::error::Result; 6 + use crate::prelude::EINVAL; 7 + 8 + /// A mutable reference to a byte buffer where a string can be written into. 9 + /// 10 + /// # Invariants 11 + /// 12 + /// `buffer` is always null terminated. 13 + pub(crate) struct RawWriter<'a> { 14 + buffer: &'a mut [u8], 15 + pos: usize, 16 + } 17 + 18 + impl<'a> RawWriter<'a> { 19 + /// Create a new `RawWriter` instance. 20 + fn new(buffer: &'a mut [u8]) -> Result<RawWriter<'a>> { 21 + *(buffer.last_mut().ok_or(EINVAL)?) = 0; 22 + 23 + // INVARIANT: We null terminated the buffer above. 24 + Ok(Self { buffer, pos: 0 }) 25 + } 26 + 27 + pub(crate) fn from_array<const N: usize>( 28 + a: &'a mut [core::ffi::c_char; N], 29 + ) -> Result<RawWriter<'a>> { 30 + Self::new( 31 + // SAFETY: the buffer of `a` is valid for read and write as `u8` for 32 + // at least `N` bytes. 33 + unsafe { core::slice::from_raw_parts_mut(a.as_mut_ptr().cast::<u8>(), N) }, 34 + ) 35 + } 36 + } 37 + 38 + impl Write for RawWriter<'_> { 39 + fn write_str(&mut self, s: &str) -> fmt::Result { 40 + let bytes = s.as_bytes(); 41 + let len = bytes.len(); 42 + 43 + // We do not want to overwrite our null terminator 44 + if self.pos + len > self.buffer.len() - 1 { 45 + return Err(fmt::Error); 46 + } 47 + 48 + // INVARIANT: We are not overwriting the last byte 49 + self.buffer[self.pos..self.pos + len].copy_from_slice(bytes); 50 + 51 + self.pos += len; 52 + 53 + Ok(()) 54 + } 55 + }
+253
rust/kernel/block/mq/request.rs
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + //! This module provides a wrapper for the C `struct request` type. 4 + //! 5 + //! C header: [`include/linux/blk-mq.h`](srctree/include/linux/blk-mq.h) 6 + 7 + use crate::{ 8 + bindings, 9 + block::mq::Operations, 10 + error::Result, 11 + types::{ARef, AlwaysRefCounted, Opaque}, 12 + }; 13 + use core::{ 14 + marker::PhantomData, 15 + ptr::{addr_of_mut, NonNull}, 16 + sync::atomic::{AtomicU64, Ordering}, 17 + }; 18 + 19 + /// A wrapper around a blk-mq `struct request`. This represents an IO request. 20 + /// 21 + /// # Implementation details 22 + /// 23 + /// There are four states for a request that the Rust bindings care about: 24 + /// 25 + /// A) Request is owned by block layer (refcount 0) 26 + /// B) Request is owned by driver but with zero `ARef`s in existence 27 + /// (refcount 1) 28 + /// C) Request is owned by driver with exactly one `ARef` in existence 29 + /// (refcount 2) 30 + /// D) Request is owned by driver with more than one `ARef` in existence 31 + /// (refcount > 2) 32 + /// 33 + /// 34 + /// We need to track A and B to ensure we fail tag to request conversions for 35 + /// requests that are not owned by the driver. 36 + /// 37 + /// We need to track C and D to ensure that it is safe to end the request and hand 38 + /// back ownership to the block layer. 39 + /// 40 + /// The states are tracked through the private `refcount` field of 41 + /// `RequestDataWrapper`. This structure lives in the private data area of the C 42 + /// `struct request`. 43 + /// 44 + /// # Invariants 45 + /// 46 + /// * `self.0` is a valid `struct request` created by the C portion of the kernel. 47 + /// * The private data area associated with this request must be an initialized 48 + /// and valid `RequestDataWrapper<T>`. 49 + /// * `self` is reference counted by atomic modification of 50 + /// self.wrapper_ref().refcount(). 51 + /// 52 + #[repr(transparent)] 53 + pub struct Request<T: Operations>(Opaque<bindings::request>, PhantomData<T>); 54 + 55 + impl<T: Operations> Request<T> { 56 + /// Create an `ARef<Request>` from a `struct request` pointer. 57 + /// 58 + /// # Safety 59 + /// 60 + /// * The caller must own a refcount on `ptr` that is transferred to the 61 + /// returned `ARef`. 62 + /// * The type invariants for `Request` must hold for the pointee of `ptr`. 63 + pub(crate) unsafe fn aref_from_raw(ptr: *mut bindings::request) -> ARef<Self> { 64 + // INVARIANT: By the safety requirements of this function, invariants are upheld. 65 + // SAFETY: By the safety requirement of this function, we own a 66 + // reference count that we can pass to `ARef`. 67 + unsafe { ARef::from_raw(NonNull::new_unchecked(ptr as *const Self as *mut Self)) } 68 + } 69 + 70 + /// Notify the block layer that a request is going to be processed now. 71 + /// 72 + /// The block layer uses this hook to do proper initializations such as 73 + /// starting the timeout timer. It is a requirement that block device 74 + /// drivers call this function when starting to process a request. 75 + /// 76 + /// # Safety 77 + /// 78 + /// The caller must have exclusive ownership of `self`, that is 79 + /// `self.wrapper_ref().refcount() == 2`. 80 + pub(crate) unsafe fn start_unchecked(this: &ARef<Self>) { 81 + // SAFETY: By type invariant, `self.0` is a valid `struct request` and 82 + // we have exclusive access. 83 + unsafe { bindings::blk_mq_start_request(this.0.get()) }; 84 + } 85 + 86 + /// Try to take exclusive ownership of `this` by dropping the refcount to 0. 87 + /// This fails if `this` is not the only `ARef` pointing to the underlying 88 + /// `Request`. 89 + /// 90 + /// If the operation is successful, `Ok` is returned with a pointer to the 91 + /// C `struct request`. If the operation fails, `this` is returned in the 92 + /// `Err` variant. 93 + fn try_set_end(this: ARef<Self>) -> Result<*mut bindings::request, ARef<Self>> { 94 + // We can race with `TagSet::tag_to_rq` 95 + if let Err(_old) = this.wrapper_ref().refcount().compare_exchange( 96 + 2, 97 + 0, 98 + Ordering::Relaxed, 99 + Ordering::Relaxed, 100 + ) { 101 + return Err(this); 102 + } 103 + 104 + let request_ptr = this.0.get(); 105 + core::mem::forget(this); 106 + 107 + Ok(request_ptr) 108 + } 109 + 110 + /// Notify the block layer that the request has been completed without errors. 111 + /// 112 + /// This function will return `Err` if `this` is not the only `ARef` 113 + /// referencing the request. 114 + pub fn end_ok(this: ARef<Self>) -> Result<(), ARef<Self>> { 115 + let request_ptr = Self::try_set_end(this)?; 116 + 117 + // SAFETY: By type invariant, `this.0` was a valid `struct request`. The 118 + // success of the call to `try_set_end` guarantees that there are no 119 + // `ARef`s pointing to this request. Therefore it is safe to hand it 120 + // back to the block layer. 121 + unsafe { bindings::blk_mq_end_request(request_ptr, bindings::BLK_STS_OK as _) }; 122 + 123 + Ok(()) 124 + } 125 + 126 + /// Return a pointer to the `RequestDataWrapper` stored in the private area 127 + /// of the request structure. 128 + /// 129 + /// # Safety 130 + /// 131 + /// - `this` must point to a valid allocation of size at least size of 132 + /// `Self` plus size of `RequestDataWrapper`. 133 + pub(crate) unsafe fn wrapper_ptr(this: *mut Self) -> NonNull<RequestDataWrapper> { 134 + let request_ptr = this.cast::<bindings::request>(); 135 + // SAFETY: By safety requirements for this function, `this` is a 136 + // valid allocation. 137 + let wrapper_ptr = 138 + unsafe { bindings::blk_mq_rq_to_pdu(request_ptr).cast::<RequestDataWrapper>() }; 139 + // SAFETY: By C API contract, wrapper_ptr points to a valid allocation 140 + // and is not null. 141 + unsafe { NonNull::new_unchecked(wrapper_ptr) } 142 + } 143 + 144 + /// Return a reference to the `RequestDataWrapper` stored in the private 145 + /// area of the request structure. 146 + pub(crate) fn wrapper_ref(&self) -> &RequestDataWrapper { 147 + // SAFETY: By type invariant, `self.0` is a valid allocation. Further, 148 + // the private data associated with this request is initialized and 149 + // valid. The existence of `&self` guarantees that the private data is 150 + // valid as a shared reference. 151 + unsafe { Self::wrapper_ptr(self as *const Self as *mut Self).as_ref() } 152 + } 153 + } 154 + 155 + /// A wrapper around data stored in the private area of the C `struct request`. 156 + pub(crate) struct RequestDataWrapper { 157 + /// The Rust request refcount has the following states: 158 + /// 159 + /// - 0: The request is owned by C block layer. 160 + /// - 1: The request is owned by Rust abstractions but there are no ARef references to it. 161 + /// - 2+: There are `ARef` references to the request. 162 + refcount: AtomicU64, 163 + } 164 + 165 + impl RequestDataWrapper { 166 + /// Return a reference to the refcount of the request that is embedding 167 + /// `self`. 168 + pub(crate) fn refcount(&self) -> &AtomicU64 { 169 + &self.refcount 170 + } 171 + 172 + /// Return a pointer to the refcount of the request that is embedding the 173 + /// pointee of `this`. 174 + /// 175 + /// # Safety 176 + /// 177 + /// - `this` must point to a live allocation of at least the size of `Self`. 178 + pub(crate) unsafe fn refcount_ptr(this: *mut Self) -> *mut AtomicU64 { 179 + // SAFETY: Because of the safety requirements of this function, the 180 + // field projection is safe. 181 + unsafe { addr_of_mut!((*this).refcount) } 182 + } 183 + } 184 + 185 + // SAFETY: Exclusive access is thread-safe for `Request`. `Request` has no `&mut 186 + // self` methods and `&self` methods that mutate `self` are internally 187 + // synchronized. 188 + unsafe impl<T: Operations> Send for Request<T> {} 189 + 190 + // SAFETY: Shared access is thread-safe for `Request`. `&self` methods that 191 + // mutate `self` are internally synchronized` 192 + unsafe impl<T: Operations> Sync for Request<T> {} 193 + 194 + /// Store the result of `op(target.load())` in target, returning new value of 195 + /// target. 196 + fn atomic_relaxed_op_return(target: &AtomicU64, op: impl Fn(u64) -> u64) -> u64 { 197 + let old = target.fetch_update(Ordering::Relaxed, Ordering::Relaxed, |x| Some(op(x))); 198 + 199 + // SAFETY: Because the operation passed to `fetch_update` above always 200 + // return `Some`, `old` will always be `Ok`. 201 + let old = unsafe { old.unwrap_unchecked() }; 202 + 203 + op(old) 204 + } 205 + 206 + /// Store the result of `op(target.load)` in `target` if `target.load() != 207 + /// pred`, returning true if the target was updated. 208 + fn atomic_relaxed_op_unless(target: &AtomicU64, op: impl Fn(u64) -> u64, pred: u64) -> bool { 209 + target 210 + .fetch_update(Ordering::Relaxed, Ordering::Relaxed, |x| { 211 + if x == pred { 212 + None 213 + } else { 214 + Some(op(x)) 215 + } 216 + }) 217 + .is_ok() 218 + } 219 + 220 + // SAFETY: All instances of `Request<T>` are reference counted. This 221 + // implementation of `AlwaysRefCounted` ensure that increments to the ref count 222 + // keeps the object alive in memory at least until a matching reference count 223 + // decrement is executed. 224 + unsafe impl<T: Operations> AlwaysRefCounted for Request<T> { 225 + fn inc_ref(&self) { 226 + let refcount = &self.wrapper_ref().refcount(); 227 + 228 + #[cfg_attr(not(CONFIG_DEBUG_MISC), allow(unused_variables))] 229 + let updated = atomic_relaxed_op_unless(refcount, |x| x + 1, 0); 230 + 231 + #[cfg(CONFIG_DEBUG_MISC)] 232 + if !updated { 233 + panic!("Request refcount zero on clone") 234 + } 235 + } 236 + 237 + unsafe fn dec_ref(obj: core::ptr::NonNull<Self>) { 238 + // SAFETY: The type invariants of `ARef` guarantee that `obj` is valid 239 + // for read. 240 + let wrapper_ptr = unsafe { Self::wrapper_ptr(obj.as_ptr()).as_ptr() }; 241 + // SAFETY: The type invariant of `Request` guarantees that the private 242 + // data area is initialized and valid. 243 + let refcount = unsafe { &*RequestDataWrapper::refcount_ptr(wrapper_ptr) }; 244 + 245 + #[cfg_attr(not(CONFIG_DEBUG_MISC), allow(unused_variables))] 246 + let new_refcount = atomic_relaxed_op_return(refcount, |x| x - 1); 247 + 248 + #[cfg(CONFIG_DEBUG_MISC)] 249 + if new_refcount == 0 { 250 + panic!("Request reached refcount zero in Rust abstractions"); 251 + } 252 + } 253 + }
+86
rust/kernel/block/mq/tag_set.rs
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + //! This module provides the `TagSet` struct to wrap the C `struct blk_mq_tag_set`. 4 + //! 5 + //! C header: [`include/linux/blk-mq.h`](srctree/include/linux/blk-mq.h) 6 + 7 + use core::pin::Pin; 8 + 9 + use crate::{ 10 + bindings, 11 + block::mq::{operations::OperationsVTable, request::RequestDataWrapper, Operations}, 12 + error, 13 + prelude::PinInit, 14 + try_pin_init, 15 + types::Opaque, 16 + }; 17 + use core::{convert::TryInto, marker::PhantomData}; 18 + use macros::{pin_data, pinned_drop}; 19 + 20 + /// A wrapper for the C `struct blk_mq_tag_set`. 21 + /// 22 + /// `struct blk_mq_tag_set` contains a `struct list_head` and so must be pinned. 23 + /// 24 + /// # Invariants 25 + /// 26 + /// - `inner` is initialized and valid. 27 + #[pin_data(PinnedDrop)] 28 + #[repr(transparent)] 29 + pub struct TagSet<T: Operations> { 30 + #[pin] 31 + inner: Opaque<bindings::blk_mq_tag_set>, 32 + _p: PhantomData<T>, 33 + } 34 + 35 + impl<T: Operations> TagSet<T> { 36 + /// Try to create a new tag set 37 + pub fn new( 38 + nr_hw_queues: u32, 39 + num_tags: u32, 40 + num_maps: u32, 41 + ) -> impl PinInit<Self, error::Error> { 42 + // SAFETY: `blk_mq_tag_set` only contains integers and pointers, which 43 + // all are allowed to be 0. 44 + let tag_set: bindings::blk_mq_tag_set = unsafe { core::mem::zeroed() }; 45 + let tag_set = core::mem::size_of::<RequestDataWrapper>() 46 + .try_into() 47 + .map(|cmd_size| { 48 + bindings::blk_mq_tag_set { 49 + ops: OperationsVTable::<T>::build(), 50 + nr_hw_queues, 51 + timeout: 0, // 0 means default which is 30Hz in C 52 + numa_node: bindings::NUMA_NO_NODE, 53 + queue_depth: num_tags, 54 + cmd_size, 55 + flags: bindings::BLK_MQ_F_SHOULD_MERGE, 56 + driver_data: core::ptr::null_mut::<core::ffi::c_void>(), 57 + nr_maps: num_maps, 58 + ..tag_set 59 + } 60 + }); 61 + 62 + try_pin_init!(TagSet { 63 + inner <- PinInit::<_, error::Error>::pin_chain(Opaque::new(tag_set?), |tag_set| { 64 + // SAFETY: we do not move out of `tag_set`. 65 + let tag_set = unsafe { Pin::get_unchecked_mut(tag_set) }; 66 + // SAFETY: `tag_set` is a reference to an initialized `blk_mq_tag_set`. 67 + error::to_result( unsafe { bindings::blk_mq_alloc_tag_set(tag_set.get())}) 68 + }), 69 + _p: PhantomData, 70 + }) 71 + } 72 + 73 + /// Return the pointer to the wrapped `struct blk_mq_tag_set` 74 + pub(crate) fn raw_tag_set(&self) -> *mut bindings::blk_mq_tag_set { 75 + self.inner.get() 76 + } 77 + } 78 + 79 + #[pinned_drop] 80 + impl<T: Operations> PinnedDrop for TagSet<T> { 81 + fn drop(self: Pin<&mut Self>) { 82 + // SAFETY: By type invariant `inner` is valid and has been properly 83 + // initialized during construction. 84 + unsafe { bindings::blk_mq_free_tag_set(self.inner.get()) }; 85 + } 86 + }
+6
rust/kernel/error.rs
··· 126 126 self.0 127 127 } 128 128 129 + #[cfg(CONFIG_BLOCK)] 130 + pub(crate) fn to_blk_status(self) -> bindings::blk_status_t { 131 + // SAFETY: `self.0` is a valid error due to its invariant. 132 + unsafe { bindings::errno_to_blk_status(self.0) } 133 + } 134 + 129 135 /// Returns the error encoded as a pointer. 130 136 #[allow(dead_code)] 131 137 pub(crate) fn to_ptr<T>(self) -> *mut T {
+2
rust/kernel/lib.rs
··· 27 27 extern crate self as kernel; 28 28 29 29 pub mod alloc; 30 + #[cfg(CONFIG_BLOCK)] 31 + pub mod block; 30 32 mod build_assert; 31 33 pub mod error; 32 34 pub mod init;