Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

NTB: Introduce MSI library

The NTB MSI library allows passing MSI interrupts across a memory
window. This offers similar functionality to doorbells or messages
except will often have much better latency and the client can
potentially use significantly more remote interrupts than typical hardware
provides for doorbells. (Which can be important in high-multiport
setups.)

The library utilizes one memory window per peer and uses the highest
index memory windows. Before any ntb_msi function may be used, the user
must call ntb_msi_init(). It may then setup and tear down the memory
windows when the link state changes using ntb_msi_setup_mws() and
ntb_msi_clear_mws().

The peer which receives the interrupt must call ntb_msim_request_irq()
to assign the interrupt handler (this function is functionally
similar to devm_request_irq()) and the returned descriptor must be
transferred to the peer which can use it to trigger the interrupt.
The triggering peer, once having received the descriptor, can
trigger the interrupt by calling ntb_msi_peer_trigger().

Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: Allen Hubbe <allenbh@gmail.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>

authored by

Logan Gunthorpe and committed by
Jon Mason
26b3a37b d217e07b

+501 -1
+11
drivers/ntb/Kconfig
··· 12 12 13 13 if NTB 14 14 15 + config NTB_MSI 16 + bool "MSI Interrupt Support" 17 + depends on PCI_MSI 18 + help 19 + Support using MSI interrupt forwarding instead of (or in addition to) 20 + hardware doorbells. MSI interrupts typically offer lower latency 21 + than doorbells and more MSI interrupts can be made available to 22 + clients. However this requires an extra memory window and support 23 + in the hardware driver for creating the MSI interrupts. 24 + 25 + If unsure, say N. 15 26 source "drivers/ntb/hw/Kconfig" 16 27 17 28 source "drivers/ntb/test/Kconfig"
+2 -1
drivers/ntb/Makefile
··· 1 1 obj-$(CONFIG_NTB) += ntb.o hw/ test/ 2 2 obj-$(CONFIG_NTB_TRANSPORT) += ntb_transport.o 3 3 4 - ntb-y := core.o 4 + ntb-y := core.o 5 + ntb-$(CONFIG_NTB_MSI) += msi.o
+415
drivers/ntb/msi.c
··· 1 + // SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) 2 + 3 + #include <linux/irq.h> 4 + #include <linux/module.h> 5 + #include <linux/ntb.h> 6 + #include <linux/msi.h> 7 + #include <linux/pci.h> 8 + 9 + MODULE_LICENSE("Dual BSD/GPL"); 10 + MODULE_VERSION("0.1"); 11 + MODULE_AUTHOR("Logan Gunthorpe <logang@deltatee.com>"); 12 + MODULE_DESCRIPTION("NTB MSI Interrupt Library"); 13 + 14 + struct ntb_msi { 15 + u64 base_addr; 16 + u64 end_addr; 17 + 18 + void (*desc_changed)(void *ctx); 19 + 20 + u32 __iomem *peer_mws[]; 21 + }; 22 + 23 + /** 24 + * ntb_msi_init() - Initialize the MSI context 25 + * @ntb: NTB device context 26 + * 27 + * This function must be called before any other ntb_msi function. 28 + * It initializes the context for MSI operations and maps 29 + * the peer memory windows. 30 + * 31 + * This function reserves the last N outbound memory windows (where N 32 + * is the number of peers). 33 + * 34 + * Return: Zero on success, otherwise a negative error number. 35 + */ 36 + int ntb_msi_init(struct ntb_dev *ntb, 37 + void (*desc_changed)(void *ctx)) 38 + { 39 + phys_addr_t mw_phys_addr; 40 + resource_size_t mw_size; 41 + size_t struct_size; 42 + int peer_widx; 43 + int peers; 44 + int ret; 45 + int i; 46 + 47 + peers = ntb_peer_port_count(ntb); 48 + if (peers <= 0) 49 + return -EINVAL; 50 + 51 + struct_size = sizeof(*ntb->msi) + sizeof(*ntb->msi->peer_mws) * peers; 52 + 53 + ntb->msi = devm_kzalloc(&ntb->dev, struct_size, GFP_KERNEL); 54 + if (!ntb->msi) 55 + return -ENOMEM; 56 + 57 + ntb->msi->desc_changed = desc_changed; 58 + 59 + for (i = 0; i < peers; i++) { 60 + peer_widx = ntb_peer_mw_count(ntb) - 1 - i; 61 + 62 + ret = ntb_peer_mw_get_addr(ntb, peer_widx, &mw_phys_addr, 63 + &mw_size); 64 + if (ret) 65 + goto unroll; 66 + 67 + ntb->msi->peer_mws[i] = devm_ioremap(&ntb->dev, mw_phys_addr, 68 + mw_size); 69 + if (!ntb->msi->peer_mws[i]) { 70 + ret = -EFAULT; 71 + goto unroll; 72 + } 73 + } 74 + 75 + return 0; 76 + 77 + unroll: 78 + for (i = 0; i < peers; i++) 79 + if (ntb->msi->peer_mws[i]) 80 + devm_iounmap(&ntb->dev, ntb->msi->peer_mws[i]); 81 + 82 + devm_kfree(&ntb->dev, ntb->msi); 83 + ntb->msi = NULL; 84 + return ret; 85 + } 86 + EXPORT_SYMBOL(ntb_msi_init); 87 + 88 + /** 89 + * ntb_msi_setup_mws() - Initialize the MSI inbound memory windows 90 + * @ntb: NTB device context 91 + * 92 + * This function sets up the required inbound memory windows. It should be 93 + * called from a work function after a link up event. 94 + * 95 + * Over the entire network, this function will reserves the last N 96 + * inbound memory windows for each peer (where N is the number of peers). 97 + * 98 + * ntb_msi_init() must be called before this function. 99 + * 100 + * Return: Zero on success, otherwise a negative error number. 101 + */ 102 + int ntb_msi_setup_mws(struct ntb_dev *ntb) 103 + { 104 + struct msi_desc *desc; 105 + u64 addr; 106 + int peer, peer_widx; 107 + resource_size_t addr_align, size_align, size_max; 108 + resource_size_t mw_size = SZ_32K; 109 + resource_size_t mw_min_size = mw_size; 110 + int i; 111 + int ret; 112 + 113 + if (!ntb->msi) 114 + return -EINVAL; 115 + 116 + desc = first_msi_entry(&ntb->pdev->dev); 117 + addr = desc->msg.address_lo + ((uint64_t)desc->msg.address_hi << 32); 118 + 119 + for (peer = 0; peer < ntb_peer_port_count(ntb); peer++) { 120 + peer_widx = ntb_peer_highest_mw_idx(ntb, peer); 121 + if (peer_widx < 0) 122 + return peer_widx; 123 + 124 + ret = ntb_mw_get_align(ntb, peer, peer_widx, &addr_align, 125 + NULL, NULL); 126 + if (ret) 127 + return ret; 128 + 129 + addr &= ~(addr_align - 1); 130 + } 131 + 132 + for (peer = 0; peer < ntb_peer_port_count(ntb); peer++) { 133 + peer_widx = ntb_peer_highest_mw_idx(ntb, peer); 134 + if (peer_widx < 0) { 135 + ret = peer_widx; 136 + goto error_out; 137 + } 138 + 139 + ret = ntb_mw_get_align(ntb, peer, peer_widx, NULL, 140 + &size_align, &size_max); 141 + if (ret) 142 + goto error_out; 143 + 144 + mw_size = round_up(mw_size, size_align); 145 + mw_size = max(mw_size, size_max); 146 + if (mw_size < mw_min_size) 147 + mw_min_size = mw_size; 148 + 149 + ret = ntb_mw_set_trans(ntb, peer, peer_widx, 150 + addr, mw_size); 151 + if (ret) 152 + goto error_out; 153 + } 154 + 155 + ntb->msi->base_addr = addr; 156 + ntb->msi->end_addr = addr + mw_min_size; 157 + 158 + return 0; 159 + 160 + error_out: 161 + for (i = 0; i < peer; i++) { 162 + peer_widx = ntb_peer_highest_mw_idx(ntb, peer); 163 + if (peer_widx < 0) 164 + continue; 165 + 166 + ntb_mw_clear_trans(ntb, i, peer_widx); 167 + } 168 + 169 + return ret; 170 + } 171 + EXPORT_SYMBOL(ntb_msi_setup_mws); 172 + 173 + /** 174 + * ntb_msi_clear_mws() - Clear all inbound memory windows 175 + * @ntb: NTB device context 176 + * 177 + * This function tears down the resources used by ntb_msi_setup_mws(). 178 + */ 179 + void ntb_msi_clear_mws(struct ntb_dev *ntb) 180 + { 181 + int peer; 182 + int peer_widx; 183 + 184 + for (peer = 0; peer < ntb_peer_port_count(ntb); peer++) { 185 + peer_widx = ntb_peer_highest_mw_idx(ntb, peer); 186 + if (peer_widx < 0) 187 + continue; 188 + 189 + ntb_mw_clear_trans(ntb, peer, peer_widx); 190 + } 191 + } 192 + EXPORT_SYMBOL(ntb_msi_clear_mws); 193 + 194 + struct ntb_msi_devres { 195 + struct ntb_dev *ntb; 196 + struct msi_desc *entry; 197 + struct ntb_msi_desc *msi_desc; 198 + }; 199 + 200 + static int ntb_msi_set_desc(struct ntb_dev *ntb, struct msi_desc *entry, 201 + struct ntb_msi_desc *msi_desc) 202 + { 203 + u64 addr; 204 + 205 + addr = entry->msg.address_lo + 206 + ((uint64_t)entry->msg.address_hi << 32); 207 + 208 + if (addr < ntb->msi->base_addr || addr >= ntb->msi->end_addr) { 209 + dev_warn_once(&ntb->dev, 210 + "IRQ %d: MSI Address not within the memory window (%llx, [%llx %llx])\n", 211 + entry->irq, addr, ntb->msi->base_addr, 212 + ntb->msi->end_addr); 213 + return -EFAULT; 214 + } 215 + 216 + msi_desc->addr_offset = addr - ntb->msi->base_addr; 217 + msi_desc->data = entry->msg.data; 218 + 219 + return 0; 220 + } 221 + 222 + static void ntb_msi_write_msg(struct msi_desc *entry, void *data) 223 + { 224 + struct ntb_msi_devres *dr = data; 225 + 226 + WARN_ON(ntb_msi_set_desc(dr->ntb, entry, dr->msi_desc)); 227 + 228 + if (dr->ntb->msi->desc_changed) 229 + dr->ntb->msi->desc_changed(dr->ntb->ctx); 230 + } 231 + 232 + static void ntbm_msi_callback_release(struct device *dev, void *res) 233 + { 234 + struct ntb_msi_devres *dr = res; 235 + 236 + dr->entry->write_msi_msg = NULL; 237 + dr->entry->write_msi_msg_data = NULL; 238 + } 239 + 240 + static int ntbm_msi_setup_callback(struct ntb_dev *ntb, struct msi_desc *entry, 241 + struct ntb_msi_desc *msi_desc) 242 + { 243 + struct ntb_msi_devres *dr; 244 + 245 + dr = devres_alloc(ntbm_msi_callback_release, 246 + sizeof(struct ntb_msi_devres), GFP_KERNEL); 247 + if (!dr) 248 + return -ENOMEM; 249 + 250 + dr->ntb = ntb; 251 + dr->entry = entry; 252 + dr->msi_desc = msi_desc; 253 + 254 + devres_add(&ntb->dev, dr); 255 + 256 + dr->entry->write_msi_msg = ntb_msi_write_msg; 257 + dr->entry->write_msi_msg_data = dr; 258 + 259 + return 0; 260 + } 261 + 262 + /** 263 + * ntbm_msi_request_threaded_irq() - allocate an MSI interrupt 264 + * @ntb: NTB device context 265 + * @handler: Function to be called when the IRQ occurs 266 + * @thread_fn: Function to be called in a threaded interrupt context. NULL 267 + * for clients which handle everything in @handler 268 + * @devname: An ascii name for the claiming device, dev_name(dev) if NULL 269 + * @dev_id: A cookie passed back to the handler function 270 + * 271 + * This function assigns an interrupt handler to an unused 272 + * MSI interrupt and returns the descriptor used to trigger 273 + * it. The descriptor can then be sent to a peer to trigger 274 + * the interrupt. 275 + * 276 + * The interrupt resource is managed with devres so it will 277 + * be automatically freed when the NTB device is torn down. 278 + * 279 + * If an IRQ allocated with this function needs to be freed 280 + * separately, ntbm_free_irq() must be used. 281 + * 282 + * Return: IRQ number assigned on success, otherwise a negative error number. 283 + */ 284 + int ntbm_msi_request_threaded_irq(struct ntb_dev *ntb, irq_handler_t handler, 285 + irq_handler_t thread_fn, 286 + const char *name, void *dev_id, 287 + struct ntb_msi_desc *msi_desc) 288 + { 289 + struct msi_desc *entry; 290 + struct irq_desc *desc; 291 + int ret; 292 + 293 + if (!ntb->msi) 294 + return -EINVAL; 295 + 296 + for_each_pci_msi_entry(entry, ntb->pdev) { 297 + desc = irq_to_desc(entry->irq); 298 + if (desc->action) 299 + continue; 300 + 301 + ret = devm_request_threaded_irq(&ntb->dev, entry->irq, handler, 302 + thread_fn, 0, name, dev_id); 303 + if (ret) 304 + continue; 305 + 306 + if (ntb_msi_set_desc(ntb, entry, msi_desc)) { 307 + devm_free_irq(&ntb->dev, entry->irq, dev_id); 308 + continue; 309 + } 310 + 311 + ret = ntbm_msi_setup_callback(ntb, entry, msi_desc); 312 + if (ret) { 313 + devm_free_irq(&ntb->dev, entry->irq, dev_id); 314 + return ret; 315 + } 316 + 317 + 318 + return entry->irq; 319 + } 320 + 321 + return -ENODEV; 322 + } 323 + EXPORT_SYMBOL(ntbm_msi_request_threaded_irq); 324 + 325 + static int ntbm_msi_callback_match(struct device *dev, void *res, void *data) 326 + { 327 + struct ntb_dev *ntb = dev_ntb(dev); 328 + struct ntb_msi_devres *dr = res; 329 + 330 + return dr->ntb == ntb && dr->entry == data; 331 + } 332 + 333 + /** 334 + * ntbm_msi_free_irq() - free an interrupt 335 + * @ntb: NTB device context 336 + * @irq: Interrupt line to free 337 + * @dev_id: Device identity to free 338 + * 339 + * This function should be used to manually free IRQs allocated with 340 + * ntbm_request_[threaded_]irq(). 341 + */ 342 + void ntbm_msi_free_irq(struct ntb_dev *ntb, unsigned int irq, void *dev_id) 343 + { 344 + struct msi_desc *entry = irq_get_msi_desc(irq); 345 + 346 + entry->write_msi_msg = NULL; 347 + entry->write_msi_msg_data = NULL; 348 + 349 + WARN_ON(devres_destroy(&ntb->dev, ntbm_msi_callback_release, 350 + ntbm_msi_callback_match, entry)); 351 + 352 + devm_free_irq(&ntb->dev, irq, dev_id); 353 + } 354 + EXPORT_SYMBOL(ntbm_msi_free_irq); 355 + 356 + /** 357 + * ntb_msi_peer_trigger() - Trigger an interrupt handler on a peer 358 + * @ntb: NTB device context 359 + * @peer: Peer index 360 + * @desc: MSI descriptor data which triggers the interrupt 361 + * 362 + * This function triggers an interrupt on a peer. It requires 363 + * the descriptor structure to have been passed from that peer 364 + * by some other means. 365 + * 366 + * Return: Zero on success, otherwise a negative error number. 367 + */ 368 + int ntb_msi_peer_trigger(struct ntb_dev *ntb, int peer, 369 + struct ntb_msi_desc *desc) 370 + { 371 + int idx; 372 + 373 + if (!ntb->msi) 374 + return -EINVAL; 375 + 376 + idx = desc->addr_offset / sizeof(*ntb->msi->peer_mws[peer]); 377 + 378 + iowrite32(desc->data, &ntb->msi->peer_mws[peer][idx]); 379 + 380 + return 0; 381 + } 382 + EXPORT_SYMBOL(ntb_msi_peer_trigger); 383 + 384 + /** 385 + * ntb_msi_peer_addr() - Get the DMA address to trigger a peer's MSI interrupt 386 + * @ntb: NTB device context 387 + * @peer: Peer index 388 + * @desc: MSI descriptor data which triggers the interrupt 389 + * @msi_addr: Physical address to trigger the interrupt 390 + * 391 + * This function allows using DMA engines to trigger an interrupt 392 + * (for example, trigger an interrupt to process the data after 393 + * sending it). To trigger the interrupt, write @desc.data to the address 394 + * returned in @msi_addr 395 + * 396 + * Return: Zero on success, otherwise a negative error number. 397 + */ 398 + int ntb_msi_peer_addr(struct ntb_dev *ntb, int peer, 399 + struct ntb_msi_desc *desc, 400 + phys_addr_t *msi_addr) 401 + { 402 + int peer_widx = ntb_peer_mw_count(ntb) - 1 - peer; 403 + phys_addr_t mw_phys_addr; 404 + int ret; 405 + 406 + ret = ntb_peer_mw_get_addr(ntb, peer_widx, &mw_phys_addr, NULL); 407 + if (ret) 408 + return ret; 409 + 410 + if (msi_addr) 411 + *msi_addr = mw_phys_addr + desc->addr_offset; 412 + 413 + return 0; 414 + } 415 + EXPORT_SYMBOL(ntb_msi_peer_addr);
+73
include/linux/ntb.h
··· 58 58 59 59 #include <linux/completion.h> 60 60 #include <linux/device.h> 61 + #include <linux/interrupt.h> 61 62 62 63 struct ntb_client; 63 64 struct ntb_dev; 65 + struct ntb_msi; 64 66 struct pci_dev; 65 67 66 68 /** ··· 428 426 spinlock_t ctx_lock; 429 427 /* block unregister until device is fully released */ 430 428 struct completion released; 429 + 430 + #ifdef CONFIG_NTB_MSI 431 + struct ntb_msi *msi; 432 + #endif 431 433 }; 432 434 #define dev_ntb(__dev) container_of((__dev), struct ntb_dev, dev) 433 435 ··· 1631 1625 return ret; 1632 1626 1633 1627 return ntb_mw_count(ntb, pidx) - ret - 1; 1628 + } 1629 + 1630 + struct ntb_msi_desc { 1631 + u32 addr_offset; 1632 + u32 data; 1633 + }; 1634 + 1635 + #ifdef CONFIG_NTB_MSI 1636 + 1637 + int ntb_msi_init(struct ntb_dev *ntb, void (*desc_changed)(void *ctx)); 1638 + int ntb_msi_setup_mws(struct ntb_dev *ntb); 1639 + void ntb_msi_clear_mws(struct ntb_dev *ntb); 1640 + int ntbm_msi_request_threaded_irq(struct ntb_dev *ntb, irq_handler_t handler, 1641 + irq_handler_t thread_fn, 1642 + const char *name, void *dev_id, 1643 + struct ntb_msi_desc *msi_desc); 1644 + void ntbm_msi_free_irq(struct ntb_dev *ntb, unsigned int irq, void *dev_id); 1645 + int ntb_msi_peer_trigger(struct ntb_dev *ntb, int peer, 1646 + struct ntb_msi_desc *desc); 1647 + int ntb_msi_peer_addr(struct ntb_dev *ntb, int peer, 1648 + struct ntb_msi_desc *desc, 1649 + phys_addr_t *msi_addr); 1650 + 1651 + #else /* not CONFIG_NTB_MSI */ 1652 + 1653 + static inline int ntb_msi_init(struct ntb_dev *ntb, 1654 + void (*desc_changed)(void *ctx)) 1655 + { 1656 + return -EOPNOTSUPP; 1657 + } 1658 + static inline int ntb_msi_setup_mws(struct ntb_dev *ntb) 1659 + { 1660 + return -EOPNOTSUPP; 1661 + } 1662 + static inline void ntb_msi_clear_mws(struct ntb_dev *ntb) {} 1663 + static inline int ntbm_msi_request_threaded_irq(struct ntb_dev *ntb, 1664 + irq_handler_t handler, 1665 + irq_handler_t thread_fn, 1666 + const char *name, void *dev_id, 1667 + struct ntb_msi_desc *msi_desc) 1668 + { 1669 + return -EOPNOTSUPP; 1670 + } 1671 + static inline void ntbm_msi_free_irq(struct ntb_dev *ntb, unsigned int irq, 1672 + void *dev_id) {} 1673 + static inline int ntb_msi_peer_trigger(struct ntb_dev *ntb, int peer, 1674 + struct ntb_msi_desc *desc) 1675 + { 1676 + return -EOPNOTSUPP; 1677 + } 1678 + static inline int ntb_msi_peer_addr(struct ntb_dev *ntb, int peer, 1679 + struct ntb_msi_desc *desc, 1680 + phys_addr_t *msi_addr) 1681 + { 1682 + return -EOPNOTSUPP; 1683 + 1684 + } 1685 + 1686 + #endif /* CONFIG_NTB_MSI */ 1687 + 1688 + static inline int ntbm_msi_request_irq(struct ntb_dev *ntb, 1689 + irq_handler_t handler, 1690 + const char *name, void *dev_id, 1691 + struct ntb_msi_desc *msi_desc) 1692 + { 1693 + return ntbm_msi_request_threaded_irq(ntb, handler, NULL, name, 1694 + dev_id, msi_desc); 1634 1695 } 1635 1696 1636 1697 #endif