Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at master 1094 lines 31 kB view raw
1// SPDX-License-Identifier: GPL-2.0-only 2/* Copyright(c) 2020 Intel Corporation. All rights reserved. */ 3#include <linux/unaligned.h> 4#include <linux/io-64-nonatomic-lo-hi.h> 5#include <linux/moduleparam.h> 6#include <linux/module.h> 7#include <linux/delay.h> 8#include <linux/sizes.h> 9#include <linux/mutex.h> 10#include <linux/list.h> 11#include <linux/pci.h> 12#include <linux/aer.h> 13#include <linux/io.h> 14#include <cxl/mailbox.h> 15#include "cxlmem.h" 16#include "cxlpci.h" 17#include "cxl.h" 18#include "pmu.h" 19 20/** 21 * DOC: cxl pci 22 * 23 * This implements the PCI exclusive functionality for a CXL device as it is 24 * defined by the Compute Express Link specification. CXL devices may surface 25 * certain functionality even if it isn't CXL enabled. While this driver is 26 * focused around the PCI specific aspects of a CXL device, it binds to the 27 * specific CXL memory device class code, and therefore the implementation of 28 * cxl_pci is focused around CXL memory devices. 29 * 30 * The driver has several responsibilities, mainly: 31 * - Create the memX device and register on the CXL bus. 32 * - Enumerate device's register interface and map them. 33 * - Registers nvdimm bridge device with cxl_core. 34 * - Registers a CXL mailbox with cxl_core. 35 */ 36 37#define cxl_doorbell_busy(cxlds) \ 38 (readl((cxlds)->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET) & \ 39 CXLDEV_MBOX_CTRL_DOORBELL) 40 41/* CXL 2.0 - 8.2.8.4 */ 42#define CXL_MAILBOX_TIMEOUT_MS (2 * HZ) 43 44/* 45 * CXL 2.0 ECN "Add Mailbox Ready Time" defines a capability field to 46 * dictate how long to wait for the mailbox to become ready. The new 47 * field allows the device to tell software the amount of time to wait 48 * before mailbox ready. This field per the spec theoretically allows 49 * for up to 255 seconds. 255 seconds is unreasonably long, its longer 50 * than the maximum SATA port link recovery wait. Default to 60 seconds 51 * until someone builds a CXL device that needs more time in practice. 52 */ 53static unsigned short mbox_ready_timeout = 60; 54module_param(mbox_ready_timeout, ushort, 0644); 55MODULE_PARM_DESC(mbox_ready_timeout, "seconds to wait for mailbox ready"); 56 57static int cxl_pci_mbox_wait_for_doorbell(struct cxl_dev_state *cxlds) 58{ 59 const unsigned long start = jiffies; 60 unsigned long end = start; 61 62 while (cxl_doorbell_busy(cxlds)) { 63 end = jiffies; 64 65 if (time_after(end, start + CXL_MAILBOX_TIMEOUT_MS)) { 66 /* Check again in case preempted before timeout test */ 67 if (!cxl_doorbell_busy(cxlds)) 68 break; 69 return -ETIMEDOUT; 70 } 71 cpu_relax(); 72 } 73 74 dev_dbg(cxlds->dev, "Doorbell wait took %dms", 75 jiffies_to_msecs(end) - jiffies_to_msecs(start)); 76 return 0; 77} 78 79#define cxl_err(dev, status, msg) \ 80 dev_err_ratelimited(dev, msg ", device state %s%s\n", \ 81 status & CXLMDEV_DEV_FATAL ? " fatal" : "", \ 82 status & CXLMDEV_FW_HALT ? " firmware-halt" : "") 83 84#define cxl_cmd_err(dev, cmd, status, msg) \ 85 dev_err_ratelimited(dev, msg " (opcode: %#x), device state %s%s\n", \ 86 (cmd)->opcode, \ 87 status & CXLMDEV_DEV_FATAL ? " fatal" : "", \ 88 status & CXLMDEV_FW_HALT ? " firmware-halt" : "") 89 90/* 91 * Threaded irq dev_id's must be globally unique. cxl_dev_id provides a unique 92 * wrapper object for each irq within the same cxlds. 93 */ 94struct cxl_dev_id { 95 struct cxl_dev_state *cxlds; 96}; 97 98static int cxl_request_irq(struct cxl_dev_state *cxlds, int irq, 99 irq_handler_t thread_fn) 100{ 101 struct device *dev = cxlds->dev; 102 struct cxl_dev_id *dev_id; 103 104 dev_id = devm_kzalloc(dev, sizeof(*dev_id), GFP_KERNEL); 105 if (!dev_id) 106 return -ENOMEM; 107 dev_id->cxlds = cxlds; 108 109 return devm_request_threaded_irq(dev, irq, NULL, thread_fn, 110 IRQF_SHARED | IRQF_ONESHOT, NULL, 111 dev_id); 112} 113 114static bool cxl_mbox_background_complete(struct cxl_dev_state *cxlds) 115{ 116 u64 reg; 117 118 reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_BG_CMD_STATUS_OFFSET); 119 return FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_PCT_MASK, reg) == 100; 120} 121 122static irqreturn_t cxl_pci_mbox_irq(int irq, void *id) 123{ 124 u64 reg; 125 u16 opcode; 126 struct cxl_dev_id *dev_id = id; 127 struct cxl_dev_state *cxlds = dev_id->cxlds; 128 struct cxl_mailbox *cxl_mbox = &cxlds->cxl_mbox; 129 struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); 130 131 if (!cxl_mbox_background_complete(cxlds)) 132 return IRQ_NONE; 133 134 reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_BG_CMD_STATUS_OFFSET); 135 opcode = FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_OPCODE_MASK, reg); 136 if (opcode == CXL_MBOX_OP_SANITIZE) { 137 mutex_lock(&cxl_mbox->mbox_mutex); 138 if (mds->security.sanitize_node) 139 mod_delayed_work(system_percpu_wq, &mds->security.poll_dwork, 0); 140 mutex_unlock(&cxl_mbox->mbox_mutex); 141 } else { 142 /* short-circuit the wait in __cxl_pci_mbox_send_cmd() */ 143 rcuwait_wake_up(&cxl_mbox->mbox_wait); 144 } 145 146 return IRQ_HANDLED; 147} 148 149/* 150 * Sanitization operation polling mode. 151 */ 152static void cxl_mbox_sanitize_work(struct work_struct *work) 153{ 154 struct cxl_memdev_state *mds = 155 container_of(work, typeof(*mds), security.poll_dwork.work); 156 struct cxl_dev_state *cxlds = &mds->cxlds; 157 struct cxl_mailbox *cxl_mbox = &cxlds->cxl_mbox; 158 159 mutex_lock(&cxl_mbox->mbox_mutex); 160 if (cxl_mbox_background_complete(cxlds)) { 161 mds->security.poll_tmo_secs = 0; 162 if (mds->security.sanitize_node) 163 sysfs_notify_dirent(mds->security.sanitize_node); 164 mds->security.sanitize_active = false; 165 166 dev_dbg(cxlds->dev, "Sanitization operation ended\n"); 167 } else { 168 int timeout = mds->security.poll_tmo_secs + 10; 169 170 mds->security.poll_tmo_secs = min(15 * 60, timeout); 171 schedule_delayed_work(&mds->security.poll_dwork, timeout * HZ); 172 } 173 mutex_unlock(&cxl_mbox->mbox_mutex); 174} 175 176/** 177 * __cxl_pci_mbox_send_cmd() - Execute a mailbox command 178 * @cxl_mbox: CXL mailbox context 179 * @mbox_cmd: Command to send to the memory device. 180 * 181 * Context: Any context. Expects mbox_mutex to be held. 182 * Return: -ETIMEDOUT if timeout occurred waiting for completion. 0 on success. 183 * Caller should check the return code in @mbox_cmd to make sure it 184 * succeeded. 185 * 186 * This is a generic form of the CXL mailbox send command thus only using the 187 * registers defined by the mailbox capability ID - CXL 2.0 8.2.8.4. Memory 188 * devices, and perhaps other types of CXL devices may have further information 189 * available upon error conditions. Driver facilities wishing to send mailbox 190 * commands should use the wrapper command. 191 * 192 * The CXL spec allows for up to two mailboxes. The intention is for the primary 193 * mailbox to be OS controlled and the secondary mailbox to be used by system 194 * firmware. This allows the OS and firmware to communicate with the device and 195 * not need to coordinate with each other. The driver only uses the primary 196 * mailbox. 197 */ 198static int __cxl_pci_mbox_send_cmd(struct cxl_mailbox *cxl_mbox, 199 struct cxl_mbox_cmd *mbox_cmd) 200{ 201 struct cxl_dev_state *cxlds = mbox_to_cxlds(cxl_mbox); 202 struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); 203 void __iomem *payload = cxlds->regs.mbox + CXLDEV_MBOX_PAYLOAD_OFFSET; 204 struct device *dev = cxlds->dev; 205 u64 cmd_reg, status_reg; 206 size_t out_len; 207 int rc; 208 209 lockdep_assert_held(&cxl_mbox->mbox_mutex); 210 211 /* 212 * Here are the steps from 8.2.8.4 of the CXL 2.0 spec. 213 * 1. Caller reads MB Control Register to verify doorbell is clear 214 * 2. Caller writes Command Register 215 * 3. Caller writes Command Payload Registers if input payload is non-empty 216 * 4. Caller writes MB Control Register to set doorbell 217 * 5. Caller either polls for doorbell to be clear or waits for interrupt if configured 218 * 6. Caller reads MB Status Register to fetch Return code 219 * 7. If command successful, Caller reads Command Register to get Payload Length 220 * 8. If output payload is non-empty, host reads Command Payload Registers 221 * 222 * Hardware is free to do whatever it wants before the doorbell is rung, 223 * and isn't allowed to change anything after it clears the doorbell. As 224 * such, steps 2 and 3 can happen in any order, and steps 6, 7, 8 can 225 * also happen in any order (though some orders might not make sense). 226 */ 227 228 /* #1 */ 229 if (cxl_doorbell_busy(cxlds)) { 230 u64 md_status = 231 readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET); 232 233 cxl_cmd_err(cxlds->dev, mbox_cmd, md_status, 234 "mailbox queue busy"); 235 return -EBUSY; 236 } 237 238 /* 239 * With sanitize polling, hardware might be done and the poller still 240 * not be in sync. Ensure no new command comes in until so. Keep the 241 * hardware semantics and only allow device health status. 242 */ 243 if (mds->security.poll_tmo_secs > 0) { 244 if (mbox_cmd->opcode != CXL_MBOX_OP_GET_HEALTH_INFO) 245 return -EBUSY; 246 } 247 248 cmd_reg = FIELD_PREP(CXLDEV_MBOX_CMD_COMMAND_OPCODE_MASK, 249 mbox_cmd->opcode); 250 if (mbox_cmd->size_in) { 251 if (WARN_ON(!mbox_cmd->payload_in)) 252 return -EINVAL; 253 254 cmd_reg |= FIELD_PREP(CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK, 255 mbox_cmd->size_in); 256 memcpy_toio(payload, mbox_cmd->payload_in, mbox_cmd->size_in); 257 } 258 259 /* #2, #3 */ 260 writeq(cmd_reg, cxlds->regs.mbox + CXLDEV_MBOX_CMD_OFFSET); 261 262 /* #4 */ 263 dev_dbg(dev, "Sending command: 0x%04x\n", mbox_cmd->opcode); 264 writel(CXLDEV_MBOX_CTRL_DOORBELL, 265 cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET); 266 267 /* #5 */ 268 rc = cxl_pci_mbox_wait_for_doorbell(cxlds); 269 if (rc == -ETIMEDOUT) { 270 u64 md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET); 271 272 cxl_cmd_err(cxlds->dev, mbox_cmd, md_status, "mailbox timeout"); 273 return rc; 274 } 275 276 /* #6 */ 277 status_reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_STATUS_OFFSET); 278 mbox_cmd->return_code = 279 FIELD_GET(CXLDEV_MBOX_STATUS_RET_CODE_MASK, status_reg); 280 281 /* 282 * Handle the background command in a synchronous manner. 283 * 284 * All other mailbox commands will serialize/queue on the mbox_mutex, 285 * which we currently hold. Furthermore this also guarantees that 286 * cxl_mbox_background_complete() checks are safe amongst each other, 287 * in that no new bg operation can occur in between. 288 * 289 * Background operations are timesliced in accordance with the nature 290 * of the command. In the event of timeout, the mailbox state is 291 * indeterminate until the next successful command submission and the 292 * driver can get back in sync with the hardware state. 293 */ 294 if (mbox_cmd->return_code == CXL_MBOX_CMD_RC_BACKGROUND) { 295 u64 bg_status_reg; 296 int i, timeout; 297 298 /* 299 * Sanitization is a special case which monopolizes the device 300 * and cannot be timesliced. Handle asynchronously instead, 301 * and allow userspace to poll(2) for completion. 302 */ 303 if (mbox_cmd->opcode == CXL_MBOX_OP_SANITIZE) { 304 if (mds->security.sanitize_active) 305 return -EBUSY; 306 307 /* give first timeout a second */ 308 timeout = 1; 309 mds->security.poll_tmo_secs = timeout; 310 mds->security.sanitize_active = true; 311 schedule_delayed_work(&mds->security.poll_dwork, 312 timeout * HZ); 313 dev_dbg(dev, "Sanitization operation started\n"); 314 goto success; 315 } 316 317 dev_dbg(dev, "Mailbox background operation (0x%04x) started\n", 318 mbox_cmd->opcode); 319 320 timeout = mbox_cmd->poll_interval_ms; 321 for (i = 0; i < mbox_cmd->poll_count; i++) { 322 if (rcuwait_wait_event_timeout(&cxl_mbox->mbox_wait, 323 cxl_mbox_background_complete(cxlds), 324 TASK_UNINTERRUPTIBLE, 325 msecs_to_jiffies(timeout)) > 0) 326 break; 327 } 328 329 if (!cxl_mbox_background_complete(cxlds)) { 330 dev_err(dev, "timeout waiting for background (%d ms)\n", 331 timeout * mbox_cmd->poll_count); 332 return -ETIMEDOUT; 333 } 334 335 bg_status_reg = readq(cxlds->regs.mbox + 336 CXLDEV_MBOX_BG_CMD_STATUS_OFFSET); 337 mbox_cmd->return_code = 338 FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_RC_MASK, 339 bg_status_reg); 340 dev_dbg(dev, 341 "Mailbox background operation (0x%04x) completed\n", 342 mbox_cmd->opcode); 343 } 344 345 if (mbox_cmd->return_code != CXL_MBOX_CMD_RC_SUCCESS) { 346 dev_dbg(dev, "Mailbox operation had an error: %s\n", 347 cxl_mbox_cmd_rc2str(mbox_cmd)); 348 return 0; /* completed but caller must check return_code */ 349 } 350 351success: 352 /* #7 */ 353 cmd_reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_CMD_OFFSET); 354 out_len = FIELD_GET(CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK, cmd_reg); 355 356 /* #8 */ 357 if (out_len && mbox_cmd->payload_out) { 358 /* 359 * Sanitize the copy. If hardware misbehaves, out_len per the 360 * spec can actually be greater than the max allowed size (21 361 * bits available but spec defined 1M max). The caller also may 362 * have requested less data than the hardware supplied even 363 * within spec. 364 */ 365 size_t n; 366 367 n = min3(mbox_cmd->size_out, cxl_mbox->payload_size, out_len); 368 memcpy_fromio(mbox_cmd->payload_out, payload, n); 369 mbox_cmd->size_out = n; 370 } else { 371 mbox_cmd->size_out = 0; 372 } 373 374 return 0; 375} 376 377static int cxl_pci_mbox_send(struct cxl_mailbox *cxl_mbox, 378 struct cxl_mbox_cmd *cmd) 379{ 380 int rc; 381 382 mutex_lock(&cxl_mbox->mbox_mutex); 383 rc = __cxl_pci_mbox_send_cmd(cxl_mbox, cmd); 384 mutex_unlock(&cxl_mbox->mbox_mutex); 385 386 return rc; 387} 388 389static int cxl_pci_setup_mailbox(struct cxl_memdev_state *mds, bool irq_avail) 390{ 391 struct cxl_dev_state *cxlds = &mds->cxlds; 392 struct cxl_mailbox *cxl_mbox = &cxlds->cxl_mbox; 393 const int cap = readl(cxlds->regs.mbox + CXLDEV_MBOX_CAPS_OFFSET); 394 struct device *dev = cxlds->dev; 395 unsigned long timeout; 396 int irq, msgnum; 397 u64 md_status; 398 u32 ctrl; 399 400 timeout = jiffies + mbox_ready_timeout * HZ; 401 do { 402 md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET); 403 if (md_status & CXLMDEV_MBOX_IF_READY) 404 break; 405 if (msleep_interruptible(100)) 406 break; 407 } while (!time_after(jiffies, timeout)); 408 409 if (!(md_status & CXLMDEV_MBOX_IF_READY)) { 410 cxl_err(dev, md_status, "timeout awaiting mailbox ready"); 411 return -ETIMEDOUT; 412 } 413 414 /* 415 * A command may be in flight from a previous driver instance, 416 * think kexec, do one doorbell wait so that 417 * __cxl_pci_mbox_send_cmd() can assume that it is the only 418 * source for future doorbell busy events. 419 */ 420 if (cxl_pci_mbox_wait_for_doorbell(cxlds) != 0) { 421 cxl_err(dev, md_status, "timeout awaiting mailbox idle"); 422 return -ETIMEDOUT; 423 } 424 425 cxl_mbox->mbox_send = cxl_pci_mbox_send; 426 cxl_mbox->payload_size = 427 1 << FIELD_GET(CXLDEV_MBOX_CAP_PAYLOAD_SIZE_MASK, cap); 428 429 /* 430 * CXL 2.0 8.2.8.4.3 Mailbox Capabilities Register 431 * 432 * If the size is too small, mandatory commands will not work and so 433 * there's no point in going forward. If the size is too large, there's 434 * no harm is soft limiting it. 435 */ 436 cxl_mbox->payload_size = min_t(size_t, cxl_mbox->payload_size, SZ_1M); 437 if (cxl_mbox->payload_size < 256) { 438 dev_err(dev, "Mailbox is too small (%zub)", 439 cxl_mbox->payload_size); 440 return -ENXIO; 441 } 442 443 dev_dbg(dev, "Mailbox payload sized %zu", cxl_mbox->payload_size); 444 445 INIT_DELAYED_WORK(&mds->security.poll_dwork, cxl_mbox_sanitize_work); 446 447 /* background command interrupts are optional */ 448 if (!(cap & CXLDEV_MBOX_CAP_BG_CMD_IRQ) || !irq_avail) 449 return 0; 450 451 msgnum = FIELD_GET(CXLDEV_MBOX_CAP_IRQ_MSGNUM_MASK, cap); 452 irq = pci_irq_vector(to_pci_dev(cxlds->dev), msgnum); 453 if (irq < 0) 454 return 0; 455 456 if (cxl_request_irq(cxlds, irq, cxl_pci_mbox_irq)) 457 return 0; 458 459 dev_dbg(cxlds->dev, "Mailbox interrupts enabled\n"); 460 /* enable background command mbox irq support */ 461 ctrl = readl(cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET); 462 ctrl |= CXLDEV_MBOX_CTRL_BG_CMD_IRQ; 463 writel(ctrl, cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET); 464 465 return 0; 466} 467 468static void free_event_buf(void *buf) 469{ 470 kvfree(buf); 471} 472 473/* 474 * There is a single buffer for reading event logs from the mailbox. All logs 475 * share this buffer protected by the mds->event_log_lock. 476 */ 477static int cxl_mem_alloc_event_buf(struct cxl_memdev_state *mds) 478{ 479 struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; 480 struct cxl_get_event_payload *buf; 481 482 buf = kvmalloc(cxl_mbox->payload_size, GFP_KERNEL); 483 if (!buf) 484 return -ENOMEM; 485 mds->event.buf = buf; 486 487 return devm_add_action_or_reset(mds->cxlds.dev, free_event_buf, buf); 488} 489 490static bool cxl_alloc_irq_vectors(struct pci_dev *pdev) 491{ 492 int nvecs; 493 494 /* 495 * Per CXL 3.0 3.1.1 CXL.io Endpoint a function on a CXL device must 496 * not generate INTx messages if that function participates in 497 * CXL.cache or CXL.mem. 498 * 499 * Additionally pci_alloc_irq_vectors() handles calling 500 * pci_free_irq_vectors() automatically despite not being called 501 * pcim_*. See pci_setup_msi_context(). 502 */ 503 nvecs = pci_alloc_irq_vectors(pdev, 1, CXL_PCI_DEFAULT_MAX_VECTORS, 504 PCI_IRQ_MSIX | PCI_IRQ_MSI); 505 if (nvecs < 1) { 506 dev_dbg(&pdev->dev, "Failed to alloc irq vectors: %d\n", nvecs); 507 return false; 508 } 509 return true; 510} 511 512static irqreturn_t cxl_event_thread(int irq, void *id) 513{ 514 struct cxl_dev_id *dev_id = id; 515 struct cxl_dev_state *cxlds = dev_id->cxlds; 516 struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); 517 u32 status; 518 519 do { 520 /* 521 * CXL 3.0 8.2.8.3.1: The lower 32 bits are the status; 522 * ignore the reserved upper 32 bits 523 */ 524 status = readl(cxlds->regs.status + CXLDEV_DEV_EVENT_STATUS_OFFSET); 525 /* Ignore logs unknown to the driver */ 526 status &= CXLDEV_EVENT_STATUS_ALL; 527 if (!status) 528 break; 529 cxl_mem_get_event_records(mds, status); 530 cond_resched(); 531 } while (status); 532 533 return IRQ_HANDLED; 534} 535 536static int cxl_event_req_irq(struct cxl_dev_state *cxlds, u8 setting) 537{ 538 struct pci_dev *pdev = to_pci_dev(cxlds->dev); 539 int irq; 540 541 if (FIELD_GET(CXLDEV_EVENT_INT_MODE_MASK, setting) != CXL_INT_MSI_MSIX) 542 return -ENXIO; 543 544 irq = pci_irq_vector(pdev, 545 FIELD_GET(CXLDEV_EVENT_INT_MSGNUM_MASK, setting)); 546 if (irq < 0) 547 return irq; 548 549 return cxl_request_irq(cxlds, irq, cxl_event_thread); 550} 551 552static int cxl_event_get_int_policy(struct cxl_memdev_state *mds, 553 struct cxl_event_interrupt_policy *policy) 554{ 555 struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; 556 struct cxl_mbox_cmd mbox_cmd = { 557 .opcode = CXL_MBOX_OP_GET_EVT_INT_POLICY, 558 .payload_out = policy, 559 .size_out = sizeof(*policy), 560 }; 561 int rc; 562 563 rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); 564 if (rc < 0) 565 dev_err(mds->cxlds.dev, 566 "Failed to get event interrupt policy : %d", rc); 567 568 return rc; 569} 570 571static int cxl_event_config_msgnums(struct cxl_memdev_state *mds, 572 struct cxl_event_interrupt_policy *policy) 573{ 574 struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; 575 struct cxl_mbox_cmd mbox_cmd; 576 int rc; 577 578 *policy = (struct cxl_event_interrupt_policy) { 579 .info_settings = CXL_INT_MSI_MSIX, 580 .warn_settings = CXL_INT_MSI_MSIX, 581 .failure_settings = CXL_INT_MSI_MSIX, 582 .fatal_settings = CXL_INT_MSI_MSIX, 583 }; 584 585 mbox_cmd = (struct cxl_mbox_cmd) { 586 .opcode = CXL_MBOX_OP_SET_EVT_INT_POLICY, 587 .payload_in = policy, 588 .size_in = sizeof(*policy), 589 }; 590 591 rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); 592 if (rc < 0) { 593 dev_err(mds->cxlds.dev, "Failed to set event interrupt policy : %d", 594 rc); 595 return rc; 596 } 597 598 /* Retrieve final interrupt settings */ 599 return cxl_event_get_int_policy(mds, policy); 600} 601 602static int cxl_event_irqsetup(struct cxl_memdev_state *mds) 603{ 604 struct cxl_dev_state *cxlds = &mds->cxlds; 605 struct cxl_event_interrupt_policy policy; 606 int rc; 607 608 rc = cxl_event_config_msgnums(mds, &policy); 609 if (rc) 610 return rc; 611 612 rc = cxl_event_req_irq(cxlds, policy.info_settings); 613 if (rc) { 614 dev_err(cxlds->dev, "Failed to get interrupt for event Info log\n"); 615 return rc; 616 } 617 618 rc = cxl_event_req_irq(cxlds, policy.warn_settings); 619 if (rc) { 620 dev_err(cxlds->dev, "Failed to get interrupt for event Warn log\n"); 621 return rc; 622 } 623 624 rc = cxl_event_req_irq(cxlds, policy.failure_settings); 625 if (rc) { 626 dev_err(cxlds->dev, "Failed to get interrupt for event Failure log\n"); 627 return rc; 628 } 629 630 rc = cxl_event_req_irq(cxlds, policy.fatal_settings); 631 if (rc) { 632 dev_err(cxlds->dev, "Failed to get interrupt for event Fatal log\n"); 633 return rc; 634 } 635 636 return 0; 637} 638 639static bool cxl_event_int_is_fw(u8 setting) 640{ 641 u8 mode = FIELD_GET(CXLDEV_EVENT_INT_MODE_MASK, setting); 642 643 return mode == CXL_INT_FW; 644} 645 646static int cxl_event_config(struct pci_host_bridge *host_bridge, 647 struct cxl_memdev_state *mds, bool irq_avail) 648{ 649 struct cxl_event_interrupt_policy policy; 650 int rc; 651 652 /* 653 * When BIOS maintains CXL error reporting control, it will process 654 * event records. Only one agent can do so. 655 */ 656 if (!host_bridge->native_cxl_error) 657 return 0; 658 659 if (!irq_avail) { 660 dev_info(mds->cxlds.dev, "No interrupt support, disable event processing.\n"); 661 return 0; 662 } 663 664 rc = cxl_event_get_int_policy(mds, &policy); 665 if (rc) 666 return rc; 667 668 if (cxl_event_int_is_fw(policy.info_settings) || 669 cxl_event_int_is_fw(policy.warn_settings) || 670 cxl_event_int_is_fw(policy.failure_settings) || 671 cxl_event_int_is_fw(policy.fatal_settings)) { 672 dev_err(mds->cxlds.dev, 673 "FW still in control of Event Logs despite _OSC settings\n"); 674 return -EBUSY; 675 } 676 677 rc = cxl_mem_alloc_event_buf(mds); 678 if (rc) 679 return rc; 680 681 rc = cxl_event_irqsetup(mds); 682 if (rc) 683 return rc; 684 685 cxl_mem_get_event_records(mds, CXLDEV_EVENT_STATUS_ALL); 686 687 return 0; 688} 689 690static int cxl_pci_type3_init_mailbox(struct cxl_dev_state *cxlds) 691{ 692 int rc; 693 694 /* 695 * Fail the init if there's no mailbox. For a type3 this is out of spec. 696 */ 697 if (!cxlds->reg_map.device_map.mbox.valid) 698 return -ENODEV; 699 700 rc = cxl_mailbox_init(&cxlds->cxl_mbox, cxlds->dev); 701 if (rc) 702 return rc; 703 704 return 0; 705} 706 707static ssize_t rcd_pcie_cap_emit(struct device *dev, u16 offset, char *buf, size_t width) 708{ 709 struct cxl_dev_state *cxlds = dev_get_drvdata(dev); 710 struct cxl_memdev *cxlmd = cxlds->cxlmd; 711 struct device *root_dev; 712 struct cxl_dport *dport; 713 struct cxl_port *root __free(put_cxl_port) = 714 cxl_mem_find_port(cxlmd, &dport); 715 716 if (!root) 717 return -ENXIO; 718 719 root_dev = root->uport_dev; 720 if (!root_dev) 721 return -ENXIO; 722 723 if (!dport->regs.rcd_pcie_cap) 724 return -ENXIO; 725 726 guard(device)(root_dev); 727 if (!root_dev->driver) 728 return -ENXIO; 729 730 switch (width) { 731 case 2: 732 return sysfs_emit(buf, "%#x\n", 733 readw(dport->regs.rcd_pcie_cap + offset)); 734 case 4: 735 return sysfs_emit(buf, "%#x\n", 736 readl(dport->regs.rcd_pcie_cap + offset)); 737 default: 738 return -EINVAL; 739 } 740} 741 742static ssize_t rcd_link_cap_show(struct device *dev, 743 struct device_attribute *attr, char *buf) 744{ 745 return rcd_pcie_cap_emit(dev, PCI_EXP_LNKCAP, buf, sizeof(u32)); 746} 747static DEVICE_ATTR_RO(rcd_link_cap); 748 749static ssize_t rcd_link_ctrl_show(struct device *dev, 750 struct device_attribute *attr, char *buf) 751{ 752 return rcd_pcie_cap_emit(dev, PCI_EXP_LNKCTL, buf, sizeof(u16)); 753} 754static DEVICE_ATTR_RO(rcd_link_ctrl); 755 756static ssize_t rcd_link_status_show(struct device *dev, 757 struct device_attribute *attr, char *buf) 758{ 759 return rcd_pcie_cap_emit(dev, PCI_EXP_LNKSTA, buf, sizeof(u16)); 760} 761static DEVICE_ATTR_RO(rcd_link_status); 762 763static struct attribute *cxl_rcd_attrs[] = { 764 &dev_attr_rcd_link_cap.attr, 765 &dev_attr_rcd_link_ctrl.attr, 766 &dev_attr_rcd_link_status.attr, 767 NULL 768}; 769 770static umode_t cxl_rcd_visible(struct kobject *kobj, struct attribute *a, int n) 771{ 772 struct device *dev = kobj_to_dev(kobj); 773 struct pci_dev *pdev = to_pci_dev(dev); 774 775 if (is_cxl_restricted(pdev)) 776 return a->mode; 777 778 return 0; 779} 780 781static struct attribute_group cxl_rcd_group = { 782 .attrs = cxl_rcd_attrs, 783 .is_visible = cxl_rcd_visible, 784}; 785__ATTRIBUTE_GROUPS(cxl_rcd); 786 787static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) 788{ 789 struct pci_host_bridge *host_bridge = pci_find_host_bridge(pdev->bus); 790 struct cxl_dpa_info range_info = { 0 }; 791 struct cxl_memdev_state *mds; 792 struct cxl_dev_state *cxlds; 793 struct cxl_register_map map; 794 struct cxl_memdev *cxlmd; 795 int rc, pmu_count; 796 unsigned int i; 797 bool irq_avail; 798 u16 dvsec; 799 800 rc = pcim_enable_device(pdev); 801 if (rc) 802 return rc; 803 pci_set_master(pdev); 804 805 dvsec = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL, 806 PCI_DVSEC_CXL_DEVICE); 807 if (!dvsec) 808 pci_warn(pdev, "Device DVSEC not present, skip CXL.mem init\n"); 809 810 mds = cxl_memdev_state_create(&pdev->dev, pci_get_dsn(pdev), dvsec); 811 if (IS_ERR(mds)) 812 return PTR_ERR(mds); 813 cxlds = &mds->cxlds; 814 pci_set_drvdata(pdev, cxlds); 815 816 cxlds->rcd = is_cxl_restricted(pdev); 817 818 rc = cxl_pci_setup_regs(pdev, CXL_REGLOC_RBI_MEMDEV, &map); 819 if (rc) 820 return rc; 821 822 rc = cxl_map_device_regs(&map, &cxlds->regs); 823 if (rc) 824 return rc; 825 826 /* 827 * If the component registers can't be found, the cxl_pci driver may 828 * still be useful for management functions so don't return an error. 829 */ 830 rc = cxl_pci_setup_regs(pdev, CXL_REGLOC_RBI_COMPONENT, 831 &cxlds->reg_map); 832 if (rc) 833 dev_warn(&pdev->dev, "No component registers (%d)\n", rc); 834 else if (!cxlds->reg_map.component_map.ras.valid) 835 dev_dbg(&pdev->dev, "RAS registers not found\n"); 836 837 rc = cxl_pci_type3_init_mailbox(cxlds); 838 if (rc) 839 return rc; 840 841 rc = cxl_await_media_ready(cxlds); 842 if (rc == 0) 843 cxlds->media_ready = true; 844 else 845 dev_warn(&pdev->dev, "Media not active (%d)\n", rc); 846 847 irq_avail = cxl_alloc_irq_vectors(pdev); 848 849 rc = cxl_pci_setup_mailbox(mds, irq_avail); 850 if (rc) 851 return rc; 852 853 rc = cxl_enumerate_cmds(mds); 854 if (rc) 855 return rc; 856 857 rc = cxl_set_timestamp(mds); 858 if (rc) 859 return rc; 860 861 rc = cxl_poison_state_init(mds); 862 if (rc) 863 return rc; 864 865 rc = cxl_dev_state_identify(mds); 866 if (rc) 867 return rc; 868 869 rc = cxl_mem_dpa_fetch(mds, &range_info); 870 if (rc) 871 return rc; 872 873 rc = cxl_dpa_setup(cxlds, &range_info); 874 if (rc) 875 return rc; 876 877 rc = devm_cxl_setup_features(cxlds); 878 if (rc) 879 dev_dbg(&pdev->dev, "No CXL Features discovered\n"); 880 881 cxlmd = devm_cxl_add_memdev(cxlds, NULL); 882 if (IS_ERR(cxlmd)) 883 return PTR_ERR(cxlmd); 884 885 rc = devm_cxl_setup_fw_upload(&pdev->dev, mds); 886 if (rc) 887 return rc; 888 889 rc = devm_cxl_sanitize_setup_notifier(&pdev->dev, cxlmd); 890 if (rc) 891 return rc; 892 893 rc = devm_cxl_setup_fwctl(&pdev->dev, cxlmd); 894 if (rc) 895 dev_dbg(&pdev->dev, "No CXL FWCTL setup\n"); 896 897 pmu_count = cxl_count_regblock(pdev, CXL_REGLOC_RBI_PMU); 898 if (pmu_count < 0) 899 return pmu_count; 900 901 for (i = 0; i < pmu_count; i++) { 902 struct cxl_pmu_regs pmu_regs; 903 904 rc = cxl_find_regblock_instance(pdev, CXL_REGLOC_RBI_PMU, &map, i); 905 if (rc) { 906 dev_dbg(&pdev->dev, "Could not find PMU regblock\n"); 907 break; 908 } 909 910 rc = cxl_map_pmu_regs(&map, &pmu_regs); 911 if (rc) { 912 dev_dbg(&pdev->dev, "Could not map PMU regs\n"); 913 break; 914 } 915 916 rc = devm_cxl_pmu_add(cxlds->dev, &pmu_regs, cxlmd->id, i, CXL_PMU_MEMDEV); 917 if (rc) { 918 dev_dbg(&pdev->dev, "Could not add PMU instance\n"); 919 break; 920 } 921 } 922 923 rc = cxl_event_config(host_bridge, mds, irq_avail); 924 if (rc) 925 return rc; 926 927 pci_save_state(pdev); 928 929 return rc; 930} 931 932static const struct pci_device_id cxl_mem_pci_tbl[] = { 933 /* PCI class code for CXL.mem Type-3 Devices */ 934 { PCI_DEVICE_CLASS((PCI_CLASS_MEMORY_CXL << 8 | CXL_MEMORY_PROGIF), ~0)}, 935 { /* terminate list */ }, 936}; 937MODULE_DEVICE_TABLE(pci, cxl_mem_pci_tbl); 938 939static pci_ers_result_t cxl_slot_reset(struct pci_dev *pdev) 940{ 941 struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); 942 struct cxl_memdev *cxlmd = cxlds->cxlmd; 943 struct device *dev = &cxlmd->dev; 944 945 dev_info(&pdev->dev, "%s: restart CXL.mem after slot reset\n", 946 dev_name(dev)); 947 pci_restore_state(pdev); 948 if (device_attach(dev) <= 0) 949 return PCI_ERS_RESULT_DISCONNECT; 950 return PCI_ERS_RESULT_RECOVERED; 951} 952 953static void cxl_error_resume(struct pci_dev *pdev) 954{ 955 struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); 956 struct cxl_memdev *cxlmd = cxlds->cxlmd; 957 struct device *dev = &cxlmd->dev; 958 959 dev_info(&pdev->dev, "%s: error resume %s\n", dev_name(dev), 960 dev->driver ? "successful" : "failed"); 961} 962 963static int cxl_endpoint_decoder_clear_reset_flags(struct device *dev, void *data) 964{ 965 struct cxl_endpoint_decoder *cxled; 966 967 if (!is_endpoint_decoder(dev)) 968 return 0; 969 970 cxled = to_cxl_endpoint_decoder(dev); 971 cxled->cxld.flags &= ~CXL_DECODER_F_RESET_MASK; 972 973 return 0; 974} 975 976static void cxl_reset_done(struct pci_dev *pdev) 977{ 978 struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); 979 struct cxl_memdev *cxlmd = cxlds->cxlmd; 980 struct device *dev = &pdev->dev; 981 982 /* 983 * FLR does not expect to touch the HDM decoders and related 984 * registers. SBR, however, will wipe all device configurations. 985 * Issue a warning if there was an active decoder before the reset 986 * that no longer exists. 987 */ 988 guard(device)(&cxlmd->dev); 989 if (!cxlmd->dev.driver) 990 return; 991 992 if (cxlmd->endpoint && 993 cxl_endpoint_decoder_reset_detected(cxlmd->endpoint)) { 994 device_for_each_child(&cxlmd->endpoint->dev, NULL, 995 cxl_endpoint_decoder_clear_reset_flags); 996 997 dev_crit(dev, "SBR happened without memory regions removal.\n"); 998 dev_crit(dev, "System may be unstable if regions hosted system memory.\n"); 999 add_taint(TAINT_USER, LOCKDEP_STILL_OK); 1000 } 1001} 1002 1003static const struct pci_error_handlers cxl_error_handlers = { 1004 .error_detected = cxl_error_detected, 1005 .slot_reset = cxl_slot_reset, 1006 .resume = cxl_error_resume, 1007 .cor_error_detected = cxl_cor_error_detected, 1008 .reset_done = cxl_reset_done, 1009}; 1010 1011static struct pci_driver cxl_pci_driver = { 1012 .name = KBUILD_MODNAME, 1013 .id_table = cxl_mem_pci_tbl, 1014 .probe = cxl_pci_probe, 1015 .err_handler = &cxl_error_handlers, 1016 .dev_groups = cxl_rcd_groups, 1017 .driver = { 1018 .probe_type = PROBE_PREFER_ASYNCHRONOUS, 1019 }, 1020}; 1021 1022#define CXL_EVENT_HDR_FLAGS_REC_SEVERITY GENMASK(1, 0) 1023static void cxl_handle_cper_event(enum cxl_event_type ev_type, 1024 struct cxl_cper_event_rec *rec) 1025{ 1026 struct cper_cxl_event_devid *device_id = &rec->hdr.device_id; 1027 struct pci_dev *pdev __free(pci_dev_put) = NULL; 1028 enum cxl_event_log_type log_type; 1029 struct cxl_dev_state *cxlds; 1030 unsigned int devfn; 1031 u32 hdr_flags; 1032 1033 pr_debug("CPER event %d for device %u:%u:%u.%u\n", ev_type, 1034 device_id->segment_num, device_id->bus_num, 1035 device_id->device_num, device_id->func_num); 1036 1037 devfn = PCI_DEVFN(device_id->device_num, device_id->func_num); 1038 pdev = pci_get_domain_bus_and_slot(device_id->segment_num, 1039 device_id->bus_num, devfn); 1040 if (!pdev) 1041 return; 1042 1043 guard(device)(&pdev->dev); 1044 if (pdev->driver != &cxl_pci_driver) 1045 return; 1046 1047 cxlds = pci_get_drvdata(pdev); 1048 if (!cxlds) 1049 return; 1050 1051 /* Fabricate a log type */ 1052 hdr_flags = get_unaligned_le24(rec->event.generic.hdr.flags); 1053 log_type = FIELD_GET(CXL_EVENT_HDR_FLAGS_REC_SEVERITY, hdr_flags); 1054 1055 cxl_event_trace_record(cxlds->cxlmd, log_type, ev_type, 1056 &uuid_null, &rec->event); 1057} 1058 1059static void cxl_cper_work_fn(struct work_struct *work) 1060{ 1061 struct cxl_cper_work_data wd; 1062 1063 while (cxl_cper_kfifo_get(&wd)) 1064 cxl_handle_cper_event(wd.event_type, &wd.rec); 1065} 1066static DECLARE_WORK(cxl_cper_work, cxl_cper_work_fn); 1067 1068static int __init cxl_pci_driver_init(void) 1069{ 1070 int rc; 1071 1072 rc = pci_register_driver(&cxl_pci_driver); 1073 if (rc) 1074 return rc; 1075 1076 rc = cxl_cper_register_work(&cxl_cper_work); 1077 if (rc) 1078 pci_unregister_driver(&cxl_pci_driver); 1079 1080 return rc; 1081} 1082 1083static void __exit cxl_pci_driver_exit(void) 1084{ 1085 cxl_cper_unregister_work(&cxl_cper_work); 1086 cancel_work_sync(&cxl_cper_work); 1087 pci_unregister_driver(&cxl_pci_driver); 1088} 1089 1090module_init(cxl_pci_driver_init); 1091module_exit(cxl_pci_driver_exit); 1092MODULE_DESCRIPTION("CXL: PCI manageability"); 1093MODULE_LICENSE("GPL v2"); 1094MODULE_IMPORT_NS("CXL");