Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

accel/qaic: Expand DRM device lifecycle

Currently the QAIC DRM device registers itself when the MHI QAIC_CONTROL
channel becomes available. This is when the device is able to process
workloads. However, the DRM driver also provides the debugfs interface
bootlog for the device. If the device fails to boot to the QSM (which
brings up the MHI QAIC_CONTROL channel), the bootlog won't be available for
debugging why it failed to boot.

Change when the DRM device registers itself from when QAIC_CONTROL is
available to when the card is first probed on the PCI bus. Additionally,
make the DRM driver persist through reset/error cases so the driver
doesn't have to be reloaded to access the card again. Send
KOBJ_ONLINE/OFFLINE uevents so userspace can know when DRM device is
ready to handle requests.

Signed-off-by: Carl Vanderlip <quic_carlv@quicinc.com>
Reviewed-by: Pranjal Ramajor Asha Kanojiya <quic_pkanojiy@quicinc.com>
Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
Signed-off-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
Reviewed-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231117174337.20174-3-quic_jhugo@quicinc.com

authored by

Carl Vanderlip and committed by
Jeffrey Hugo
5f0a0ebc 44df9a2a

+27 -30
+8 -1
Documentation/accel/qaic/qaic.rst
··· 93 93 uAPI 94 94 ==== 95 95 96 + QAIC creates an accel device per phsyical PCIe device. This accel device exists 97 + for as long as the PCIe device is known to Linux. 98 + 99 + The PCIe device may not be in the state to accept requests from userspace at 100 + all times. QAIC will trigger KOBJ_ONLINE/OFFLINE uevents to advertise when the 101 + device can accept requests (ONLINE) and when the device is no longer accepting 102 + requests (OFFLINE) because of a reset or other state transition. 103 + 96 104 QAIC defines a number of driver specific IOCTLs as part of the userspace API. 97 - This section describes those APIs. 98 105 99 106 DRM_IOCTL_QAIC_MANAGE 100 107 This IOCTL allows userspace to send a NNC request to the QSM. The call will
+1 -1
drivers/accel/qaic/mhi_controller.c
··· 469 469 pci_err(qdev->pdev, "Fatal error received from device. Attempting to recover\n"); 470 470 /* this event occurs in non-atomic context */ 471 471 if (reason == MHI_CB_SYS_ERROR) 472 - qaic_dev_reset_clean_local_state(qdev, true); 472 + qaic_dev_reset_clean_local_state(qdev); 473 473 } 474 474 475 475 static int mhi_reset_and_async_power_up(struct mhi_controller *mhi_cntrl)
+1 -1
drivers/accel/qaic/qaic.h
··· 283 283 void release_dbc(struct qaic_device *qdev, u32 dbc_id); 284 284 285 285 void wake_all_cntl(struct qaic_device *qdev); 286 - void qaic_dev_reset_clean_local_state(struct qaic_device *qdev, bool exit_reset); 286 + void qaic_dev_reset_clean_local_state(struct qaic_device *qdev); 287 287 288 288 struct drm_gem_object *qaic_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf); 289 289
+17 -27
drivers/accel/qaic/qaic_drv.c
··· 8 8 #include <linux/idr.h> 9 9 #include <linux/interrupt.h> 10 10 #include <linux/list.h> 11 + #include <linux/kobject.h> 11 12 #include <linux/kref.h> 12 13 #include <linux/mhi.h> 13 14 #include <linux/module.h> ··· 43 42 MODULE_PARM_DESC(datapath_polling, "Operate the datapath in polling mode"); 44 43 static bool link_up; 45 44 static DEFINE_IDA(qaic_usrs); 46 - 47 - static int qaic_create_drm_device(struct qaic_device *qdev, s32 partition_id); 48 - static void qaic_destroy_drm_device(struct qaic_device *qdev, s32 partition_id); 49 45 50 46 static void free_usr(struct kref *kref) 51 47 { ··· 181 183 182 184 qddev->partition_id = partition_id; 183 185 184 - /* 185 - * drm_dev_unregister() sets the driver data to NULL and 186 - * drm_dev_register() does not update the driver data. During a SOC 187 - * reset drm dev is unregistered and registered again leaving the 188 - * driver data to NULL. 189 - */ 190 - dev_set_drvdata(to_accel_kdev(qddev), drm->accel); 191 186 ret = drm_dev_register(drm, 0); 192 187 if (ret) 193 188 pci_dbg(qdev->pdev, "drm_dev_register failed %d\n", ret); ··· 194 203 struct drm_device *drm = to_drm(qddev); 195 204 struct qaic_user *usr; 196 205 197 - drm_dev_get(drm); 198 206 drm_dev_unregister(drm); 199 207 qddev->partition_id = 0; 200 208 /* ··· 222 232 mutex_lock(&qddev->users_mutex); 223 233 } 224 234 mutex_unlock(&qddev->users_mutex); 225 - drm_dev_put(drm); 226 235 } 227 236 228 237 static int qaic_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id) ··· 243 254 244 255 qdev = pci_get_drvdata(to_pci_dev(mhi_dev->mhi_cntrl->cntrl_dev)); 245 256 246 - qdev->dev_state = QAIC_ONLINE; 247 - 248 257 dev_set_drvdata(&mhi_dev->dev, qdev); 249 258 qdev->cntl_ch = mhi_dev; 250 259 ··· 252 265 return ret; 253 266 } 254 267 268 + qdev->dev_state = QAIC_BOOT; 255 269 ret = get_cntl_version(qdev, NULL, &major, &minor); 256 270 if (ret || major != CNTL_MAJOR || minor > CNTL_MINOR) { 257 271 pci_err(qdev->pdev, "%s: Control protocol version (%d.%d) not supported. Supported version is (%d.%d). Ret: %d\n", ··· 260 272 ret = -EINVAL; 261 273 goto close_control; 262 274 } 263 - 264 - ret = qaic_create_drm_device(qdev, QAIC_NO_PARTITION); 275 + qdev->dev_state = QAIC_ONLINE; 276 + kobject_uevent(&(to_accel_kdev(qdev->qddev))->kobj, KOBJ_ONLINE); 265 277 266 278 return ret; 267 279 ··· 279 291 { 280 292 int i; 281 293 294 + kobject_uevent(&(to_accel_kdev(qdev->qddev))->kobj, KOBJ_OFFLINE); 282 295 qdev->dev_state = QAIC_OFFLINE; 283 296 /* wake up any waiters to avoid waiting for timeouts at sync */ 284 297 wake_all_cntl(qdev); ··· 288 299 synchronize_srcu(&qdev->dev_lock); 289 300 } 290 301 291 - void qaic_dev_reset_clean_local_state(struct qaic_device *qdev, bool exit_reset) 302 + void qaic_dev_reset_clean_local_state(struct qaic_device *qdev) 292 303 { 293 304 int i; 294 305 295 306 qaic_notify_reset(qdev); 296 307 297 - /* remove drmdevs to prevent new users from coming in */ 298 - qaic_destroy_drm_device(qdev, QAIC_NO_PARTITION); 299 - 300 308 /* start tearing things down */ 301 309 for (i = 0; i < qdev->num_dbc; ++i) 302 310 release_dbc(qdev, i); 303 - 304 - if (exit_reset) 305 - qdev->dev_state = QAIC_ONLINE; 306 311 } 307 312 308 313 static void cleanup_qdev(struct qaic_device *qdev) ··· 321 338 if (!qdev) 322 339 return NULL; 323 340 341 + qdev->dev_state = QAIC_OFFLINE; 324 342 if (id->device == PCI_DEV_AIC100) { 325 343 qdev->num_dbc = 16; 326 344 qdev->dbc = devm_kcalloc(&pdev->dev, qdev->num_dbc, sizeof(*qdev->dbc), GFP_KERNEL); ··· 483 499 goto cleanup_qdev; 484 500 } 485 501 502 + ret = qaic_create_drm_device(qdev, QAIC_NO_PARTITION); 503 + if (ret) 504 + goto cleanup_qdev; 505 + 486 506 qdev->mhi_cntrl = qaic_mhi_register_controller(pdev, qdev->bar_0, mhi_irq, 487 507 qdev->single_msi); 488 508 if (IS_ERR(qdev->mhi_cntrl)) { 489 509 ret = PTR_ERR(qdev->mhi_cntrl); 490 - goto cleanup_qdev; 510 + goto cleanup_drm_dev; 491 511 } 492 512 493 513 return 0; 494 514 515 + cleanup_drm_dev: 516 + qaic_destroy_drm_device(qdev, QAIC_NO_PARTITION); 495 517 cleanup_qdev: 496 518 cleanup_qdev(qdev); 497 519 return ret; ··· 510 520 if (!qdev) 511 521 return; 512 522 513 - qaic_dev_reset_clean_local_state(qdev, false); 523 + qaic_dev_reset_clean_local_state(qdev); 524 + qaic_destroy_drm_device(qdev, QAIC_NO_PARTITION); 514 525 qaic_mhi_free_controller(qdev->mhi_cntrl, link_up); 515 526 cleanup_qdev(qdev); 516 527 } ··· 534 543 535 544 qaic_notify_reset(qdev); 536 545 qaic_mhi_start_reset(qdev->mhi_cntrl); 537 - qaic_dev_reset_clean_local_state(qdev, false); 546 + qaic_dev_reset_clean_local_state(qdev); 538 547 } 539 548 540 549 static void qaic_pci_reset_done(struct pci_dev *pdev) 541 550 { 542 551 struct qaic_device *qdev = pci_get_drvdata(pdev); 543 552 544 - qdev->dev_state = QAIC_ONLINE; 545 553 qaic_mhi_reset_done(qdev->mhi_cntrl); 546 554 } 547 555