Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/xe: Fix missing runtime outer protection for ggtt_remove_node

Defer the ggtt node removal to a thread if runtime_pm is not active.

The ggtt node removal can be called from multiple places, including
places where we cannot protect with outer callers and places we are
within other locks. So, try to grab the runtime reference if the
device is already active, otherwise defer the removal to a separate
thread from where we are sure we can wake the device up.

v2: - use xe wq instead of system wq (Matt and CI)
- Avoid GFP_KERNEL to be future proof since this removal can
be called from outside our drivers and we don't want to block
if atomic is needed. (Brost)
v3: amend forgot chunk declaring xe_device.
v4: Use a xe_ggtt_region to encapsulate the node and remova info,
wihtout the need for any memory allocation at runtime.
v5: Actually fill the delayed_removal.invalidate (Brost)
v6: - Ensure that ggtt_region is not freed before work finishes (Auld)
- Own wq to ensures that the queued works are flushed before
ggtt_fini (Brost)
v7: also free ggtt_region on early !bound return (Auld)
v8: Address the null deref (CI)
v9: Based on the new xe_ggtt_node for the proper care of the lifetime
of the object.
v10: Redo the lost v5 change. (Brost)
v11: Simplify the invalidate_on_remove (Lucas)

Cc: Matthew Auld <matthew.auld@intel.com>
Cc: Paulo Zanoni <paulo.r.zanoni@intel.com>
Cc: Francois Dugast <francois.dugast@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240821193842.352557-12-rodrigo.vivi@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>

+73 -39
+67 -39
drivers/gpu/drm/xe/xe_ggtt.c
··· 161 161 { 162 162 struct xe_ggtt *ggtt = arg; 163 163 164 + destroy_workqueue(ggtt->wq); 164 165 mutex_destroy(&ggtt->lock); 165 166 drm_mm_takedown(&ggtt->mm); 166 167 } ··· 243 242 else 244 243 ggtt->pt_ops = &xelp_pt_ops; 245 244 245 + ggtt->wq = alloc_workqueue("xe-ggtt-wq", 0, 0); 246 + 246 247 drm_mm_init(&ggtt->mm, xe_wopcm_size(xe), 247 248 ggtt->size - xe_wopcm_size(xe)); 248 249 mutex_init(&ggtt->lock); ··· 277 274 278 275 xe_ggtt_invalidate(ggtt); 279 276 mutex_unlock(&ggtt->lock); 277 + } 278 + 279 + static void ggtt_node_remove(struct xe_ggtt_node *node) 280 + { 281 + struct xe_ggtt *ggtt = node->ggtt; 282 + struct xe_device *xe = tile_to_xe(ggtt->tile); 283 + bool bound; 284 + int idx; 285 + 286 + if (!node || !node->ggtt) 287 + return; 288 + 289 + bound = drm_dev_enter(&xe->drm, &idx); 290 + 291 + mutex_lock(&ggtt->lock); 292 + if (bound) 293 + xe_ggtt_clear(ggtt, node->base.start, node->base.size); 294 + drm_mm_remove_node(&node->base); 295 + node->base.size = 0; 296 + mutex_unlock(&ggtt->lock); 297 + 298 + if (!bound) 299 + goto free_node; 300 + 301 + if (node->invalidate_on_remove) 302 + xe_ggtt_invalidate(ggtt); 303 + 304 + drm_dev_exit(idx); 305 + 306 + free_node: 307 + xe_ggtt_node_fini(node); 308 + } 309 + 310 + static void ggtt_node_remove_work_func(struct work_struct *work) 311 + { 312 + struct xe_ggtt_node *node = container_of(work, typeof(*node), 313 + delayed_removal_work); 314 + struct xe_device *xe = tile_to_xe(node->ggtt->tile); 315 + 316 + xe_pm_runtime_get(xe); 317 + ggtt_node_remove(node); 318 + xe_pm_runtime_put(xe); 319 + } 320 + 321 + /** 322 + * xe_ggtt_node_remove - Remove a &xe_ggtt_node from the GGTT 323 + * @node: the &xe_ggtt_node to be removed 324 + * @invalidate: if node needs invalidation upon removal 325 + */ 326 + void xe_ggtt_node_remove(struct xe_ggtt_node *node, bool invalidate) 327 + { 328 + struct xe_ggtt *ggtt = node->ggtt; 329 + struct xe_device *xe = tile_to_xe(ggtt->tile); 330 + 331 + node->invalidate_on_remove = invalidate; 332 + 333 + if (xe_pm_runtime_get_if_active(xe)) { 334 + ggtt_node_remove(node); 335 + xe_pm_runtime_put(xe); 336 + } else { 337 + queue_work(ggtt->wq, &node->delayed_removal_work); 338 + } 280 339 } 281 340 282 341 /** ··· 536 471 if (!node) 537 472 return ERR_PTR(-ENOMEM); 538 473 474 + INIT_WORK(&node->delayed_removal_work, ggtt_node_remove_work_func); 539 475 node->ggtt = ggtt; 476 + 540 477 return node; 541 478 } 542 479 ··· 553 486 void xe_ggtt_node_fini(struct xe_ggtt_node *node) 554 487 { 555 488 kfree(node); 556 - } 557 - 558 - /** 559 - * xe_ggtt_node_remove - Remove a &xe_ggtt_node from the GGTT 560 - * @node: the &xe_ggtt_node to be removed 561 - * @invalidate: if node needs invalidation upon removal 562 - */ 563 - void xe_ggtt_node_remove(struct xe_ggtt_node *node, bool invalidate) 564 - { 565 - struct xe_ggtt *ggtt = node->ggtt; 566 - struct xe_device *xe = tile_to_xe(ggtt->tile); 567 - bool bound; 568 - int idx; 569 - 570 - if (!node || !node->ggtt) 571 - return; 572 - 573 - bound = drm_dev_enter(&xe->drm, &idx); 574 - if (bound) 575 - xe_pm_runtime_get_noresume(xe); 576 - 577 - mutex_lock(&ggtt->lock); 578 - if (bound) 579 - xe_ggtt_clear(ggtt, node->base.start, node->base.size); 580 - drm_mm_remove_node(&node->base); 581 - node->base.size = 0; 582 - mutex_unlock(&ggtt->lock); 583 - 584 - if (!bound) 585 - goto free_node; 586 - 587 - if (invalidate) 588 - xe_ggtt_invalidate(ggtt); 589 - 590 - xe_pm_runtime_put(xe); 591 - drm_dev_exit(idx); 592 - 593 - free_node: 594 - xe_ggtt_node_fini(node); 595 489 } 596 490 597 491 /**
+6
drivers/gpu/drm/xe/xe_ggtt_types.h
··· 47 47 struct drm_mm mm; 48 48 /** @access_count: counts GGTT writes */ 49 49 unsigned int access_count; 50 + /** @wq: Dedicated unordered work queue to process node removals */ 51 + struct workqueue_struct *wq; 50 52 }; 51 53 52 54 /** ··· 63 61 struct xe_ggtt *ggtt; 64 62 /** @base: A drm_mm_node */ 65 63 struct drm_mm_node base; 64 + /** @delayed_removal_work: The work struct for the delayed removal */ 65 + struct work_struct delayed_removal_work; 66 + /** @invalidate_on_remove: If it needs invalidation upon removal */ 67 + bool invalidate_on_remove; 66 68 }; 67 69 68 70 /**