Merge tag 'drm-next-2025-10-11-1' of https://gitlab.freedesktop.org/drm/kernel

+7 -2

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

··· 2586 2586 * from the KFD, trigger a segmentation fault in VM debug mode. 2587 2587 */ 2588 2588 if (amdgpu_ttm_adev(bo->tbo.bdev)->debug_vm_userptr) { 2589 + struct kfd_process *p; 2590 + 2589 2591 pr_err("Pid %d unmapped memory before destroying userptr at GPU addr 0x%llx\n", 2590 2592 pid_nr(process_info->pid), mem->va); 2591 2593 2592 2594 // Send GPU VM fault to user space 2593 - kfd_signal_vm_fault_event_with_userptr(kfd_lookup_process_by_pid(process_info->pid), 2594 - mem->va); 2595 + p = kfd_lookup_process_by_pid(process_info->pid); 2596 + if (p) { 2597 + kfd_signal_vm_fault_event_with_userptr(p, mem->va); 2598 + kfd_unref_process(p); 2599 + } 2595 2600 } 2596 2601 2597 2602 ret = 0;

+30 -18

drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

··· 6389 6389 if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled) 6390 6390 drm_helper_resume_force_mode(adev_to_drm(tmp_adev)); 6391 6391 6392 - if (tmp_adev->asic_reset_res) 6393 - r = tmp_adev->asic_reset_res; 6394 - 6395 - tmp_adev->asic_reset_res = 0; 6396 - 6397 - if (r) { 6392 + if (tmp_adev->asic_reset_res) { 6398 6393 /* bad news, how to tell it to userspace ? 6399 6394 * for ras error, we should report GPU bad status instead of 6400 6395 * reset failure 6401 6396 */ 6402 6397 if (reset_context->src != AMDGPU_RESET_SRC_RAS || 6403 6398 !amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) 6404 - dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", 6405 - atomic_read(&tmp_adev->gpu_reset_counter)); 6406 - amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r); 6399 + dev_info( 6400 + tmp_adev->dev, 6401 + "GPU reset(%d) failed with error %d \n", 6402 + atomic_read( 6403 + &tmp_adev->gpu_reset_counter), 6404 + tmp_adev->asic_reset_res); 6405 + amdgpu_vf_error_put(tmp_adev, 6406 + AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, 6407 + tmp_adev->asic_reset_res); 6408 + if (!r) 6409 + r = tmp_adev->asic_reset_res; 6410 + tmp_adev->asic_reset_res = 0; 6407 6411 } else { 6408 - dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter)); 6412 + dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", 6413 + atomic_read(&tmp_adev->gpu_reset_counter)); 6409 6414 if (amdgpu_acpi_smart_shift_update(tmp_adev, 6410 6415 AMDGPU_SS_DEV_D0)) 6411 6416 dev_warn(tmp_adev->dev, ··· 7162 7157 7163 7158 static void amdgpu_device_cache_switch_state(struct amdgpu_device *adev) 7164 7159 { 7165 - struct pci_dev *parent = pci_upstream_bridge(adev->pdev); 7160 + struct pci_dev *swus, *swds; 7166 7161 int r; 7167 7162 7168 - if (!parent || parent->vendor != PCI_VENDOR_ID_ATI) 7163 + swds = pci_upstream_bridge(adev->pdev); 7164 + if (!swds || swds->vendor != PCI_VENDOR_ID_ATI || 7165 + pci_pcie_type(swds) != PCI_EXP_TYPE_DOWNSTREAM) 7166 + return; 7167 + swus = pci_upstream_bridge(swds); 7168 + if (!swus || 7169 + (swus->vendor != PCI_VENDOR_ID_ATI && 7170 + swus->vendor != PCI_VENDOR_ID_AMD) || 7171 + pci_pcie_type(swus) != PCI_EXP_TYPE_UPSTREAM) 7169 7172 return; 7170 7173 7171 7174 /* If already saved, return */ 7172 7175 if (adev->pcie_reset_ctx.swus) 7173 7176 return; 7174 7177 /* Upstream bridge is ATI, assume it's SWUS/DS architecture */ 7175 - r = pci_save_state(parent); 7178 + r = pci_save_state(swds); 7176 7179 if (r) 7177 7180 return; 7178 - adev->pcie_reset_ctx.swds_pcistate = pci_store_saved_state(parent); 7181 + adev->pcie_reset_ctx.swds_pcistate = pci_store_saved_state(swds); 7179 7182 7180 - parent = pci_upstream_bridge(parent); 7181 - r = pci_save_state(parent); 7183 + r = pci_save_state(swus); 7182 7184 if (r) 7183 7185 return; 7184 - adev->pcie_reset_ctx.swus_pcistate = pci_store_saved_state(parent); 7186 + adev->pcie_reset_ctx.swus_pcistate = pci_store_saved_state(swus); 7185 7187 7186 - adev->pcie_reset_ctx.swus = parent; 7188 + adev->pcie_reset_ctx.swus = swus; 7187 7189 } 7188 7190 7189 7191 static void amdgpu_device_load_switch_state(struct amdgpu_device *adev)

+5

drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c

··· 1102 1102 1103 1103 might_sleep(); 1104 1104 while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { 1105 + if (amdgpu_in_reset(adev)) 1106 + goto failed_kiq_read; 1107 + 1105 1108 msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); 1106 1109 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); 1107 1110 } ··· 1174 1171 1175 1172 might_sleep(); 1176 1173 while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { 1174 + if (amdgpu_in_reset(adev)) 1175 + goto failed_kiq_write; 1177 1176 1178 1177 msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); 1179 1178 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);

+2 -8

drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c

··· 1421 1421 1422 1422 amdgpu_debugfs_vm_init(file_priv); 1423 1423 1424 - r = amdgpu_vm_init(adev, &fpriv->vm, fpriv->xcp_id); 1424 + r = amdgpu_vm_init(adev, &fpriv->vm, fpriv->xcp_id, pasid); 1425 1425 if (r) 1426 1426 goto error_pasid; 1427 - 1428 - r = amdgpu_vm_set_pasid(adev, &fpriv->vm, pasid); 1429 - if (r) 1430 - goto error_vm; 1431 1427 1432 1428 fpriv->prt_va = amdgpu_vm_bo_add(adev, &fpriv->vm, NULL); 1433 1429 if (!fpriv->prt_va) { ··· 1464 1468 amdgpu_vm_fini(adev, &fpriv->vm); 1465 1469 1466 1470 error_pasid: 1467 - if (pasid) { 1471 + if (pasid) 1468 1472 amdgpu_pasid_free(pasid); 1469 - amdgpu_vm_set_pasid(adev, &fpriv->vm, 0); 1470 - } 1471 1473 1472 1474 kfree(fpriv); 1473 1475

+1 -1

drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c

··· 2352 2352 } 2353 2353 2354 2354 ret = psp_ta_load(psp, &psp->securedisplay_context.context); 2355 - if (!ret) { 2355 + if (!ret && !psp->securedisplay_context.context.resp_status) { 2356 2356 psp->securedisplay_context.context.initialized = true; 2357 2357 mutex_init(&psp->securedisplay_context.mutex); 2358 2358 } else

+4 -4

drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c

··· 726 726 struct amdgpu_bo *bo; 727 727 int ret; 728 728 729 - spin_lock(&vm->invalidated_lock); 729 + spin_lock(&vm->status_lock); 730 730 while (!list_empty(&vm->invalidated)) { 731 731 bo_va = list_first_entry(&vm->invalidated, 732 732 struct amdgpu_bo_va, 733 733 base.vm_status); 734 - spin_unlock(&vm->invalidated_lock); 734 + spin_unlock(&vm->status_lock); 735 735 736 736 bo = bo_va->base.bo; 737 737 ret = drm_exec_prepare_obj(exec, &bo->tbo.base, 2); ··· 748 748 if (ret) 749 749 return ret; 750 750 751 - spin_lock(&vm->invalidated_lock); 751 + spin_lock(&vm->status_lock); 752 752 } 753 - spin_unlock(&vm->invalidated_lock); 753 + spin_unlock(&vm->status_lock); 754 754 755 755 return 0; 756 756 }

+112 -99

drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

··· 139 139 } 140 140 141 141 /** 142 - * amdgpu_vm_set_pasid - manage pasid and vm ptr mapping 143 - * 144 - * @adev: amdgpu_device pointer 145 - * @vm: amdgpu_vm pointer 146 - * @pasid: the pasid the VM is using on this GPU 147 - * 148 - * Set the pasid this VM is using on this GPU, can also be used to remove the 149 - * pasid by passing in zero. 150 - * 151 - */ 152 - int amdgpu_vm_set_pasid(struct amdgpu_device *adev, struct amdgpu_vm *vm, 153 - u32 pasid) 154 - { 155 - int r; 156 - 157 - amdgpu_vm_assert_locked(vm); 158 - 159 - if (vm->pasid == pasid) 160 - return 0; 161 - 162 - if (vm->pasid) { 163 - r = xa_err(xa_erase_irq(&adev->vm_manager.pasids, vm->pasid)); 164 - if (r < 0) 165 - return r; 166 - 167 - vm->pasid = 0; 168 - } 169 - 170 - if (pasid) { 171 - r = xa_err(xa_store_irq(&adev->vm_manager.pasids, pasid, vm, 172 - GFP_KERNEL)); 173 - if (r < 0) 174 - return r; 175 - 176 - vm->pasid = pasid; 177 - } 178 - 179 - 180 - return 0; 181 - } 182 - 183 - /** 184 142 * amdgpu_vm_bo_evicted - vm_bo is evicted 185 143 * 186 144 * @vm_bo: vm_bo which is evicted ··· 153 195 154 196 vm_bo->moved = true; 155 197 amdgpu_vm_assert_locked(vm); 198 + spin_lock(&vm_bo->vm->status_lock); 156 199 if (bo->tbo.type == ttm_bo_type_kernel) 157 200 list_move(&vm_bo->vm_status, &vm->evicted); 158 201 else 159 202 list_move_tail(&vm_bo->vm_status, &vm->evicted); 203 + spin_unlock(&vm_bo->vm->status_lock); 160 204 } 161 205 /** 162 206 * amdgpu_vm_bo_moved - vm_bo is moved ··· 171 211 static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo) 172 212 { 173 213 amdgpu_vm_assert_locked(vm_bo->vm); 214 + spin_lock(&vm_bo->vm->status_lock); 174 215 list_move(&vm_bo->vm_status, &vm_bo->vm->moved); 216 + spin_unlock(&vm_bo->vm->status_lock); 175 217 } 176 218 177 219 /** ··· 187 225 static void amdgpu_vm_bo_idle(struct amdgpu_vm_bo_base *vm_bo) 188 226 { 189 227 amdgpu_vm_assert_locked(vm_bo->vm); 228 + spin_lock(&vm_bo->vm->status_lock); 190 229 list_move(&vm_bo->vm_status, &vm_bo->vm->idle); 230 + spin_unlock(&vm_bo->vm->status_lock); 191 231 vm_bo->moved = false; 192 232 } 193 233 ··· 203 239 */ 204 240 static void amdgpu_vm_bo_invalidated(struct amdgpu_vm_bo_base *vm_bo) 205 241 { 206 - spin_lock(&vm_bo->vm->invalidated_lock); 242 + spin_lock(&vm_bo->vm->status_lock); 207 243 list_move(&vm_bo->vm_status, &vm_bo->vm->invalidated); 208 - spin_unlock(&vm_bo->vm->invalidated_lock); 244 + spin_unlock(&vm_bo->vm->status_lock); 209 245 } 210 246 211 247 /** ··· 218 254 */ 219 255 static void amdgpu_vm_bo_evicted_user(struct amdgpu_vm_bo_base *vm_bo) 220 256 { 221 - amdgpu_vm_assert_locked(vm_bo->vm); 222 257 vm_bo->moved = true; 258 + spin_lock(&vm_bo->vm->status_lock); 223 259 list_move(&vm_bo->vm_status, &vm_bo->vm->evicted_user); 260 + spin_unlock(&vm_bo->vm->status_lock); 224 261 } 225 262 226 263 /** ··· 235 270 static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo) 236 271 { 237 272 amdgpu_vm_assert_locked(vm_bo->vm); 238 - if (vm_bo->bo->parent) 273 + if (vm_bo->bo->parent) { 274 + spin_lock(&vm_bo->vm->status_lock); 239 275 list_move(&vm_bo->vm_status, &vm_bo->vm->relocated); 240 - else 276 + spin_unlock(&vm_bo->vm->status_lock); 277 + } else { 241 278 amdgpu_vm_bo_idle(vm_bo); 279 + } 242 280 } 243 281 244 282 /** ··· 255 287 static void amdgpu_vm_bo_done(struct amdgpu_vm_bo_base *vm_bo) 256 288 { 257 289 amdgpu_vm_assert_locked(vm_bo->vm); 290 + spin_lock(&vm_bo->vm->status_lock); 258 291 list_move(&vm_bo->vm_status, &vm_bo->vm->done); 292 + spin_unlock(&vm_bo->vm->status_lock); 259 293 } 260 294 261 295 /** ··· 271 301 { 272 302 struct amdgpu_vm_bo_base *vm_bo, *tmp; 273 303 274 - spin_lock(&vm->invalidated_lock); 304 + amdgpu_vm_assert_locked(vm); 305 + 306 + spin_lock(&vm->status_lock); 275 307 list_splice_init(&vm->done, &vm->invalidated); 276 308 list_for_each_entry(vm_bo, &vm->invalidated, vm_status) 277 309 vm_bo->moved = true; 278 - spin_unlock(&vm->invalidated_lock); 279 310 280 - amdgpu_vm_assert_locked(vm_bo->vm); 281 311 list_for_each_entry_safe(vm_bo, tmp, &vm->idle, vm_status) { 282 312 struct amdgpu_bo *bo = vm_bo->bo; 283 313 ··· 287 317 else if (bo->parent) 288 318 list_move(&vm_bo->vm_status, &vm_bo->vm->relocated); 289 319 } 320 + spin_unlock(&vm->status_lock); 290 321 } 291 322 292 323 /** 293 324 * amdgpu_vm_update_shared - helper to update shared memory stat 294 325 * @base: base structure for tracking BO usage in a VM 295 326 * 296 - * Takes the vm stats_lock and updates the shared memory stat. If the basic 327 + * Takes the vm status_lock and updates the shared memory stat. If the basic 297 328 * stat changed (e.g. buffer was moved) amdgpu_vm_update_stats need to be called 298 329 * as well. 299 330 */ ··· 307 336 bool shared; 308 337 309 338 dma_resv_assert_held(bo->tbo.base.resv); 310 - spin_lock(&vm->stats_lock); 339 + spin_lock(&vm->status_lock); 311 340 shared = drm_gem_object_is_shared_for_memory_stats(&bo->tbo.base); 312 341 if (base->shared != shared) { 313 342 base->shared = shared; ··· 319 348 vm->stats[bo_memtype].drm.private += size; 320 349 } 321 350 } 322 - spin_unlock(&vm->stats_lock); 351 + spin_unlock(&vm->status_lock); 323 352 } 324 353 325 354 /** ··· 344 373 * be bo->tbo.resource 345 374 * @sign: if we should add (+1) or subtract (-1) from the stat 346 375 * 347 - * Caller need to have the vm stats_lock held. Useful for when multiple update 376 + * Caller need to have the vm status_lock held. Useful for when multiple update 348 377 * need to happen at the same time. 349 378 */ 350 379 static void amdgpu_vm_update_stats_locked(struct amdgpu_vm_bo_base *base, 351 - struct ttm_resource *res, int sign) 380 + struct ttm_resource *res, int sign) 352 381 { 353 382 struct amdgpu_vm *vm = base->vm; 354 383 struct amdgpu_bo *bo = base->bo; ··· 372 401 */ 373 402 if (bo->flags & AMDGPU_GEM_CREATE_DISCARDABLE) 374 403 vm->stats[res_memtype].drm.purgeable += size; 375 - if (!(bo->preferred_domains & 376 - amdgpu_mem_type_to_domain(res_memtype))) 404 + if (!(bo->preferred_domains & amdgpu_mem_type_to_domain(res_memtype))) 377 405 vm->stats[bo_memtype].evicted += size; 378 406 } 379 407 } ··· 391 421 { 392 422 struct amdgpu_vm *vm = base->vm; 393 423 394 - spin_lock(&vm->stats_lock); 424 + spin_lock(&vm->status_lock); 395 425 amdgpu_vm_update_stats_locked(base, res, sign); 396 - spin_unlock(&vm->stats_lock); 426 + spin_unlock(&vm->status_lock); 397 427 } 398 428 399 429 /** ··· 419 449 base->next = bo->vm_bo; 420 450 bo->vm_bo = base; 421 451 422 - spin_lock(&vm->stats_lock); 452 + spin_lock(&vm->status_lock); 423 453 base->shared = drm_gem_object_is_shared_for_memory_stats(&bo->tbo.base); 424 454 amdgpu_vm_update_stats_locked(base, bo->tbo.resource, +1); 425 - spin_unlock(&vm->stats_lock); 455 + spin_unlock(&vm->status_lock); 426 456 427 457 if (!amdgpu_vm_is_bo_always_valid(vm, bo)) 428 458 return; ··· 481 511 int ret; 482 512 483 513 /* We can only trust prev->next while holding the lock */ 484 - spin_lock(&vm->invalidated_lock); 514 + spin_lock(&vm->status_lock); 485 515 while (!list_is_head(prev->next, &vm->done)) { 486 516 bo_va = list_entry(prev->next, typeof(*bo_va), base.vm_status); 487 - spin_unlock(&vm->invalidated_lock); 517 + spin_unlock(&vm->status_lock); 488 518 489 519 bo = bo_va->base.bo; 490 520 if (bo) { ··· 492 522 if (unlikely(ret)) 493 523 return ret; 494 524 } 495 - spin_lock(&vm->invalidated_lock); 525 + spin_lock(&vm->status_lock); 496 526 prev = prev->next; 497 527 } 498 - spin_unlock(&vm->invalidated_lock); 528 + spin_unlock(&vm->status_lock); 499 529 500 530 return 0; 501 531 } ··· 591 621 void *param) 592 622 { 593 623 uint64_t new_vm_generation = amdgpu_vm_generation(adev, vm); 594 - struct amdgpu_vm_bo_base *bo_base, *tmp; 624 + struct amdgpu_vm_bo_base *bo_base; 595 625 struct amdgpu_bo *bo; 596 626 int r; 597 627 ··· 604 634 return r; 605 635 } 606 636 607 - list_for_each_entry_safe(bo_base, tmp, &vm->evicted, vm_status) { 637 + spin_lock(&vm->status_lock); 638 + while (!list_empty(&vm->evicted)) { 639 + bo_base = list_first_entry(&vm->evicted, 640 + struct amdgpu_vm_bo_base, 641 + vm_status); 642 + spin_unlock(&vm->status_lock); 643 + 608 644 bo = bo_base->bo; 609 645 610 646 r = validate(param, bo); ··· 623 647 vm->update_funcs->map_table(to_amdgpu_bo_vm(bo)); 624 648 amdgpu_vm_bo_relocated(bo_base); 625 649 } 650 + spin_lock(&vm->status_lock); 626 651 } 652 + while (ticket && !list_empty(&vm->evicted_user)) { 653 + bo_base = list_first_entry(&vm->evicted_user, 654 + struct amdgpu_vm_bo_base, 655 + vm_status); 656 + spin_unlock(&vm->status_lock); 627 657 628 - if (ticket) { 629 - list_for_each_entry_safe(bo_base, tmp, &vm->evicted_user, 630 - vm_status) { 631 - bo = bo_base->bo; 632 - dma_resv_assert_held(bo->tbo.base.resv); 658 + bo = bo_base->bo; 659 + dma_resv_assert_held(bo->tbo.base.resv); 633 660 634 - r = validate(param, bo); 635 - if (r) 636 - return r; 661 + r = validate(param, bo); 662 + if (r) 663 + return r; 637 664 638 - amdgpu_vm_bo_invalidated(bo_base); 639 - } 665 + amdgpu_vm_bo_invalidated(bo_base); 666 + 667 + spin_lock(&vm->status_lock); 640 668 } 669 + spin_unlock(&vm->status_lock); 641 670 642 671 amdgpu_vm_eviction_lock(vm); 643 672 vm->evicting = false; ··· 671 690 ret = !vm->evicting; 672 691 amdgpu_vm_eviction_unlock(vm); 673 692 693 + spin_lock(&vm->status_lock); 674 694 ret &= list_empty(&vm->evicted); 695 + spin_unlock(&vm->status_lock); 675 696 676 697 spin_lock(&vm->immediate.lock); 677 698 ret &= !vm->immediate.stopped; ··· 964 981 struct amdgpu_vm *vm, bool immediate) 965 982 { 966 983 struct amdgpu_vm_update_params params; 967 - struct amdgpu_vm_bo_base *entry, *tmp; 984 + struct amdgpu_vm_bo_base *entry; 968 985 bool flush_tlb_needed = false; 986 + LIST_HEAD(relocated); 969 987 int r, idx; 970 988 971 989 amdgpu_vm_assert_locked(vm); 972 990 973 - if (list_empty(&vm->relocated)) 991 + spin_lock(&vm->status_lock); 992 + list_splice_init(&vm->relocated, &relocated); 993 + spin_unlock(&vm->status_lock); 994 + 995 + if (list_empty(&relocated)) 974 996 return 0; 975 997 976 998 if (!drm_dev_enter(adev_to_drm(adev), &idx)) ··· 991 1003 if (r) 992 1004 goto error; 993 1005 994 - list_for_each_entry(entry, &vm->relocated, vm_status) { 1006 + list_for_each_entry(entry, &relocated, vm_status) { 995 1007 /* vm_flush_needed after updating moved PDEs */ 996 1008 flush_tlb_needed |= entry->moved; 997 1009 ··· 1007 1019 if (flush_tlb_needed) 1008 1020 atomic64_inc(&vm->tlb_seq); 1009 1021 1010 - list_for_each_entry_safe(entry, tmp, &vm->relocated, vm_status) { 1022 + while (!list_empty(&relocated)) { 1023 + entry = list_first_entry(&relocated, struct amdgpu_vm_bo_base, 1024 + vm_status); 1011 1025 amdgpu_vm_bo_idle(entry); 1012 1026 } 1013 1027 ··· 1236 1246 void amdgpu_vm_get_memory(struct amdgpu_vm *vm, 1237 1247 struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM]) 1238 1248 { 1239 - spin_lock(&vm->stats_lock); 1249 + spin_lock(&vm->status_lock); 1240 1250 memcpy(stats, vm->stats, sizeof(*stats) * __AMDGPU_PL_NUM); 1241 - spin_unlock(&vm->stats_lock); 1251 + spin_unlock(&vm->status_lock); 1242 1252 } 1243 1253 1244 1254 /** ··· 1605 1615 struct amdgpu_vm *vm, 1606 1616 struct ww_acquire_ctx *ticket) 1607 1617 { 1608 - struct amdgpu_bo_va *bo_va, *tmp; 1618 + struct amdgpu_bo_va *bo_va; 1609 1619 struct dma_resv *resv; 1610 1620 bool clear, unlock; 1611 1621 int r; 1612 1622 1613 - list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) { 1623 + spin_lock(&vm->status_lock); 1624 + while (!list_empty(&vm->moved)) { 1625 + bo_va = list_first_entry(&vm->moved, struct amdgpu_bo_va, 1626 + base.vm_status); 1627 + spin_unlock(&vm->status_lock); 1628 + 1614 1629 /* Per VM BOs never need to bo cleared in the page tables */ 1615 1630 r = amdgpu_vm_bo_update(adev, bo_va, false); 1616 1631 if (r) 1617 1632 return r; 1633 + spin_lock(&vm->status_lock); 1618 1634 } 1619 1635 1620 - spin_lock(&vm->invalidated_lock); 1621 1636 while (!list_empty(&vm->invalidated)) { 1622 1637 bo_va = list_first_entry(&vm->invalidated, struct amdgpu_bo_va, 1623 1638 base.vm_status); 1624 1639 resv = bo_va->base.bo->tbo.base.resv; 1625 - spin_unlock(&vm->invalidated_lock); 1640 + spin_unlock(&vm->status_lock); 1626 1641 1627 1642 /* Try to reserve the BO to avoid clearing its ptes */ 1628 1643 if (!adev->debug_vm && dma_resv_trylock(resv)) { ··· 1659 1664 bo_va->base.bo->tbo.resource->mem_type == TTM_PL_SYSTEM)) 1660 1665 amdgpu_vm_bo_evicted_user(&bo_va->base); 1661 1666 1662 - spin_lock(&vm->invalidated_lock); 1667 + spin_lock(&vm->status_lock); 1663 1668 } 1664 - spin_unlock(&vm->invalidated_lock); 1669 + spin_unlock(&vm->status_lock); 1665 1670 1666 1671 return 0; 1667 1672 } ··· 2190 2195 } 2191 2196 } 2192 2197 2193 - spin_lock(&vm->invalidated_lock); 2198 + spin_lock(&vm->status_lock); 2194 2199 list_del(&bo_va->base.vm_status); 2195 - spin_unlock(&vm->invalidated_lock); 2200 + spin_unlock(&vm->status_lock); 2196 2201 2197 2202 list_for_each_entry_safe(mapping, next, &bo_va->valids, list) { 2198 2203 list_del(&mapping->list); ··· 2300 2305 for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) { 2301 2306 struct amdgpu_vm *vm = bo_base->vm; 2302 2307 2303 - spin_lock(&vm->stats_lock); 2308 + spin_lock(&vm->status_lock); 2304 2309 amdgpu_vm_update_stats_locked(bo_base, bo->tbo.resource, -1); 2305 2310 amdgpu_vm_update_stats_locked(bo_base, new_mem, +1); 2306 - spin_unlock(&vm->stats_lock); 2311 + spin_unlock(&vm->status_lock); 2307 2312 } 2308 2313 2309 2314 amdgpu_vm_bo_invalidate(bo, evicted); ··· 2549 2554 * @adev: amdgpu_device pointer 2550 2555 * @vm: requested vm 2551 2556 * @xcp_id: GPU partition selection id 2557 + * @pasid: the pasid the VM is using on this GPU 2552 2558 * 2553 2559 * Init @vm fields. 2554 2560 * ··· 2557 2561 * 0 for success, error for failure. 2558 2562 */ 2559 2563 int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, 2560 - int32_t xcp_id) 2564 + int32_t xcp_id, uint32_t pasid) 2561 2565 { 2562 2566 struct amdgpu_bo *root_bo; 2563 2567 struct amdgpu_bo_vm *root; ··· 2571 2575 INIT_LIST_HEAD(&vm->relocated); 2572 2576 INIT_LIST_HEAD(&vm->moved); 2573 2577 INIT_LIST_HEAD(&vm->idle); 2574 - spin_lock_init(&vm->invalidated_lock); 2575 2578 INIT_LIST_HEAD(&vm->invalidated); 2579 + spin_lock_init(&vm->status_lock); 2576 2580 INIT_LIST_HEAD(&vm->freed); 2577 2581 INIT_LIST_HEAD(&vm->done); 2578 2582 INIT_KFIFO(vm->faults); 2579 - spin_lock_init(&vm->stats_lock); 2580 2583 2581 2584 r = amdgpu_vm_init_entities(adev, vm); 2582 2585 if (r) ··· 2633 2638 if (r) 2634 2639 dev_dbg(adev->dev, "Failed to create task info for VM\n"); 2635 2640 2641 + /* Store new PASID in XArray (if non-zero) */ 2642 + if (pasid != 0) { 2643 + r = xa_err(xa_store_irq(&adev->vm_manager.pasids, pasid, vm, GFP_KERNEL)); 2644 + if (r < 0) 2645 + goto error_free_root; 2646 + 2647 + vm->pasid = pasid; 2648 + } 2649 + 2636 2650 amdgpu_bo_unreserve(vm->root.bo); 2637 2651 amdgpu_bo_unref(&root_bo); 2638 2652 2639 2653 return 0; 2640 2654 2641 2655 error_free_root: 2656 + /* If PASID was partially set, erase it from XArray before failing */ 2657 + if (vm->pasid != 0) { 2658 + xa_erase_irq(&adev->vm_manager.pasids, vm->pasid); 2659 + vm->pasid = 0; 2660 + } 2642 2661 amdgpu_vm_pt_free_root(adev, vm); 2643 2662 amdgpu_bo_unreserve(vm->root.bo); 2644 2663 amdgpu_bo_unref(&root_bo); ··· 2758 2749 2759 2750 root = amdgpu_bo_ref(vm->root.bo); 2760 2751 amdgpu_bo_reserve(root, true); 2761 - amdgpu_vm_set_pasid(adev, vm, 0); 2752 + /* Remove PASID mapping before destroying VM */ 2753 + if (vm->pasid != 0) { 2754 + xa_erase_irq(&adev->vm_manager.pasids, vm->pasid); 2755 + vm->pasid = 0; 2756 + } 2762 2757 dma_fence_wait(vm->last_unlocked, false); 2763 2758 dma_fence_put(vm->last_unlocked); 2764 2759 dma_fence_wait(vm->last_tlb_flush, false); ··· 3051 3038 3052 3039 amdgpu_vm_assert_locked(vm); 3053 3040 3041 + spin_lock(&vm->status_lock); 3054 3042 seq_puts(m, "\tIdle BOs:\n"); 3055 3043 list_for_each_entry_safe(bo_va, tmp, &vm->idle, base.vm_status) { 3056 3044 if (!bo_va->base.bo) ··· 3089 3075 id = 0; 3090 3076 3091 3077 seq_puts(m, "\tInvalidated BOs:\n"); 3092 - spin_lock(&vm->invalidated_lock); 3093 3078 list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, base.vm_status) { 3094 3079 if (!bo_va->base.bo) 3095 3080 continue; 3096 3081 total_invalidated += amdgpu_bo_print_info(id++, bo_va->base.bo, m); 3097 3082 } 3098 - spin_unlock(&vm->invalidated_lock); 3099 3083 total_invalidated_objs = id; 3100 3084 id = 0; 3101 3085 ··· 3103 3091 continue; 3104 3092 total_done += amdgpu_bo_print_info(id++, bo_va->base.bo, m); 3105 3093 } 3094 + spin_unlock(&vm->status_lock); 3106 3095 total_done_objs = id; 3107 3096 3108 3097 seq_printf(m, "\tTotal idle size: %12lld\tobjs:\t%d\n", total_idle,

+7 -13

drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h

··· 203 203 /* protected by bo being reserved */ 204 204 struct amdgpu_vm_bo_base *next; 205 205 206 - /* protected by vm reservation and invalidated_lock */ 206 + /* protected by vm status_lock */ 207 207 struct list_head vm_status; 208 208 209 209 /* if the bo is counted as shared in mem stats 210 - * protected by vm BO being reserved */ 210 + * protected by vm status_lock */ 211 211 bool shared; 212 212 213 213 /* protected by the BO being reserved */ ··· 343 343 bool evicting; 344 344 unsigned int saved_flags; 345 345 346 - /* Memory statistics for this vm, protected by stats_lock */ 347 - spinlock_t stats_lock; 346 + /* Lock to protect vm_bo add/del/move on all lists of vm */ 347 + spinlock_t status_lock; 348 + 349 + /* Memory statistics for this vm, protected by status_lock */ 348 350 struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM]; 349 351 350 352 /* ··· 354 352 * PDs, PTs or per VM BOs. The state transits are: 355 353 * 356 354 * evicted -> relocated (PDs, PTs) or moved (per VM BOs) -> idle 357 - * 358 - * Lists are protected by the root PD dma_resv lock. 359 355 */ 360 356 361 357 /* Per-VM and PT BOs who needs a validation */ ··· 374 374 * state transits are: 375 375 * 376 376 * evicted_user or invalidated -> done 377 - * 378 - * Lists are protected by the invalidated_lock. 379 377 */ 380 - spinlock_t invalidated_lock; 381 378 382 379 /* BOs for user mode queues that need a validation */ 383 380 struct list_head evicted_user; ··· 500 503 void amdgpu_vm_manager_init(struct amdgpu_device *adev); 501 504 void amdgpu_vm_manager_fini(struct amdgpu_device *adev); 502 505 503 - int amdgpu_vm_set_pasid(struct amdgpu_device *adev, struct amdgpu_vm *vm, 504 - u32 pasid); 505 - 506 506 long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout); 507 - int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp_id); 507 + int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp_id, uint32_t pasid); 508 508 int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm); 509 509 void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm); 510 510 int amdgpu_vm_lock_pd(struct amdgpu_vm *vm, struct drm_exec *exec,

+4

drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c

··· 543 543 entry->bo->vm_bo = NULL; 544 544 ttm_bo_set_bulk_move(&entry->bo->tbo, NULL); 545 545 546 + spin_lock(&entry->vm->status_lock); 546 547 list_del(&entry->vm_status); 548 + spin_unlock(&entry->vm->status_lock); 547 549 amdgpu_bo_unref(&entry->bo); 548 550 } 549 551 ··· 589 587 struct amdgpu_vm_pt_cursor seek; 590 588 struct amdgpu_vm_bo_base *entry; 591 589 590 + spin_lock(&params->vm->status_lock); 592 591 for_each_amdgpu_vm_pt_dfs_safe(params->adev, params->vm, cursor, seek, entry) { 593 592 if (entry && entry->bo) 594 593 list_move(&entry->vm_status, &params->tlb_flush_waitlist); ··· 597 594 598 595 /* enter start node now */ 599 596 list_move(&cursor->entry->vm_status, &params->tlb_flush_waitlist); 597 + spin_unlock(&params->vm->status_lock); 600 598 } 601 599 602 600 /**

+1 -1

drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c

··· 337 337 int vmid, i; 338 338 339 339 if (adev->enable_uni_mes && adev->mes.ring[AMDGPU_MES_SCHED_PIPE].sched.ready && 340 - (adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x83) { 340 + (adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x84) { 341 341 struct mes_inv_tlbs_pasid_input input = {0}; 342 342 input.pasid = pasid; 343 343 input.flush_type = flush_type;

+2

drivers/gpu/drm/amd/amdkfd/kfd_svm.c

··· 3045 3045 if (svms->checkpoint_ts[gpuidx] != 0) { 3046 3046 if (amdgpu_ih_ts_after_or_equal(ts, svms->checkpoint_ts[gpuidx])) { 3047 3047 pr_debug("draining retry fault, drop fault 0x%llx\n", addr); 3048 + if (write_locked) 3049 + mmap_write_downgrade(mm); 3048 3050 r = -EAGAIN; 3049 3051 goto out_unlock_svms; 3050 3052 } else {

+4

drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c

··· 2000 2000 2001 2001 init_data.flags.disable_ips_in_vpb = 0; 2002 2002 2003 + /* DCN35 and above supports dynamic DTBCLK switch */ 2004 + if (amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 5, 0)) 2005 + init_data.flags.allow_0_dtb_clk = true; 2006 + 2003 2007 /* Enable DWB for tested platforms only */ 2004 2008 if (amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0)) 2005 2009 init_data.num_virtual_links = 1;

+13 -8

drivers/gpu/drm/amd/display/dc/dce/dce_transform.c

··· 154 154 REG_SET(SCL_BYPASS_CONTROL, 0, SCL_BYPASS_MODE, 0); 155 155 156 156 if (data->taps.h_taps + data->taps.v_taps <= 2) { 157 - /* Set bypass */ 157 + /* Disable scaler functionality */ 158 + REG_WRITE(SCL_SCALER_ENABLE, 0); 158 159 159 - /* DCE6 has no SCL_MODE register, skip scale mode programming */ 160 - 160 + /* Clear registers that can cause glitches even when the scaler is off */ 161 + REG_WRITE(SCL_TAP_CONTROL, 0); 162 + REG_WRITE(SCL_AUTOMATIC_MODE_CONTROL, 0); 163 + REG_WRITE(SCL_F_SHARP_CONTROL, 0); 161 164 return false; 162 165 } 163 166 ··· 168 165 SCL_H_NUM_OF_TAPS, data->taps.h_taps - 1, 169 166 SCL_V_NUM_OF_TAPS, data->taps.v_taps - 1); 170 167 171 - /* DCE6 has no SCL_MODE register, skip scale mode programming */ 168 + REG_WRITE(SCL_SCALER_ENABLE, 1); 172 169 173 170 /* DCE6 has no SCL_BOUNDARY_MODE bit, skip replace out of bound pixels */ 174 171 ··· 505 502 REG_SET(DC_LB_MEM_SIZE, 0, 506 503 DC_LB_MEM_SIZE, xfm_dce->lb_memory_size); 507 504 505 + REG_WRITE(SCL_UPDATE, 0x00010000); 506 + 508 507 /* Clear SCL_F_SHARP_CONTROL value to 0 */ 509 508 REG_WRITE(SCL_F_SHARP_CONTROL, 0); 510 509 ··· 532 527 if (coeffs_v != xfm_dce->filter_v || coeffs_h != xfm_dce->filter_h) { 533 528 /* 4. Program vertical filters */ 534 529 if (xfm_dce->filter_v == NULL) 535 - REG_SET(SCL_VERT_FILTER_CONTROL, 0, 536 - SCL_V_2TAP_HARDCODE_COEF_EN, 0); 530 + REG_WRITE(SCL_VERT_FILTER_CONTROL, 0); 537 531 program_multi_taps_filter( 538 532 xfm_dce, 539 533 data->taps.v_taps, ··· 546 542 547 543 /* 5. Program horizontal filters */ 548 544 if (xfm_dce->filter_h == NULL) 549 - REG_SET(SCL_HORZ_FILTER_CONTROL, 0, 550 - SCL_H_2TAP_HARDCODE_COEF_EN, 0); 545 + REG_WRITE(SCL_HORZ_FILTER_CONTROL, 0); 551 546 program_multi_taps_filter( 552 547 xfm_dce, 553 548 data->taps.h_taps, ··· 569 566 /* DCE6 has no SCL_COEF_UPDATE_COMPLETE bit to flip to new coefficient memory */ 570 567 571 568 /* DCE6 DATA_FORMAT register does not support ALPHA_EN */ 569 + 570 + REG_WRITE(SCL_UPDATE, 0); 572 571 } 573 572 #endif 574 573

+4

drivers/gpu/drm/amd/display/dc/dce/dce_transform.h

··· 155 155 SRI(SCL_COEF_RAM_TAP_DATA, SCL, id), \ 156 156 SRI(VIEWPORT_START, SCL, id), \ 157 157 SRI(VIEWPORT_SIZE, SCL, id), \ 158 + SRI(SCL_SCALER_ENABLE, SCL, id), \ 159 + SRI(SCL_HORZ_FILTER_INIT_RGB_LUMA, SCL, id), \ 160 + SRI(SCL_HORZ_FILTER_INIT_CHROMA, SCL, id), \ 158 161 SRI(SCL_HORZ_FILTER_SCALE_RATIO, SCL, id), \ 159 162 SRI(SCL_VERT_FILTER_SCALE_RATIO, SCL, id), \ 160 163 SRI(SCL_VERT_FILTER_INIT, SCL, id), \ ··· 593 590 uint32_t SCL_VERT_FILTER_SCALE_RATIO; 594 591 uint32_t SCL_HORZ_FILTER_INIT; 595 592 #if defined(CONFIG_DRM_AMD_DC_SI) 593 + uint32_t SCL_SCALER_ENABLE; 596 594 uint32_t SCL_HORZ_FILTER_INIT_RGB_LUMA; 597 595 uint32_t SCL_HORZ_FILTER_INIT_CHROMA; 598 596 #endif

+4

drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c

··· 808 808 809 809 int dcn_get_max_non_odm_pix_rate_100hz(struct _vcs_dpi_soc_bounding_box_st *soc) 810 810 { 811 + dc_assert_fp_enabled(); 812 + 811 813 return soc->clock_limits[0].dispclk_mhz * 10000.0 / (1.0 + soc->dcn_downspread_percent / 100.0); 812 814 } 813 815 ··· 817 815 struct _vcs_dpi_soc_bounding_box_st *soc, 818 816 int pix_clk_100hz, int bpp, int seg_size_kb) 819 817 { 818 + dc_assert_fp_enabled(); 819 + 820 820 /* Roughly calculate required crb to hide latency. In practice there is slightly 821 821 * more buffer available for latency hiding 822 822 */

+4 -2

drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c

··· 445 445 bool upscaled = false; 446 446 const unsigned int max_allowed_vblank_nom = 1023; 447 447 448 + dc_assert_fp_enabled(); 449 + 448 450 dcn31_populate_dml_pipes_from_context(dc, context, pipes, 449 451 validate_mode); 450 452 ··· 500 498 501 499 pipes[pipe_cnt].pipe.src.unbounded_req_mode = false; 502 500 503 - DC_FP_START(); 504 501 dcn31_zero_pipe_dcc_fraction(pipes, pipe_cnt); 505 - DC_FP_END(); 506 502 507 503 pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch; 508 504 pipes[pipe_cnt].pipe.src.dcc_rate = 3; ··· 580 580 enum dcn_zstate_support_state support = DCN_ZSTATE_SUPPORT_DISALLOW; 581 581 unsigned int i, plane_count = 0; 582 582 DC_LOGGER_INIT(dc->ctx->logger); 583 + 584 + dc_assert_fp_enabled(); 583 585 584 586 for (i = 0; i < dc->res_pool->pipe_count; i++) { 585 587 if (context->res_ctx.pipe_ctx[i].plane_state)

+2 -2

drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c

··· 478 478 bool upscaled = false; 479 479 const unsigned int max_allowed_vblank_nom = 1023; 480 480 481 + dc_assert_fp_enabled(); 482 + 481 483 dcn31_populate_dml_pipes_from_context(dc, context, pipes, 482 484 validate_mode); 483 485 ··· 533 531 534 532 pipes[pipe_cnt].pipe.src.unbounded_req_mode = false; 535 533 536 - DC_FP_START(); 537 534 dcn31_zero_pipe_dcc_fraction(pipes, pipe_cnt); 538 - DC_FP_END(); 539 535 540 536 pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch; 541 537 pipes[pipe_cnt].pipe.src.dcc_rate = 3;

+2 -2

drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c

··· 404 404 }, 405 405 406 406 .max_upscale_factor = { 407 - .argb8888 = 16000, 407 + .argb8888 = 1, 408 408 .nv12 = 1, 409 409 .fp16 = 1 410 410 }, 411 411 412 412 .max_downscale_factor = { 413 - .argb8888 = 250, 413 + .argb8888 = 1, 414 414 .nv12 = 1, 415 415 .fp16 = 1 416 416 }

+15 -1

drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c

··· 1760 1760 } 1761 1761 1762 1762 1763 + static int populate_dml_pipes_from_context_fpu(struct dc *dc, 1764 + struct dc_state *context, 1765 + display_e2e_pipe_params_st *pipes, 1766 + enum dc_validate_mode validate_mode) 1767 + { 1768 + int ret; 1769 + 1770 + DC_FP_START(); 1771 + ret = dcn35_populate_dml_pipes_from_context_fpu(dc, context, pipes, validate_mode); 1772 + DC_FP_END(); 1773 + 1774 + return ret; 1775 + } 1776 + 1763 1777 static struct resource_funcs dcn35_res_pool_funcs = { 1764 1778 .destroy = dcn35_destroy_resource_pool, 1765 1779 .link_enc_create = dcn35_link_encoder_create, ··· 1784 1770 .validate_bandwidth = dcn35_validate_bandwidth, 1785 1771 .calculate_wm_and_dlg = NULL, 1786 1772 .update_soc_for_wm_a = dcn31_update_soc_for_wm_a, 1787 - .populate_dml_pipes = dcn35_populate_dml_pipes_from_context_fpu, 1773 + .populate_dml_pipes = populate_dml_pipes_from_context_fpu, 1788 1774 .acquire_free_pipe_as_secondary_dpp_pipe = dcn20_acquire_free_pipe_for_layer, 1789 1775 .release_pipe = dcn20_release_pipe, 1790 1776 .add_stream_to_ctx = dcn30_add_stream_to_ctx,

+16 -1

drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c

··· 1732 1732 return out ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE; 1733 1733 } 1734 1734 1735 + static int populate_dml_pipes_from_context_fpu(struct dc *dc, 1736 + struct dc_state *context, 1737 + display_e2e_pipe_params_st *pipes, 1738 + enum dc_validate_mode validate_mode) 1739 + { 1740 + int ret; 1741 + 1742 + DC_FP_START(); 1743 + ret = dcn351_populate_dml_pipes_from_context_fpu(dc, context, pipes, validate_mode); 1744 + DC_FP_END(); 1745 + 1746 + return ret; 1747 + 1748 + } 1749 + 1735 1750 static struct resource_funcs dcn351_res_pool_funcs = { 1736 1751 .destroy = dcn351_destroy_resource_pool, 1737 1752 .link_enc_create = dcn35_link_encoder_create, ··· 1757 1742 .validate_bandwidth = dcn351_validate_bandwidth, 1758 1743 .calculate_wm_and_dlg = NULL, 1759 1744 .update_soc_for_wm_a = dcn31_update_soc_for_wm_a, 1760 - .populate_dml_pipes = dcn351_populate_dml_pipes_from_context_fpu, 1745 + .populate_dml_pipes = populate_dml_pipes_from_context_fpu, 1761 1746 .acquire_free_pipe_as_secondary_dpp_pipe = dcn20_acquire_free_pipe_for_layer, 1762 1747 .release_pipe = dcn20_release_pipe, 1763 1748 .add_stream_to_ctx = dcn30_add_stream_to_ctx,

+15 -1

drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c

··· 1734 1734 } 1735 1735 1736 1736 1737 + static int populate_dml_pipes_from_context_fpu(struct dc *dc, 1738 + struct dc_state *context, 1739 + display_e2e_pipe_params_st *pipes, 1740 + enum dc_validate_mode validate_mode) 1741 + { 1742 + int ret; 1743 + 1744 + DC_FP_START(); 1745 + ret = dcn35_populate_dml_pipes_from_context_fpu(dc, context, pipes, validate_mode); 1746 + DC_FP_END(); 1747 + 1748 + return ret; 1749 + } 1750 + 1737 1751 static struct resource_funcs dcn36_res_pool_funcs = { 1738 1752 .destroy = dcn36_destroy_resource_pool, 1739 1753 .link_enc_create = dcn35_link_encoder_create, ··· 1758 1744 .validate_bandwidth = dcn35_validate_bandwidth, 1759 1745 .calculate_wm_and_dlg = NULL, 1760 1746 .update_soc_for_wm_a = dcn31_update_soc_for_wm_a, 1761 - .populate_dml_pipes = dcn35_populate_dml_pipes_from_context_fpu, 1747 + .populate_dml_pipes = populate_dml_pipes_from_context_fpu, 1762 1748 .acquire_free_pipe_as_secondary_dpp_pipe = dcn20_acquire_free_pipe_for_layer, 1763 1749 .release_pipe = dcn20_release_pipe, 1764 1750 .add_stream_to_ctx = dcn30_add_stream_to_ctx,

+5 -5

drivers/gpu/drm/amd/display/dc/sspl/dc_spl.c

··· 641 641 /* this gives the direction of the cositing (negative will move 642 642 * left, right otherwise) 643 643 */ 644 - int sign = 1; 644 + int h_sign = flip_horz_scan_dir ? -1 : 1; 645 + int v_sign = flip_vert_scan_dir ? -1 : 1; 645 646 646 647 switch (spl_in->basic_in.cositing) { 647 - 648 648 case CHROMA_COSITING_TOPLEFT: 649 - init_adj_h = spl_fixpt_from_fraction(sign, 4); 650 - init_adj_v = spl_fixpt_from_fraction(sign, 4); 649 + init_adj_h = spl_fixpt_from_fraction(h_sign, 4); 650 + init_adj_v = spl_fixpt_from_fraction(v_sign, 4); 651 651 break; 652 652 case CHROMA_COSITING_LEFT: 653 - init_adj_h = spl_fixpt_from_fraction(sign, 4); 653 + init_adj_h = spl_fixpt_from_fraction(h_sign, 4); 654 654 init_adj_v = spl_fixpt_zero; 655 655 break; 656 656 case CHROMA_COSITING_NONE:

+7

drivers/gpu/drm/amd/include/asic_reg/dce/dce_6_0_d.h

··· 4115 4115 #define mmSCL0_SCL_COEF_RAM_CONFLICT_STATUS 0x1B55 4116 4116 #define mmSCL0_SCL_COEF_RAM_SELECT 0x1B40 4117 4117 #define mmSCL0_SCL_COEF_RAM_TAP_DATA 0x1B41 4118 + #define mmSCL0_SCL_SCALER_ENABLE 0x1B42 4118 4119 #define mmSCL0_SCL_CONTROL 0x1B44 4119 4120 #define mmSCL0_SCL_DEBUG 0x1B6A 4120 4121 #define mmSCL0_SCL_DEBUG2 0x1B69 ··· 4145 4144 #define mmSCL1_SCL_COEF_RAM_CONFLICT_STATUS 0x1E55 4146 4145 #define mmSCL1_SCL_COEF_RAM_SELECT 0x1E40 4147 4146 #define mmSCL1_SCL_COEF_RAM_TAP_DATA 0x1E41 4147 + #define mmSCL1_SCL_SCALER_ENABLE 0x1E42 4148 4148 #define mmSCL1_SCL_CONTROL 0x1E44 4149 4149 #define mmSCL1_SCL_DEBUG 0x1E6A 4150 4150 #define mmSCL1_SCL_DEBUG2 0x1E69 ··· 4175 4173 #define mmSCL2_SCL_COEF_RAM_CONFLICT_STATUS 0x4155 4176 4174 #define mmSCL2_SCL_COEF_RAM_SELECT 0x4140 4177 4175 #define mmSCL2_SCL_COEF_RAM_TAP_DATA 0x4141 4176 + #define mmSCL2_SCL_SCALER_ENABLE 0x4142 4178 4177 #define mmSCL2_SCL_CONTROL 0x4144 4179 4178 #define mmSCL2_SCL_DEBUG 0x416A 4180 4179 #define mmSCL2_SCL_DEBUG2 0x4169 ··· 4205 4202 #define mmSCL3_SCL_COEF_RAM_CONFLICT_STATUS 0x4455 4206 4203 #define mmSCL3_SCL_COEF_RAM_SELECT 0x4440 4207 4204 #define mmSCL3_SCL_COEF_RAM_TAP_DATA 0x4441 4205 + #define mmSCL3_SCL_SCALER_ENABLE 0x4442 4208 4206 #define mmSCL3_SCL_CONTROL 0x4444 4209 4207 #define mmSCL3_SCL_DEBUG 0x446A 4210 4208 #define mmSCL3_SCL_DEBUG2 0x4469 ··· 4235 4231 #define mmSCL4_SCL_COEF_RAM_CONFLICT_STATUS 0x4755 4236 4232 #define mmSCL4_SCL_COEF_RAM_SELECT 0x4740 4237 4233 #define mmSCL4_SCL_COEF_RAM_TAP_DATA 0x4741 4234 + #define mmSCL4_SCL_SCALER_ENABLE 0x4742 4238 4235 #define mmSCL4_SCL_CONTROL 0x4744 4239 4236 #define mmSCL4_SCL_DEBUG 0x476A 4240 4237 #define mmSCL4_SCL_DEBUG2 0x4769 ··· 4265 4260 #define mmSCL5_SCL_COEF_RAM_CONFLICT_STATUS 0x4A55 4266 4261 #define mmSCL5_SCL_COEF_RAM_SELECT 0x4A40 4267 4262 #define mmSCL5_SCL_COEF_RAM_TAP_DATA 0x4A41 4263 + #define mmSCL5_SCL_SCALER_ENABLE 0x4A42 4268 4264 #define mmSCL5_SCL_CONTROL 0x4A44 4269 4265 #define mmSCL5_SCL_DEBUG 0x4A6A 4270 4266 #define mmSCL5_SCL_DEBUG2 0x4A69 ··· 4293 4287 #define mmSCL_COEF_RAM_CONFLICT_STATUS 0x1B55 4294 4288 #define mmSCL_COEF_RAM_SELECT 0x1B40 4295 4289 #define mmSCL_COEF_RAM_TAP_DATA 0x1B41 4290 + #define mmSCL_SCALER_ENABLE 0x1B42 4296 4291 #define mmSCL_CONTROL 0x1B44 4297 4292 #define mmSCL_DEBUG 0x1B6A 4298 4293 #define mmSCL_DEBUG2 0x1B69

+2

drivers/gpu/drm/amd/include/asic_reg/dce/dce_6_0_sh_mask.h

··· 8650 8650 #define REGAMMA_LUT_INDEX__REGAMMA_LUT_INDEX__SHIFT 0x00000000 8651 8651 #define REGAMMA_LUT_WRITE_EN_MASK__REGAMMA_LUT_WRITE_EN_MASK_MASK 0x00000007L 8652 8652 #define REGAMMA_LUT_WRITE_EN_MASK__REGAMMA_LUT_WRITE_EN_MASK__SHIFT 0x00000000 8653 + #define SCL_SCALER_ENABLE__SCL_SCALE_EN_MASK 0x00000001L 8654 + #define SCL_SCALER_ENABLE__SCL_SCALE_EN__SHIFT 0x00000000 8653 8655 #define SCL_ALU_CONTROL__SCL_ALU_DISABLE_MASK 0x00000001L 8654 8656 #define SCL_ALU_CONTROL__SCL_ALU_DISABLE__SHIFT 0x00000000 8655 8657 #define SCL_BYPASS_CONTROL__SCL_BYPASS_MODE_MASK 0x00000003L

+2 -1

drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c

··· 288 288 * Considering above, we just leave user a verbal message instead 289 289 * of halt driver loading. 290 290 */ 291 - if (if_version != smu->smc_driver_if_version) { 291 + if (smu->smc_driver_if_version != SMU_IGNORE_IF_VERSION && 292 + if_version != smu->smc_driver_if_version) { 292 293 dev_info(adev->dev, "smu driver if version = 0x%08x, smu fw if version = 0x%08x, " 293 294 "smu fw program = %d, smu fw version = 0x%08x (%d.%d.%d)\n", 294 295 smu->smc_driver_if_version, if_version,

+2 -3

drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c

··· 450 450 ((pgm == 4) && (fw_ver >= 0x4557000))) 451 451 smu_v13_0_6_cap_set(smu, SMU_CAP(SDMA_RESET)); 452 452 453 - if (((pgm == 0) && (fw_ver >= 0x00558200)) || 454 - ((pgm == 4) && (fw_ver >= 0x04557100))) 453 + if ((pgm == 0) && (fw_ver >= 0x00558200)) 455 454 smu_v13_0_6_cap_set(smu, SMU_CAP(VCN_RESET)); 456 455 } 457 456 ··· 3932 3933 smu->feature_map = (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 12)) ? 3933 3934 smu_v13_0_12_feature_mask_map : smu_v13_0_6_feature_mask_map; 3934 3935 smu->table_map = smu_v13_0_6_table_map; 3935 - smu->smc_driver_if_version = SMU13_0_6_DRIVER_IF_VERSION; 3936 + smu->smc_driver_if_version = SMU_IGNORE_IF_VERSION; 3936 3937 smu->smc_fw_caps |= SMU_FW_CAP_RAS_PRI; 3937 3938 smu_v13_0_set_smu_mailbox_registers(smu); 3938 3939 smu_v13_0_6_set_temp_funcs(smu);

+2

drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h

··· 40 40 #define SMU_IH_INTERRUPT_CONTEXT_ID_FAN_ABNORMAL 0x8 41 41 #define SMU_IH_INTERRUPT_CONTEXT_ID_FAN_RECOVERY 0x9 42 42 43 + #define SMU_IGNORE_IF_VERSION 0xFFFFFFFF 44 + 43 45 #define smu_cmn_init_soft_gpu_metrics(ptr, frev, crev) \ 44 46 do { \ 45 47 typecheck(struct gpu_metrics_v##frev##_##crev *, (ptr)); \

+13 -11

drivers/gpu/drm/drm_gpusvm.c

··· 361 361 * @name: Name of the GPU SVM. 362 362 * @drm: Pointer to the DRM device structure. 363 363 * @mm: Pointer to the mm_struct for the address space. 364 - * @device_private_page_owner: Device private pages owner. 365 364 * @mm_start: Start address of GPU SVM. 366 365 * @mm_range: Range of the GPU SVM. 367 366 * @notifier_size: Size of individual notifiers. ··· 382 383 */ 383 384 int drm_gpusvm_init(struct drm_gpusvm *gpusvm, 384 385 const char *name, struct drm_device *drm, 385 - struct mm_struct *mm, void *device_private_page_owner, 386 + struct mm_struct *mm, 386 387 unsigned long mm_start, unsigned long mm_range, 387 388 unsigned long notifier_size, 388 389 const struct drm_gpusvm_ops *ops, ··· 394 395 mmgrab(mm); 395 396 } else { 396 397 /* No full SVM mode, only core drm_gpusvm_pages API. */ 397 - if (ops || num_chunks || mm_range || notifier_size || 398 - device_private_page_owner) 398 + if (ops || num_chunks || mm_range || notifier_size) 399 399 return -EINVAL; 400 400 } 401 401 402 402 gpusvm->name = name; 403 403 gpusvm->drm = drm; 404 404 gpusvm->mm = mm; 405 - gpusvm->device_private_page_owner = device_private_page_owner; 406 405 gpusvm->mm_start = mm_start; 407 406 gpusvm->mm_range = mm_range; 408 407 gpusvm->notifier_size = notifier_size; ··· 681 684 * @notifier: Pointer to the GPU SVM notifier structure 682 685 * @start: Start address 683 686 * @end: End address 687 + * @dev_private_owner: The device private page owner 684 688 * 685 689 * Check if pages between start and end have been faulted in on the CPU. Use to 686 690 * prevent migration of pages without CPU backing store. ··· 690 692 */ 691 693 static bool drm_gpusvm_check_pages(struct drm_gpusvm *gpusvm, 692 694 struct drm_gpusvm_notifier *notifier, 693 - unsigned long start, unsigned long end) 695 + unsigned long start, unsigned long end, 696 + void *dev_private_owner) 694 697 { 695 698 struct hmm_range hmm_range = { 696 699 .default_flags = 0, 697 700 .notifier = &notifier->notifier, 698 701 .start = start, 699 702 .end = end, 700 - .dev_private_owner = gpusvm->device_private_page_owner, 703 + .dev_private_owner = dev_private_owner, 701 704 }; 702 705 unsigned long timeout = 703 706 jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); ··· 752 753 * @gpuva_start: Start address of GPUVA which mirrors CPU 753 754 * @gpuva_end: End address of GPUVA which mirrors CPU 754 755 * @check_pages_threshold: Check CPU pages for present threshold 756 + * @dev_private_owner: The device private page owner 755 757 * 756 758 * This function determines the chunk size for the GPU SVM range based on the 757 759 * fault address, GPU SVM chunk sizes, existing GPU SVM ranges, and the virtual ··· 767 767 unsigned long fault_addr, 768 768 unsigned long gpuva_start, 769 769 unsigned long gpuva_end, 770 - unsigned long check_pages_threshold) 770 + unsigned long check_pages_threshold, 771 + void *dev_private_owner) 771 772 { 772 773 unsigned long start, end; 773 774 int i = 0; ··· 815 814 * process-many-malloc' mallocs at least 64k at a time. 816 815 */ 817 816 if (end - start <= check_pages_threshold && 818 - !drm_gpusvm_check_pages(gpusvm, notifier, start, end)) { 817 + !drm_gpusvm_check_pages(gpusvm, notifier, start, end, dev_private_owner)) { 819 818 ++i; 820 819 goto retry; 821 820 } ··· 958 957 chunk_size = drm_gpusvm_range_chunk_size(gpusvm, notifier, vas, 959 958 fault_addr, gpuva_start, 960 959 gpuva_end, 961 - ctx->check_pages_threshold); 960 + ctx->check_pages_threshold, 961 + ctx->device_private_page_owner); 962 962 if (chunk_size == LONG_MAX) { 963 963 err = -EINVAL; 964 964 goto err_notifier_remove; ··· 1270 1268 .notifier = notifier, 1271 1269 .start = pages_start, 1272 1270 .end = pages_end, 1273 - .dev_private_owner = gpusvm->device_private_page_owner, 1271 + .dev_private_owner = ctx->device_private_page_owner, 1274 1272 }; 1275 1273 void *zdd; 1276 1274 unsigned long timeout =

+2

drivers/gpu/drm/v3d/v3d_drv.h

··· 62 62 /* Currently active job for this queue */ 63 63 struct v3d_job *active_job; 64 64 spinlock_t queue_lock; 65 + /* Protect dma fence for signalling job completion */ 66 + spinlock_t fence_lock; 65 67 }; 66 68 67 69 /* Performance monitor object. The perform lifetime is controlled by userspace

+1 -1

drivers/gpu/drm/v3d/v3d_fence.c

··· 15 15 fence->dev = &v3d->drm; 16 16 fence->queue = q; 17 17 fence->seqno = ++queue->emit_seqno; 18 - dma_fence_init(&fence->base, &v3d_fence_ops, &queue->queue_lock, 18 + dma_fence_init(&fence->base, &v3d_fence_ops, &queue->fence_lock, 19 19 queue->fence_context, fence->seqno); 20 20 21 21 return &fence->base;

+1

drivers/gpu/drm/v3d/v3d_gem.c

··· 273 273 seqcount_init(&queue->stats.lock); 274 274 275 275 spin_lock_init(&queue->queue_lock); 276 + spin_lock_init(&queue->fence_lock); 276 277 } 277 278 278 279 spin_lock_init(&v3d->mm_lock);

+6 -6

drivers/gpu/drm/xe/tests/xe_pci.c

··· 211 211 * param generator can be used for both 212 212 */ 213 213 static const struct xe_ip pre_gmdid_graphics_ips[] = { 214 - graphics_ip_xelp, 215 - graphics_ip_xelpp, 216 - graphics_ip_xehpg, 217 - graphics_ip_xehpc, 214 + { 1200, "Xe_LP", &graphics_xelp }, 215 + { 1210, "Xe_LP+", &graphics_xelp }, 216 + { 1255, "Xe_HPG", &graphics_xehpg }, 217 + { 1260, "Xe_HPC", &graphics_xehpc }, 218 218 }; 219 219 220 220 static const struct xe_ip pre_gmdid_media_ips[] = { 221 - media_ip_xem, 222 - media_ip_xehpm, 221 + { 1200, "Xe_M", &media_xem }, 222 + { 1255, "Xe_HPM", &media_xem }, 223 223 }; 224 224 225 225 KUNIT_ARRAY_PARAM(pre_gmdid_graphics_ip, pre_gmdid_graphics_ips, xe_ip_kunit_desc);

+23 -11

drivers/gpu/drm/xe/xe_bo.c

··· 1737 1737 bo->attr.atomic_access == DRM_XE_ATOMIC_CPU; 1738 1738 } 1739 1739 1740 + static int xe_bo_wait_usage_kernel(struct xe_bo *bo, struct ttm_operation_ctx *ctx) 1741 + { 1742 + long lerr; 1743 + 1744 + if (ctx->no_wait_gpu) 1745 + return dma_resv_test_signaled(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL) ? 1746 + 0 : -EBUSY; 1747 + 1748 + lerr = dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL, 1749 + ctx->interruptible, MAX_SCHEDULE_TIMEOUT); 1750 + if (lerr < 0) 1751 + return lerr; 1752 + if (lerr == 0) 1753 + return -EBUSY; 1754 + 1755 + return 0; 1756 + } 1757 + 1740 1758 /* Populate the bo if swapped out, or migrate if the access mode requires that. */ 1741 1759 static int xe_bo_fault_migrate(struct xe_bo *bo, struct ttm_operation_ctx *ctx, 1742 1760 struct drm_exec *exec) ··· 1763 1745 int err = 0; 1764 1746 1765 1747 if (ttm_manager_type(tbo->bdev, tbo->resource->mem_type)->use_tt) { 1766 - xe_assert(xe_bo_device(bo), 1767 - dma_resv_test_signaled(tbo->base.resv, DMA_RESV_USAGE_KERNEL) || 1768 - (tbo->ttm && ttm_tt_is_populated(tbo->ttm))); 1769 - err = ttm_bo_populate(&bo->ttm, ctx); 1748 + err = xe_bo_wait_usage_kernel(bo, ctx); 1749 + if (!err) 1750 + err = ttm_bo_populate(&bo->ttm, ctx); 1770 1751 } else if (should_migrate_to_smem(bo)) { 1771 1752 xe_assert(xe_bo_device(bo), bo->flags & XE_BO_FLAG_SYSTEM); 1772 1753 err = xe_bo_migrate(bo, XE_PL_TT, ctx, exec); ··· 1939 1922 .no_wait_gpu = false, 1940 1923 .gfp_retry_mayfail = retry_after_wait, 1941 1924 }; 1942 - long lerr; 1943 1925 1944 1926 err = drm_exec_lock_obj(&exec, &tbo->base); 1945 1927 drm_exec_retry_on_contention(&exec); ··· 1958 1942 break; 1959 1943 } 1960 1944 1961 - lerr = dma_resv_wait_timeout(tbo->base.resv, 1962 - DMA_RESV_USAGE_KERNEL, true, 1963 - MAX_SCHEDULE_TIMEOUT); 1964 - if (lerr < 0) { 1965 - err = lerr; 1945 + err = xe_bo_wait_usage_kernel(bo, &tctx); 1946 + if (err) 1966 1947 break; 1967 - } 1968 1948 1969 1949 if (!retry_after_wait) 1970 1950 ret = __xe_bo_cpu_fault(vmf, xe, bo);

+18 -5

drivers/gpu/drm/xe/xe_configfs.c

··· 126 126 * not intended for normal execution and will taint the kernel with TAINT_TEST 127 127 * when used. 128 128 * 129 - * Currently this is implemented only for post and mid context restore. 130 - * Examples: 129 + * The syntax allows to pass straight instructions to be executed by the engine 130 + * in a batch buffer or set specific registers. 131 + * 132 + * #. Generic instruction:: 133 + * 134 + * <engine-class> cmd <instr> [[dword0] [dword1] [...]] 135 + * 136 + * #. Simple register setting:: 137 + * 138 + * <engine-class> reg <address> <value> 139 + * 140 + * Commands are saved per engine class: all instances of that class will execute 141 + * those commands during context switch. The instruction, dword arguments, 142 + * addresses and values are in hex format like in the examples below. 131 143 * 132 144 * #. Execute a LRI command to write 0xDEADBEEF to register 0x4f10 after the 133 145 * normal context restore:: ··· 166 154 * When using multiple lines, make sure to use a command that is 167 155 * implemented with a single write syscall, like HEREDOC. 168 156 * 169 - * These attributes can only be set before binding to the device. 157 + * Currently this is implemented only for post and mid context restore and 158 + * these attributes can only be set before binding to the device. 170 159 * 171 160 * Remove devices 172 161 * ============== ··· 337 324 continue; 338 325 339 326 pattern += strlen(engine_info[i].cls); 340 - if (!mask && !*pattern) 341 - return &engine_info[i]; 327 + if (!mask) 328 + return *pattern ? NULL : &engine_info[i]; 342 329 343 330 if (!strcmp(pattern, "*")) { 344 331 *mask = engine_info[i].mask;

+10 -9

drivers/gpu/drm/xe/xe_device.c

··· 685 685 } 686 686 ALLOW_ERROR_INJECTION(wait_for_lmem_ready, ERRNO); /* See xe_pci_probe() */ 687 687 688 - static void sriov_update_device_info(struct xe_device *xe) 688 + static void vf_update_device_info(struct xe_device *xe) 689 689 { 690 + xe_assert(xe, IS_SRIOV_VF(xe)); 690 691 /* disable features that are not available/applicable to VFs */ 691 - if (IS_SRIOV_VF(xe)) { 692 - xe->info.probe_display = 0; 693 - xe->info.has_heci_cscfi = 0; 694 - xe->info.has_heci_gscfi = 0; 695 - xe->info.skip_guc_pc = 1; 696 - xe->info.skip_pcode = 1; 697 - } 692 + xe->info.probe_display = 0; 693 + xe->info.has_heci_cscfi = 0; 694 + xe->info.has_heci_gscfi = 0; 695 + xe->info.has_late_bind = 0; 696 + xe->info.skip_guc_pc = 1; 697 + xe->info.skip_pcode = 1; 698 698 } 699 699 700 700 static int xe_device_vram_alloc(struct xe_device *xe) ··· 735 735 736 736 xe_sriov_probe_early(xe); 737 737 738 - sriov_update_device_info(xe); 738 + if (IS_SRIOV_VF(xe)) 739 + vf_update_device_info(xe); 739 740 740 741 err = xe_pcode_probe_early(xe); 741 742 if (err || xe_survivability_mode_is_requested(xe)) {

+1 -5

drivers/gpu/drm/xe/xe_hw_engine_group.c

··· 213 213 214 214 err = q->ops->suspend_wait(q); 215 215 if (err) 216 - goto err_suspend; 216 + return err; 217 217 } 218 218 219 219 if (need_resume) 220 220 xe_hw_engine_group_resume_faulting_lr_jobs(group); 221 221 222 222 return 0; 223 - 224 - err_suspend: 225 - up_write(&group->mode_sem); 226 - return err; 227 223 } 228 224 229 225 /**

+10 -10

drivers/gpu/drm/xe/xe_late_bind_fw.c

··· 60 60 const struct gsc_manifest_header *manifest; 61 61 const struct gsc_cpd_entry *entry; 62 62 size_t min_size = sizeof(*header); 63 - u32 offset; 63 + u32 offset = 0; 64 64 int i; 65 65 66 66 /* manifest_entry is mandatory */ ··· 116 116 const struct csc_fpt_header *header = data; 117 117 const struct csc_fpt_entry *entry; 118 118 size_t min_size = sizeof(*header); 119 - u32 offset; 119 + u32 offset = 0; 120 120 int i; 121 121 122 122 /* fpt_entry is mandatory */ ··· 184 184 } 185 185 } 186 186 187 - static int xe_late_bind_fw_num_fans(struct xe_late_bind *late_bind) 187 + static int xe_late_bind_fw_num_fans(struct xe_late_bind *late_bind, u32 *num_fans) 188 188 { 189 189 struct xe_device *xe = late_bind_to_xe(late_bind); 190 190 struct xe_tile *root_tile = xe_device_get_root_tile(xe); 191 - u32 uval; 192 191 193 - if (!xe_pcode_read(root_tile, 194 - PCODE_MBOX(FAN_SPEED_CONTROL, FSC_READ_NUM_FANS, 0), &uval, NULL)) 195 - return uval; 196 - else 197 - return 0; 192 + return xe_pcode_read(root_tile, 193 + PCODE_MBOX(FAN_SPEED_CONTROL, FSC_READ_NUM_FANS, 0), num_fans, NULL); 198 194 } 199 195 200 196 void xe_late_bind_wait_for_worker_completion(struct xe_late_bind *late_bind) ··· 310 314 lb_fw->flags &= ~INTEL_LB_FLAG_IS_PERSISTENT; 311 315 312 316 if (lb_fw->type == INTEL_LB_TYPE_FAN_CONTROL) { 313 - num_fans = xe_late_bind_fw_num_fans(late_bind); 317 + ret = xe_late_bind_fw_num_fans(late_bind, &num_fans); 318 + if (ret) { 319 + drm_dbg(&xe->drm, "Failed to read number of fans: %d\n", ret); 320 + return 0; /* Not a fatal error, continue without fan control */ 321 + } 314 322 drm_dbg(&xe->drm, "Number of Fans: %d\n", num_fans); 315 323 if (!num_fans) 316 324 return 0;

+1 -1

drivers/gpu/drm/xe/xe_pm.c

··· 201 201 if (err) 202 202 goto err; 203 203 204 - xe_i2c_pm_resume(xe, xe->d3cold.allowed); 204 + xe_i2c_pm_resume(xe, true); 205 205 206 206 xe_irq_resume(xe); 207 207

+6 -9

drivers/gpu/drm/xe/xe_query.c

··· 276 276 mem_regions->mem_regions[0].instance = 0; 277 277 mem_regions->mem_regions[0].min_page_size = PAGE_SIZE; 278 278 mem_regions->mem_regions[0].total_size = man->size << PAGE_SHIFT; 279 - if (perfmon_capable()) 280 - mem_regions->mem_regions[0].used = ttm_resource_manager_usage(man); 279 + mem_regions->mem_regions[0].used = ttm_resource_manager_usage(man); 281 280 mem_regions->num_mem_regions = 1; 282 281 283 282 for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) { ··· 292 293 mem_regions->mem_regions[mem_regions->num_mem_regions].total_size = 293 294 man->size; 294 295 295 - if (perfmon_capable()) { 296 - xe_ttm_vram_get_used(man, 297 - &mem_regions->mem_regions 298 - [mem_regions->num_mem_regions].used, 299 - &mem_regions->mem_regions 300 - [mem_regions->num_mem_regions].cpu_visible_used); 301 - } 296 + xe_ttm_vram_get_used(man, 297 + &mem_regions->mem_regions 298 + [mem_regions->num_mem_regions].used, 299 + &mem_regions->mem_regions 300 + [mem_regions->num_mem_regions].cpu_visible_used); 302 301 303 302 mem_regions->mem_regions[mem_regions->num_mem_regions].cpu_visible_size = 304 303 xe_ttm_vram_get_cpu_visible_size(man);

+3 -8

drivers/gpu/drm/xe/xe_svm.c

··· 67 67 range_debug(range, operation); 68 68 } 69 69 70 - static void *xe_svm_devm_owner(struct xe_device *xe) 71 - { 72 - return xe; 73 - } 74 - 75 70 static struct drm_gpusvm_range * 76 71 xe_svm_range_alloc(struct drm_gpusvm *gpusvm) 77 72 { ··· 739 744 xe_svm_garbage_collector_work_func); 740 745 741 746 err = drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM", &vm->xe->drm, 742 - current->mm, xe_svm_devm_owner(vm->xe), 0, 743 - vm->size, 747 + current->mm, 0, vm->size, 744 748 xe_modparam.svm_notifier_size * SZ_1M, 745 749 &gpusvm_ops, fault_chunk_sizes, 746 750 ARRAY_SIZE(fault_chunk_sizes)); 747 751 drm_gpusvm_driver_set_lock(&vm->svm.gpusvm, &vm->lock); 748 752 } else { 749 753 err = drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM (simple)", 750 - &vm->xe->drm, NULL, NULL, 0, 0, 0, NULL, 754 + &vm->xe->drm, NULL, 0, 0, 0, NULL, 751 755 NULL, 0); 752 756 } 753 757 ··· 1011 1017 .devmem_only = need_vram && devmem_possible, 1012 1018 .timeslice_ms = need_vram && devmem_possible ? 1013 1019 vm->xe->atomic_svm_timeslice_ms : 0, 1020 + .device_private_page_owner = xe_svm_devm_owner(vm->xe), 1014 1021 }; 1015 1022 struct xe_validation_ctx vctx; 1016 1023 struct drm_exec exec;

+14

drivers/gpu/drm/xe/xe_svm.h

··· 6 6 #ifndef _XE_SVM_H_ 7 7 #define _XE_SVM_H_ 8 8 9 + struct xe_device; 10 + 11 + /** 12 + * xe_svm_devm_owner() - Return the owner of device private memory 13 + * @xe: The xe device. 14 + * 15 + * Return: The owner of this device's device private memory to use in 16 + * hmm_range_fault()- 17 + */ 18 + static inline void *xe_svm_devm_owner(struct xe_device *xe) 19 + { 20 + return xe; 21 + } 22 + 9 23 #if IS_ENABLED(CONFIG_DRM_XE_GPUSVM) 10 24 11 25 #include <drm/drm_pagemap.h>

+1

drivers/gpu/drm/xe/xe_userptr.c

··· 54 54 struct xe_device *xe = vm->xe; 55 55 struct drm_gpusvm_ctx ctx = { 56 56 .read_only = xe_vma_read_only(vma), 57 + .device_private_page_owner = NULL, 57 58 }; 58 59 59 60 lockdep_assert_held(&vm->lock);

+1

drivers/gpu/drm/xe/xe_vm.c

··· 2881 2881 ctx.read_only = xe_vma_read_only(vma); 2882 2882 ctx.devmem_possible = devmem_possible; 2883 2883 ctx.check_pages_threshold = devmem_possible ? SZ_64K : 0; 2884 + ctx.device_private_page_owner = xe_svm_devm_owner(vm->xe); 2884 2885 2885 2886 /* TODO: Threading the migration */ 2886 2887 xa_for_each(&op->prefetch_range.range, i, svm_range) {

+4 -3

include/drm/drm_gpusvm.h

··· 179 179 * @name: Name of the GPU SVM 180 180 * @drm: Pointer to the DRM device structure 181 181 * @mm: Pointer to the mm_struct for the address space 182 - * @device_private_page_owner: Device private pages owner 183 182 * @mm_start: Start address of GPU SVM 184 183 * @mm_range: Range of the GPU SVM 185 184 * @notifier_size: Size of individual notifiers ··· 203 204 const char *name; 204 205 struct drm_device *drm; 205 206 struct mm_struct *mm; 206 - void *device_private_page_owner; 207 207 unsigned long mm_start; 208 208 unsigned long mm_range; 209 209 unsigned long notifier_size; ··· 224 226 /** 225 227 * struct drm_gpusvm_ctx - DRM GPU SVM context 226 228 * 229 + * @device_private_page_owner: The device-private page owner to use for 230 + * this operation 227 231 * @check_pages_threshold: Check CPU pages for present if chunk is less than or 228 232 * equal to threshold. If not present, reduce chunk 229 233 * size. ··· 239 239 * Context that is DRM GPUSVM is operating in (i.e. user arguments). 240 240 */ 241 241 struct drm_gpusvm_ctx { 242 + void *device_private_page_owner; 242 243 unsigned long check_pages_threshold; 243 244 unsigned long timeslice_ms; 244 245 unsigned int in_notifier :1; ··· 250 249 251 250 int drm_gpusvm_init(struct drm_gpusvm *gpusvm, 252 251 const char *name, struct drm_device *drm, 253 - struct mm_struct *mm, void *device_private_page_owner, 252 + struct mm_struct *mm, 254 253 unsigned long mm_start, unsigned long mm_range, 255 254 unsigned long notifier_size, 256 255 const struct drm_gpusvm_ops *ops,

+2 -2

include/uapi/linux/kfd_ioctl.h

··· 67 67 68 68 struct kfd_ioctl_create_queue_args { 69 69 __u64 ring_base_address; /* to KFD */ 70 - __u64 write_pointer_address; /* from KFD */ 71 - __u64 read_pointer_address; /* from KFD */ 70 + __u64 write_pointer_address; /* to KFD */ 71 + __u64 read_pointer_address; /* to KFD */ 72 72 __u64 doorbell_offset; /* from KFD */ 73 73 74 74 __u32 ring_size; /* to KFD */