Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/radeon/cik: add support for doing async VM pt updates (v5)

Async page table updates using the sDMA engine. sDMA has a
special packet for updating entries for contiguous pages
that reduces overhead.

v2: add support for and use the CP for now.
v3: update for 2 level PTs
v4: rebase, fix DMA packet
v5: switch to using an IB

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

+109
+109
drivers/gpu/drm/radeon/cik.c
··· 3408 3408 } 3409 3409 3410 3410 /** 3411 + * cik_vm_set_page - update the page tables using sDMA 3412 + * 3413 + * @rdev: radeon_device pointer 3414 + * @ib: indirect buffer to fill with commands 3415 + * @pe: addr of the page entry 3416 + * @addr: dst addr to write into pe 3417 + * @count: number of page entries to update 3418 + * @incr: increase next addr by incr bytes 3419 + * @flags: access flags 3420 + * 3421 + * Update the page tables using CP or sDMA (CIK). 3422 + */ 3423 + void cik_vm_set_page(struct radeon_device *rdev, 3424 + struct radeon_ib *ib, 3425 + uint64_t pe, 3426 + uint64_t addr, unsigned count, 3427 + uint32_t incr, uint32_t flags) 3428 + { 3429 + uint32_t r600_flags = cayman_vm_page_flags(rdev, flags); 3430 + uint64_t value; 3431 + unsigned ndw; 3432 + 3433 + if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) { 3434 + /* CP */ 3435 + while (count) { 3436 + ndw = 2 + count * 2; 3437 + if (ndw > 0x3FFE) 3438 + ndw = 0x3FFE; 3439 + 3440 + ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw); 3441 + ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) | 3442 + WRITE_DATA_DST_SEL(1)); 3443 + ib->ptr[ib->length_dw++] = pe; 3444 + ib->ptr[ib->length_dw++] = upper_32_bits(pe); 3445 + for (; ndw > 2; ndw -= 2, --count, pe += 8) { 3446 + if (flags & RADEON_VM_PAGE_SYSTEM) { 3447 + value = radeon_vm_map_gart(rdev, addr); 3448 + value &= 0xFFFFFFFFFFFFF000ULL; 3449 + } else if (flags & RADEON_VM_PAGE_VALID) { 3450 + value = addr; 3451 + } else { 3452 + value = 0; 3453 + } 3454 + addr += incr; 3455 + value |= r600_flags; 3456 + ib->ptr[ib->length_dw++] = value; 3457 + ib->ptr[ib->length_dw++] = upper_32_bits(value); 3458 + } 3459 + } 3460 + } else { 3461 + /* DMA */ 3462 + if (flags & RADEON_VM_PAGE_SYSTEM) { 3463 + while (count) { 3464 + ndw = count * 2; 3465 + if (ndw > 0xFFFFE) 3466 + ndw = 0xFFFFE; 3467 + 3468 + /* for non-physically contiguous pages (system) */ 3469 + ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0); 3470 + ib->ptr[ib->length_dw++] = pe; 3471 + ib->ptr[ib->length_dw++] = upper_32_bits(pe); 3472 + ib->ptr[ib->length_dw++] = ndw; 3473 + for (; ndw > 0; ndw -= 2, --count, pe += 8) { 3474 + if (flags & RADEON_VM_PAGE_SYSTEM) { 3475 + value = radeon_vm_map_gart(rdev, addr); 3476 + value &= 0xFFFFFFFFFFFFF000ULL; 3477 + } else if (flags & RADEON_VM_PAGE_VALID) { 3478 + value = addr; 3479 + } else { 3480 + value = 0; 3481 + } 3482 + addr += incr; 3483 + value |= r600_flags; 3484 + ib->ptr[ib->length_dw++] = value; 3485 + ib->ptr[ib->length_dw++] = upper_32_bits(value); 3486 + } 3487 + } 3488 + } else { 3489 + while (count) { 3490 + ndw = count; 3491 + if (ndw > 0x7FFFF) 3492 + ndw = 0x7FFFF; 3493 + 3494 + if (flags & RADEON_VM_PAGE_VALID) 3495 + value = addr; 3496 + else 3497 + value = 0; 3498 + /* for physically contiguous pages (vram) */ 3499 + ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0); 3500 + ib->ptr[ib->length_dw++] = pe; /* dst addr */ 3501 + ib->ptr[ib->length_dw++] = upper_32_bits(pe); 3502 + ib->ptr[ib->length_dw++] = r600_flags; /* mask */ 3503 + ib->ptr[ib->length_dw++] = 0; 3504 + ib->ptr[ib->length_dw++] = value; /* value */ 3505 + ib->ptr[ib->length_dw++] = upper_32_bits(value); 3506 + ib->ptr[ib->length_dw++] = incr; /* increment size */ 3507 + ib->ptr[ib->length_dw++] = 0; 3508 + ib->ptr[ib->length_dw++] = ndw; /* number of entries */ 3509 + pe += ndw * 8; 3510 + addr += ndw * incr; 3511 + count -= ndw; 3512 + } 3513 + } 3514 + while (ib->length_dw & 0x7) 3515 + ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0); 3516 + } 3517 + } 3518 + 3519 + /** 3411 3520 * cik_dma_vm_flush - cik vm flush using sDMA 3412 3521 * 3413 3522 * @rdev: radeon_device pointer