Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/radeon: split PT setup in more functions

Move the decision what to use into the common VM code.

Signed-off-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Christian König and committed by
Alex Deucher
03f62abd 5a341be2

+496 -251
+124 -69
drivers/gpu/drm/radeon/cik_sdma.c
··· 749 749 } 750 750 751 751 /** 752 - * cik_sdma_vm_set_page - update the page tables using sDMA 752 + * cik_sdma_vm_copy_pages - update PTEs by copying them from the GART 753 + * 754 + * @rdev: radeon_device pointer 755 + * @ib: indirect buffer to fill with commands 756 + * @pe: addr of the page entry 757 + * @src: src addr to copy from 758 + * @count: number of page entries to update 759 + * 760 + * Update PTEs by copying them from the GART using sDMA (CIK). 761 + */ 762 + void cik_sdma_vm_copy_pages(struct radeon_device *rdev, 763 + struct radeon_ib *ib, 764 + uint64_t pe, uint64_t src, 765 + unsigned count) 766 + { 767 + while (count) { 768 + unsigned bytes = count * 8; 769 + if (bytes > 0x1FFFF8) 770 + bytes = 0x1FFFF8; 771 + 772 + ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY, 773 + SDMA_WRITE_SUB_OPCODE_LINEAR, 0); 774 + ib->ptr[ib->length_dw++] = bytes; 775 + ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ 776 + ib->ptr[ib->length_dw++] = lower_32_bits(src); 777 + ib->ptr[ib->length_dw++] = upper_32_bits(src); 778 + ib->ptr[ib->length_dw++] = lower_32_bits(pe); 779 + ib->ptr[ib->length_dw++] = upper_32_bits(pe); 780 + 781 + pe += bytes; 782 + src += bytes; 783 + count -= bytes / 8; 784 + } 785 + } 786 + 787 + /** 788 + * cik_sdma_vm_write_pages - update PTEs by writing them manually 789 + * 790 + * @rdev: radeon_device pointer 791 + * @ib: indirect buffer to fill with commands 792 + * @pe: addr of the page entry 793 + * @addr: dst addr to write into pe 794 + * @count: number of page entries to update 795 + * @incr: increase next addr by incr bytes 796 + * @flags: access flags 797 + * 798 + * Update PTEs by writing them manually using sDMA (CIK). 799 + */ 800 + void cik_sdma_vm_write_pages(struct radeon_device *rdev, 801 + struct radeon_ib *ib, 802 + uint64_t pe, 803 + uint64_t addr, unsigned count, 804 + uint32_t incr, uint32_t flags) 805 + { 806 + uint64_t value; 807 + unsigned ndw; 808 + 809 + while (count) { 810 + ndw = count * 2; 811 + if (ndw > 0xFFFFE) 812 + ndw = 0xFFFFE; 813 + 814 + /* for non-physically contiguous pages (system) */ 815 + ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, 816 + SDMA_WRITE_SUB_OPCODE_LINEAR, 0); 817 + ib->ptr[ib->length_dw++] = pe; 818 + ib->ptr[ib->length_dw++] = upper_32_bits(pe); 819 + ib->ptr[ib->length_dw++] = ndw; 820 + for (; ndw > 0; ndw -= 2, --count, pe += 8) { 821 + if (flags & R600_PTE_SYSTEM) { 822 + value = radeon_vm_map_gart(rdev, addr); 823 + value &= 0xFFFFFFFFFFFFF000ULL; 824 + } else if (flags & R600_PTE_VALID) { 825 + value = addr; 826 + } else { 827 + value = 0; 828 + } 829 + addr += incr; 830 + value |= flags; 831 + ib->ptr[ib->length_dw++] = value; 832 + ib->ptr[ib->length_dw++] = upper_32_bits(value); 833 + } 834 + } 835 + } 836 + 837 + /** 838 + * cik_sdma_vm_set_pages - update the page tables using sDMA 753 839 * 754 840 * @rdev: radeon_device pointer 755 841 * @ib: indirect buffer to fill with commands ··· 847 761 * 848 762 * Update the page tables using sDMA (CIK). 849 763 */ 850 - void cik_sdma_vm_set_page(struct radeon_device *rdev, 851 - struct radeon_ib *ib, 852 - uint64_t pe, 853 - uint64_t addr, unsigned count, 854 - uint32_t incr, uint32_t flags) 764 + void cik_sdma_vm_set_pages(struct radeon_device *rdev, 765 + struct radeon_ib *ib, 766 + uint64_t pe, 767 + uint64_t addr, unsigned count, 768 + uint32_t incr, uint32_t flags) 855 769 { 856 770 uint64_t value; 857 771 unsigned ndw; 858 772 859 - trace_radeon_vm_set_page(pe, addr, count, incr, flags); 773 + while (count) { 774 + ndw = count; 775 + if (ndw > 0x7FFFF) 776 + ndw = 0x7FFFF; 860 777 861 - if ((flags & R600_PTE_GART_MASK) == R600_PTE_GART_MASK) { 862 - uint64_t src = rdev->gart.table_addr + (addr >> 12) * 8; 863 - while (count) { 864 - unsigned bytes = count * 8; 865 - if (bytes > 0x1FFFF8) 866 - bytes = 0x1FFFF8; 778 + if (flags & R600_PTE_VALID) 779 + value = addr; 780 + else 781 + value = 0; 867 782 868 - ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_WRITE_SUB_OPCODE_LINEAR, 0); 869 - ib->ptr[ib->length_dw++] = bytes; 870 - ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ 871 - ib->ptr[ib->length_dw++] = lower_32_bits(src); 872 - ib->ptr[ib->length_dw++] = upper_32_bits(src); 873 - ib->ptr[ib->length_dw++] = lower_32_bits(pe); 874 - ib->ptr[ib->length_dw++] = upper_32_bits(pe); 783 + /* for physically contiguous pages (vram) */ 784 + ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0); 785 + ib->ptr[ib->length_dw++] = pe; /* dst addr */ 786 + ib->ptr[ib->length_dw++] = upper_32_bits(pe); 787 + ib->ptr[ib->length_dw++] = flags; /* mask */ 788 + ib->ptr[ib->length_dw++] = 0; 789 + ib->ptr[ib->length_dw++] = value; /* value */ 790 + ib->ptr[ib->length_dw++] = upper_32_bits(value); 791 + ib->ptr[ib->length_dw++] = incr; /* increment size */ 792 + ib->ptr[ib->length_dw++] = 0; 793 + ib->ptr[ib->length_dw++] = ndw; /* number of entries */ 875 794 876 - pe += bytes; 877 - src += bytes; 878 - count -= bytes / 8; 879 - } 880 - } else if (flags & R600_PTE_SYSTEM) { 881 - while (count) { 882 - ndw = count * 2; 883 - if (ndw > 0xFFFFE) 884 - ndw = 0xFFFFE; 885 - 886 - /* for non-physically contiguous pages (system) */ 887 - ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0); 888 - ib->ptr[ib->length_dw++] = pe; 889 - ib->ptr[ib->length_dw++] = upper_32_bits(pe); 890 - ib->ptr[ib->length_dw++] = ndw; 891 - for (; ndw > 0; ndw -= 2, --count, pe += 8) { 892 - value = radeon_vm_map_gart(rdev, addr); 893 - value &= 0xFFFFFFFFFFFFF000ULL; 894 - addr += incr; 895 - value |= flags; 896 - ib->ptr[ib->length_dw++] = value; 897 - ib->ptr[ib->length_dw++] = upper_32_bits(value); 898 - } 899 - } 900 - } else { 901 - while (count) { 902 - ndw = count; 903 - if (ndw > 0x7FFFF) 904 - ndw = 0x7FFFF; 905 - 906 - if (flags & R600_PTE_VALID) 907 - value = addr; 908 - else 909 - value = 0; 910 - /* for physically contiguous pages (vram) */ 911 - ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0); 912 - ib->ptr[ib->length_dw++] = pe; /* dst addr */ 913 - ib->ptr[ib->length_dw++] = upper_32_bits(pe); 914 - ib->ptr[ib->length_dw++] = flags; /* mask */ 915 - ib->ptr[ib->length_dw++] = 0; 916 - ib->ptr[ib->length_dw++] = value; /* value */ 917 - ib->ptr[ib->length_dw++] = upper_32_bits(value); 918 - ib->ptr[ib->length_dw++] = incr; /* increment size */ 919 - ib->ptr[ib->length_dw++] = 0; 920 - ib->ptr[ib->length_dw++] = ndw; /* number of entries */ 921 - pe += ndw * 8; 922 - addr += ndw * incr; 923 - count -= ndw; 924 - } 795 + pe += ndw * 8; 796 + addr += ndw * incr; 797 + count -= ndw; 925 798 } 799 + } 800 + 801 + /** 802 + * cik_sdma_vm_pad_ib - pad the IB to the required number of dw 803 + * 804 + * @ib: indirect buffer to fill with padding 805 + * 806 + */ 807 + void cik_sdma_vm_pad_ib(struct radeon_ib *ib) 808 + { 926 809 while (ib->length_dw & 0x7) 927 810 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0); 928 811 }
+122 -73
drivers/gpu/drm/radeon/ni_dma.c
··· 307 307 } 308 308 309 309 /** 310 - * cayman_dma_vm_set_page - update the page tables using the DMA 310 + * cayman_dma_vm_copy_pages - update PTEs by copying them from the GART 311 + * 312 + * @rdev: radeon_device pointer 313 + * @ib: indirect buffer to fill with commands 314 + * @pe: addr of the page entry 315 + * @src: src addr where to copy from 316 + * @count: number of page entries to update 317 + * 318 + * Update PTEs by copying them from the GART using the DMA (cayman/TN). 319 + */ 320 + void cayman_dma_vm_copy_pages(struct radeon_device *rdev, 321 + struct radeon_ib *ib, 322 + uint64_t pe, uint64_t src, 323 + unsigned count) 324 + { 325 + unsigned ndw; 326 + 327 + while (count) { 328 + ndw = count * 2; 329 + if (ndw > 0xFFFFE) 330 + ndw = 0xFFFFE; 331 + 332 + ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY, 333 + 0, 0, ndw); 334 + ib->ptr[ib->length_dw++] = lower_32_bits(pe); 335 + ib->ptr[ib->length_dw++] = lower_32_bits(src); 336 + ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; 337 + ib->ptr[ib->length_dw++] = upper_32_bits(src) & 0xff; 338 + 339 + pe += ndw * 4; 340 + src += ndw * 4; 341 + count -= ndw / 2; 342 + } 343 + } 344 + 345 + /** 346 + * cayman_dma_vm_write_pages - update PTEs by writing them manually 311 347 * 312 348 * @rdev: radeon_device pointer 313 349 * @ib: indirect buffer to fill with commands ··· 351 315 * @addr: dst addr to write into pe 352 316 * @count: number of page entries to update 353 317 * @incr: increase next addr by incr bytes 354 - * @flags: hw access flags 318 + * @flags: hw access flags 355 319 * 356 - * Update the page tables using the DMA (cayman/TN). 320 + * Update PTEs by writing them manually using the DMA (cayman/TN). 357 321 */ 358 - void cayman_dma_vm_set_page(struct radeon_device *rdev, 359 - struct radeon_ib *ib, 360 - uint64_t pe, 361 - uint64_t addr, unsigned count, 362 - uint32_t incr, uint32_t flags) 322 + void cayman_dma_vm_write_pages(struct radeon_device *rdev, 323 + struct radeon_ib *ib, 324 + uint64_t pe, 325 + uint64_t addr, unsigned count, 326 + uint32_t incr, uint32_t flags) 363 327 { 364 328 uint64_t value; 365 329 unsigned ndw; 366 330 367 - trace_radeon_vm_set_page(pe, addr, count, incr, flags); 331 + while (count) { 332 + ndw = count * 2; 333 + if (ndw > 0xFFFFE) 334 + ndw = 0xFFFFE; 368 335 369 - if ((flags & R600_PTE_GART_MASK) == R600_PTE_GART_MASK) { 370 - uint64_t src = rdev->gart.table_addr + (addr >> 12) * 8; 371 - while (count) { 372 - ndw = count * 2; 373 - if (ndw > 0xFFFFE) 374 - ndw = 0xFFFFE; 375 - 376 - ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY, 377 - 0, 0, ndw); 378 - ib->ptr[ib->length_dw++] = lower_32_bits(pe); 379 - ib->ptr[ib->length_dw++] = lower_32_bits(src); 380 - ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; 381 - ib->ptr[ib->length_dw++] = upper_32_bits(src) & 0xff; 382 - 383 - pe += ndw * 4; 384 - src += ndw * 4; 385 - count -= ndw / 2; 386 - } 387 - 388 - } else if ((flags & R600_PTE_SYSTEM) || (count == 1)) { 389 - while (count) { 390 - ndw = count * 2; 391 - if (ndw > 0xFFFFE) 392 - ndw = 0xFFFFE; 393 - 394 - /* for non-physically contiguous pages (system) */ 395 - ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, ndw); 396 - ib->ptr[ib->length_dw++] = pe; 397 - ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; 398 - for (; ndw > 0; ndw -= 2, --count, pe += 8) { 399 - if (flags & R600_PTE_SYSTEM) { 400 - value = radeon_vm_map_gart(rdev, addr); 401 - value &= 0xFFFFFFFFFFFFF000ULL; 402 - } else if (flags & R600_PTE_VALID) { 403 - value = addr; 404 - } else { 405 - value = 0; 406 - } 407 - addr += incr; 408 - value |= flags; 409 - ib->ptr[ib->length_dw++] = value; 410 - ib->ptr[ib->length_dw++] = upper_32_bits(value); 411 - } 412 - } 413 - } else { 414 - while (count) { 415 - ndw = count * 2; 416 - if (ndw > 0xFFFFE) 417 - ndw = 0xFFFFE; 418 - 419 - if (flags & R600_PTE_VALID) 336 + /* for non-physically contiguous pages (system) */ 337 + ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 338 + 0, 0, ndw); 339 + ib->ptr[ib->length_dw++] = pe; 340 + ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; 341 + for (; ndw > 0; ndw -= 2, --count, pe += 8) { 342 + if (flags & R600_PTE_SYSTEM) { 343 + value = radeon_vm_map_gart(rdev, addr); 344 + value &= 0xFFFFFFFFFFFFF000ULL; 345 + } else if (flags & R600_PTE_VALID) { 420 346 value = addr; 421 - else 347 + } else { 422 348 value = 0; 423 - /* for physically contiguous pages (vram) */ 424 - ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw); 425 - ib->ptr[ib->length_dw++] = pe; /* dst addr */ 426 - ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; 427 - ib->ptr[ib->length_dw++] = flags; /* mask */ 428 - ib->ptr[ib->length_dw++] = 0; 429 - ib->ptr[ib->length_dw++] = value; /* value */ 349 + } 350 + addr += incr; 351 + value |= flags; 352 + ib->ptr[ib->length_dw++] = value; 430 353 ib->ptr[ib->length_dw++] = upper_32_bits(value); 431 - ib->ptr[ib->length_dw++] = incr; /* increment size */ 432 - ib->ptr[ib->length_dw++] = 0; 433 - pe += ndw * 4; 434 - addr += (ndw / 2) * incr; 435 - count -= ndw / 2; 436 354 } 437 355 } 356 + } 357 + 358 + /** 359 + * cayman_dma_vm_set_pages - update the page tables using the DMA 360 + * 361 + * @rdev: radeon_device pointer 362 + * @ib: indirect buffer to fill with commands 363 + * @pe: addr of the page entry 364 + * @addr: dst addr to write into pe 365 + * @count: number of page entries to update 366 + * @incr: increase next addr by incr bytes 367 + * @flags: hw access flags 368 + * 369 + * Update the page tables using the DMA (cayman/TN). 370 + */ 371 + void cayman_dma_vm_set_pages(struct radeon_device *rdev, 372 + struct radeon_ib *ib, 373 + uint64_t pe, 374 + uint64_t addr, unsigned count, 375 + uint32_t incr, uint32_t flags) 376 + { 377 + uint64_t value; 378 + unsigned ndw; 379 + 380 + while (count) { 381 + ndw = count * 2; 382 + if (ndw > 0xFFFFE) 383 + ndw = 0xFFFFE; 384 + 385 + if (flags & R600_PTE_VALID) 386 + value = addr; 387 + else 388 + value = 0; 389 + 390 + /* for physically contiguous pages (vram) */ 391 + ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw); 392 + ib->ptr[ib->length_dw++] = pe; /* dst addr */ 393 + ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; 394 + ib->ptr[ib->length_dw++] = flags; /* mask */ 395 + ib->ptr[ib->length_dw++] = 0; 396 + ib->ptr[ib->length_dw++] = value; /* value */ 397 + ib->ptr[ib->length_dw++] = upper_32_bits(value); 398 + ib->ptr[ib->length_dw++] = incr; /* increment size */ 399 + ib->ptr[ib->length_dw++] = 0; 400 + 401 + pe += ndw * 4; 402 + addr += (ndw / 2) * incr; 403 + count -= ndw / 2; 404 + } 405 + } 406 + 407 + /** 408 + * cayman_dma_vm_pad_ib - pad the IB to the required number of dw 409 + * 410 + * @ib: indirect buffer to fill with padding 411 + * 412 + */ 413 + void cayman_dma_vm_pad_ib(struct radeon_ib *ib) 414 + { 438 415 while (ib->length_dw & 0x7) 439 416 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0); 440 417 }
+19 -6
drivers/gpu/drm/radeon/radeon.h
··· 1797 1797 struct { 1798 1798 int (*init)(struct radeon_device *rdev); 1799 1799 void (*fini)(struct radeon_device *rdev); 1800 - void (*set_page)(struct radeon_device *rdev, 1801 - struct radeon_ib *ib, 1802 - uint64_t pe, 1803 - uint64_t addr, unsigned count, 1804 - uint32_t incr, uint32_t flags); 1800 + void (*copy_pages)(struct radeon_device *rdev, 1801 + struct radeon_ib *ib, 1802 + uint64_t pe, uint64_t src, 1803 + unsigned count); 1804 + void (*write_pages)(struct radeon_device *rdev, 1805 + struct radeon_ib *ib, 1806 + uint64_t pe, 1807 + uint64_t addr, unsigned count, 1808 + uint32_t incr, uint32_t flags); 1809 + void (*set_pages)(struct radeon_device *rdev, 1810 + struct radeon_ib *ib, 1811 + uint64_t pe, 1812 + uint64_t addr, unsigned count, 1813 + uint32_t incr, uint32_t flags); 1814 + void (*pad_ib)(struct radeon_ib *ib); 1805 1815 } vm; 1806 1816 /* ring specific callbacks */ 1807 1817 struct radeon_asic_ring *ring[RADEON_NUM_RINGS]; ··· 2771 2761 #define radeon_gart_set_page(rdev, i, p, f) (rdev)->asic->gart.set_page((rdev), (i), (p), (f)) 2772 2762 #define radeon_asic_vm_init(rdev) (rdev)->asic->vm.init((rdev)) 2773 2763 #define radeon_asic_vm_fini(rdev) (rdev)->asic->vm.fini((rdev)) 2774 - #define radeon_asic_vm_set_page(rdev, ib, pe, addr, count, incr, flags) ((rdev)->asic->vm.set_page((rdev), (ib), (pe), (addr), (count), (incr), (flags))) 2764 + #define radeon_asic_vm_copy_pages(rdev, ib, pe, src, count) ((rdev)->asic->vm.copy_pages((rdev), (ib), (pe), (src), (count))) 2765 + #define radeon_asic_vm_write_pages(rdev, ib, pe, addr, count, incr, flags) ((rdev)->asic->vm.write_pages((rdev), (ib), (pe), (addr), (count), (incr), (flags))) 2766 + #define radeon_asic_vm_set_pages(rdev, ib, pe, addr, count, incr, flags) ((rdev)->asic->vm.set_pages((rdev), (ib), (pe), (addr), (count), (incr), (flags))) 2767 + #define radeon_asic_vm_pad_ib(rdev, ib) ((rdev)->asic->vm.pad_ib((ib))) 2775 2768 #define radeon_ring_start(rdev, r, cp) (rdev)->asic->ring[(r)]->ring_start((rdev), (cp)) 2776 2769 #define radeon_ring_test(rdev, r, cp) (rdev)->asic->ring[(r)]->ring_test((rdev), (cp)) 2777 2770 #define radeon_ib_test(rdev, r, cp) (rdev)->asic->ring[(r)]->ib_test((rdev), (cp))
+20 -5
drivers/gpu/drm/radeon/radeon_asic.c
··· 1613 1613 .vm = { 1614 1614 .init = &cayman_vm_init, 1615 1615 .fini = &cayman_vm_fini, 1616 - .set_page = &cayman_dma_vm_set_page, 1616 + .copy_pages = &cayman_dma_vm_copy_pages, 1617 + .write_pages = &cayman_dma_vm_write_pages, 1618 + .set_pages = &cayman_dma_vm_set_pages, 1619 + .pad_ib = &cayman_dma_vm_pad_ib, 1617 1620 }, 1618 1621 .ring = { 1619 1622 [RADEON_RING_TYPE_GFX_INDEX] = &cayman_gfx_ring, ··· 1716 1713 .vm = { 1717 1714 .init = &cayman_vm_init, 1718 1715 .fini = &cayman_vm_fini, 1719 - .set_page = &cayman_dma_vm_set_page, 1716 + .copy_pages = &cayman_dma_vm_copy_pages, 1717 + .write_pages = &cayman_dma_vm_write_pages, 1718 + .set_pages = &cayman_dma_vm_set_pages, 1719 + .pad_ib = &cayman_dma_vm_pad_ib, 1720 1720 }, 1721 1721 .ring = { 1722 1722 [RADEON_RING_TYPE_GFX_INDEX] = &cayman_gfx_ring, ··· 1849 1843 .vm = { 1850 1844 .init = &si_vm_init, 1851 1845 .fini = &si_vm_fini, 1852 - .set_page = &si_dma_vm_set_page, 1846 + .copy_pages = &si_dma_vm_copy_pages, 1847 + .write_pages = &si_dma_vm_write_pages, 1848 + .set_pages = &si_dma_vm_set_pages, 1849 + .pad_ib = &cayman_dma_vm_pad_ib, 1853 1850 }, 1854 1851 .ring = { 1855 1852 [RADEON_RING_TYPE_GFX_INDEX] = &si_gfx_ring, ··· 2010 2001 .vm = { 2011 2002 .init = &cik_vm_init, 2012 2003 .fini = &cik_vm_fini, 2013 - .set_page = &cik_sdma_vm_set_page, 2004 + .copy_pages = &cik_sdma_vm_copy_pages, 2005 + .write_pages = &cik_sdma_vm_write_pages, 2006 + .set_pages = &cik_sdma_vm_set_pages, 2007 + .pad_ib = &cik_sdma_vm_pad_ib, 2014 2008 }, 2015 2009 .ring = { 2016 2010 [RADEON_RING_TYPE_GFX_INDEX] = &ci_gfx_ring, ··· 2117 2105 .vm = { 2118 2106 .init = &cik_vm_init, 2119 2107 .fini = &cik_vm_fini, 2120 - .set_page = &cik_sdma_vm_set_page, 2108 + .copy_pages = &cik_sdma_vm_copy_pages, 2109 + .write_pages = &cik_sdma_vm_write_pages, 2110 + .set_pages = &cik_sdma_vm_set_pages, 2111 + .pad_ib = &cik_sdma_vm_pad_ib, 2121 2112 }, 2122 2113 .ring = { 2123 2114 [RADEON_RING_TYPE_GFX_INDEX] = &ci_gfx_ring,
+49 -15
drivers/gpu/drm/radeon/radeon_asic.h
··· 607 607 struct radeon_ib *ib); 608 608 bool cayman_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring); 609 609 bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring); 610 - void cayman_dma_vm_set_page(struct radeon_device *rdev, 611 - struct radeon_ib *ib, 612 - uint64_t pe, 613 - uint64_t addr, unsigned count, 614 - uint32_t incr, uint32_t flags); 610 + 611 + void cayman_dma_vm_copy_pages(struct radeon_device *rdev, 612 + struct radeon_ib *ib, 613 + uint64_t pe, uint64_t src, 614 + unsigned count); 615 + void cayman_dma_vm_write_pages(struct radeon_device *rdev, 616 + struct radeon_ib *ib, 617 + uint64_t pe, 618 + uint64_t addr, unsigned count, 619 + uint32_t incr, uint32_t flags); 620 + void cayman_dma_vm_set_pages(struct radeon_device *rdev, 621 + struct radeon_ib *ib, 622 + uint64_t pe, 623 + uint64_t addr, unsigned count, 624 + uint32_t incr, uint32_t flags); 625 + void cayman_dma_vm_pad_ib(struct radeon_ib *ib); 615 626 616 627 void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); 617 628 ··· 705 694 uint64_t src_offset, uint64_t dst_offset, 706 695 unsigned num_gpu_pages, 707 696 struct radeon_fence **fence); 708 - void si_dma_vm_set_page(struct radeon_device *rdev, 709 - struct radeon_ib *ib, 710 - uint64_t pe, 711 - uint64_t addr, unsigned count, 712 - uint32_t incr, uint32_t flags); 697 + 698 + void si_dma_vm_copy_pages(struct radeon_device *rdev, 699 + struct radeon_ib *ib, 700 + uint64_t pe, uint64_t src, 701 + unsigned count); 702 + void si_dma_vm_write_pages(struct radeon_device *rdev, 703 + struct radeon_ib *ib, 704 + uint64_t pe, 705 + uint64_t addr, unsigned count, 706 + uint32_t incr, uint32_t flags); 707 + void si_dma_vm_set_pages(struct radeon_device *rdev, 708 + struct radeon_ib *ib, 709 + uint64_t pe, 710 + uint64_t addr, unsigned count, 711 + uint32_t incr, uint32_t flags); 712 + 713 713 void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); 714 714 u32 si_get_xclk(struct radeon_device *rdev); 715 715 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev); ··· 794 772 int cik_vm_init(struct radeon_device *rdev); 795 773 void cik_vm_fini(struct radeon_device *rdev); 796 774 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); 797 - void cik_sdma_vm_set_page(struct radeon_device *rdev, 798 - struct radeon_ib *ib, 799 - uint64_t pe, 800 - uint64_t addr, unsigned count, 801 - uint32_t incr, uint32_t flags); 775 + 776 + void cik_sdma_vm_copy_pages(struct radeon_device *rdev, 777 + struct radeon_ib *ib, 778 + uint64_t pe, uint64_t src, 779 + unsigned count); 780 + void cik_sdma_vm_write_pages(struct radeon_device *rdev, 781 + struct radeon_ib *ib, 782 + uint64_t pe, 783 + uint64_t addr, unsigned count, 784 + uint32_t incr, uint32_t flags); 785 + void cik_sdma_vm_set_pages(struct radeon_device *rdev, 786 + struct radeon_ib *ib, 787 + uint64_t pe, 788 + uint64_t addr, unsigned count, 789 + uint32_t incr, uint32_t flags); 790 + void cik_sdma_vm_pad_ib(struct radeon_ib *ib); 791 + 802 792 void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); 803 793 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib); 804 794 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
+53 -14
drivers/gpu/drm/radeon/radeon_vm.c
··· 341 341 } 342 342 343 343 /** 344 + * radeon_vm_set_pages - helper to call the right asic function 345 + * 346 + * @rdev: radeon_device pointer 347 + * @ib: indirect buffer to fill with commands 348 + * @pe: addr of the page entry 349 + * @addr: dst addr to write into pe 350 + * @count: number of page entries to update 351 + * @incr: increase next addr by incr bytes 352 + * @flags: hw access flags 353 + * 354 + * Traces the parameters and calls the right asic functions 355 + * to setup the page table using the DMA. 356 + */ 357 + static void radeon_vm_set_pages(struct radeon_device *rdev, 358 + struct radeon_ib *ib, 359 + uint64_t pe, 360 + uint64_t addr, unsigned count, 361 + uint32_t incr, uint32_t flags) 362 + { 363 + trace_radeon_vm_set_page(pe, addr, count, incr, flags); 364 + 365 + if ((flags & R600_PTE_GART_MASK) == R600_PTE_GART_MASK) { 366 + uint64_t src = rdev->gart.table_addr + (addr >> 12) * 8; 367 + radeon_asic_vm_copy_pages(rdev, ib, pe, src, count); 368 + 369 + } else if ((flags & R600_PTE_SYSTEM) || (count < 3)) { 370 + radeon_asic_vm_write_pages(rdev, ib, pe, addr, 371 + count, incr, flags); 372 + 373 + } else { 374 + radeon_asic_vm_set_pages(rdev, ib, pe, addr, 375 + count, incr, flags); 376 + } 377 + } 378 + 379 + /** 344 380 * radeon_vm_clear_bo - initially clear the page dir/table 345 381 * 346 382 * @rdev: radeon_device pointer ··· 417 381 418 382 ib.length_dw = 0; 419 383 420 - radeon_asic_vm_set_page(rdev, &ib, addr, 0, entries, 0, 0); 384 + radeon_vm_set_pages(rdev, &ib, addr, 0, entries, 0, 0); 385 + radeon_asic_vm_pad_ib(rdev, &ib); 421 386 422 387 r = radeon_ib_schedule(rdev, &ib, NULL); 423 388 if (r) ··· 671 634 ((last_pt + incr * count) != pt)) { 672 635 673 636 if (count) { 674 - radeon_asic_vm_set_page(rdev, &ib, last_pde, 675 - last_pt, count, incr, 676 - R600_PTE_VALID); 637 + radeon_vm_set_pages(rdev, &ib, last_pde, 638 + last_pt, count, incr, 639 + R600_PTE_VALID); 677 640 } 678 641 679 642 count = 1; ··· 685 648 } 686 649 687 650 if (count) 688 - radeon_asic_vm_set_page(rdev, &ib, last_pde, last_pt, count, 689 - incr, R600_PTE_VALID); 651 + radeon_vm_set_pages(rdev, &ib, last_pde, last_pt, count, 652 + incr, R600_PTE_VALID); 690 653 691 654 if (ib.length_dw != 0) { 655 + radeon_asic_vm_pad_ib(rdev, &ib); 692 656 radeon_semaphore_sync_to(ib.semaphore, pd->tbo.sync_obj); 693 657 radeon_semaphore_sync_to(ib.semaphore, vm->last_id_use); 694 658 r = radeon_ib_schedule(rdev, &ib, NULL); ··· 757 719 (frag_start >= frag_end)) { 758 720 759 721 count = (pe_end - pe_start) / 8; 760 - radeon_asic_vm_set_page(rdev, ib, pe_start, addr, count, 761 - RADEON_GPU_PAGE_SIZE, flags); 722 + radeon_vm_set_pages(rdev, ib, pe_start, addr, count, 723 + RADEON_GPU_PAGE_SIZE, flags); 762 724 return; 763 725 } 764 726 765 727 /* handle the 4K area at the beginning */ 766 728 if (pe_start != frag_start) { 767 729 count = (frag_start - pe_start) / 8; 768 - radeon_asic_vm_set_page(rdev, ib, pe_start, addr, count, 769 - RADEON_GPU_PAGE_SIZE, flags); 730 + radeon_vm_set_pages(rdev, ib, pe_start, addr, count, 731 + RADEON_GPU_PAGE_SIZE, flags); 770 732 addr += RADEON_GPU_PAGE_SIZE * count; 771 733 } 772 734 773 735 /* handle the area in the middle */ 774 736 count = (frag_end - frag_start) / 8; 775 - radeon_asic_vm_set_page(rdev, ib, frag_start, addr, count, 776 - RADEON_GPU_PAGE_SIZE, flags | frag_flags); 737 + radeon_vm_set_pages(rdev, ib, frag_start, addr, count, 738 + RADEON_GPU_PAGE_SIZE, flags | frag_flags); 777 739 778 740 /* handle the 4K area at the end */ 779 741 if (frag_end != pe_end) { 780 742 addr += RADEON_GPU_PAGE_SIZE * count; 781 743 count = (pe_end - frag_end) / 8; 782 - radeon_asic_vm_set_page(rdev, ib, frag_end, addr, count, 783 - RADEON_GPU_PAGE_SIZE, flags); 744 + radeon_vm_set_pages(rdev, ib, frag_end, addr, count, 745 + RADEON_GPU_PAGE_SIZE, flags); 784 746 } 785 747 } 786 748 ··· 938 900 bo_va->it.last + 1, addr, 939 901 radeon_vm_page_flags(bo_va->flags)); 940 902 903 + radeon_asic_vm_pad_ib(rdev, &ib); 941 904 radeon_semaphore_sync_to(ib.semaphore, vm->fence); 942 905 r = radeon_ib_schedule(rdev, &ib, NULL); 943 906 if (r) {
+109 -69
drivers/gpu/drm/radeon/si_dma.c
··· 56 56 } 57 57 58 58 /** 59 - * si_dma_vm_set_page - update the page tables using the DMA 59 + * si_dma_vm_copy_pages - update PTEs by copying them from the GART 60 + * 61 + * @rdev: radeon_device pointer 62 + * @ib: indirect buffer to fill with commands 63 + * @pe: addr of the page entry 64 + * @src: src addr where to copy from 65 + * @count: number of page entries to update 66 + * 67 + * Update PTEs by copying them from the GART using the DMA (SI). 68 + */ 69 + void si_dma_vm_copy_pages(struct radeon_device *rdev, 70 + struct radeon_ib *ib, 71 + uint64_t pe, uint64_t src, 72 + unsigned count) 73 + { 74 + while (count) { 75 + unsigned bytes = count * 8; 76 + if (bytes > 0xFFFF8) 77 + bytes = 0xFFFF8; 78 + 79 + ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY, 80 + 1, 0, 0, bytes); 81 + ib->ptr[ib->length_dw++] = lower_32_bits(pe); 82 + ib->ptr[ib->length_dw++] = lower_32_bits(src); 83 + ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; 84 + ib->ptr[ib->length_dw++] = upper_32_bits(src) & 0xff; 85 + 86 + pe += bytes; 87 + src += bytes; 88 + count -= bytes / 8; 89 + } 90 + } 91 + 92 + /** 93 + * si_dma_vm_write_pages - update PTEs by writing them manually 94 + * 95 + * @rdev: radeon_device pointer 96 + * @ib: indirect buffer to fill with commands 97 + * @pe: addr of the page entry 98 + * @addr: dst addr to write into pe 99 + * @count: number of page entries to update 100 + * @incr: increase next addr by incr bytes 101 + * @flags: access flags 102 + * 103 + * Update PTEs by writing them manually using the DMA (SI). 104 + */ 105 + void si_dma_vm_write_pages(struct radeon_device *rdev, 106 + struct radeon_ib *ib, 107 + uint64_t pe, 108 + uint64_t addr, unsigned count, 109 + uint32_t incr, uint32_t flags) 110 + { 111 + uint64_t value; 112 + unsigned ndw; 113 + 114 + while (count) { 115 + ndw = count * 2; 116 + if (ndw > 0xFFFFE) 117 + ndw = 0xFFFFE; 118 + 119 + /* for non-physically contiguous pages (system) */ 120 + ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw); 121 + ib->ptr[ib->length_dw++] = pe; 122 + ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; 123 + for (; ndw > 0; ndw -= 2, --count, pe += 8) { 124 + if (flags & R600_PTE_SYSTEM) { 125 + value = radeon_vm_map_gart(rdev, addr); 126 + value &= 0xFFFFFFFFFFFFF000ULL; 127 + } else if (flags & R600_PTE_VALID) { 128 + value = addr; 129 + } else { 130 + value = 0; 131 + } 132 + addr += incr; 133 + value |= flags; 134 + ib->ptr[ib->length_dw++] = value; 135 + ib->ptr[ib->length_dw++] = upper_32_bits(value); 136 + } 137 + } 138 + } 139 + 140 + /** 141 + * si_dma_vm_set_pages - update the page tables using the DMA 60 142 * 61 143 * @rdev: radeon_device pointer 62 144 * @ib: indirect buffer to fill with commands ··· 150 68 * 151 69 * Update the page tables using the DMA (SI). 152 70 */ 153 - void si_dma_vm_set_page(struct radeon_device *rdev, 154 - struct radeon_ib *ib, 155 - uint64_t pe, 156 - uint64_t addr, unsigned count, 157 - uint32_t incr, uint32_t flags) 71 + void si_dma_vm_set_pages(struct radeon_device *rdev, 72 + struct radeon_ib *ib, 73 + uint64_t pe, 74 + uint64_t addr, unsigned count, 75 + uint32_t incr, uint32_t flags) 158 76 { 159 77 uint64_t value; 160 78 unsigned ndw; 161 79 162 - trace_radeon_vm_set_page(pe, addr, count, incr, flags); 80 + while (count) { 81 + ndw = count * 2; 82 + if (ndw > 0xFFFFE) 83 + ndw = 0xFFFFE; 163 84 164 - if ((flags & R600_PTE_GART_MASK) == R600_PTE_GART_MASK) { 165 - uint64_t src = rdev->gart.table_addr + (addr >> 12) * 8; 166 - while (count) { 167 - unsigned bytes = count * 8; 168 - if (bytes > 0xFFFF8) 169 - bytes = 0xFFFF8; 85 + if (flags & R600_PTE_VALID) 86 + value = addr; 87 + else 88 + value = 0; 170 89 171 - ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY, 172 - 1, 0, 0, bytes); 173 - ib->ptr[ib->length_dw++] = lower_32_bits(pe); 174 - ib->ptr[ib->length_dw++] = lower_32_bits(src); 175 - ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; 176 - ib->ptr[ib->length_dw++] = upper_32_bits(src) & 0xff; 177 - 178 - pe += bytes; 179 - src += bytes; 180 - count -= bytes / 8; 181 - } 182 - } else if (flags & R600_PTE_SYSTEM) { 183 - while (count) { 184 - ndw = count * 2; 185 - if (ndw > 0xFFFFE) 186 - ndw = 0xFFFFE; 187 - 188 - /* for non-physically contiguous pages (system) */ 189 - ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw); 190 - ib->ptr[ib->length_dw++] = pe; 191 - ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; 192 - for (; ndw > 0; ndw -= 2, --count, pe += 8) { 193 - value = radeon_vm_map_gart(rdev, addr); 194 - value &= 0xFFFFFFFFFFFFF000ULL; 195 - addr += incr; 196 - value |= flags; 197 - ib->ptr[ib->length_dw++] = value; 198 - ib->ptr[ib->length_dw++] = upper_32_bits(value); 199 - } 200 - } 201 - } else { 202 - while (count) { 203 - ndw = count * 2; 204 - if (ndw > 0xFFFFE) 205 - ndw = 0xFFFFE; 206 - 207 - if (flags & R600_PTE_VALID) 208 - value = addr; 209 - else 210 - value = 0; 211 - /* for physically contiguous pages (vram) */ 212 - ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw); 213 - ib->ptr[ib->length_dw++] = pe; /* dst addr */ 214 - ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; 215 - ib->ptr[ib->length_dw++] = flags; /* mask */ 216 - ib->ptr[ib->length_dw++] = 0; 217 - ib->ptr[ib->length_dw++] = value; /* value */ 218 - ib->ptr[ib->length_dw++] = upper_32_bits(value); 219 - ib->ptr[ib->length_dw++] = incr; /* increment size */ 220 - ib->ptr[ib->length_dw++] = 0; 221 - pe += ndw * 4; 222 - addr += (ndw / 2) * incr; 223 - count -= ndw / 2; 224 - } 90 + /* for physically contiguous pages (vram) */ 91 + ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw); 92 + ib->ptr[ib->length_dw++] = pe; /* dst addr */ 93 + ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; 94 + ib->ptr[ib->length_dw++] = flags; /* mask */ 95 + ib->ptr[ib->length_dw++] = 0; 96 + ib->ptr[ib->length_dw++] = value; /* value */ 97 + ib->ptr[ib->length_dw++] = upper_32_bits(value); 98 + ib->ptr[ib->length_dw++] = incr; /* increment size */ 99 + ib->ptr[ib->length_dw++] = 0; 100 + pe += ndw * 4; 101 + addr += (ndw / 2) * incr; 102 + count -= ndw / 2; 225 103 } 226 - while (ib->length_dw & 0x7) 227 - ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0); 228 104 } 229 105 230 106 void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)