Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdgpu: simplify padding calculations (v2)

Simplify padding calculations.

v2: Comment update and spacing.

Signed-off-by: Luben Tuikov <luben.tuikov@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Luben Tuikov and committed by
Alex Deucher
ce73516d f4feb9fa

+20 -13
+2 -2
drivers/gpu/drm/amd/amdgpu/cik_sdma.c
··· 228 228 u32 extra_bits = vmid & 0xf; 229 229 230 230 /* IB packet must end on a 8 DW boundary */ 231 - cik_sdma_ring_insert_nop(ring, (12 - (lower_32_bits(ring->wptr) & 7)) % 8); 231 + cik_sdma_ring_insert_nop(ring, (4 - lower_32_bits(ring->wptr)) & 7); 232 232 233 233 amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits)); 234 234 amdgpu_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */ ··· 811 811 u32 pad_count; 812 812 int i; 813 813 814 - pad_count = (8 - (ib->length_dw & 0x7)) % 8; 814 + pad_count = (-ib->length_dw) & 7; 815 815 for (i = 0; i < pad_count; i++) 816 816 if (sdma && sdma->burst_nop && (i == 0)) 817 817 ib->ptr[ib->length_dw++] =
+2 -2
drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
··· 255 255 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 256 256 257 257 /* IB packet must end on a 8 DW boundary */ 258 - sdma_v2_4_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); 258 + sdma_v2_4_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7); 259 259 260 260 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | 261 261 SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf)); ··· 750 750 u32 pad_count; 751 751 int i; 752 752 753 - pad_count = (8 - (ib->length_dw & 0x7)) % 8; 753 + pad_count = (-ib->length_dw) & 7; 754 754 for (i = 0; i < pad_count; i++) 755 755 if (sdma && sdma->burst_nop && (i == 0)) 756 756 ib->ptr[ib->length_dw++] =
+2 -2
drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
··· 429 429 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 430 430 431 431 /* IB packet must end on a 8 DW boundary */ 432 - sdma_v3_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); 432 + sdma_v3_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7); 433 433 434 434 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | 435 435 SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf)); ··· 1021 1021 u32 pad_count; 1022 1022 int i; 1023 1023 1024 - pad_count = (8 - (ib->length_dw & 0x7)) % 8; 1024 + pad_count = (-ib->length_dw) & 7; 1025 1025 for (i = 0; i < pad_count; i++) 1026 1026 if (sdma && sdma->burst_nop && (i == 0)) 1027 1027 ib->ptr[ib->length_dw++] =
+2 -2
drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
··· 698 698 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 699 699 700 700 /* IB packet must end on a 8 DW boundary */ 701 - sdma_v4_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); 701 + sdma_v4_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7); 702 702 703 703 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | 704 704 SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf)); ··· 1579 1579 u32 pad_count; 1580 1580 int i; 1581 1581 1582 - pad_count = (8 - (ib->length_dw & 0x7)) % 8; 1582 + pad_count = (-ib->length_dw) & 7; 1583 1583 for (i = 0; i < pad_count; i++) 1584 1584 if (sdma && sdma->burst_nop && (i == 0)) 1585 1585 ib->ptr[ib->length_dw++] =
+12 -5
drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
··· 382 382 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 383 383 uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring, vmid); 384 384 385 - /* IB packet must end on a 8 DW boundary */ 386 - sdma_v5_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); 385 + /* An IB packet must end on a 8 DW boundary--the next dword 386 + * must be on a 8-dword boundary. Our IB packet below is 6 387 + * dwords long, thus add x number of NOPs, such that, in 388 + * modular arithmetic, 389 + * wptr + 6 + x = 8k, k >= 0, which in C is, 390 + * (wptr + 6 + x) % 8 = 0. 391 + * The expression below, is a solution of x. 392 + */ 393 + sdma_v5_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7); 387 394 388 395 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | 389 396 SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf)); ··· 1083 1076 } 1084 1077 1085 1078 /** 1086 - * sdma_v5_0_ring_pad_ib - pad the IB to the required number of dw 1087 - * 1079 + * sdma_v5_0_ring_pad_ib - pad the IB 1088 1080 * @ib: indirect buffer to fill with padding 1089 1081 * 1082 + * Pad the IB with NOPs to a boundary multiple of 8. 1090 1083 */ 1091 1084 static void sdma_v5_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) 1092 1085 { ··· 1094 1087 u32 pad_count; 1095 1088 int i; 1096 1089 1097 - pad_count = (8 - (ib->length_dw & 0x7)) % 8; 1090 + pad_count = (-ib->length_dw) & 0x7; 1098 1091 for (i = 0; i < pad_count; i++) 1099 1092 if (sdma && sdma->burst_nop && (i == 0)) 1100 1093 ib->ptr[ib->length_dw++] =