Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ioat: cleanup completion status reads

The cleanup path makes an effort to only perform an atomic read of the
64-bit completion address. However in the 32-bit case it does not
matter if we read the upper-32 and lower-32 non-atomically because the
upper-32 will always be zero.

Signed-off-by: Maciej Sosnowski <maciej.sosnowski@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

+46 -75
+29 -49
drivers/dma/ioat/dma.c
··· 201 201 spin_lock_bh(&chan->cleanup_lock); 202 202 spin_lock_bh(&ioat->desc_lock); 203 203 204 - chan->completion_virt->low = 0; 205 - chan->completion_virt->high = 0; 204 + *chan->completion = 0; 206 205 ioat->pending = 0; 207 206 208 207 /* count the descriptors waiting */ ··· 255 256 256 257 dev_dbg(to_dev(chan), "%s\n", __func__); 257 258 chanerr = readl(reg_base + IOAT_CHANERR_OFFSET); 258 - chansts = (chan->completion_virt->low 259 - & IOAT_CHANSTS_DMA_TRANSFER_STATUS); 259 + chansts = *chan->completion & IOAT_CHANSTS_DMA_TRANSFER_STATUS; 260 260 if (chanerr) { 261 261 dev_err(to_dev(chan), 262 262 "chan%d, CHANSTS = 0x%08x CHANERR = 0x%04x, clearing\n", ··· 291 293 struct ioat_dma_chan *ioat; 292 294 struct ioat_chan_common *chan; 293 295 int i; 294 - 295 - union { 296 - u64 full; 297 - struct { 298 - u32 low; 299 - u32 high; 300 - }; 301 - } completion_hw; 296 + u64 completion; 297 + u32 completion_low; 302 298 unsigned long compl_desc_addr_hw; 303 299 304 300 for (i = 0; i < device->common.chancnt; i++) { ··· 326 334 * try resetting the channel 327 335 */ 328 336 329 - completion_hw.low = readl(chan->reg_base + 337 + /* we need to read the low address first as this 338 + * causes the chipset to latch the upper bits 339 + * for the subsequent read 340 + */ 341 + completion_low = readl(chan->reg_base + 330 342 IOAT_CHANSTS_OFFSET_LOW(chan->device->version)); 331 - completion_hw.high = readl(chan->reg_base + 343 + completion = readl(chan->reg_base + 332 344 IOAT_CHANSTS_OFFSET_HIGH(chan->device->version)); 333 - #if (BITS_PER_LONG == 64) 334 - compl_desc_addr_hw = 335 - completion_hw.full 336 - & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR; 337 - #else 338 - compl_desc_addr_hw = 339 - completion_hw.low & IOAT_LOW_COMPLETION_MASK; 340 - #endif 345 + completion <<= 32; 346 + completion |= completion_low; 347 + compl_desc_addr_hw = completion & 348 + IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR; 341 349 342 350 if ((compl_desc_addr_hw != 0) 343 351 && (compl_desc_addr_hw != chan->watchdog_completion) 344 352 && (compl_desc_addr_hw != chan->last_compl_desc_addr_hw)) { 345 353 chan->last_compl_desc_addr_hw = compl_desc_addr_hw; 346 - chan->completion_virt->low = completion_hw.low; 347 - chan->completion_virt->high = completion_hw.high; 354 + *chan->completion = completion; 348 355 } else { 349 356 ioat1_reset_channel(ioat); 350 357 chan->watchdog_completion = 0; ··· 483 492 484 493 /* allocate a completion writeback area */ 485 494 /* doing 2 32bit writes to mmio since 1 64b write doesn't work */ 486 - chan->completion_virt = pci_pool_alloc(chan->device->completion_pool, 487 - GFP_KERNEL, 488 - &chan->completion_addr); 489 - memset(chan->completion_virt, 0, 490 - sizeof(*chan->completion_virt)); 491 - writel(((u64) chan->completion_addr) & 0x00000000FFFFFFFF, 495 + chan->completion = pci_pool_alloc(chan->device->completion_pool, 496 + GFP_KERNEL, &chan->completion_dma); 497 + memset(chan->completion, 0, sizeof(*chan->completion)); 498 + writel(((u64) chan->completion_dma) & 0x00000000FFFFFFFF, 492 499 chan->reg_base + IOAT_CHANCMP_OFFSET_LOW); 493 - writel(((u64) chan->completion_addr) >> 32, 500 + writel(((u64) chan->completion_dma) >> 32, 494 501 chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH); 495 502 496 503 tasklet_enable(&chan->cleanup_task); ··· 547 558 spin_unlock_bh(&ioat->desc_lock); 548 559 549 560 pci_pool_free(ioatdma_device->completion_pool, 550 - chan->completion_virt, 551 - chan->completion_addr); 561 + chan->completion, 562 + chan->completion_dma); 552 563 553 564 /* one is ok since we left it on there on purpose */ 554 565 if (in_use_descs > 1) 555 566 dev_err(to_dev(chan), "Freeing %d in use descriptors!\n", 556 567 in_use_descs - 1); 557 568 558 - chan->last_completion = chan->completion_addr = 0; 569 + chan->last_completion = 0; 570 + chan->completion_dma = 0; 559 571 chan->watchdog_completion = 0; 560 572 chan->last_compl_desc_addr_hw = 0; 561 573 chan->watchdog_tcp_cookie = chan->watchdog_last_tcp_cookie = 0; ··· 699 709 unsigned long ioat_get_current_completion(struct ioat_chan_common *chan) 700 710 { 701 711 unsigned long phys_complete; 712 + u64 completion; 702 713 703 - /* The completion writeback can happen at any time, 704 - so reads by the driver need to be atomic operations 705 - The descriptor physical addresses are limited to 32-bits 706 - when the CPU can only do a 32-bit mov */ 707 - 708 - #if (BITS_PER_LONG == 64) 709 - phys_complete = 710 - chan->completion_virt->full 711 - & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR; 712 - #else 713 - phys_complete = chan->completion_virt->low & IOAT_LOW_COMPLETION_MASK; 714 - #endif 714 + completion = *chan->completion; 715 + phys_complete = completion & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR; 715 716 716 717 dev_dbg(to_dev(chan), "%s: phys_complete: %#llx\n", __func__, 717 718 (unsigned long long) phys_complete); 718 719 719 - if ((chan->completion_virt->full 720 - & IOAT_CHANSTS_DMA_TRANSFER_STATUS) == 720 + if ((completion & IOAT_CHANSTS_DMA_TRANSFER_STATUS) == 721 721 IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED) { 722 722 dev_err(to_dev(chan), "Channel halted, chanerr = %x\n", 723 723 readl(chan->reg_base + IOAT_CHANERR_OFFSET)); ··· 730 750 dma_cookie_t cookie = 0; 731 751 struct dma_async_tx_descriptor *tx; 732 752 733 - prefetch(chan->completion_virt); 753 + prefetch(chan->completion); 734 754 735 755 if (!spin_trylock_bh(&chan->cleanup_lock)) 736 756 return;
+2 -8
drivers/dma/ioat/dma.h
··· 96 96 struct ioatdma_device *device; 97 97 struct dma_chan common; 98 98 99 - dma_addr_t completion_addr; 100 - union { 101 - u64 full; /* HW completion writeback */ 102 - struct { 103 - u32 low; 104 - u32 high; 105 - }; 106 - } *completion_virt; 99 + dma_addr_t completion_dma; 100 + u64 *completion; 107 101 unsigned long last_compl_desc_addr_hw; 108 102 struct tasklet_struct cleanup_task; 109 103 };
+11 -14
drivers/dma/ioat/dma_v2.c
··· 200 200 return; 201 201 202 202 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); 203 - chansts = (chan->completion_virt->low 204 - & IOAT_CHANSTS_DMA_TRANSFER_STATUS); 203 + chansts = *chan->completion & IOAT_CHANSTS_DMA_TRANSFER_STATUS; 205 204 if (chanerr) { 206 205 dev_err(to_dev(chan), 207 206 "chan%d, CHANSTS = 0x%08x CHANERR = 0x%04x, clearing\n", ··· 280 281 int i; 281 282 struct dma_async_tx_descriptor *tx; 282 283 283 - prefetch(chan->completion_virt); 284 + prefetch(chan->completion); 284 285 285 286 spin_lock_bh(&chan->cleanup_lock); 286 287 phys_complete = ioat_get_current_completion(chan); ··· 469 470 470 471 /* allocate a completion writeback area */ 471 472 /* doing 2 32bit writes to mmio since 1 64b write doesn't work */ 472 - chan->completion_virt = pci_pool_alloc(chan->device->completion_pool, 473 - GFP_KERNEL, 474 - &chan->completion_addr); 475 - if (!chan->completion_virt) 473 + chan->completion = pci_pool_alloc(chan->device->completion_pool, 474 + GFP_KERNEL, &chan->completion_dma); 475 + if (!chan->completion) 476 476 return -ENOMEM; 477 477 478 - memset(chan->completion_virt, 0, 479 - sizeof(*chan->completion_virt)); 480 - writel(((u64) chan->completion_addr) & 0x00000000FFFFFFFF, 478 + memset(chan->completion, 0, sizeof(*chan->completion)); 479 + writel(((u64) chan->completion_dma) & 0x00000000FFFFFFFF, 481 480 chan->reg_base + IOAT_CHANCMP_OFFSET_LOW); 482 - writel(((u64) chan->completion_addr) >> 32, 481 + writel(((u64) chan->completion_dma) >> 32, 483 482 chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH); 484 483 485 484 ioat->alloc_order = ioat_get_alloc_order(); ··· 652 655 ioat->ring = NULL; 653 656 ioat->alloc_order = 0; 654 657 pci_pool_free(ioatdma_device->completion_pool, 655 - chan->completion_virt, 656 - chan->completion_addr); 658 + chan->completion, 659 + chan->completion_dma); 657 660 spin_unlock_bh(&ioat->ring_lock); 658 661 659 662 chan->last_completion = 0; 660 - chan->completion_addr = 0; 663 + chan->completion_dma = 0; 661 664 ioat->pending = 0; 662 665 ioat->dmacount = 0; 663 666 chan->watchdog_completion = 0;
+4 -4
drivers/dma/ioat/registers.h
··· 94 94 #define IOAT2_CHANSTS_OFFSET_HIGH 0x0C 95 95 #define IOAT_CHANSTS_OFFSET_HIGH(ver) ((ver) < IOAT_VER_2_0 \ 96 96 ? IOAT1_CHANSTS_OFFSET_HIGH : IOAT2_CHANSTS_OFFSET_HIGH) 97 - #define IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR ~0x3F 98 - #define IOAT_CHANSTS_SOFT_ERR 0x0000000000000010 99 - #define IOAT_CHANSTS_UNAFFILIATED_ERR 0x0000000000000008 100 - #define IOAT_CHANSTS_DMA_TRANSFER_STATUS 0x0000000000000007 97 + #define IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR (~0x3fULL) 98 + #define IOAT_CHANSTS_SOFT_ERR 0x10ULL 99 + #define IOAT_CHANSTS_UNAFFILIATED_ERR 0x8ULL 100 + #define IOAT_CHANSTS_DMA_TRANSFER_STATUS 0x7ULL 101 101 #define IOAT_CHANSTS_DMA_TRANSFER_STATUS_ACTIVE 0x0 102 102 #define IOAT_CHANSTS_DMA_TRANSFER_STATUS_DONE 0x1 103 103 #define IOAT_CHANSTS_DMA_TRANSFER_STATUS_SUSPENDED 0x2