Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v2.6.27-rc3 1708 lines 48 kB view raw
1/* 2 * Intel I/OAT DMA Linux driver 3 * Copyright(c) 2004 - 2007 Intel Corporation. 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms and conditions of the GNU General Public License, 7 * version 2, as published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 12 * more details. 13 * 14 * You should have received a copy of the GNU General Public License along with 15 * this program; if not, write to the Free Software Foundation, Inc., 16 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. 17 * 18 * The full GNU General Public License is included in this distribution in 19 * the file called "COPYING". 20 * 21 */ 22 23/* 24 * This driver supports an Intel I/OAT DMA engine, which does asynchronous 25 * copy operations. 26 */ 27 28#include <linux/init.h> 29#include <linux/module.h> 30#include <linux/pci.h> 31#include <linux/interrupt.h> 32#include <linux/dmaengine.h> 33#include <linux/delay.h> 34#include <linux/dma-mapping.h> 35#include <linux/workqueue.h> 36#include "ioatdma.h" 37#include "ioatdma_registers.h" 38#include "ioatdma_hw.h" 39 40#define to_ioat_chan(chan) container_of(chan, struct ioat_dma_chan, common) 41#define to_ioatdma_device(dev) container_of(dev, struct ioatdma_device, common) 42#define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node) 43#define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, async_tx) 44 45#define chan_num(ch) ((int)((ch)->reg_base - (ch)->device->reg_base) / 0x80) 46static int ioat_pending_level = 4; 47module_param(ioat_pending_level, int, 0644); 48MODULE_PARM_DESC(ioat_pending_level, 49 "high-water mark for pushing ioat descriptors (default: 4)"); 50 51#define RESET_DELAY msecs_to_jiffies(100) 52#define WATCHDOG_DELAY round_jiffies(msecs_to_jiffies(2000)) 53static void ioat_dma_chan_reset_part2(struct work_struct *work); 54static void ioat_dma_chan_watchdog(struct work_struct *work); 55 56/* 57 * workaround for IOAT ver.3.0 null descriptor issue 58 * (channel returns error when size is 0) 59 */ 60#define NULL_DESC_BUFFER_SIZE 1 61 62/* internal functions */ 63static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan); 64static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan); 65 66static struct ioat_desc_sw * 67ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan); 68static struct ioat_desc_sw * 69ioat2_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan); 70 71static inline struct ioat_dma_chan *ioat_lookup_chan_by_index( 72 struct ioatdma_device *device, 73 int index) 74{ 75 return device->idx[index]; 76} 77 78/** 79 * ioat_dma_do_interrupt - handler used for single vector interrupt mode 80 * @irq: interrupt id 81 * @data: interrupt data 82 */ 83static irqreturn_t ioat_dma_do_interrupt(int irq, void *data) 84{ 85 struct ioatdma_device *instance = data; 86 struct ioat_dma_chan *ioat_chan; 87 unsigned long attnstatus; 88 int bit; 89 u8 intrctrl; 90 91 intrctrl = readb(instance->reg_base + IOAT_INTRCTRL_OFFSET); 92 93 if (!(intrctrl & IOAT_INTRCTRL_MASTER_INT_EN)) 94 return IRQ_NONE; 95 96 if (!(intrctrl & IOAT_INTRCTRL_INT_STATUS)) { 97 writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET); 98 return IRQ_NONE; 99 } 100 101 attnstatus = readl(instance->reg_base + IOAT_ATTNSTATUS_OFFSET); 102 for_each_bit(bit, &attnstatus, BITS_PER_LONG) { 103 ioat_chan = ioat_lookup_chan_by_index(instance, bit); 104 tasklet_schedule(&ioat_chan->cleanup_task); 105 } 106 107 writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET); 108 return IRQ_HANDLED; 109} 110 111/** 112 * ioat_dma_do_interrupt_msix - handler used for vector-per-channel interrupt mode 113 * @irq: interrupt id 114 * @data: interrupt data 115 */ 116static irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data) 117{ 118 struct ioat_dma_chan *ioat_chan = data; 119 120 tasklet_schedule(&ioat_chan->cleanup_task); 121 122 return IRQ_HANDLED; 123} 124 125static void ioat_dma_cleanup_tasklet(unsigned long data); 126 127/** 128 * ioat_dma_enumerate_channels - find and initialize the device's channels 129 * @device: the device to be enumerated 130 */ 131static int ioat_dma_enumerate_channels(struct ioatdma_device *device) 132{ 133 u8 xfercap_scale; 134 u32 xfercap; 135 int i; 136 struct ioat_dma_chan *ioat_chan; 137 138 /* 139 * IOAT ver.3 workarounds 140 */ 141 if (device->version == IOAT_VER_3_0) { 142 u32 chan_err_mask; 143 u16 dev_id; 144 u32 dmauncerrsts; 145 146 /* 147 * Write CHANERRMSK_INT with 3E07h to mask out the errors 148 * that can cause stability issues for IOAT ver.3 149 */ 150 chan_err_mask = 0x3E07; 151 pci_write_config_dword(device->pdev, 152 IOAT_PCI_CHANERRMASK_INT_OFFSET, 153 chan_err_mask); 154 155 /* 156 * Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit 157 * (workaround for spurious config parity error after restart) 158 */ 159 pci_read_config_word(device->pdev, 160 IOAT_PCI_DEVICE_ID_OFFSET, 161 &dev_id); 162 if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) { 163 dmauncerrsts = 0x10; 164 pci_write_config_dword(device->pdev, 165 IOAT_PCI_DMAUNCERRSTS_OFFSET, 166 dmauncerrsts); 167 } 168 } 169 170 device->common.chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET); 171 xfercap_scale = readb(device->reg_base + IOAT_XFERCAP_OFFSET); 172 xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale)); 173 174 for (i = 0; i < device->common.chancnt; i++) { 175 ioat_chan = kzalloc(sizeof(*ioat_chan), GFP_KERNEL); 176 if (!ioat_chan) { 177 device->common.chancnt = i; 178 break; 179 } 180 181 ioat_chan->device = device; 182 ioat_chan->reg_base = device->reg_base + (0x80 * (i + 1)); 183 ioat_chan->xfercap = xfercap; 184 ioat_chan->desccount = 0; 185 INIT_DELAYED_WORK(&ioat_chan->work, ioat_dma_chan_reset_part2); 186 if (ioat_chan->device->version != IOAT_VER_1_2) { 187 writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE 188 | IOAT_DMA_DCA_ANY_CPU, 189 ioat_chan->reg_base + IOAT_DCACTRL_OFFSET); 190 } 191 spin_lock_init(&ioat_chan->cleanup_lock); 192 spin_lock_init(&ioat_chan->desc_lock); 193 INIT_LIST_HEAD(&ioat_chan->free_desc); 194 INIT_LIST_HEAD(&ioat_chan->used_desc); 195 /* This should be made common somewhere in dmaengine.c */ 196 ioat_chan->common.device = &device->common; 197 list_add_tail(&ioat_chan->common.device_node, 198 &device->common.channels); 199 device->idx[i] = ioat_chan; 200 tasklet_init(&ioat_chan->cleanup_task, 201 ioat_dma_cleanup_tasklet, 202 (unsigned long) ioat_chan); 203 tasklet_disable(&ioat_chan->cleanup_task); 204 } 205 return device->common.chancnt; 206} 207 208/** 209 * ioat_dma_memcpy_issue_pending - push potentially unrecognized appended 210 * descriptors to hw 211 * @chan: DMA channel handle 212 */ 213static inline void __ioat1_dma_memcpy_issue_pending( 214 struct ioat_dma_chan *ioat_chan) 215{ 216 ioat_chan->pending = 0; 217 writeb(IOAT_CHANCMD_APPEND, ioat_chan->reg_base + IOAT1_CHANCMD_OFFSET); 218} 219 220static void ioat1_dma_memcpy_issue_pending(struct dma_chan *chan) 221{ 222 struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); 223 224 if (ioat_chan->pending > 0) { 225 spin_lock_bh(&ioat_chan->desc_lock); 226 __ioat1_dma_memcpy_issue_pending(ioat_chan); 227 spin_unlock_bh(&ioat_chan->desc_lock); 228 } 229} 230 231static inline void __ioat2_dma_memcpy_issue_pending( 232 struct ioat_dma_chan *ioat_chan) 233{ 234 ioat_chan->pending = 0; 235 writew(ioat_chan->dmacount, 236 ioat_chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET); 237} 238 239static void ioat2_dma_memcpy_issue_pending(struct dma_chan *chan) 240{ 241 struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); 242 243 if (ioat_chan->pending > 0) { 244 spin_lock_bh(&ioat_chan->desc_lock); 245 __ioat2_dma_memcpy_issue_pending(ioat_chan); 246 spin_unlock_bh(&ioat_chan->desc_lock); 247 } 248} 249 250 251/** 252 * ioat_dma_chan_reset_part2 - reinit the channel after a reset 253 */ 254static void ioat_dma_chan_reset_part2(struct work_struct *work) 255{ 256 struct ioat_dma_chan *ioat_chan = 257 container_of(work, struct ioat_dma_chan, work.work); 258 struct ioat_desc_sw *desc; 259 260 spin_lock_bh(&ioat_chan->cleanup_lock); 261 spin_lock_bh(&ioat_chan->desc_lock); 262 263 ioat_chan->completion_virt->low = 0; 264 ioat_chan->completion_virt->high = 0; 265 ioat_chan->pending = 0; 266 267 /* 268 * count the descriptors waiting, and be sure to do it 269 * right for both the CB1 line and the CB2 ring 270 */ 271 ioat_chan->dmacount = 0; 272 if (ioat_chan->used_desc.prev) { 273 desc = to_ioat_desc(ioat_chan->used_desc.prev); 274 do { 275 ioat_chan->dmacount++; 276 desc = to_ioat_desc(desc->node.next); 277 } while (&desc->node != ioat_chan->used_desc.next); 278 } 279 280 /* 281 * write the new starting descriptor address 282 * this puts channel engine into ARMED state 283 */ 284 desc = to_ioat_desc(ioat_chan->used_desc.prev); 285 switch (ioat_chan->device->version) { 286 case IOAT_VER_1_2: 287 writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF, 288 ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW); 289 writel(((u64) desc->async_tx.phys) >> 32, 290 ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH); 291 292 writeb(IOAT_CHANCMD_START, ioat_chan->reg_base 293 + IOAT_CHANCMD_OFFSET(ioat_chan->device->version)); 294 break; 295 case IOAT_VER_2_0: 296 writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF, 297 ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW); 298 writel(((u64) desc->async_tx.phys) >> 32, 299 ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH); 300 301 /* tell the engine to go with what's left to be done */ 302 writew(ioat_chan->dmacount, 303 ioat_chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET); 304 305 break; 306 } 307 dev_err(&ioat_chan->device->pdev->dev, 308 "chan%d reset - %d descs waiting, %d total desc\n", 309 chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount); 310 311 spin_unlock_bh(&ioat_chan->desc_lock); 312 spin_unlock_bh(&ioat_chan->cleanup_lock); 313} 314 315/** 316 * ioat_dma_reset_channel - restart a channel 317 * @ioat_chan: IOAT DMA channel handle 318 */ 319static void ioat_dma_reset_channel(struct ioat_dma_chan *ioat_chan) 320{ 321 u32 chansts, chanerr; 322 323 if (!ioat_chan->used_desc.prev) 324 return; 325 326 chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); 327 chansts = (ioat_chan->completion_virt->low 328 & IOAT_CHANSTS_DMA_TRANSFER_STATUS); 329 if (chanerr) { 330 dev_err(&ioat_chan->device->pdev->dev, 331 "chan%d, CHANSTS = 0x%08x CHANERR = 0x%04x, clearing\n", 332 chan_num(ioat_chan), chansts, chanerr); 333 writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET); 334 } 335 336 /* 337 * whack it upside the head with a reset 338 * and wait for things to settle out. 339 * force the pending count to a really big negative 340 * to make sure no one forces an issue_pending 341 * while we're waiting. 342 */ 343 344 spin_lock_bh(&ioat_chan->desc_lock); 345 ioat_chan->pending = INT_MIN; 346 writeb(IOAT_CHANCMD_RESET, 347 ioat_chan->reg_base 348 + IOAT_CHANCMD_OFFSET(ioat_chan->device->version)); 349 spin_unlock_bh(&ioat_chan->desc_lock); 350 351 /* schedule the 2nd half instead of sleeping a long time */ 352 schedule_delayed_work(&ioat_chan->work, RESET_DELAY); 353} 354 355/** 356 * ioat_dma_chan_watchdog - watch for stuck channels 357 */ 358static void ioat_dma_chan_watchdog(struct work_struct *work) 359{ 360 struct ioatdma_device *device = 361 container_of(work, struct ioatdma_device, work.work); 362 struct ioat_dma_chan *ioat_chan; 363 int i; 364 365 union { 366 u64 full; 367 struct { 368 u32 low; 369 u32 high; 370 }; 371 } completion_hw; 372 unsigned long compl_desc_addr_hw; 373 374 for (i = 0; i < device->common.chancnt; i++) { 375 ioat_chan = ioat_lookup_chan_by_index(device, i); 376 377 if (ioat_chan->device->version == IOAT_VER_1_2 378 /* have we started processing anything yet */ 379 && ioat_chan->last_completion 380 /* have we completed any since last watchdog cycle? */ 381 && (ioat_chan->last_completion == 382 ioat_chan->watchdog_completion) 383 /* has TCP stuck on one cookie since last watchdog? */ 384 && (ioat_chan->watchdog_tcp_cookie == 385 ioat_chan->watchdog_last_tcp_cookie) 386 && (ioat_chan->watchdog_tcp_cookie != 387 ioat_chan->completed_cookie) 388 /* is there something in the chain to be processed? */ 389 /* CB1 chain always has at least the last one processed */ 390 && (ioat_chan->used_desc.prev != ioat_chan->used_desc.next) 391 && ioat_chan->pending == 0) { 392 393 /* 394 * check CHANSTS register for completed 395 * descriptor address. 396 * if it is different than completion writeback, 397 * it is not zero 398 * and it has changed since the last watchdog 399 * we can assume that channel 400 * is still working correctly 401 * and the problem is in completion writeback. 402 * update completion writeback 403 * with actual CHANSTS value 404 * else 405 * try resetting the channel 406 */ 407 408 completion_hw.low = readl(ioat_chan->reg_base + 409 IOAT_CHANSTS_OFFSET_LOW(ioat_chan->device->version)); 410 completion_hw.high = readl(ioat_chan->reg_base + 411 IOAT_CHANSTS_OFFSET_HIGH(ioat_chan->device->version)); 412#if (BITS_PER_LONG == 64) 413 compl_desc_addr_hw = 414 completion_hw.full 415 & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR; 416#else 417 compl_desc_addr_hw = 418 completion_hw.low & IOAT_LOW_COMPLETION_MASK; 419#endif 420 421 if ((compl_desc_addr_hw != 0) 422 && (compl_desc_addr_hw != ioat_chan->watchdog_completion) 423 && (compl_desc_addr_hw != ioat_chan->last_compl_desc_addr_hw)) { 424 ioat_chan->last_compl_desc_addr_hw = compl_desc_addr_hw; 425 ioat_chan->completion_virt->low = completion_hw.low; 426 ioat_chan->completion_virt->high = completion_hw.high; 427 } else { 428 ioat_dma_reset_channel(ioat_chan); 429 ioat_chan->watchdog_completion = 0; 430 ioat_chan->last_compl_desc_addr_hw = 0; 431 } 432 433 /* 434 * for version 2.0 if there are descriptors yet to be processed 435 * and the last completed hasn't changed since the last watchdog 436 * if they haven't hit the pending level 437 * issue the pending to push them through 438 * else 439 * try resetting the channel 440 */ 441 } else if (ioat_chan->device->version == IOAT_VER_2_0 442 && ioat_chan->used_desc.prev 443 && ioat_chan->last_completion 444 && ioat_chan->last_completion == ioat_chan->watchdog_completion) { 445 446 if (ioat_chan->pending < ioat_pending_level) 447 ioat2_dma_memcpy_issue_pending(&ioat_chan->common); 448 else { 449 ioat_dma_reset_channel(ioat_chan); 450 ioat_chan->watchdog_completion = 0; 451 } 452 } else { 453 ioat_chan->last_compl_desc_addr_hw = 0; 454 ioat_chan->watchdog_completion 455 = ioat_chan->last_completion; 456 } 457 458 ioat_chan->watchdog_last_tcp_cookie = 459 ioat_chan->watchdog_tcp_cookie; 460 } 461 462 schedule_delayed_work(&device->work, WATCHDOG_DELAY); 463} 464 465static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx) 466{ 467 struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan); 468 struct ioat_desc_sw *first = tx_to_ioat_desc(tx); 469 struct ioat_desc_sw *prev, *new; 470 struct ioat_dma_descriptor *hw; 471 dma_cookie_t cookie; 472 LIST_HEAD(new_chain); 473 u32 copy; 474 size_t len; 475 dma_addr_t src, dst; 476 unsigned long orig_flags; 477 unsigned int desc_count = 0; 478 479 /* src and dest and len are stored in the initial descriptor */ 480 len = first->len; 481 src = first->src; 482 dst = first->dst; 483 orig_flags = first->async_tx.flags; 484 new = first; 485 486 spin_lock_bh(&ioat_chan->desc_lock); 487 prev = to_ioat_desc(ioat_chan->used_desc.prev); 488 prefetch(prev->hw); 489 do { 490 copy = min_t(size_t, len, ioat_chan->xfercap); 491 492 async_tx_ack(&new->async_tx); 493 494 hw = new->hw; 495 hw->size = copy; 496 hw->ctl = 0; 497 hw->src_addr = src; 498 hw->dst_addr = dst; 499 hw->next = 0; 500 501 /* chain together the physical address list for the HW */ 502 wmb(); 503 prev->hw->next = (u64) new->async_tx.phys; 504 505 len -= copy; 506 dst += copy; 507 src += copy; 508 509 list_add_tail(&new->node, &new_chain); 510 desc_count++; 511 prev = new; 512 } while (len && (new = ioat1_dma_get_next_descriptor(ioat_chan))); 513 514 if (!new) { 515 dev_err(&ioat_chan->device->pdev->dev, 516 "tx submit failed\n"); 517 spin_unlock_bh(&ioat_chan->desc_lock); 518 return -ENOMEM; 519 } 520 521 hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS; 522 if (new->async_tx.callback) { 523 hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN; 524 if (first != new) { 525 /* move callback into to last desc */ 526 new->async_tx.callback = first->async_tx.callback; 527 new->async_tx.callback_param 528 = first->async_tx.callback_param; 529 first->async_tx.callback = NULL; 530 first->async_tx.callback_param = NULL; 531 } 532 } 533 534 new->tx_cnt = desc_count; 535 new->async_tx.flags = orig_flags; /* client is in control of this ack */ 536 537 /* store the original values for use in later cleanup */ 538 if (new != first) { 539 new->src = first->src; 540 new->dst = first->dst; 541 new->len = first->len; 542 } 543 544 /* cookie incr and addition to used_list must be atomic */ 545 cookie = ioat_chan->common.cookie; 546 cookie++; 547 if (cookie < 0) 548 cookie = 1; 549 ioat_chan->common.cookie = new->async_tx.cookie = cookie; 550 551 /* write address into NextDescriptor field of last desc in chain */ 552 to_ioat_desc(ioat_chan->used_desc.prev)->hw->next = 553 first->async_tx.phys; 554 list_splice_tail(&new_chain, &ioat_chan->used_desc); 555 556 ioat_chan->dmacount += desc_count; 557 ioat_chan->pending += desc_count; 558 if (ioat_chan->pending >= ioat_pending_level) 559 __ioat1_dma_memcpy_issue_pending(ioat_chan); 560 spin_unlock_bh(&ioat_chan->desc_lock); 561 562 return cookie; 563} 564 565static dma_cookie_t ioat2_tx_submit(struct dma_async_tx_descriptor *tx) 566{ 567 struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan); 568 struct ioat_desc_sw *first = tx_to_ioat_desc(tx); 569 struct ioat_desc_sw *new; 570 struct ioat_dma_descriptor *hw; 571 dma_cookie_t cookie; 572 u32 copy; 573 size_t len; 574 dma_addr_t src, dst; 575 unsigned long orig_flags; 576 unsigned int desc_count = 0; 577 578 /* src and dest and len are stored in the initial descriptor */ 579 len = first->len; 580 src = first->src; 581 dst = first->dst; 582 orig_flags = first->async_tx.flags; 583 new = first; 584 585 /* 586 * ioat_chan->desc_lock is still in force in version 2 path 587 * it gets unlocked at end of this function 588 */ 589 do { 590 copy = min_t(size_t, len, ioat_chan->xfercap); 591 592 async_tx_ack(&new->async_tx); 593 594 hw = new->hw; 595 hw->size = copy; 596 hw->ctl = 0; 597 hw->src_addr = src; 598 hw->dst_addr = dst; 599 600 len -= copy; 601 dst += copy; 602 src += copy; 603 desc_count++; 604 } while (len && (new = ioat2_dma_get_next_descriptor(ioat_chan))); 605 606 if (!new) { 607 dev_err(&ioat_chan->device->pdev->dev, 608 "tx submit failed\n"); 609 spin_unlock_bh(&ioat_chan->desc_lock); 610 return -ENOMEM; 611 } 612 613 hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_CP_STS; 614 if (new->async_tx.callback) { 615 hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN; 616 if (first != new) { 617 /* move callback into to last desc */ 618 new->async_tx.callback = first->async_tx.callback; 619 new->async_tx.callback_param 620 = first->async_tx.callback_param; 621 first->async_tx.callback = NULL; 622 first->async_tx.callback_param = NULL; 623 } 624 } 625 626 new->tx_cnt = desc_count; 627 new->async_tx.flags = orig_flags; /* client is in control of this ack */ 628 629 /* store the original values for use in later cleanup */ 630 if (new != first) { 631 new->src = first->src; 632 new->dst = first->dst; 633 new->len = first->len; 634 } 635 636 /* cookie incr and addition to used_list must be atomic */ 637 cookie = ioat_chan->common.cookie; 638 cookie++; 639 if (cookie < 0) 640 cookie = 1; 641 ioat_chan->common.cookie = new->async_tx.cookie = cookie; 642 643 ioat_chan->dmacount += desc_count; 644 ioat_chan->pending += desc_count; 645 if (ioat_chan->pending >= ioat_pending_level) 646 __ioat2_dma_memcpy_issue_pending(ioat_chan); 647 spin_unlock_bh(&ioat_chan->desc_lock); 648 649 return cookie; 650} 651 652/** 653 * ioat_dma_alloc_descriptor - allocate and return a sw and hw descriptor pair 654 * @ioat_chan: the channel supplying the memory pool for the descriptors 655 * @flags: allocation flags 656 */ 657static struct ioat_desc_sw *ioat_dma_alloc_descriptor( 658 struct ioat_dma_chan *ioat_chan, 659 gfp_t flags) 660{ 661 struct ioat_dma_descriptor *desc; 662 struct ioat_desc_sw *desc_sw; 663 struct ioatdma_device *ioatdma_device; 664 dma_addr_t phys; 665 666 ioatdma_device = to_ioatdma_device(ioat_chan->common.device); 667 desc = pci_pool_alloc(ioatdma_device->dma_pool, flags, &phys); 668 if (unlikely(!desc)) 669 return NULL; 670 671 desc_sw = kzalloc(sizeof(*desc_sw), flags); 672 if (unlikely(!desc_sw)) { 673 pci_pool_free(ioatdma_device->dma_pool, desc, phys); 674 return NULL; 675 } 676 677 memset(desc, 0, sizeof(*desc)); 678 dma_async_tx_descriptor_init(&desc_sw->async_tx, &ioat_chan->common); 679 switch (ioat_chan->device->version) { 680 case IOAT_VER_1_2: 681 desc_sw->async_tx.tx_submit = ioat1_tx_submit; 682 break; 683 case IOAT_VER_2_0: 684 case IOAT_VER_3_0: 685 desc_sw->async_tx.tx_submit = ioat2_tx_submit; 686 break; 687 } 688 INIT_LIST_HEAD(&desc_sw->async_tx.tx_list); 689 690 desc_sw->hw = desc; 691 desc_sw->async_tx.phys = phys; 692 693 return desc_sw; 694} 695 696static int ioat_initial_desc_count = 256; 697module_param(ioat_initial_desc_count, int, 0644); 698MODULE_PARM_DESC(ioat_initial_desc_count, 699 "initial descriptors per channel (default: 256)"); 700 701/** 702 * ioat2_dma_massage_chan_desc - link the descriptors into a circle 703 * @ioat_chan: the channel to be massaged 704 */ 705static void ioat2_dma_massage_chan_desc(struct ioat_dma_chan *ioat_chan) 706{ 707 struct ioat_desc_sw *desc, *_desc; 708 709 /* setup used_desc */ 710 ioat_chan->used_desc.next = ioat_chan->free_desc.next; 711 ioat_chan->used_desc.prev = NULL; 712 713 /* pull free_desc out of the circle so that every node is a hw 714 * descriptor, but leave it pointing to the list 715 */ 716 ioat_chan->free_desc.prev->next = ioat_chan->free_desc.next; 717 ioat_chan->free_desc.next->prev = ioat_chan->free_desc.prev; 718 719 /* circle link the hw descriptors */ 720 desc = to_ioat_desc(ioat_chan->free_desc.next); 721 desc->hw->next = to_ioat_desc(desc->node.next)->async_tx.phys; 722 list_for_each_entry_safe(desc, _desc, ioat_chan->free_desc.next, node) { 723 desc->hw->next = to_ioat_desc(desc->node.next)->async_tx.phys; 724 } 725} 726 727/** 728 * ioat_dma_alloc_chan_resources - returns the number of allocated descriptors 729 * @chan: the channel to be filled out 730 */ 731static int ioat_dma_alloc_chan_resources(struct dma_chan *chan, 732 struct dma_client *client) 733{ 734 struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); 735 struct ioat_desc_sw *desc; 736 u16 chanctrl; 737 u32 chanerr; 738 int i; 739 LIST_HEAD(tmp_list); 740 741 /* have we already been set up? */ 742 if (!list_empty(&ioat_chan->free_desc)) 743 return ioat_chan->desccount; 744 745 /* Setup register to interrupt and write completion status on error */ 746 chanctrl = IOAT_CHANCTRL_ERR_INT_EN | 747 IOAT_CHANCTRL_ANY_ERR_ABORT_EN | 748 IOAT_CHANCTRL_ERR_COMPLETION_EN; 749 writew(chanctrl, ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET); 750 751 chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); 752 if (chanerr) { 753 dev_err(&ioat_chan->device->pdev->dev, 754 "CHANERR = %x, clearing\n", chanerr); 755 writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET); 756 } 757 758 /* Allocate descriptors */ 759 for (i = 0; i < ioat_initial_desc_count; i++) { 760 desc = ioat_dma_alloc_descriptor(ioat_chan, GFP_KERNEL); 761 if (!desc) { 762 dev_err(&ioat_chan->device->pdev->dev, 763 "Only %d initial descriptors\n", i); 764 break; 765 } 766 list_add_tail(&desc->node, &tmp_list); 767 } 768 spin_lock_bh(&ioat_chan->desc_lock); 769 ioat_chan->desccount = i; 770 list_splice(&tmp_list, &ioat_chan->free_desc); 771 if (ioat_chan->device->version != IOAT_VER_1_2) 772 ioat2_dma_massage_chan_desc(ioat_chan); 773 spin_unlock_bh(&ioat_chan->desc_lock); 774 775 /* allocate a completion writeback area */ 776 /* doing 2 32bit writes to mmio since 1 64b write doesn't work */ 777 ioat_chan->completion_virt = 778 pci_pool_alloc(ioat_chan->device->completion_pool, 779 GFP_KERNEL, 780 &ioat_chan->completion_addr); 781 memset(ioat_chan->completion_virt, 0, 782 sizeof(*ioat_chan->completion_virt)); 783 writel(((u64) ioat_chan->completion_addr) & 0x00000000FFFFFFFF, 784 ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_LOW); 785 writel(((u64) ioat_chan->completion_addr) >> 32, 786 ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH); 787 788 tasklet_enable(&ioat_chan->cleanup_task); 789 ioat_dma_start_null_desc(ioat_chan); /* give chain to dma device */ 790 return ioat_chan->desccount; 791} 792 793/** 794 * ioat_dma_free_chan_resources - release all the descriptors 795 * @chan: the channel to be cleaned 796 */ 797static void ioat_dma_free_chan_resources(struct dma_chan *chan) 798{ 799 struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); 800 struct ioatdma_device *ioatdma_device = to_ioatdma_device(chan->device); 801 struct ioat_desc_sw *desc, *_desc; 802 int in_use_descs = 0; 803 804 tasklet_disable(&ioat_chan->cleanup_task); 805 ioat_dma_memcpy_cleanup(ioat_chan); 806 807 /* Delay 100ms after reset to allow internal DMA logic to quiesce 808 * before removing DMA descriptor resources. 809 */ 810 writeb(IOAT_CHANCMD_RESET, 811 ioat_chan->reg_base 812 + IOAT_CHANCMD_OFFSET(ioat_chan->device->version)); 813 mdelay(100); 814 815 spin_lock_bh(&ioat_chan->desc_lock); 816 switch (ioat_chan->device->version) { 817 case IOAT_VER_1_2: 818 list_for_each_entry_safe(desc, _desc, 819 &ioat_chan->used_desc, node) { 820 in_use_descs++; 821 list_del(&desc->node); 822 pci_pool_free(ioatdma_device->dma_pool, desc->hw, 823 desc->async_tx.phys); 824 kfree(desc); 825 } 826 list_for_each_entry_safe(desc, _desc, 827 &ioat_chan->free_desc, node) { 828 list_del(&desc->node); 829 pci_pool_free(ioatdma_device->dma_pool, desc->hw, 830 desc->async_tx.phys); 831 kfree(desc); 832 } 833 break; 834 case IOAT_VER_2_0: 835 case IOAT_VER_3_0: 836 list_for_each_entry_safe(desc, _desc, 837 ioat_chan->free_desc.next, node) { 838 list_del(&desc->node); 839 pci_pool_free(ioatdma_device->dma_pool, desc->hw, 840 desc->async_tx.phys); 841 kfree(desc); 842 } 843 desc = to_ioat_desc(ioat_chan->free_desc.next); 844 pci_pool_free(ioatdma_device->dma_pool, desc->hw, 845 desc->async_tx.phys); 846 kfree(desc); 847 INIT_LIST_HEAD(&ioat_chan->free_desc); 848 INIT_LIST_HEAD(&ioat_chan->used_desc); 849 break; 850 } 851 spin_unlock_bh(&ioat_chan->desc_lock); 852 853 pci_pool_free(ioatdma_device->completion_pool, 854 ioat_chan->completion_virt, 855 ioat_chan->completion_addr); 856 857 /* one is ok since we left it on there on purpose */ 858 if (in_use_descs > 1) 859 dev_err(&ioat_chan->device->pdev->dev, 860 "Freeing %d in use descriptors!\n", 861 in_use_descs - 1); 862 863 ioat_chan->last_completion = ioat_chan->completion_addr = 0; 864 ioat_chan->pending = 0; 865 ioat_chan->dmacount = 0; 866 ioat_chan->watchdog_completion = 0; 867 ioat_chan->last_compl_desc_addr_hw = 0; 868 ioat_chan->watchdog_tcp_cookie = 869 ioat_chan->watchdog_last_tcp_cookie = 0; 870} 871 872/** 873 * ioat_dma_get_next_descriptor - return the next available descriptor 874 * @ioat_chan: IOAT DMA channel handle 875 * 876 * Gets the next descriptor from the chain, and must be called with the 877 * channel's desc_lock held. Allocates more descriptors if the channel 878 * has run out. 879 */ 880static struct ioat_desc_sw * 881ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan) 882{ 883 struct ioat_desc_sw *new; 884 885 if (!list_empty(&ioat_chan->free_desc)) { 886 new = to_ioat_desc(ioat_chan->free_desc.next); 887 list_del(&new->node); 888 } else { 889 /* try to get another desc */ 890 new = ioat_dma_alloc_descriptor(ioat_chan, GFP_ATOMIC); 891 if (!new) { 892 dev_err(&ioat_chan->device->pdev->dev, 893 "alloc failed\n"); 894 return NULL; 895 } 896 } 897 898 prefetch(new->hw); 899 return new; 900} 901 902static struct ioat_desc_sw * 903ioat2_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan) 904{ 905 struct ioat_desc_sw *new; 906 907 /* 908 * used.prev points to where to start processing 909 * used.next points to next free descriptor 910 * if used.prev == NULL, there are none waiting to be processed 911 * if used.next == used.prev.prev, there is only one free descriptor, 912 * and we need to use it to as a noop descriptor before 913 * linking in a new set of descriptors, since the device 914 * has probably already read the pointer to it 915 */ 916 if (ioat_chan->used_desc.prev && 917 ioat_chan->used_desc.next == ioat_chan->used_desc.prev->prev) { 918 919 struct ioat_desc_sw *desc; 920 struct ioat_desc_sw *noop_desc; 921 int i; 922 923 /* set up the noop descriptor */ 924 noop_desc = to_ioat_desc(ioat_chan->used_desc.next); 925 /* set size to non-zero value (channel returns error when size is 0) */ 926 noop_desc->hw->size = NULL_DESC_BUFFER_SIZE; 927 noop_desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL; 928 noop_desc->hw->src_addr = 0; 929 noop_desc->hw->dst_addr = 0; 930 931 ioat_chan->used_desc.next = ioat_chan->used_desc.next->next; 932 ioat_chan->pending++; 933 ioat_chan->dmacount++; 934 935 /* try to get a few more descriptors */ 936 for (i = 16; i; i--) { 937 desc = ioat_dma_alloc_descriptor(ioat_chan, GFP_ATOMIC); 938 if (!desc) { 939 dev_err(&ioat_chan->device->pdev->dev, 940 "alloc failed\n"); 941 break; 942 } 943 list_add_tail(&desc->node, ioat_chan->used_desc.next); 944 945 desc->hw->next 946 = to_ioat_desc(desc->node.next)->async_tx.phys; 947 to_ioat_desc(desc->node.prev)->hw->next 948 = desc->async_tx.phys; 949 ioat_chan->desccount++; 950 } 951 952 ioat_chan->used_desc.next = noop_desc->node.next; 953 } 954 new = to_ioat_desc(ioat_chan->used_desc.next); 955 prefetch(new); 956 ioat_chan->used_desc.next = new->node.next; 957 958 if (ioat_chan->used_desc.prev == NULL) 959 ioat_chan->used_desc.prev = &new->node; 960 961 prefetch(new->hw); 962 return new; 963} 964 965static struct ioat_desc_sw *ioat_dma_get_next_descriptor( 966 struct ioat_dma_chan *ioat_chan) 967{ 968 if (!ioat_chan) 969 return NULL; 970 971 switch (ioat_chan->device->version) { 972 case IOAT_VER_1_2: 973 return ioat1_dma_get_next_descriptor(ioat_chan); 974 break; 975 case IOAT_VER_2_0: 976 case IOAT_VER_3_0: 977 return ioat2_dma_get_next_descriptor(ioat_chan); 978 break; 979 } 980 return NULL; 981} 982 983static struct dma_async_tx_descriptor *ioat1_dma_prep_memcpy( 984 struct dma_chan *chan, 985 dma_addr_t dma_dest, 986 dma_addr_t dma_src, 987 size_t len, 988 unsigned long flags) 989{ 990 struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); 991 struct ioat_desc_sw *new; 992 993 spin_lock_bh(&ioat_chan->desc_lock); 994 new = ioat_dma_get_next_descriptor(ioat_chan); 995 spin_unlock_bh(&ioat_chan->desc_lock); 996 997 if (new) { 998 new->len = len; 999 new->dst = dma_dest; 1000 new->src = dma_src; 1001 new->async_tx.flags = flags; 1002 return &new->async_tx; 1003 } else { 1004 dev_err(&ioat_chan->device->pdev->dev, 1005 "chan%d - get_next_desc failed: %d descs waiting, %d total desc\n", 1006 chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount); 1007 return NULL; 1008 } 1009} 1010 1011static struct dma_async_tx_descriptor *ioat2_dma_prep_memcpy( 1012 struct dma_chan *chan, 1013 dma_addr_t dma_dest, 1014 dma_addr_t dma_src, 1015 size_t len, 1016 unsigned long flags) 1017{ 1018 struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); 1019 struct ioat_desc_sw *new; 1020 1021 spin_lock_bh(&ioat_chan->desc_lock); 1022 new = ioat2_dma_get_next_descriptor(ioat_chan); 1023 1024 /* 1025 * leave ioat_chan->desc_lock set in ioat 2 path 1026 * it will get unlocked at end of tx_submit 1027 */ 1028 1029 if (new) { 1030 new->len = len; 1031 new->dst = dma_dest; 1032 new->src = dma_src; 1033 new->async_tx.flags = flags; 1034 return &new->async_tx; 1035 } else { 1036 spin_unlock_bh(&ioat_chan->desc_lock); 1037 dev_err(&ioat_chan->device->pdev->dev, 1038 "chan%d - get_next_desc failed: %d descs waiting, %d total desc\n", 1039 chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount); 1040 return NULL; 1041 } 1042} 1043 1044static void ioat_dma_cleanup_tasklet(unsigned long data) 1045{ 1046 struct ioat_dma_chan *chan = (void *)data; 1047 ioat_dma_memcpy_cleanup(chan); 1048 writew(IOAT_CHANCTRL_INT_DISABLE, 1049 chan->reg_base + IOAT_CHANCTRL_OFFSET); 1050} 1051 1052static void 1053ioat_dma_unmap(struct ioat_dma_chan *ioat_chan, struct ioat_desc_sw *desc) 1054{ 1055 /* 1056 * yes we are unmapping both _page and _single 1057 * alloc'd regions with unmap_page. Is this 1058 * *really* that bad? 1059 */ 1060 if (!(desc->async_tx.flags & DMA_COMPL_SKIP_DEST_UNMAP)) 1061 pci_unmap_page(ioat_chan->device->pdev, 1062 pci_unmap_addr(desc, dst), 1063 pci_unmap_len(desc, len), 1064 PCI_DMA_FROMDEVICE); 1065 1066 if (!(desc->async_tx.flags & DMA_COMPL_SKIP_SRC_UNMAP)) 1067 pci_unmap_page(ioat_chan->device->pdev, 1068 pci_unmap_addr(desc, src), 1069 pci_unmap_len(desc, len), 1070 PCI_DMA_TODEVICE); 1071} 1072 1073/** 1074 * ioat_dma_memcpy_cleanup - cleanup up finished descriptors 1075 * @chan: ioat channel to be cleaned up 1076 */ 1077static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan) 1078{ 1079 unsigned long phys_complete; 1080 struct ioat_desc_sw *desc, *_desc; 1081 dma_cookie_t cookie = 0; 1082 unsigned long desc_phys; 1083 struct ioat_desc_sw *latest_desc; 1084 1085 prefetch(ioat_chan->completion_virt); 1086 1087 if (!spin_trylock_bh(&ioat_chan->cleanup_lock)) 1088 return; 1089 1090 /* The completion writeback can happen at any time, 1091 so reads by the driver need to be atomic operations 1092 The descriptor physical addresses are limited to 32-bits 1093 when the CPU can only do a 32-bit mov */ 1094 1095#if (BITS_PER_LONG == 64) 1096 phys_complete = 1097 ioat_chan->completion_virt->full 1098 & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR; 1099#else 1100 phys_complete = 1101 ioat_chan->completion_virt->low & IOAT_LOW_COMPLETION_MASK; 1102#endif 1103 1104 if ((ioat_chan->completion_virt->full 1105 & IOAT_CHANSTS_DMA_TRANSFER_STATUS) == 1106 IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED) { 1107 dev_err(&ioat_chan->device->pdev->dev, 1108 "Channel halted, chanerr = %x\n", 1109 readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET)); 1110 1111 /* TODO do something to salvage the situation */ 1112 } 1113 1114 if (phys_complete == ioat_chan->last_completion) { 1115 spin_unlock_bh(&ioat_chan->cleanup_lock); 1116 /* 1117 * perhaps we're stuck so hard that the watchdog can't go off? 1118 * try to catch it after 2 seconds 1119 */ 1120 if (ioat_chan->device->version != IOAT_VER_3_0) { 1121 if (time_after(jiffies, 1122 ioat_chan->last_completion_time + HZ*WATCHDOG_DELAY)) { 1123 ioat_dma_chan_watchdog(&(ioat_chan->device->work.work)); 1124 ioat_chan->last_completion_time = jiffies; 1125 } 1126 } 1127 return; 1128 } 1129 ioat_chan->last_completion_time = jiffies; 1130 1131 cookie = 0; 1132 if (!spin_trylock_bh(&ioat_chan->desc_lock)) { 1133 spin_unlock_bh(&ioat_chan->cleanup_lock); 1134 return; 1135 } 1136 1137 switch (ioat_chan->device->version) { 1138 case IOAT_VER_1_2: 1139 list_for_each_entry_safe(desc, _desc, 1140 &ioat_chan->used_desc, node) { 1141 1142 /* 1143 * Incoming DMA requests may use multiple descriptors, 1144 * due to exceeding xfercap, perhaps. If so, only the 1145 * last one will have a cookie, and require unmapping. 1146 */ 1147 if (desc->async_tx.cookie) { 1148 cookie = desc->async_tx.cookie; 1149 ioat_dma_unmap(ioat_chan, desc); 1150 if (desc->async_tx.callback) { 1151 desc->async_tx.callback(desc->async_tx.callback_param); 1152 desc->async_tx.callback = NULL; 1153 } 1154 } 1155 1156 if (desc->async_tx.phys != phys_complete) { 1157 /* 1158 * a completed entry, but not the last, so clean 1159 * up if the client is done with the descriptor 1160 */ 1161 if (async_tx_test_ack(&desc->async_tx)) { 1162 list_del(&desc->node); 1163 list_add_tail(&desc->node, 1164 &ioat_chan->free_desc); 1165 } else 1166 desc->async_tx.cookie = 0; 1167 } else { 1168 /* 1169 * last used desc. Do not remove, so we can 1170 * append from it, but don't look at it next 1171 * time, either 1172 */ 1173 desc->async_tx.cookie = 0; 1174 1175 /* TODO check status bits? */ 1176 break; 1177 } 1178 } 1179 break; 1180 case IOAT_VER_2_0: 1181 case IOAT_VER_3_0: 1182 /* has some other thread has already cleaned up? */ 1183 if (ioat_chan->used_desc.prev == NULL) 1184 break; 1185 1186 /* work backwards to find latest finished desc */ 1187 desc = to_ioat_desc(ioat_chan->used_desc.next); 1188 latest_desc = NULL; 1189 do { 1190 desc = to_ioat_desc(desc->node.prev); 1191 desc_phys = (unsigned long)desc->async_tx.phys 1192 & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR; 1193 if (desc_phys == phys_complete) { 1194 latest_desc = desc; 1195 break; 1196 } 1197 } while (&desc->node != ioat_chan->used_desc.prev); 1198 1199 if (latest_desc != NULL) { 1200 1201 /* work forwards to clear finished descriptors */ 1202 for (desc = to_ioat_desc(ioat_chan->used_desc.prev); 1203 &desc->node != latest_desc->node.next && 1204 &desc->node != ioat_chan->used_desc.next; 1205 desc = to_ioat_desc(desc->node.next)) { 1206 if (desc->async_tx.cookie) { 1207 cookie = desc->async_tx.cookie; 1208 desc->async_tx.cookie = 0; 1209 ioat_dma_unmap(ioat_chan, desc); 1210 if (desc->async_tx.callback) { 1211 desc->async_tx.callback(desc->async_tx.callback_param); 1212 desc->async_tx.callback = NULL; 1213 } 1214 } 1215 } 1216 1217 /* move used.prev up beyond those that are finished */ 1218 if (&desc->node == ioat_chan->used_desc.next) 1219 ioat_chan->used_desc.prev = NULL; 1220 else 1221 ioat_chan->used_desc.prev = &desc->node; 1222 } 1223 break; 1224 } 1225 1226 spin_unlock_bh(&ioat_chan->desc_lock); 1227 1228 ioat_chan->last_completion = phys_complete; 1229 if (cookie != 0) 1230 ioat_chan->completed_cookie = cookie; 1231 1232 spin_unlock_bh(&ioat_chan->cleanup_lock); 1233} 1234 1235/** 1236 * ioat_dma_is_complete - poll the status of a IOAT DMA transaction 1237 * @chan: IOAT DMA channel handle 1238 * @cookie: DMA transaction identifier 1239 * @done: if not %NULL, updated with last completed transaction 1240 * @used: if not %NULL, updated with last used transaction 1241 */ 1242static enum dma_status ioat_dma_is_complete(struct dma_chan *chan, 1243 dma_cookie_t cookie, 1244 dma_cookie_t *done, 1245 dma_cookie_t *used) 1246{ 1247 struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); 1248 dma_cookie_t last_used; 1249 dma_cookie_t last_complete; 1250 enum dma_status ret; 1251 1252 last_used = chan->cookie; 1253 last_complete = ioat_chan->completed_cookie; 1254 ioat_chan->watchdog_tcp_cookie = cookie; 1255 1256 if (done) 1257 *done = last_complete; 1258 if (used) 1259 *used = last_used; 1260 1261 ret = dma_async_is_complete(cookie, last_complete, last_used); 1262 if (ret == DMA_SUCCESS) 1263 return ret; 1264 1265 ioat_dma_memcpy_cleanup(ioat_chan); 1266 1267 last_used = chan->cookie; 1268 last_complete = ioat_chan->completed_cookie; 1269 1270 if (done) 1271 *done = last_complete; 1272 if (used) 1273 *used = last_used; 1274 1275 return dma_async_is_complete(cookie, last_complete, last_used); 1276} 1277 1278static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan) 1279{ 1280 struct ioat_desc_sw *desc; 1281 1282 spin_lock_bh(&ioat_chan->desc_lock); 1283 1284 desc = ioat_dma_get_next_descriptor(ioat_chan); 1285 1286 if (!desc) { 1287 dev_err(&ioat_chan->device->pdev->dev, 1288 "Unable to start null desc - get next desc failed\n"); 1289 spin_unlock_bh(&ioat_chan->desc_lock); 1290 return; 1291 } 1292 1293 desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL 1294 | IOAT_DMA_DESCRIPTOR_CTL_INT_GN 1295 | IOAT_DMA_DESCRIPTOR_CTL_CP_STS; 1296 /* set size to non-zero value (channel returns error when size is 0) */ 1297 desc->hw->size = NULL_DESC_BUFFER_SIZE; 1298 desc->hw->src_addr = 0; 1299 desc->hw->dst_addr = 0; 1300 async_tx_ack(&desc->async_tx); 1301 switch (ioat_chan->device->version) { 1302 case IOAT_VER_1_2: 1303 desc->hw->next = 0; 1304 list_add_tail(&desc->node, &ioat_chan->used_desc); 1305 1306 writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF, 1307 ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW); 1308 writel(((u64) desc->async_tx.phys) >> 32, 1309 ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH); 1310 1311 writeb(IOAT_CHANCMD_START, ioat_chan->reg_base 1312 + IOAT_CHANCMD_OFFSET(ioat_chan->device->version)); 1313 break; 1314 case IOAT_VER_2_0: 1315 case IOAT_VER_3_0: 1316 writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF, 1317 ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW); 1318 writel(((u64) desc->async_tx.phys) >> 32, 1319 ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH); 1320 1321 ioat_chan->dmacount++; 1322 __ioat2_dma_memcpy_issue_pending(ioat_chan); 1323 break; 1324 } 1325 spin_unlock_bh(&ioat_chan->desc_lock); 1326} 1327 1328/* 1329 * Perform a IOAT transaction to verify the HW works. 1330 */ 1331#define IOAT_TEST_SIZE 2000 1332 1333static void ioat_dma_test_callback(void *dma_async_param) 1334{ 1335 printk(KERN_ERR "ioatdma: ioat_dma_test_callback(%p)\n", 1336 dma_async_param); 1337} 1338 1339/** 1340 * ioat_dma_self_test - Perform a IOAT transaction to verify the HW works. 1341 * @device: device to be tested 1342 */ 1343static int ioat_dma_self_test(struct ioatdma_device *device) 1344{ 1345 int i; 1346 u8 *src; 1347 u8 *dest; 1348 struct dma_chan *dma_chan; 1349 struct dma_async_tx_descriptor *tx; 1350 dma_addr_t dma_dest, dma_src; 1351 dma_cookie_t cookie; 1352 int err = 0; 1353 1354 src = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL); 1355 if (!src) 1356 return -ENOMEM; 1357 dest = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL); 1358 if (!dest) { 1359 kfree(src); 1360 return -ENOMEM; 1361 } 1362 1363 /* Fill in src buffer */ 1364 for (i = 0; i < IOAT_TEST_SIZE; i++) 1365 src[i] = (u8)i; 1366 1367 /* Start copy, using first DMA channel */ 1368 dma_chan = container_of(device->common.channels.next, 1369 struct dma_chan, 1370 device_node); 1371 if (device->common.device_alloc_chan_resources(dma_chan, NULL) < 1) { 1372 dev_err(&device->pdev->dev, 1373 "selftest cannot allocate chan resource\n"); 1374 err = -ENODEV; 1375 goto out; 1376 } 1377 1378 dma_src = dma_map_single(dma_chan->device->dev, src, IOAT_TEST_SIZE, 1379 DMA_TO_DEVICE); 1380 dma_dest = dma_map_single(dma_chan->device->dev, dest, IOAT_TEST_SIZE, 1381 DMA_FROM_DEVICE); 1382 tx = device->common.device_prep_dma_memcpy(dma_chan, dma_dest, dma_src, 1383 IOAT_TEST_SIZE, 0); 1384 if (!tx) { 1385 dev_err(&device->pdev->dev, 1386 "Self-test prep failed, disabling\n"); 1387 err = -ENODEV; 1388 goto free_resources; 1389 } 1390 1391 async_tx_ack(tx); 1392 tx->callback = ioat_dma_test_callback; 1393 tx->callback_param = (void *)0x8086; 1394 cookie = tx->tx_submit(tx); 1395 if (cookie < 0) { 1396 dev_err(&device->pdev->dev, 1397 "Self-test setup failed, disabling\n"); 1398 err = -ENODEV; 1399 goto free_resources; 1400 } 1401 device->common.device_issue_pending(dma_chan); 1402 msleep(1); 1403 1404 if (device->common.device_is_tx_complete(dma_chan, cookie, NULL, NULL) 1405 != DMA_SUCCESS) { 1406 dev_err(&device->pdev->dev, 1407 "Self-test copy timed out, disabling\n"); 1408 err = -ENODEV; 1409 goto free_resources; 1410 } 1411 if (memcmp(src, dest, IOAT_TEST_SIZE)) { 1412 dev_err(&device->pdev->dev, 1413 "Self-test copy failed compare, disabling\n"); 1414 err = -ENODEV; 1415 goto free_resources; 1416 } 1417 1418free_resources: 1419 device->common.device_free_chan_resources(dma_chan); 1420out: 1421 kfree(src); 1422 kfree(dest); 1423 return err; 1424} 1425 1426static char ioat_interrupt_style[32] = "msix"; 1427module_param_string(ioat_interrupt_style, ioat_interrupt_style, 1428 sizeof(ioat_interrupt_style), 0644); 1429MODULE_PARM_DESC(ioat_interrupt_style, 1430 "set ioat interrupt style: msix (default), " 1431 "msix-single-vector, msi, intx)"); 1432 1433/** 1434 * ioat_dma_setup_interrupts - setup interrupt handler 1435 * @device: ioat device 1436 */ 1437static int ioat_dma_setup_interrupts(struct ioatdma_device *device) 1438{ 1439 struct ioat_dma_chan *ioat_chan; 1440 int err, i, j, msixcnt; 1441 u8 intrctrl = 0; 1442 1443 if (!strcmp(ioat_interrupt_style, "msix")) 1444 goto msix; 1445 if (!strcmp(ioat_interrupt_style, "msix-single-vector")) 1446 goto msix_single_vector; 1447 if (!strcmp(ioat_interrupt_style, "msi")) 1448 goto msi; 1449 if (!strcmp(ioat_interrupt_style, "intx")) 1450 goto intx; 1451 dev_err(&device->pdev->dev, "invalid ioat_interrupt_style %s\n", 1452 ioat_interrupt_style); 1453 goto err_no_irq; 1454 1455msix: 1456 /* The number of MSI-X vectors should equal the number of channels */ 1457 msixcnt = device->common.chancnt; 1458 for (i = 0; i < msixcnt; i++) 1459 device->msix_entries[i].entry = i; 1460 1461 err = pci_enable_msix(device->pdev, device->msix_entries, msixcnt); 1462 if (err < 0) 1463 goto msi; 1464 if (err > 0) 1465 goto msix_single_vector; 1466 1467 for (i = 0; i < msixcnt; i++) { 1468 ioat_chan = ioat_lookup_chan_by_index(device, i); 1469 err = request_irq(device->msix_entries[i].vector, 1470 ioat_dma_do_interrupt_msix, 1471 0, "ioat-msix", ioat_chan); 1472 if (err) { 1473 for (j = 0; j < i; j++) { 1474 ioat_chan = 1475 ioat_lookup_chan_by_index(device, j); 1476 free_irq(device->msix_entries[j].vector, 1477 ioat_chan); 1478 } 1479 goto msix_single_vector; 1480 } 1481 } 1482 intrctrl |= IOAT_INTRCTRL_MSIX_VECTOR_CONTROL; 1483 device->irq_mode = msix_multi_vector; 1484 goto done; 1485 1486msix_single_vector: 1487 device->msix_entries[0].entry = 0; 1488 err = pci_enable_msix(device->pdev, device->msix_entries, 1); 1489 if (err) 1490 goto msi; 1491 1492 err = request_irq(device->msix_entries[0].vector, ioat_dma_do_interrupt, 1493 0, "ioat-msix", device); 1494 if (err) { 1495 pci_disable_msix(device->pdev); 1496 goto msi; 1497 } 1498 device->irq_mode = msix_single_vector; 1499 goto done; 1500 1501msi: 1502 err = pci_enable_msi(device->pdev); 1503 if (err) 1504 goto intx; 1505 1506 err = request_irq(device->pdev->irq, ioat_dma_do_interrupt, 1507 0, "ioat-msi", device); 1508 if (err) { 1509 pci_disable_msi(device->pdev); 1510 goto intx; 1511 } 1512 /* 1513 * CB 1.2 devices need a bit set in configuration space to enable MSI 1514 */ 1515 if (device->version == IOAT_VER_1_2) { 1516 u32 dmactrl; 1517 pci_read_config_dword(device->pdev, 1518 IOAT_PCI_DMACTRL_OFFSET, &dmactrl); 1519 dmactrl |= IOAT_PCI_DMACTRL_MSI_EN; 1520 pci_write_config_dword(device->pdev, 1521 IOAT_PCI_DMACTRL_OFFSET, dmactrl); 1522 } 1523 device->irq_mode = msi; 1524 goto done; 1525 1526intx: 1527 err = request_irq(device->pdev->irq, ioat_dma_do_interrupt, 1528 IRQF_SHARED, "ioat-intx", device); 1529 if (err) 1530 goto err_no_irq; 1531 device->irq_mode = intx; 1532 1533done: 1534 intrctrl |= IOAT_INTRCTRL_MASTER_INT_EN; 1535 writeb(intrctrl, device->reg_base + IOAT_INTRCTRL_OFFSET); 1536 return 0; 1537 1538err_no_irq: 1539 /* Disable all interrupt generation */ 1540 writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET); 1541 dev_err(&device->pdev->dev, "no usable interrupts\n"); 1542 device->irq_mode = none; 1543 return -1; 1544} 1545 1546/** 1547 * ioat_dma_remove_interrupts - remove whatever interrupts were set 1548 * @device: ioat device 1549 */ 1550static void ioat_dma_remove_interrupts(struct ioatdma_device *device) 1551{ 1552 struct ioat_dma_chan *ioat_chan; 1553 int i; 1554 1555 /* Disable all interrupt generation */ 1556 writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET); 1557 1558 switch (device->irq_mode) { 1559 case msix_multi_vector: 1560 for (i = 0; i < device->common.chancnt; i++) { 1561 ioat_chan = ioat_lookup_chan_by_index(device, i); 1562 free_irq(device->msix_entries[i].vector, ioat_chan); 1563 } 1564 pci_disable_msix(device->pdev); 1565 break; 1566 case msix_single_vector: 1567 free_irq(device->msix_entries[0].vector, device); 1568 pci_disable_msix(device->pdev); 1569 break; 1570 case msi: 1571 free_irq(device->pdev->irq, device); 1572 pci_disable_msi(device->pdev); 1573 break; 1574 case intx: 1575 free_irq(device->pdev->irq, device); 1576 break; 1577 case none: 1578 dev_warn(&device->pdev->dev, 1579 "call to %s without interrupts setup\n", __func__); 1580 } 1581 device->irq_mode = none; 1582} 1583 1584struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev, 1585 void __iomem *iobase) 1586{ 1587 int err; 1588 struct ioatdma_device *device; 1589 1590 device = kzalloc(sizeof(*device), GFP_KERNEL); 1591 if (!device) { 1592 err = -ENOMEM; 1593 goto err_kzalloc; 1594 } 1595 device->pdev = pdev; 1596 device->reg_base = iobase; 1597 device->version = readb(device->reg_base + IOAT_VER_OFFSET); 1598 1599 /* DMA coherent memory pool for DMA descriptor allocations */ 1600 device->dma_pool = pci_pool_create("dma_desc_pool", pdev, 1601 sizeof(struct ioat_dma_descriptor), 1602 64, 0); 1603 if (!device->dma_pool) { 1604 err = -ENOMEM; 1605 goto err_dma_pool; 1606 } 1607 1608 device->completion_pool = pci_pool_create("completion_pool", pdev, 1609 sizeof(u64), SMP_CACHE_BYTES, 1610 SMP_CACHE_BYTES); 1611 if (!device->completion_pool) { 1612 err = -ENOMEM; 1613 goto err_completion_pool; 1614 } 1615 1616 INIT_LIST_HEAD(&device->common.channels); 1617 ioat_dma_enumerate_channels(device); 1618 1619 device->common.device_alloc_chan_resources = 1620 ioat_dma_alloc_chan_resources; 1621 device->common.device_free_chan_resources = 1622 ioat_dma_free_chan_resources; 1623 device->common.dev = &pdev->dev; 1624 1625 dma_cap_set(DMA_MEMCPY, device->common.cap_mask); 1626 device->common.device_is_tx_complete = ioat_dma_is_complete; 1627 switch (device->version) { 1628 case IOAT_VER_1_2: 1629 device->common.device_prep_dma_memcpy = ioat1_dma_prep_memcpy; 1630 device->common.device_issue_pending = 1631 ioat1_dma_memcpy_issue_pending; 1632 break; 1633 case IOAT_VER_2_0: 1634 case IOAT_VER_3_0: 1635 device->common.device_prep_dma_memcpy = ioat2_dma_prep_memcpy; 1636 device->common.device_issue_pending = 1637 ioat2_dma_memcpy_issue_pending; 1638 break; 1639 } 1640 1641 dev_err(&device->pdev->dev, 1642 "Intel(R) I/OAT DMA Engine found," 1643 " %d channels, device version 0x%02x, driver version %s\n", 1644 device->common.chancnt, device->version, IOAT_DMA_VERSION); 1645 1646 err = ioat_dma_setup_interrupts(device); 1647 if (err) 1648 goto err_setup_interrupts; 1649 1650 err = ioat_dma_self_test(device); 1651 if (err) 1652 goto err_self_test; 1653 1654 ioat_set_tcp_copy_break(device); 1655 1656 dma_async_device_register(&device->common); 1657 1658 if (device->version != IOAT_VER_3_0) { 1659 INIT_DELAYED_WORK(&device->work, ioat_dma_chan_watchdog); 1660 schedule_delayed_work(&device->work, 1661 WATCHDOG_DELAY); 1662 } 1663 1664 return device; 1665 1666err_self_test: 1667 ioat_dma_remove_interrupts(device); 1668err_setup_interrupts: 1669 pci_pool_destroy(device->completion_pool); 1670err_completion_pool: 1671 pci_pool_destroy(device->dma_pool); 1672err_dma_pool: 1673 kfree(device); 1674err_kzalloc: 1675 dev_err(&pdev->dev, 1676 "Intel(R) I/OAT DMA Engine initialization failed\n"); 1677 return NULL; 1678} 1679 1680void ioat_dma_remove(struct ioatdma_device *device) 1681{ 1682 struct dma_chan *chan, *_chan; 1683 struct ioat_dma_chan *ioat_chan; 1684 1685 ioat_dma_remove_interrupts(device); 1686 1687 dma_async_device_unregister(&device->common); 1688 1689 pci_pool_destroy(device->dma_pool); 1690 pci_pool_destroy(device->completion_pool); 1691 1692 iounmap(device->reg_base); 1693 pci_release_regions(device->pdev); 1694 pci_disable_device(device->pdev); 1695 1696 if (device->version != IOAT_VER_3_0) { 1697 cancel_delayed_work(&device->work); 1698 } 1699 1700 list_for_each_entry_safe(chan, _chan, 1701 &device->common.channels, device_node) { 1702 ioat_chan = to_ioat_chan(chan); 1703 list_del(&chan->device_node); 1704 kfree(ioat_chan); 1705 } 1706 kfree(device); 1707} 1708