Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v2.6.30-rc6 1741 lines 49 kB view raw
1/* 2 * Intel I/OAT DMA Linux driver 3 * Copyright(c) 2004 - 2009 Intel Corporation. 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms and conditions of the GNU General Public License, 7 * version 2, as published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 12 * more details. 13 * 14 * You should have received a copy of the GNU General Public License along with 15 * this program; if not, write to the Free Software Foundation, Inc., 16 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. 17 * 18 * The full GNU General Public License is included in this distribution in 19 * the file called "COPYING". 20 * 21 */ 22 23/* 24 * This driver supports an Intel I/OAT DMA engine, which does asynchronous 25 * copy operations. 26 */ 27 28#include <linux/init.h> 29#include <linux/module.h> 30#include <linux/pci.h> 31#include <linux/interrupt.h> 32#include <linux/dmaengine.h> 33#include <linux/delay.h> 34#include <linux/dma-mapping.h> 35#include <linux/workqueue.h> 36#include <linux/i7300_idle.h> 37#include "ioatdma.h" 38#include "ioatdma_registers.h" 39#include "ioatdma_hw.h" 40 41#define to_ioat_chan(chan) container_of(chan, struct ioat_dma_chan, common) 42#define to_ioatdma_device(dev) container_of(dev, struct ioatdma_device, common) 43#define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node) 44#define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, async_tx) 45 46#define chan_num(ch) ((int)((ch)->reg_base - (ch)->device->reg_base) / 0x80) 47static int ioat_pending_level = 4; 48module_param(ioat_pending_level, int, 0644); 49MODULE_PARM_DESC(ioat_pending_level, 50 "high-water mark for pushing ioat descriptors (default: 4)"); 51 52#define RESET_DELAY msecs_to_jiffies(100) 53#define WATCHDOG_DELAY round_jiffies(msecs_to_jiffies(2000)) 54static void ioat_dma_chan_reset_part2(struct work_struct *work); 55static void ioat_dma_chan_watchdog(struct work_struct *work); 56 57/* 58 * workaround for IOAT ver.3.0 null descriptor issue 59 * (channel returns error when size is 0) 60 */ 61#define NULL_DESC_BUFFER_SIZE 1 62 63/* internal functions */ 64static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan); 65static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan); 66 67static struct ioat_desc_sw * 68ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan); 69static struct ioat_desc_sw * 70ioat2_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan); 71 72static inline struct ioat_dma_chan *ioat_lookup_chan_by_index( 73 struct ioatdma_device *device, 74 int index) 75{ 76 return device->idx[index]; 77} 78 79/** 80 * ioat_dma_do_interrupt - handler used for single vector interrupt mode 81 * @irq: interrupt id 82 * @data: interrupt data 83 */ 84static irqreturn_t ioat_dma_do_interrupt(int irq, void *data) 85{ 86 struct ioatdma_device *instance = data; 87 struct ioat_dma_chan *ioat_chan; 88 unsigned long attnstatus; 89 int bit; 90 u8 intrctrl; 91 92 intrctrl = readb(instance->reg_base + IOAT_INTRCTRL_OFFSET); 93 94 if (!(intrctrl & IOAT_INTRCTRL_MASTER_INT_EN)) 95 return IRQ_NONE; 96 97 if (!(intrctrl & IOAT_INTRCTRL_INT_STATUS)) { 98 writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET); 99 return IRQ_NONE; 100 } 101 102 attnstatus = readl(instance->reg_base + IOAT_ATTNSTATUS_OFFSET); 103 for_each_bit(bit, &attnstatus, BITS_PER_LONG) { 104 ioat_chan = ioat_lookup_chan_by_index(instance, bit); 105 tasklet_schedule(&ioat_chan->cleanup_task); 106 } 107 108 writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET); 109 return IRQ_HANDLED; 110} 111 112/** 113 * ioat_dma_do_interrupt_msix - handler used for vector-per-channel interrupt mode 114 * @irq: interrupt id 115 * @data: interrupt data 116 */ 117static irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data) 118{ 119 struct ioat_dma_chan *ioat_chan = data; 120 121 tasklet_schedule(&ioat_chan->cleanup_task); 122 123 return IRQ_HANDLED; 124} 125 126static void ioat_dma_cleanup_tasklet(unsigned long data); 127 128/** 129 * ioat_dma_enumerate_channels - find and initialize the device's channels 130 * @device: the device to be enumerated 131 */ 132static int ioat_dma_enumerate_channels(struct ioatdma_device *device) 133{ 134 u8 xfercap_scale; 135 u32 xfercap; 136 int i; 137 struct ioat_dma_chan *ioat_chan; 138 139 /* 140 * IOAT ver.3 workarounds 141 */ 142 if (device->version == IOAT_VER_3_0) { 143 u32 chan_err_mask; 144 u16 dev_id; 145 u32 dmauncerrsts; 146 147 /* 148 * Write CHANERRMSK_INT with 3E07h to mask out the errors 149 * that can cause stability issues for IOAT ver.3 150 */ 151 chan_err_mask = 0x3E07; 152 pci_write_config_dword(device->pdev, 153 IOAT_PCI_CHANERRMASK_INT_OFFSET, 154 chan_err_mask); 155 156 /* 157 * Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit 158 * (workaround for spurious config parity error after restart) 159 */ 160 pci_read_config_word(device->pdev, 161 IOAT_PCI_DEVICE_ID_OFFSET, 162 &dev_id); 163 if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) { 164 dmauncerrsts = 0x10; 165 pci_write_config_dword(device->pdev, 166 IOAT_PCI_DMAUNCERRSTS_OFFSET, 167 dmauncerrsts); 168 } 169 } 170 171 device->common.chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET); 172 xfercap_scale = readb(device->reg_base + IOAT_XFERCAP_OFFSET); 173 xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale)); 174 175#ifdef CONFIG_I7300_IDLE_IOAT_CHANNEL 176 if (i7300_idle_platform_probe(NULL, NULL) == 0) { 177 device->common.chancnt--; 178 } 179#endif 180 for (i = 0; i < device->common.chancnt; i++) { 181 ioat_chan = kzalloc(sizeof(*ioat_chan), GFP_KERNEL); 182 if (!ioat_chan) { 183 device->common.chancnt = i; 184 break; 185 } 186 187 ioat_chan->device = device; 188 ioat_chan->reg_base = device->reg_base + (0x80 * (i + 1)); 189 ioat_chan->xfercap = xfercap; 190 ioat_chan->desccount = 0; 191 INIT_DELAYED_WORK(&ioat_chan->work, ioat_dma_chan_reset_part2); 192 if (ioat_chan->device->version == IOAT_VER_2_0) 193 writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE | 194 IOAT_DMA_DCA_ANY_CPU, 195 ioat_chan->reg_base + IOAT_DCACTRL_OFFSET); 196 else if (ioat_chan->device->version == IOAT_VER_3_0) 197 writel(IOAT_DMA_DCA_ANY_CPU, 198 ioat_chan->reg_base + IOAT_DCACTRL_OFFSET); 199 spin_lock_init(&ioat_chan->cleanup_lock); 200 spin_lock_init(&ioat_chan->desc_lock); 201 INIT_LIST_HEAD(&ioat_chan->free_desc); 202 INIT_LIST_HEAD(&ioat_chan->used_desc); 203 /* This should be made common somewhere in dmaengine.c */ 204 ioat_chan->common.device = &device->common; 205 list_add_tail(&ioat_chan->common.device_node, 206 &device->common.channels); 207 device->idx[i] = ioat_chan; 208 tasklet_init(&ioat_chan->cleanup_task, 209 ioat_dma_cleanup_tasklet, 210 (unsigned long) ioat_chan); 211 tasklet_disable(&ioat_chan->cleanup_task); 212 } 213 return device->common.chancnt; 214} 215 216/** 217 * ioat_dma_memcpy_issue_pending - push potentially unrecognized appended 218 * descriptors to hw 219 * @chan: DMA channel handle 220 */ 221static inline void __ioat1_dma_memcpy_issue_pending( 222 struct ioat_dma_chan *ioat_chan) 223{ 224 ioat_chan->pending = 0; 225 writeb(IOAT_CHANCMD_APPEND, ioat_chan->reg_base + IOAT1_CHANCMD_OFFSET); 226} 227 228static void ioat1_dma_memcpy_issue_pending(struct dma_chan *chan) 229{ 230 struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); 231 232 if (ioat_chan->pending > 0) { 233 spin_lock_bh(&ioat_chan->desc_lock); 234 __ioat1_dma_memcpy_issue_pending(ioat_chan); 235 spin_unlock_bh(&ioat_chan->desc_lock); 236 } 237} 238 239static inline void __ioat2_dma_memcpy_issue_pending( 240 struct ioat_dma_chan *ioat_chan) 241{ 242 ioat_chan->pending = 0; 243 writew(ioat_chan->dmacount, 244 ioat_chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET); 245} 246 247static void ioat2_dma_memcpy_issue_pending(struct dma_chan *chan) 248{ 249 struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); 250 251 if (ioat_chan->pending > 0) { 252 spin_lock_bh(&ioat_chan->desc_lock); 253 __ioat2_dma_memcpy_issue_pending(ioat_chan); 254 spin_unlock_bh(&ioat_chan->desc_lock); 255 } 256} 257 258 259/** 260 * ioat_dma_chan_reset_part2 - reinit the channel after a reset 261 */ 262static void ioat_dma_chan_reset_part2(struct work_struct *work) 263{ 264 struct ioat_dma_chan *ioat_chan = 265 container_of(work, struct ioat_dma_chan, work.work); 266 struct ioat_desc_sw *desc; 267 268 spin_lock_bh(&ioat_chan->cleanup_lock); 269 spin_lock_bh(&ioat_chan->desc_lock); 270 271 ioat_chan->completion_virt->low = 0; 272 ioat_chan->completion_virt->high = 0; 273 ioat_chan->pending = 0; 274 275 /* 276 * count the descriptors waiting, and be sure to do it 277 * right for both the CB1 line and the CB2 ring 278 */ 279 ioat_chan->dmacount = 0; 280 if (ioat_chan->used_desc.prev) { 281 desc = to_ioat_desc(ioat_chan->used_desc.prev); 282 do { 283 ioat_chan->dmacount++; 284 desc = to_ioat_desc(desc->node.next); 285 } while (&desc->node != ioat_chan->used_desc.next); 286 } 287 288 /* 289 * write the new starting descriptor address 290 * this puts channel engine into ARMED state 291 */ 292 desc = to_ioat_desc(ioat_chan->used_desc.prev); 293 switch (ioat_chan->device->version) { 294 case IOAT_VER_1_2: 295 writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF, 296 ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW); 297 writel(((u64) desc->async_tx.phys) >> 32, 298 ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH); 299 300 writeb(IOAT_CHANCMD_START, ioat_chan->reg_base 301 + IOAT_CHANCMD_OFFSET(ioat_chan->device->version)); 302 break; 303 case IOAT_VER_2_0: 304 writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF, 305 ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW); 306 writel(((u64) desc->async_tx.phys) >> 32, 307 ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH); 308 309 /* tell the engine to go with what's left to be done */ 310 writew(ioat_chan->dmacount, 311 ioat_chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET); 312 313 break; 314 } 315 dev_err(&ioat_chan->device->pdev->dev, 316 "chan%d reset - %d descs waiting, %d total desc\n", 317 chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount); 318 319 spin_unlock_bh(&ioat_chan->desc_lock); 320 spin_unlock_bh(&ioat_chan->cleanup_lock); 321} 322 323/** 324 * ioat_dma_reset_channel - restart a channel 325 * @ioat_chan: IOAT DMA channel handle 326 */ 327static void ioat_dma_reset_channel(struct ioat_dma_chan *ioat_chan) 328{ 329 u32 chansts, chanerr; 330 331 if (!ioat_chan->used_desc.prev) 332 return; 333 334 chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); 335 chansts = (ioat_chan->completion_virt->low 336 & IOAT_CHANSTS_DMA_TRANSFER_STATUS); 337 if (chanerr) { 338 dev_err(&ioat_chan->device->pdev->dev, 339 "chan%d, CHANSTS = 0x%08x CHANERR = 0x%04x, clearing\n", 340 chan_num(ioat_chan), chansts, chanerr); 341 writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET); 342 } 343 344 /* 345 * whack it upside the head with a reset 346 * and wait for things to settle out. 347 * force the pending count to a really big negative 348 * to make sure no one forces an issue_pending 349 * while we're waiting. 350 */ 351 352 spin_lock_bh(&ioat_chan->desc_lock); 353 ioat_chan->pending = INT_MIN; 354 writeb(IOAT_CHANCMD_RESET, 355 ioat_chan->reg_base 356 + IOAT_CHANCMD_OFFSET(ioat_chan->device->version)); 357 spin_unlock_bh(&ioat_chan->desc_lock); 358 359 /* schedule the 2nd half instead of sleeping a long time */ 360 schedule_delayed_work(&ioat_chan->work, RESET_DELAY); 361} 362 363/** 364 * ioat_dma_chan_watchdog - watch for stuck channels 365 */ 366static void ioat_dma_chan_watchdog(struct work_struct *work) 367{ 368 struct ioatdma_device *device = 369 container_of(work, struct ioatdma_device, work.work); 370 struct ioat_dma_chan *ioat_chan; 371 int i; 372 373 union { 374 u64 full; 375 struct { 376 u32 low; 377 u32 high; 378 }; 379 } completion_hw; 380 unsigned long compl_desc_addr_hw; 381 382 for (i = 0; i < device->common.chancnt; i++) { 383 ioat_chan = ioat_lookup_chan_by_index(device, i); 384 385 if (ioat_chan->device->version == IOAT_VER_1_2 386 /* have we started processing anything yet */ 387 && ioat_chan->last_completion 388 /* have we completed any since last watchdog cycle? */ 389 && (ioat_chan->last_completion == 390 ioat_chan->watchdog_completion) 391 /* has TCP stuck on one cookie since last watchdog? */ 392 && (ioat_chan->watchdog_tcp_cookie == 393 ioat_chan->watchdog_last_tcp_cookie) 394 && (ioat_chan->watchdog_tcp_cookie != 395 ioat_chan->completed_cookie) 396 /* is there something in the chain to be processed? */ 397 /* CB1 chain always has at least the last one processed */ 398 && (ioat_chan->used_desc.prev != ioat_chan->used_desc.next) 399 && ioat_chan->pending == 0) { 400 401 /* 402 * check CHANSTS register for completed 403 * descriptor address. 404 * if it is different than completion writeback, 405 * it is not zero 406 * and it has changed since the last watchdog 407 * we can assume that channel 408 * is still working correctly 409 * and the problem is in completion writeback. 410 * update completion writeback 411 * with actual CHANSTS value 412 * else 413 * try resetting the channel 414 */ 415 416 completion_hw.low = readl(ioat_chan->reg_base + 417 IOAT_CHANSTS_OFFSET_LOW(ioat_chan->device->version)); 418 completion_hw.high = readl(ioat_chan->reg_base + 419 IOAT_CHANSTS_OFFSET_HIGH(ioat_chan->device->version)); 420#if (BITS_PER_LONG == 64) 421 compl_desc_addr_hw = 422 completion_hw.full 423 & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR; 424#else 425 compl_desc_addr_hw = 426 completion_hw.low & IOAT_LOW_COMPLETION_MASK; 427#endif 428 429 if ((compl_desc_addr_hw != 0) 430 && (compl_desc_addr_hw != ioat_chan->watchdog_completion) 431 && (compl_desc_addr_hw != ioat_chan->last_compl_desc_addr_hw)) { 432 ioat_chan->last_compl_desc_addr_hw = compl_desc_addr_hw; 433 ioat_chan->completion_virt->low = completion_hw.low; 434 ioat_chan->completion_virt->high = completion_hw.high; 435 } else { 436 ioat_dma_reset_channel(ioat_chan); 437 ioat_chan->watchdog_completion = 0; 438 ioat_chan->last_compl_desc_addr_hw = 0; 439 } 440 441 /* 442 * for version 2.0 if there are descriptors yet to be processed 443 * and the last completed hasn't changed since the last watchdog 444 * if they haven't hit the pending level 445 * issue the pending to push them through 446 * else 447 * try resetting the channel 448 */ 449 } else if (ioat_chan->device->version == IOAT_VER_2_0 450 && ioat_chan->used_desc.prev 451 && ioat_chan->last_completion 452 && ioat_chan->last_completion == ioat_chan->watchdog_completion) { 453 454 if (ioat_chan->pending < ioat_pending_level) 455 ioat2_dma_memcpy_issue_pending(&ioat_chan->common); 456 else { 457 ioat_dma_reset_channel(ioat_chan); 458 ioat_chan->watchdog_completion = 0; 459 } 460 } else { 461 ioat_chan->last_compl_desc_addr_hw = 0; 462 ioat_chan->watchdog_completion 463 = ioat_chan->last_completion; 464 } 465 466 ioat_chan->watchdog_last_tcp_cookie = 467 ioat_chan->watchdog_tcp_cookie; 468 } 469 470 schedule_delayed_work(&device->work, WATCHDOG_DELAY); 471} 472 473static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx) 474{ 475 struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan); 476 struct ioat_desc_sw *first = tx_to_ioat_desc(tx); 477 struct ioat_desc_sw *prev, *new; 478 struct ioat_dma_descriptor *hw; 479 dma_cookie_t cookie; 480 LIST_HEAD(new_chain); 481 u32 copy; 482 size_t len; 483 dma_addr_t src, dst; 484 unsigned long orig_flags; 485 unsigned int desc_count = 0; 486 487 /* src and dest and len are stored in the initial descriptor */ 488 len = first->len; 489 src = first->src; 490 dst = first->dst; 491 orig_flags = first->async_tx.flags; 492 new = first; 493 494 spin_lock_bh(&ioat_chan->desc_lock); 495 prev = to_ioat_desc(ioat_chan->used_desc.prev); 496 prefetch(prev->hw); 497 do { 498 copy = min_t(size_t, len, ioat_chan->xfercap); 499 500 async_tx_ack(&new->async_tx); 501 502 hw = new->hw; 503 hw->size = copy; 504 hw->ctl = 0; 505 hw->src_addr = src; 506 hw->dst_addr = dst; 507 hw->next = 0; 508 509 /* chain together the physical address list for the HW */ 510 wmb(); 511 prev->hw->next = (u64) new->async_tx.phys; 512 513 len -= copy; 514 dst += copy; 515 src += copy; 516 517 list_add_tail(&new->node, &new_chain); 518 desc_count++; 519 prev = new; 520 } while (len && (new = ioat1_dma_get_next_descriptor(ioat_chan))); 521 522 if (!new) { 523 dev_err(&ioat_chan->device->pdev->dev, 524 "tx submit failed\n"); 525 spin_unlock_bh(&ioat_chan->desc_lock); 526 return -ENOMEM; 527 } 528 529 hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS; 530 if (first->async_tx.callback) { 531 hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN; 532 if (first != new) { 533 /* move callback into to last desc */ 534 new->async_tx.callback = first->async_tx.callback; 535 new->async_tx.callback_param 536 = first->async_tx.callback_param; 537 first->async_tx.callback = NULL; 538 first->async_tx.callback_param = NULL; 539 } 540 } 541 542 new->tx_cnt = desc_count; 543 new->async_tx.flags = orig_flags; /* client is in control of this ack */ 544 545 /* store the original values for use in later cleanup */ 546 if (new != first) { 547 new->src = first->src; 548 new->dst = first->dst; 549 new->len = first->len; 550 } 551 552 /* cookie incr and addition to used_list must be atomic */ 553 cookie = ioat_chan->common.cookie; 554 cookie++; 555 if (cookie < 0) 556 cookie = 1; 557 ioat_chan->common.cookie = new->async_tx.cookie = cookie; 558 559 /* write address into NextDescriptor field of last desc in chain */ 560 to_ioat_desc(ioat_chan->used_desc.prev)->hw->next = 561 first->async_tx.phys; 562 list_splice_tail(&new_chain, &ioat_chan->used_desc); 563 564 ioat_chan->dmacount += desc_count; 565 ioat_chan->pending += desc_count; 566 if (ioat_chan->pending >= ioat_pending_level) 567 __ioat1_dma_memcpy_issue_pending(ioat_chan); 568 spin_unlock_bh(&ioat_chan->desc_lock); 569 570 return cookie; 571} 572 573static dma_cookie_t ioat2_tx_submit(struct dma_async_tx_descriptor *tx) 574{ 575 struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan); 576 struct ioat_desc_sw *first = tx_to_ioat_desc(tx); 577 struct ioat_desc_sw *new; 578 struct ioat_dma_descriptor *hw; 579 dma_cookie_t cookie; 580 u32 copy; 581 size_t len; 582 dma_addr_t src, dst; 583 unsigned long orig_flags; 584 unsigned int desc_count = 0; 585 586 /* src and dest and len are stored in the initial descriptor */ 587 len = first->len; 588 src = first->src; 589 dst = first->dst; 590 orig_flags = first->async_tx.flags; 591 new = first; 592 593 /* 594 * ioat_chan->desc_lock is still in force in version 2 path 595 * it gets unlocked at end of this function 596 */ 597 do { 598 copy = min_t(size_t, len, ioat_chan->xfercap); 599 600 async_tx_ack(&new->async_tx); 601 602 hw = new->hw; 603 hw->size = copy; 604 hw->ctl = 0; 605 hw->src_addr = src; 606 hw->dst_addr = dst; 607 608 len -= copy; 609 dst += copy; 610 src += copy; 611 desc_count++; 612 } while (len && (new = ioat2_dma_get_next_descriptor(ioat_chan))); 613 614 if (!new) { 615 dev_err(&ioat_chan->device->pdev->dev, 616 "tx submit failed\n"); 617 spin_unlock_bh(&ioat_chan->desc_lock); 618 return -ENOMEM; 619 } 620 621 hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_CP_STS; 622 if (first->async_tx.callback) { 623 hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN; 624 if (first != new) { 625 /* move callback into to last desc */ 626 new->async_tx.callback = first->async_tx.callback; 627 new->async_tx.callback_param 628 = first->async_tx.callback_param; 629 first->async_tx.callback = NULL; 630 first->async_tx.callback_param = NULL; 631 } 632 } 633 634 new->tx_cnt = desc_count; 635 new->async_tx.flags = orig_flags; /* client is in control of this ack */ 636 637 /* store the original values for use in later cleanup */ 638 if (new != first) { 639 new->src = first->src; 640 new->dst = first->dst; 641 new->len = first->len; 642 } 643 644 /* cookie incr and addition to used_list must be atomic */ 645 cookie = ioat_chan->common.cookie; 646 cookie++; 647 if (cookie < 0) 648 cookie = 1; 649 ioat_chan->common.cookie = new->async_tx.cookie = cookie; 650 651 ioat_chan->dmacount += desc_count; 652 ioat_chan->pending += desc_count; 653 if (ioat_chan->pending >= ioat_pending_level) 654 __ioat2_dma_memcpy_issue_pending(ioat_chan); 655 spin_unlock_bh(&ioat_chan->desc_lock); 656 657 return cookie; 658} 659 660/** 661 * ioat_dma_alloc_descriptor - allocate and return a sw and hw descriptor pair 662 * @ioat_chan: the channel supplying the memory pool for the descriptors 663 * @flags: allocation flags 664 */ 665static struct ioat_desc_sw *ioat_dma_alloc_descriptor( 666 struct ioat_dma_chan *ioat_chan, 667 gfp_t flags) 668{ 669 struct ioat_dma_descriptor *desc; 670 struct ioat_desc_sw *desc_sw; 671 struct ioatdma_device *ioatdma_device; 672 dma_addr_t phys; 673 674 ioatdma_device = to_ioatdma_device(ioat_chan->common.device); 675 desc = pci_pool_alloc(ioatdma_device->dma_pool, flags, &phys); 676 if (unlikely(!desc)) 677 return NULL; 678 679 desc_sw = kzalloc(sizeof(*desc_sw), flags); 680 if (unlikely(!desc_sw)) { 681 pci_pool_free(ioatdma_device->dma_pool, desc, phys); 682 return NULL; 683 } 684 685 memset(desc, 0, sizeof(*desc)); 686 dma_async_tx_descriptor_init(&desc_sw->async_tx, &ioat_chan->common); 687 switch (ioat_chan->device->version) { 688 case IOAT_VER_1_2: 689 desc_sw->async_tx.tx_submit = ioat1_tx_submit; 690 break; 691 case IOAT_VER_2_0: 692 case IOAT_VER_3_0: 693 desc_sw->async_tx.tx_submit = ioat2_tx_submit; 694 break; 695 } 696 697 desc_sw->hw = desc; 698 desc_sw->async_tx.phys = phys; 699 700 return desc_sw; 701} 702 703static int ioat_initial_desc_count = 256; 704module_param(ioat_initial_desc_count, int, 0644); 705MODULE_PARM_DESC(ioat_initial_desc_count, 706 "initial descriptors per channel (default: 256)"); 707 708/** 709 * ioat2_dma_massage_chan_desc - link the descriptors into a circle 710 * @ioat_chan: the channel to be massaged 711 */ 712static void ioat2_dma_massage_chan_desc(struct ioat_dma_chan *ioat_chan) 713{ 714 struct ioat_desc_sw *desc, *_desc; 715 716 /* setup used_desc */ 717 ioat_chan->used_desc.next = ioat_chan->free_desc.next; 718 ioat_chan->used_desc.prev = NULL; 719 720 /* pull free_desc out of the circle so that every node is a hw 721 * descriptor, but leave it pointing to the list 722 */ 723 ioat_chan->free_desc.prev->next = ioat_chan->free_desc.next; 724 ioat_chan->free_desc.next->prev = ioat_chan->free_desc.prev; 725 726 /* circle link the hw descriptors */ 727 desc = to_ioat_desc(ioat_chan->free_desc.next); 728 desc->hw->next = to_ioat_desc(desc->node.next)->async_tx.phys; 729 list_for_each_entry_safe(desc, _desc, ioat_chan->free_desc.next, node) { 730 desc->hw->next = to_ioat_desc(desc->node.next)->async_tx.phys; 731 } 732} 733 734/** 735 * ioat_dma_alloc_chan_resources - returns the number of allocated descriptors 736 * @chan: the channel to be filled out 737 */ 738static int ioat_dma_alloc_chan_resources(struct dma_chan *chan) 739{ 740 struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); 741 struct ioat_desc_sw *desc; 742 u16 chanctrl; 743 u32 chanerr; 744 int i; 745 LIST_HEAD(tmp_list); 746 747 /* have we already been set up? */ 748 if (!list_empty(&ioat_chan->free_desc)) 749 return ioat_chan->desccount; 750 751 /* Setup register to interrupt and write completion status on error */ 752 chanctrl = IOAT_CHANCTRL_ERR_INT_EN | 753 IOAT_CHANCTRL_ANY_ERR_ABORT_EN | 754 IOAT_CHANCTRL_ERR_COMPLETION_EN; 755 writew(chanctrl, ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET); 756 757 chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); 758 if (chanerr) { 759 dev_err(&ioat_chan->device->pdev->dev, 760 "CHANERR = %x, clearing\n", chanerr); 761 writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET); 762 } 763 764 /* Allocate descriptors */ 765 for (i = 0; i < ioat_initial_desc_count; i++) { 766 desc = ioat_dma_alloc_descriptor(ioat_chan, GFP_KERNEL); 767 if (!desc) { 768 dev_err(&ioat_chan->device->pdev->dev, 769 "Only %d initial descriptors\n", i); 770 break; 771 } 772 list_add_tail(&desc->node, &tmp_list); 773 } 774 spin_lock_bh(&ioat_chan->desc_lock); 775 ioat_chan->desccount = i; 776 list_splice(&tmp_list, &ioat_chan->free_desc); 777 if (ioat_chan->device->version != IOAT_VER_1_2) 778 ioat2_dma_massage_chan_desc(ioat_chan); 779 spin_unlock_bh(&ioat_chan->desc_lock); 780 781 /* allocate a completion writeback area */ 782 /* doing 2 32bit writes to mmio since 1 64b write doesn't work */ 783 ioat_chan->completion_virt = 784 pci_pool_alloc(ioat_chan->device->completion_pool, 785 GFP_KERNEL, 786 &ioat_chan->completion_addr); 787 memset(ioat_chan->completion_virt, 0, 788 sizeof(*ioat_chan->completion_virt)); 789 writel(((u64) ioat_chan->completion_addr) & 0x00000000FFFFFFFF, 790 ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_LOW); 791 writel(((u64) ioat_chan->completion_addr) >> 32, 792 ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH); 793 794 tasklet_enable(&ioat_chan->cleanup_task); 795 ioat_dma_start_null_desc(ioat_chan); /* give chain to dma device */ 796 return ioat_chan->desccount; 797} 798 799/** 800 * ioat_dma_free_chan_resources - release all the descriptors 801 * @chan: the channel to be cleaned 802 */ 803static void ioat_dma_free_chan_resources(struct dma_chan *chan) 804{ 805 struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); 806 struct ioatdma_device *ioatdma_device = to_ioatdma_device(chan->device); 807 struct ioat_desc_sw *desc, *_desc; 808 int in_use_descs = 0; 809 810 /* Before freeing channel resources first check 811 * if they have been previously allocated for this channel. 812 */ 813 if (ioat_chan->desccount == 0) 814 return; 815 816 tasklet_disable(&ioat_chan->cleanup_task); 817 ioat_dma_memcpy_cleanup(ioat_chan); 818 819 /* Delay 100ms after reset to allow internal DMA logic to quiesce 820 * before removing DMA descriptor resources. 821 */ 822 writeb(IOAT_CHANCMD_RESET, 823 ioat_chan->reg_base 824 + IOAT_CHANCMD_OFFSET(ioat_chan->device->version)); 825 mdelay(100); 826 827 spin_lock_bh(&ioat_chan->desc_lock); 828 switch (ioat_chan->device->version) { 829 case IOAT_VER_1_2: 830 list_for_each_entry_safe(desc, _desc, 831 &ioat_chan->used_desc, node) { 832 in_use_descs++; 833 list_del(&desc->node); 834 pci_pool_free(ioatdma_device->dma_pool, desc->hw, 835 desc->async_tx.phys); 836 kfree(desc); 837 } 838 list_for_each_entry_safe(desc, _desc, 839 &ioat_chan->free_desc, node) { 840 list_del(&desc->node); 841 pci_pool_free(ioatdma_device->dma_pool, desc->hw, 842 desc->async_tx.phys); 843 kfree(desc); 844 } 845 break; 846 case IOAT_VER_2_0: 847 case IOAT_VER_3_0: 848 list_for_each_entry_safe(desc, _desc, 849 ioat_chan->free_desc.next, node) { 850 list_del(&desc->node); 851 pci_pool_free(ioatdma_device->dma_pool, desc->hw, 852 desc->async_tx.phys); 853 kfree(desc); 854 } 855 desc = to_ioat_desc(ioat_chan->free_desc.next); 856 pci_pool_free(ioatdma_device->dma_pool, desc->hw, 857 desc->async_tx.phys); 858 kfree(desc); 859 INIT_LIST_HEAD(&ioat_chan->free_desc); 860 INIT_LIST_HEAD(&ioat_chan->used_desc); 861 break; 862 } 863 spin_unlock_bh(&ioat_chan->desc_lock); 864 865 pci_pool_free(ioatdma_device->completion_pool, 866 ioat_chan->completion_virt, 867 ioat_chan->completion_addr); 868 869 /* one is ok since we left it on there on purpose */ 870 if (in_use_descs > 1) 871 dev_err(&ioat_chan->device->pdev->dev, 872 "Freeing %d in use descriptors!\n", 873 in_use_descs - 1); 874 875 ioat_chan->last_completion = ioat_chan->completion_addr = 0; 876 ioat_chan->pending = 0; 877 ioat_chan->dmacount = 0; 878 ioat_chan->desccount = 0; 879 ioat_chan->watchdog_completion = 0; 880 ioat_chan->last_compl_desc_addr_hw = 0; 881 ioat_chan->watchdog_tcp_cookie = 882 ioat_chan->watchdog_last_tcp_cookie = 0; 883} 884 885/** 886 * ioat_dma_get_next_descriptor - return the next available descriptor 887 * @ioat_chan: IOAT DMA channel handle 888 * 889 * Gets the next descriptor from the chain, and must be called with the 890 * channel's desc_lock held. Allocates more descriptors if the channel 891 * has run out. 892 */ 893static struct ioat_desc_sw * 894ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan) 895{ 896 struct ioat_desc_sw *new; 897 898 if (!list_empty(&ioat_chan->free_desc)) { 899 new = to_ioat_desc(ioat_chan->free_desc.next); 900 list_del(&new->node); 901 } else { 902 /* try to get another desc */ 903 new = ioat_dma_alloc_descriptor(ioat_chan, GFP_ATOMIC); 904 if (!new) { 905 dev_err(&ioat_chan->device->pdev->dev, 906 "alloc failed\n"); 907 return NULL; 908 } 909 } 910 911 prefetch(new->hw); 912 return new; 913} 914 915static struct ioat_desc_sw * 916ioat2_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan) 917{ 918 struct ioat_desc_sw *new; 919 920 /* 921 * used.prev points to where to start processing 922 * used.next points to next free descriptor 923 * if used.prev == NULL, there are none waiting to be processed 924 * if used.next == used.prev.prev, there is only one free descriptor, 925 * and we need to use it to as a noop descriptor before 926 * linking in a new set of descriptors, since the device 927 * has probably already read the pointer to it 928 */ 929 if (ioat_chan->used_desc.prev && 930 ioat_chan->used_desc.next == ioat_chan->used_desc.prev->prev) { 931 932 struct ioat_desc_sw *desc; 933 struct ioat_desc_sw *noop_desc; 934 int i; 935 936 /* set up the noop descriptor */ 937 noop_desc = to_ioat_desc(ioat_chan->used_desc.next); 938 /* set size to non-zero value (channel returns error when size is 0) */ 939 noop_desc->hw->size = NULL_DESC_BUFFER_SIZE; 940 noop_desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL; 941 noop_desc->hw->src_addr = 0; 942 noop_desc->hw->dst_addr = 0; 943 944 ioat_chan->used_desc.next = ioat_chan->used_desc.next->next; 945 ioat_chan->pending++; 946 ioat_chan->dmacount++; 947 948 /* try to get a few more descriptors */ 949 for (i = 16; i; i--) { 950 desc = ioat_dma_alloc_descriptor(ioat_chan, GFP_ATOMIC); 951 if (!desc) { 952 dev_err(&ioat_chan->device->pdev->dev, 953 "alloc failed\n"); 954 break; 955 } 956 list_add_tail(&desc->node, ioat_chan->used_desc.next); 957 958 desc->hw->next 959 = to_ioat_desc(desc->node.next)->async_tx.phys; 960 to_ioat_desc(desc->node.prev)->hw->next 961 = desc->async_tx.phys; 962 ioat_chan->desccount++; 963 } 964 965 ioat_chan->used_desc.next = noop_desc->node.next; 966 } 967 new = to_ioat_desc(ioat_chan->used_desc.next); 968 prefetch(new); 969 ioat_chan->used_desc.next = new->node.next; 970 971 if (ioat_chan->used_desc.prev == NULL) 972 ioat_chan->used_desc.prev = &new->node; 973 974 prefetch(new->hw); 975 return new; 976} 977 978static struct ioat_desc_sw *ioat_dma_get_next_descriptor( 979 struct ioat_dma_chan *ioat_chan) 980{ 981 if (!ioat_chan) 982 return NULL; 983 984 switch (ioat_chan->device->version) { 985 case IOAT_VER_1_2: 986 return ioat1_dma_get_next_descriptor(ioat_chan); 987 case IOAT_VER_2_0: 988 case IOAT_VER_3_0: 989 return ioat2_dma_get_next_descriptor(ioat_chan); 990 } 991 return NULL; 992} 993 994static struct dma_async_tx_descriptor *ioat1_dma_prep_memcpy( 995 struct dma_chan *chan, 996 dma_addr_t dma_dest, 997 dma_addr_t dma_src, 998 size_t len, 999 unsigned long flags) 1000{ 1001 struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); 1002 struct ioat_desc_sw *new; 1003 1004 spin_lock_bh(&ioat_chan->desc_lock); 1005 new = ioat_dma_get_next_descriptor(ioat_chan); 1006 spin_unlock_bh(&ioat_chan->desc_lock); 1007 1008 if (new) { 1009 new->len = len; 1010 new->dst = dma_dest; 1011 new->src = dma_src; 1012 new->async_tx.flags = flags; 1013 return &new->async_tx; 1014 } else { 1015 dev_err(&ioat_chan->device->pdev->dev, 1016 "chan%d - get_next_desc failed: %d descs waiting, %d total desc\n", 1017 chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount); 1018 return NULL; 1019 } 1020} 1021 1022static struct dma_async_tx_descriptor *ioat2_dma_prep_memcpy( 1023 struct dma_chan *chan, 1024 dma_addr_t dma_dest, 1025 dma_addr_t dma_src, 1026 size_t len, 1027 unsigned long flags) 1028{ 1029 struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); 1030 struct ioat_desc_sw *new; 1031 1032 spin_lock_bh(&ioat_chan->desc_lock); 1033 new = ioat2_dma_get_next_descriptor(ioat_chan); 1034 1035 /* 1036 * leave ioat_chan->desc_lock set in ioat 2 path 1037 * it will get unlocked at end of tx_submit 1038 */ 1039 1040 if (new) { 1041 new->len = len; 1042 new->dst = dma_dest; 1043 new->src = dma_src; 1044 new->async_tx.flags = flags; 1045 return &new->async_tx; 1046 } else { 1047 spin_unlock_bh(&ioat_chan->desc_lock); 1048 dev_err(&ioat_chan->device->pdev->dev, 1049 "chan%d - get_next_desc failed: %d descs waiting, %d total desc\n", 1050 chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount); 1051 return NULL; 1052 } 1053} 1054 1055static void ioat_dma_cleanup_tasklet(unsigned long data) 1056{ 1057 struct ioat_dma_chan *chan = (void *)data; 1058 ioat_dma_memcpy_cleanup(chan); 1059 writew(IOAT_CHANCTRL_INT_DISABLE, 1060 chan->reg_base + IOAT_CHANCTRL_OFFSET); 1061} 1062 1063static void 1064ioat_dma_unmap(struct ioat_dma_chan *ioat_chan, struct ioat_desc_sw *desc) 1065{ 1066 if (!(desc->async_tx.flags & DMA_COMPL_SKIP_DEST_UNMAP)) { 1067 if (desc->async_tx.flags & DMA_COMPL_DEST_UNMAP_SINGLE) 1068 pci_unmap_single(ioat_chan->device->pdev, 1069 pci_unmap_addr(desc, dst), 1070 pci_unmap_len(desc, len), 1071 PCI_DMA_FROMDEVICE); 1072 else 1073 pci_unmap_page(ioat_chan->device->pdev, 1074 pci_unmap_addr(desc, dst), 1075 pci_unmap_len(desc, len), 1076 PCI_DMA_FROMDEVICE); 1077 } 1078 1079 if (!(desc->async_tx.flags & DMA_COMPL_SKIP_SRC_UNMAP)) { 1080 if (desc->async_tx.flags & DMA_COMPL_SRC_UNMAP_SINGLE) 1081 pci_unmap_single(ioat_chan->device->pdev, 1082 pci_unmap_addr(desc, src), 1083 pci_unmap_len(desc, len), 1084 PCI_DMA_TODEVICE); 1085 else 1086 pci_unmap_page(ioat_chan->device->pdev, 1087 pci_unmap_addr(desc, src), 1088 pci_unmap_len(desc, len), 1089 PCI_DMA_TODEVICE); 1090 } 1091} 1092 1093/** 1094 * ioat_dma_memcpy_cleanup - cleanup up finished descriptors 1095 * @chan: ioat channel to be cleaned up 1096 */ 1097static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan) 1098{ 1099 unsigned long phys_complete; 1100 struct ioat_desc_sw *desc, *_desc; 1101 dma_cookie_t cookie = 0; 1102 unsigned long desc_phys; 1103 struct ioat_desc_sw *latest_desc; 1104 1105 prefetch(ioat_chan->completion_virt); 1106 1107 if (!spin_trylock_bh(&ioat_chan->cleanup_lock)) 1108 return; 1109 1110 /* The completion writeback can happen at any time, 1111 so reads by the driver need to be atomic operations 1112 The descriptor physical addresses are limited to 32-bits 1113 when the CPU can only do a 32-bit mov */ 1114 1115#if (BITS_PER_LONG == 64) 1116 phys_complete = 1117 ioat_chan->completion_virt->full 1118 & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR; 1119#else 1120 phys_complete = 1121 ioat_chan->completion_virt->low & IOAT_LOW_COMPLETION_MASK; 1122#endif 1123 1124 if ((ioat_chan->completion_virt->full 1125 & IOAT_CHANSTS_DMA_TRANSFER_STATUS) == 1126 IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED) { 1127 dev_err(&ioat_chan->device->pdev->dev, 1128 "Channel halted, chanerr = %x\n", 1129 readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET)); 1130 1131 /* TODO do something to salvage the situation */ 1132 } 1133 1134 if (phys_complete == ioat_chan->last_completion) { 1135 spin_unlock_bh(&ioat_chan->cleanup_lock); 1136 /* 1137 * perhaps we're stuck so hard that the watchdog can't go off? 1138 * try to catch it after 2 seconds 1139 */ 1140 if (ioat_chan->device->version != IOAT_VER_3_0) { 1141 if (time_after(jiffies, 1142 ioat_chan->last_completion_time + HZ*WATCHDOG_DELAY)) { 1143 ioat_dma_chan_watchdog(&(ioat_chan->device->work.work)); 1144 ioat_chan->last_completion_time = jiffies; 1145 } 1146 } 1147 return; 1148 } 1149 ioat_chan->last_completion_time = jiffies; 1150 1151 cookie = 0; 1152 if (!spin_trylock_bh(&ioat_chan->desc_lock)) { 1153 spin_unlock_bh(&ioat_chan->cleanup_lock); 1154 return; 1155 } 1156 1157 switch (ioat_chan->device->version) { 1158 case IOAT_VER_1_2: 1159 list_for_each_entry_safe(desc, _desc, 1160 &ioat_chan->used_desc, node) { 1161 1162 /* 1163 * Incoming DMA requests may use multiple descriptors, 1164 * due to exceeding xfercap, perhaps. If so, only the 1165 * last one will have a cookie, and require unmapping. 1166 */ 1167 if (desc->async_tx.cookie) { 1168 cookie = desc->async_tx.cookie; 1169 ioat_dma_unmap(ioat_chan, desc); 1170 if (desc->async_tx.callback) { 1171 desc->async_tx.callback(desc->async_tx.callback_param); 1172 desc->async_tx.callback = NULL; 1173 } 1174 } 1175 1176 if (desc->async_tx.phys != phys_complete) { 1177 /* 1178 * a completed entry, but not the last, so clean 1179 * up if the client is done with the descriptor 1180 */ 1181 if (async_tx_test_ack(&desc->async_tx)) { 1182 list_move_tail(&desc->node, 1183 &ioat_chan->free_desc); 1184 } else 1185 desc->async_tx.cookie = 0; 1186 } else { 1187 /* 1188 * last used desc. Do not remove, so we can 1189 * append from it, but don't look at it next 1190 * time, either 1191 */ 1192 desc->async_tx.cookie = 0; 1193 1194 /* TODO check status bits? */ 1195 break; 1196 } 1197 } 1198 break; 1199 case IOAT_VER_2_0: 1200 case IOAT_VER_3_0: 1201 /* has some other thread has already cleaned up? */ 1202 if (ioat_chan->used_desc.prev == NULL) 1203 break; 1204 1205 /* work backwards to find latest finished desc */ 1206 desc = to_ioat_desc(ioat_chan->used_desc.next); 1207 latest_desc = NULL; 1208 do { 1209 desc = to_ioat_desc(desc->node.prev); 1210 desc_phys = (unsigned long)desc->async_tx.phys 1211 & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR; 1212 if (desc_phys == phys_complete) { 1213 latest_desc = desc; 1214 break; 1215 } 1216 } while (&desc->node != ioat_chan->used_desc.prev); 1217 1218 if (latest_desc != NULL) { 1219 1220 /* work forwards to clear finished descriptors */ 1221 for (desc = to_ioat_desc(ioat_chan->used_desc.prev); 1222 &desc->node != latest_desc->node.next && 1223 &desc->node != ioat_chan->used_desc.next; 1224 desc = to_ioat_desc(desc->node.next)) { 1225 if (desc->async_tx.cookie) { 1226 cookie = desc->async_tx.cookie; 1227 desc->async_tx.cookie = 0; 1228 ioat_dma_unmap(ioat_chan, desc); 1229 if (desc->async_tx.callback) { 1230 desc->async_tx.callback(desc->async_tx.callback_param); 1231 desc->async_tx.callback = NULL; 1232 } 1233 } 1234 } 1235 1236 /* move used.prev up beyond those that are finished */ 1237 if (&desc->node == ioat_chan->used_desc.next) 1238 ioat_chan->used_desc.prev = NULL; 1239 else 1240 ioat_chan->used_desc.prev = &desc->node; 1241 } 1242 break; 1243 } 1244 1245 spin_unlock_bh(&ioat_chan->desc_lock); 1246 1247 ioat_chan->last_completion = phys_complete; 1248 if (cookie != 0) 1249 ioat_chan->completed_cookie = cookie; 1250 1251 spin_unlock_bh(&ioat_chan->cleanup_lock); 1252} 1253 1254/** 1255 * ioat_dma_is_complete - poll the status of a IOAT DMA transaction 1256 * @chan: IOAT DMA channel handle 1257 * @cookie: DMA transaction identifier 1258 * @done: if not %NULL, updated with last completed transaction 1259 * @used: if not %NULL, updated with last used transaction 1260 */ 1261static enum dma_status ioat_dma_is_complete(struct dma_chan *chan, 1262 dma_cookie_t cookie, 1263 dma_cookie_t *done, 1264 dma_cookie_t *used) 1265{ 1266 struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); 1267 dma_cookie_t last_used; 1268 dma_cookie_t last_complete; 1269 enum dma_status ret; 1270 1271 last_used = chan->cookie; 1272 last_complete = ioat_chan->completed_cookie; 1273 ioat_chan->watchdog_tcp_cookie = cookie; 1274 1275 if (done) 1276 *done = last_complete; 1277 if (used) 1278 *used = last_used; 1279 1280 ret = dma_async_is_complete(cookie, last_complete, last_used); 1281 if (ret == DMA_SUCCESS) 1282 return ret; 1283 1284 ioat_dma_memcpy_cleanup(ioat_chan); 1285 1286 last_used = chan->cookie; 1287 last_complete = ioat_chan->completed_cookie; 1288 1289 if (done) 1290 *done = last_complete; 1291 if (used) 1292 *used = last_used; 1293 1294 return dma_async_is_complete(cookie, last_complete, last_used); 1295} 1296 1297static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan) 1298{ 1299 struct ioat_desc_sw *desc; 1300 1301 spin_lock_bh(&ioat_chan->desc_lock); 1302 1303 desc = ioat_dma_get_next_descriptor(ioat_chan); 1304 1305 if (!desc) { 1306 dev_err(&ioat_chan->device->pdev->dev, 1307 "Unable to start null desc - get next desc failed\n"); 1308 spin_unlock_bh(&ioat_chan->desc_lock); 1309 return; 1310 } 1311 1312 desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL 1313 | IOAT_DMA_DESCRIPTOR_CTL_INT_GN 1314 | IOAT_DMA_DESCRIPTOR_CTL_CP_STS; 1315 /* set size to non-zero value (channel returns error when size is 0) */ 1316 desc->hw->size = NULL_DESC_BUFFER_SIZE; 1317 desc->hw->src_addr = 0; 1318 desc->hw->dst_addr = 0; 1319 async_tx_ack(&desc->async_tx); 1320 switch (ioat_chan->device->version) { 1321 case IOAT_VER_1_2: 1322 desc->hw->next = 0; 1323 list_add_tail(&desc->node, &ioat_chan->used_desc); 1324 1325 writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF, 1326 ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW); 1327 writel(((u64) desc->async_tx.phys) >> 32, 1328 ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH); 1329 1330 writeb(IOAT_CHANCMD_START, ioat_chan->reg_base 1331 + IOAT_CHANCMD_OFFSET(ioat_chan->device->version)); 1332 break; 1333 case IOAT_VER_2_0: 1334 case IOAT_VER_3_0: 1335 writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF, 1336 ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW); 1337 writel(((u64) desc->async_tx.phys) >> 32, 1338 ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH); 1339 1340 ioat_chan->dmacount++; 1341 __ioat2_dma_memcpy_issue_pending(ioat_chan); 1342 break; 1343 } 1344 spin_unlock_bh(&ioat_chan->desc_lock); 1345} 1346 1347/* 1348 * Perform a IOAT transaction to verify the HW works. 1349 */ 1350#define IOAT_TEST_SIZE 2000 1351 1352static void ioat_dma_test_callback(void *dma_async_param) 1353{ 1354 struct completion *cmp = dma_async_param; 1355 1356 complete(cmp); 1357} 1358 1359/** 1360 * ioat_dma_self_test - Perform a IOAT transaction to verify the HW works. 1361 * @device: device to be tested 1362 */ 1363static int ioat_dma_self_test(struct ioatdma_device *device) 1364{ 1365 int i; 1366 u8 *src; 1367 u8 *dest; 1368 struct dma_chan *dma_chan; 1369 struct dma_async_tx_descriptor *tx; 1370 dma_addr_t dma_dest, dma_src; 1371 dma_cookie_t cookie; 1372 int err = 0; 1373 struct completion cmp; 1374 unsigned long tmo; 1375 unsigned long flags; 1376 1377 src = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL); 1378 if (!src) 1379 return -ENOMEM; 1380 dest = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL); 1381 if (!dest) { 1382 kfree(src); 1383 return -ENOMEM; 1384 } 1385 1386 /* Fill in src buffer */ 1387 for (i = 0; i < IOAT_TEST_SIZE; i++) 1388 src[i] = (u8)i; 1389 1390 /* Start copy, using first DMA channel */ 1391 dma_chan = container_of(device->common.channels.next, 1392 struct dma_chan, 1393 device_node); 1394 if (device->common.device_alloc_chan_resources(dma_chan) < 1) { 1395 dev_err(&device->pdev->dev, 1396 "selftest cannot allocate chan resource\n"); 1397 err = -ENODEV; 1398 goto out; 1399 } 1400 1401 dma_src = dma_map_single(dma_chan->device->dev, src, IOAT_TEST_SIZE, 1402 DMA_TO_DEVICE); 1403 dma_dest = dma_map_single(dma_chan->device->dev, dest, IOAT_TEST_SIZE, 1404 DMA_FROM_DEVICE); 1405 flags = DMA_COMPL_SRC_UNMAP_SINGLE | DMA_COMPL_DEST_UNMAP_SINGLE; 1406 tx = device->common.device_prep_dma_memcpy(dma_chan, dma_dest, dma_src, 1407 IOAT_TEST_SIZE, flags); 1408 if (!tx) { 1409 dev_err(&device->pdev->dev, 1410 "Self-test prep failed, disabling\n"); 1411 err = -ENODEV; 1412 goto free_resources; 1413 } 1414 1415 async_tx_ack(tx); 1416 init_completion(&cmp); 1417 tx->callback = ioat_dma_test_callback; 1418 tx->callback_param = &cmp; 1419 cookie = tx->tx_submit(tx); 1420 if (cookie < 0) { 1421 dev_err(&device->pdev->dev, 1422 "Self-test setup failed, disabling\n"); 1423 err = -ENODEV; 1424 goto free_resources; 1425 } 1426 device->common.device_issue_pending(dma_chan); 1427 1428 tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); 1429 1430 if (tmo == 0 || 1431 device->common.device_is_tx_complete(dma_chan, cookie, NULL, NULL) 1432 != DMA_SUCCESS) { 1433 dev_err(&device->pdev->dev, 1434 "Self-test copy timed out, disabling\n"); 1435 err = -ENODEV; 1436 goto free_resources; 1437 } 1438 if (memcmp(src, dest, IOAT_TEST_SIZE)) { 1439 dev_err(&device->pdev->dev, 1440 "Self-test copy failed compare, disabling\n"); 1441 err = -ENODEV; 1442 goto free_resources; 1443 } 1444 1445free_resources: 1446 device->common.device_free_chan_resources(dma_chan); 1447out: 1448 kfree(src); 1449 kfree(dest); 1450 return err; 1451} 1452 1453static char ioat_interrupt_style[32] = "msix"; 1454module_param_string(ioat_interrupt_style, ioat_interrupt_style, 1455 sizeof(ioat_interrupt_style), 0644); 1456MODULE_PARM_DESC(ioat_interrupt_style, 1457 "set ioat interrupt style: msix (default), " 1458 "msix-single-vector, msi, intx)"); 1459 1460/** 1461 * ioat_dma_setup_interrupts - setup interrupt handler 1462 * @device: ioat device 1463 */ 1464static int ioat_dma_setup_interrupts(struct ioatdma_device *device) 1465{ 1466 struct ioat_dma_chan *ioat_chan; 1467 int err, i, j, msixcnt; 1468 u8 intrctrl = 0; 1469 1470 if (!strcmp(ioat_interrupt_style, "msix")) 1471 goto msix; 1472 if (!strcmp(ioat_interrupt_style, "msix-single-vector")) 1473 goto msix_single_vector; 1474 if (!strcmp(ioat_interrupt_style, "msi")) 1475 goto msi; 1476 if (!strcmp(ioat_interrupt_style, "intx")) 1477 goto intx; 1478 dev_err(&device->pdev->dev, "invalid ioat_interrupt_style %s\n", 1479 ioat_interrupt_style); 1480 goto err_no_irq; 1481 1482msix: 1483 /* The number of MSI-X vectors should equal the number of channels */ 1484 msixcnt = device->common.chancnt; 1485 for (i = 0; i < msixcnt; i++) 1486 device->msix_entries[i].entry = i; 1487 1488 err = pci_enable_msix(device->pdev, device->msix_entries, msixcnt); 1489 if (err < 0) 1490 goto msi; 1491 if (err > 0) 1492 goto msix_single_vector; 1493 1494 for (i = 0; i < msixcnt; i++) { 1495 ioat_chan = ioat_lookup_chan_by_index(device, i); 1496 err = request_irq(device->msix_entries[i].vector, 1497 ioat_dma_do_interrupt_msix, 1498 0, "ioat-msix", ioat_chan); 1499 if (err) { 1500 for (j = 0; j < i; j++) { 1501 ioat_chan = 1502 ioat_lookup_chan_by_index(device, j); 1503 free_irq(device->msix_entries[j].vector, 1504 ioat_chan); 1505 } 1506 goto msix_single_vector; 1507 } 1508 } 1509 intrctrl |= IOAT_INTRCTRL_MSIX_VECTOR_CONTROL; 1510 device->irq_mode = msix_multi_vector; 1511 goto done; 1512 1513msix_single_vector: 1514 device->msix_entries[0].entry = 0; 1515 err = pci_enable_msix(device->pdev, device->msix_entries, 1); 1516 if (err) 1517 goto msi; 1518 1519 err = request_irq(device->msix_entries[0].vector, ioat_dma_do_interrupt, 1520 0, "ioat-msix", device); 1521 if (err) { 1522 pci_disable_msix(device->pdev); 1523 goto msi; 1524 } 1525 device->irq_mode = msix_single_vector; 1526 goto done; 1527 1528msi: 1529 err = pci_enable_msi(device->pdev); 1530 if (err) 1531 goto intx; 1532 1533 err = request_irq(device->pdev->irq, ioat_dma_do_interrupt, 1534 0, "ioat-msi", device); 1535 if (err) { 1536 pci_disable_msi(device->pdev); 1537 goto intx; 1538 } 1539 /* 1540 * CB 1.2 devices need a bit set in configuration space to enable MSI 1541 */ 1542 if (device->version == IOAT_VER_1_2) { 1543 u32 dmactrl; 1544 pci_read_config_dword(device->pdev, 1545 IOAT_PCI_DMACTRL_OFFSET, &dmactrl); 1546 dmactrl |= IOAT_PCI_DMACTRL_MSI_EN; 1547 pci_write_config_dword(device->pdev, 1548 IOAT_PCI_DMACTRL_OFFSET, dmactrl); 1549 } 1550 device->irq_mode = msi; 1551 goto done; 1552 1553intx: 1554 err = request_irq(device->pdev->irq, ioat_dma_do_interrupt, 1555 IRQF_SHARED, "ioat-intx", device); 1556 if (err) 1557 goto err_no_irq; 1558 device->irq_mode = intx; 1559 1560done: 1561 intrctrl |= IOAT_INTRCTRL_MASTER_INT_EN; 1562 writeb(intrctrl, device->reg_base + IOAT_INTRCTRL_OFFSET); 1563 return 0; 1564 1565err_no_irq: 1566 /* Disable all interrupt generation */ 1567 writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET); 1568 dev_err(&device->pdev->dev, "no usable interrupts\n"); 1569 device->irq_mode = none; 1570 return -1; 1571} 1572 1573/** 1574 * ioat_dma_remove_interrupts - remove whatever interrupts were set 1575 * @device: ioat device 1576 */ 1577static void ioat_dma_remove_interrupts(struct ioatdma_device *device) 1578{ 1579 struct ioat_dma_chan *ioat_chan; 1580 int i; 1581 1582 /* Disable all interrupt generation */ 1583 writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET); 1584 1585 switch (device->irq_mode) { 1586 case msix_multi_vector: 1587 for (i = 0; i < device->common.chancnt; i++) { 1588 ioat_chan = ioat_lookup_chan_by_index(device, i); 1589 free_irq(device->msix_entries[i].vector, ioat_chan); 1590 } 1591 pci_disable_msix(device->pdev); 1592 break; 1593 case msix_single_vector: 1594 free_irq(device->msix_entries[0].vector, device); 1595 pci_disable_msix(device->pdev); 1596 break; 1597 case msi: 1598 free_irq(device->pdev->irq, device); 1599 pci_disable_msi(device->pdev); 1600 break; 1601 case intx: 1602 free_irq(device->pdev->irq, device); 1603 break; 1604 case none: 1605 dev_warn(&device->pdev->dev, 1606 "call to %s without interrupts setup\n", __func__); 1607 } 1608 device->irq_mode = none; 1609} 1610 1611struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev, 1612 void __iomem *iobase) 1613{ 1614 int err; 1615 struct ioatdma_device *device; 1616 1617 device = kzalloc(sizeof(*device), GFP_KERNEL); 1618 if (!device) { 1619 err = -ENOMEM; 1620 goto err_kzalloc; 1621 } 1622 device->pdev = pdev; 1623 device->reg_base = iobase; 1624 device->version = readb(device->reg_base + IOAT_VER_OFFSET); 1625 1626 /* DMA coherent memory pool for DMA descriptor allocations */ 1627 device->dma_pool = pci_pool_create("dma_desc_pool", pdev, 1628 sizeof(struct ioat_dma_descriptor), 1629 64, 0); 1630 if (!device->dma_pool) { 1631 err = -ENOMEM; 1632 goto err_dma_pool; 1633 } 1634 1635 device->completion_pool = pci_pool_create("completion_pool", pdev, 1636 sizeof(u64), SMP_CACHE_BYTES, 1637 SMP_CACHE_BYTES); 1638 if (!device->completion_pool) { 1639 err = -ENOMEM; 1640 goto err_completion_pool; 1641 } 1642 1643 INIT_LIST_HEAD(&device->common.channels); 1644 ioat_dma_enumerate_channels(device); 1645 1646 device->common.device_alloc_chan_resources = 1647 ioat_dma_alloc_chan_resources; 1648 device->common.device_free_chan_resources = 1649 ioat_dma_free_chan_resources; 1650 device->common.dev = &pdev->dev; 1651 1652 dma_cap_set(DMA_MEMCPY, device->common.cap_mask); 1653 device->common.device_is_tx_complete = ioat_dma_is_complete; 1654 switch (device->version) { 1655 case IOAT_VER_1_2: 1656 device->common.device_prep_dma_memcpy = ioat1_dma_prep_memcpy; 1657 device->common.device_issue_pending = 1658 ioat1_dma_memcpy_issue_pending; 1659 break; 1660 case IOAT_VER_2_0: 1661 case IOAT_VER_3_0: 1662 device->common.device_prep_dma_memcpy = ioat2_dma_prep_memcpy; 1663 device->common.device_issue_pending = 1664 ioat2_dma_memcpy_issue_pending; 1665 break; 1666 } 1667 1668 dev_err(&device->pdev->dev, 1669 "Intel(R) I/OAT DMA Engine found," 1670 " %d channels, device version 0x%02x, driver version %s\n", 1671 device->common.chancnt, device->version, IOAT_DMA_VERSION); 1672 1673 if (!device->common.chancnt) { 1674 dev_err(&device->pdev->dev, 1675 "Intel(R) I/OAT DMA Engine problem found: " 1676 "zero channels detected\n"); 1677 goto err_setup_interrupts; 1678 } 1679 1680 err = ioat_dma_setup_interrupts(device); 1681 if (err) 1682 goto err_setup_interrupts; 1683 1684 err = ioat_dma_self_test(device); 1685 if (err) 1686 goto err_self_test; 1687 1688 ioat_set_tcp_copy_break(device); 1689 1690 dma_async_device_register(&device->common); 1691 1692 if (device->version != IOAT_VER_3_0) { 1693 INIT_DELAYED_WORK(&device->work, ioat_dma_chan_watchdog); 1694 schedule_delayed_work(&device->work, 1695 WATCHDOG_DELAY); 1696 } 1697 1698 return device; 1699 1700err_self_test: 1701 ioat_dma_remove_interrupts(device); 1702err_setup_interrupts: 1703 pci_pool_destroy(device->completion_pool); 1704err_completion_pool: 1705 pci_pool_destroy(device->dma_pool); 1706err_dma_pool: 1707 kfree(device); 1708err_kzalloc: 1709 dev_err(&pdev->dev, 1710 "Intel(R) I/OAT DMA Engine initialization failed\n"); 1711 return NULL; 1712} 1713 1714void ioat_dma_remove(struct ioatdma_device *device) 1715{ 1716 struct dma_chan *chan, *_chan; 1717 struct ioat_dma_chan *ioat_chan; 1718 1719 if (device->version != IOAT_VER_3_0) 1720 cancel_delayed_work(&device->work); 1721 1722 ioat_dma_remove_interrupts(device); 1723 1724 dma_async_device_unregister(&device->common); 1725 1726 pci_pool_destroy(device->dma_pool); 1727 pci_pool_destroy(device->completion_pool); 1728 1729 iounmap(device->reg_base); 1730 pci_release_regions(device->pdev); 1731 pci_disable_device(device->pdev); 1732 1733 list_for_each_entry_safe(chan, _chan, 1734 &device->common.channels, device_node) { 1735 ioat_chan = to_ioat_chan(chan); 1736 list_del(&chan->device_node); 1737 kfree(ioat_chan); 1738 } 1739 kfree(device); 1740} 1741