Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v3.17-rc1 1676 lines 47 kB view raw
1/* 2 * This file is provided under a dual BSD/GPLv2 license. When using or 3 * redistributing this file, you may do so under either license. 4 * 5 * GPL LICENSE SUMMARY 6 * 7 * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved. 8 * 9 * This program is free software; you can redistribute it and/or modify it 10 * under the terms and conditions of the GNU General Public License, 11 * version 2, as published by the Free Software Foundation. 12 * 13 * This program is distributed in the hope that it will be useful, but WITHOUT 14 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 15 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 16 * more details. 17 * 18 * You should have received a copy of the GNU General Public License along with 19 * this program; if not, write to the Free Software Foundation, Inc., 20 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. 21 * 22 * The full GNU General Public License is included in this distribution in 23 * the file called "COPYING". 24 * 25 * BSD LICENSE 26 * 27 * Copyright(c) 2004-2009 Intel Corporation. All rights reserved. 28 * 29 * Redistribution and use in source and binary forms, with or without 30 * modification, are permitted provided that the following conditions are met: 31 * 32 * * Redistributions of source code must retain the above copyright 33 * notice, this list of conditions and the following disclaimer. 34 * * Redistributions in binary form must reproduce the above copyright 35 * notice, this list of conditions and the following disclaimer in 36 * the documentation and/or other materials provided with the 37 * distribution. 38 * * Neither the name of Intel Corporation nor the names of its 39 * contributors may be used to endorse or promote products derived 40 * from this software without specific prior written permission. 41 * 42 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 43 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 45 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 46 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 47 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 48 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 49 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 50 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 51 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 52 * POSSIBILITY OF SUCH DAMAGE. 53 */ 54 55/* 56 * Support routines for v3+ hardware 57 */ 58#include <linux/module.h> 59#include <linux/pci.h> 60#include <linux/gfp.h> 61#include <linux/dmaengine.h> 62#include <linux/dma-mapping.h> 63#include <linux/prefetch.h> 64#include "../dmaengine.h" 65#include "registers.h" 66#include "hw.h" 67#include "dma.h" 68#include "dma_v2.h" 69 70extern struct kmem_cache *ioat3_sed_cache; 71 72/* ioat hardware assumes at least two sources for raid operations */ 73#define src_cnt_to_sw(x) ((x) + 2) 74#define src_cnt_to_hw(x) ((x) - 2) 75#define ndest_to_sw(x) ((x) + 1) 76#define ndest_to_hw(x) ((x) - 1) 77#define src16_cnt_to_sw(x) ((x) + 9) 78#define src16_cnt_to_hw(x) ((x) - 9) 79 80/* provide a lookup table for setting the source address in the base or 81 * extended descriptor of an xor or pq descriptor 82 */ 83static const u8 xor_idx_to_desc = 0xe0; 84static const u8 xor_idx_to_field[] = { 1, 4, 5, 6, 7, 0, 1, 2 }; 85static const u8 pq_idx_to_desc = 0xf8; 86static const u8 pq16_idx_to_desc[] = { 0, 0, 1, 1, 1, 1, 1, 1, 1, 87 2, 2, 2, 2, 2, 2, 2 }; 88static const u8 pq_idx_to_field[] = { 1, 4, 5, 0, 1, 2, 4, 5 }; 89static const u8 pq16_idx_to_field[] = { 1, 4, 1, 2, 3, 4, 5, 6, 7, 90 0, 1, 2, 3, 4, 5, 6 }; 91 92static void ioat3_eh(struct ioat2_dma_chan *ioat); 93 94static void xor_set_src(struct ioat_raw_descriptor *descs[2], 95 dma_addr_t addr, u32 offset, int idx) 96{ 97 struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1]; 98 99 raw->field[xor_idx_to_field[idx]] = addr + offset; 100} 101 102static dma_addr_t pq_get_src(struct ioat_raw_descriptor *descs[2], int idx) 103{ 104 struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1]; 105 106 return raw->field[pq_idx_to_field[idx]]; 107} 108 109static dma_addr_t pq16_get_src(struct ioat_raw_descriptor *desc[3], int idx) 110{ 111 struct ioat_raw_descriptor *raw = desc[pq16_idx_to_desc[idx]]; 112 113 return raw->field[pq16_idx_to_field[idx]]; 114} 115 116static void pq_set_src(struct ioat_raw_descriptor *descs[2], 117 dma_addr_t addr, u32 offset, u8 coef, int idx) 118{ 119 struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *) descs[0]; 120 struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1]; 121 122 raw->field[pq_idx_to_field[idx]] = addr + offset; 123 pq->coef[idx] = coef; 124} 125 126static bool is_jf_ioat(struct pci_dev *pdev) 127{ 128 switch (pdev->device) { 129 case PCI_DEVICE_ID_INTEL_IOAT_JSF0: 130 case PCI_DEVICE_ID_INTEL_IOAT_JSF1: 131 case PCI_DEVICE_ID_INTEL_IOAT_JSF2: 132 case PCI_DEVICE_ID_INTEL_IOAT_JSF3: 133 case PCI_DEVICE_ID_INTEL_IOAT_JSF4: 134 case PCI_DEVICE_ID_INTEL_IOAT_JSF5: 135 case PCI_DEVICE_ID_INTEL_IOAT_JSF6: 136 case PCI_DEVICE_ID_INTEL_IOAT_JSF7: 137 case PCI_DEVICE_ID_INTEL_IOAT_JSF8: 138 case PCI_DEVICE_ID_INTEL_IOAT_JSF9: 139 return true; 140 default: 141 return false; 142 } 143} 144 145static bool is_snb_ioat(struct pci_dev *pdev) 146{ 147 switch (pdev->device) { 148 case PCI_DEVICE_ID_INTEL_IOAT_SNB0: 149 case PCI_DEVICE_ID_INTEL_IOAT_SNB1: 150 case PCI_DEVICE_ID_INTEL_IOAT_SNB2: 151 case PCI_DEVICE_ID_INTEL_IOAT_SNB3: 152 case PCI_DEVICE_ID_INTEL_IOAT_SNB4: 153 case PCI_DEVICE_ID_INTEL_IOAT_SNB5: 154 case PCI_DEVICE_ID_INTEL_IOAT_SNB6: 155 case PCI_DEVICE_ID_INTEL_IOAT_SNB7: 156 case PCI_DEVICE_ID_INTEL_IOAT_SNB8: 157 case PCI_DEVICE_ID_INTEL_IOAT_SNB9: 158 return true; 159 default: 160 return false; 161 } 162} 163 164static bool is_ivb_ioat(struct pci_dev *pdev) 165{ 166 switch (pdev->device) { 167 case PCI_DEVICE_ID_INTEL_IOAT_IVB0: 168 case PCI_DEVICE_ID_INTEL_IOAT_IVB1: 169 case PCI_DEVICE_ID_INTEL_IOAT_IVB2: 170 case PCI_DEVICE_ID_INTEL_IOAT_IVB3: 171 case PCI_DEVICE_ID_INTEL_IOAT_IVB4: 172 case PCI_DEVICE_ID_INTEL_IOAT_IVB5: 173 case PCI_DEVICE_ID_INTEL_IOAT_IVB6: 174 case PCI_DEVICE_ID_INTEL_IOAT_IVB7: 175 case PCI_DEVICE_ID_INTEL_IOAT_IVB8: 176 case PCI_DEVICE_ID_INTEL_IOAT_IVB9: 177 return true; 178 default: 179 return false; 180 } 181 182} 183 184static bool is_hsw_ioat(struct pci_dev *pdev) 185{ 186 switch (pdev->device) { 187 case PCI_DEVICE_ID_INTEL_IOAT_HSW0: 188 case PCI_DEVICE_ID_INTEL_IOAT_HSW1: 189 case PCI_DEVICE_ID_INTEL_IOAT_HSW2: 190 case PCI_DEVICE_ID_INTEL_IOAT_HSW3: 191 case PCI_DEVICE_ID_INTEL_IOAT_HSW4: 192 case PCI_DEVICE_ID_INTEL_IOAT_HSW5: 193 case PCI_DEVICE_ID_INTEL_IOAT_HSW6: 194 case PCI_DEVICE_ID_INTEL_IOAT_HSW7: 195 case PCI_DEVICE_ID_INTEL_IOAT_HSW8: 196 case PCI_DEVICE_ID_INTEL_IOAT_HSW9: 197 return true; 198 default: 199 return false; 200 } 201 202} 203 204static bool is_xeon_cb32(struct pci_dev *pdev) 205{ 206 return is_jf_ioat(pdev) || is_snb_ioat(pdev) || is_ivb_ioat(pdev) || 207 is_hsw_ioat(pdev); 208} 209 210static bool is_bwd_ioat(struct pci_dev *pdev) 211{ 212 switch (pdev->device) { 213 case PCI_DEVICE_ID_INTEL_IOAT_BWD0: 214 case PCI_DEVICE_ID_INTEL_IOAT_BWD1: 215 case PCI_DEVICE_ID_INTEL_IOAT_BWD2: 216 case PCI_DEVICE_ID_INTEL_IOAT_BWD3: 217 return true; 218 default: 219 return false; 220 } 221} 222 223static bool is_bwd_noraid(struct pci_dev *pdev) 224{ 225 switch (pdev->device) { 226 case PCI_DEVICE_ID_INTEL_IOAT_BWD2: 227 case PCI_DEVICE_ID_INTEL_IOAT_BWD3: 228 return true; 229 default: 230 return false; 231 } 232 233} 234 235static void pq16_set_src(struct ioat_raw_descriptor *desc[3], 236 dma_addr_t addr, u32 offset, u8 coef, unsigned idx) 237{ 238 struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *)desc[0]; 239 struct ioat_pq16a_descriptor *pq16 = 240 (struct ioat_pq16a_descriptor *)desc[1]; 241 struct ioat_raw_descriptor *raw = desc[pq16_idx_to_desc[idx]]; 242 243 raw->field[pq16_idx_to_field[idx]] = addr + offset; 244 245 if (idx < 8) 246 pq->coef[idx] = coef; 247 else 248 pq16->coef[idx - 8] = coef; 249} 250 251static struct ioat_sed_ent * 252ioat3_alloc_sed(struct ioatdma_device *device, unsigned int hw_pool) 253{ 254 struct ioat_sed_ent *sed; 255 gfp_t flags = __GFP_ZERO | GFP_ATOMIC; 256 257 sed = kmem_cache_alloc(ioat3_sed_cache, flags); 258 if (!sed) 259 return NULL; 260 261 sed->hw_pool = hw_pool; 262 sed->hw = dma_pool_alloc(device->sed_hw_pool[hw_pool], 263 flags, &sed->dma); 264 if (!sed->hw) { 265 kmem_cache_free(ioat3_sed_cache, sed); 266 return NULL; 267 } 268 269 return sed; 270} 271 272static void ioat3_free_sed(struct ioatdma_device *device, struct ioat_sed_ent *sed) 273{ 274 if (!sed) 275 return; 276 277 dma_pool_free(device->sed_hw_pool[sed->hw_pool], sed->hw, sed->dma); 278 kmem_cache_free(ioat3_sed_cache, sed); 279} 280 281static bool desc_has_ext(struct ioat_ring_ent *desc) 282{ 283 struct ioat_dma_descriptor *hw = desc->hw; 284 285 if (hw->ctl_f.op == IOAT_OP_XOR || 286 hw->ctl_f.op == IOAT_OP_XOR_VAL) { 287 struct ioat_xor_descriptor *xor = desc->xor; 288 289 if (src_cnt_to_sw(xor->ctl_f.src_cnt) > 5) 290 return true; 291 } else if (hw->ctl_f.op == IOAT_OP_PQ || 292 hw->ctl_f.op == IOAT_OP_PQ_VAL) { 293 struct ioat_pq_descriptor *pq = desc->pq; 294 295 if (src_cnt_to_sw(pq->ctl_f.src_cnt) > 3) 296 return true; 297 } 298 299 return false; 300} 301 302static u64 ioat3_get_current_completion(struct ioat_chan_common *chan) 303{ 304 u64 phys_complete; 305 u64 completion; 306 307 completion = *chan->completion; 308 phys_complete = ioat_chansts_to_addr(completion); 309 310 dev_dbg(to_dev(chan), "%s: phys_complete: %#llx\n", __func__, 311 (unsigned long long) phys_complete); 312 313 return phys_complete; 314} 315 316static bool ioat3_cleanup_preamble(struct ioat_chan_common *chan, 317 u64 *phys_complete) 318{ 319 *phys_complete = ioat3_get_current_completion(chan); 320 if (*phys_complete == chan->last_completion) 321 return false; 322 323 clear_bit(IOAT_COMPLETION_ACK, &chan->state); 324 mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); 325 326 return true; 327} 328 329static void 330desc_get_errstat(struct ioat2_dma_chan *ioat, struct ioat_ring_ent *desc) 331{ 332 struct ioat_dma_descriptor *hw = desc->hw; 333 334 switch (hw->ctl_f.op) { 335 case IOAT_OP_PQ_VAL: 336 case IOAT_OP_PQ_VAL_16S: 337 { 338 struct ioat_pq_descriptor *pq = desc->pq; 339 340 /* check if there's error written */ 341 if (!pq->dwbes_f.wbes) 342 return; 343 344 /* need to set a chanerr var for checking to clear later */ 345 346 if (pq->dwbes_f.p_val_err) 347 *desc->result |= SUM_CHECK_P_RESULT; 348 349 if (pq->dwbes_f.q_val_err) 350 *desc->result |= SUM_CHECK_Q_RESULT; 351 352 return; 353 } 354 default: 355 return; 356 } 357} 358 359/** 360 * __cleanup - reclaim used descriptors 361 * @ioat: channel (ring) to clean 362 * 363 * The difference from the dma_v2.c __cleanup() is that this routine 364 * handles extended descriptors and dma-unmapping raid operations. 365 */ 366static void __cleanup(struct ioat2_dma_chan *ioat, dma_addr_t phys_complete) 367{ 368 struct ioat_chan_common *chan = &ioat->base; 369 struct ioatdma_device *device = chan->device; 370 struct ioat_ring_ent *desc; 371 bool seen_current = false; 372 int idx = ioat->tail, i; 373 u16 active; 374 375 dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n", 376 __func__, ioat->head, ioat->tail, ioat->issued); 377 378 /* 379 * At restart of the channel, the completion address and the 380 * channel status will be 0 due to starting a new chain. Since 381 * it's new chain and the first descriptor "fails", there is 382 * nothing to clean up. We do not want to reap the entire submitted 383 * chain due to this 0 address value and then BUG. 384 */ 385 if (!phys_complete) 386 return; 387 388 active = ioat2_ring_active(ioat); 389 for (i = 0; i < active && !seen_current; i++) { 390 struct dma_async_tx_descriptor *tx; 391 392 smp_read_barrier_depends(); 393 prefetch(ioat2_get_ring_ent(ioat, idx + i + 1)); 394 desc = ioat2_get_ring_ent(ioat, idx + i); 395 dump_desc_dbg(ioat, desc); 396 397 /* set err stat if we are using dwbes */ 398 if (device->cap & IOAT_CAP_DWBES) 399 desc_get_errstat(ioat, desc); 400 401 tx = &desc->txd; 402 if (tx->cookie) { 403 dma_cookie_complete(tx); 404 dma_descriptor_unmap(tx); 405 if (tx->callback) { 406 tx->callback(tx->callback_param); 407 tx->callback = NULL; 408 } 409 } 410 411 if (tx->phys == phys_complete) 412 seen_current = true; 413 414 /* skip extended descriptors */ 415 if (desc_has_ext(desc)) { 416 BUG_ON(i + 1 >= active); 417 i++; 418 } 419 420 /* cleanup super extended descriptors */ 421 if (desc->sed) { 422 ioat3_free_sed(device, desc->sed); 423 desc->sed = NULL; 424 } 425 } 426 smp_mb(); /* finish all descriptor reads before incrementing tail */ 427 ioat->tail = idx + i; 428 BUG_ON(active && !seen_current); /* no active descs have written a completion? */ 429 chan->last_completion = phys_complete; 430 431 if (active - i == 0) { 432 dev_dbg(to_dev(chan), "%s: cancel completion timeout\n", 433 __func__); 434 clear_bit(IOAT_COMPLETION_PENDING, &chan->state); 435 mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); 436 } 437 /* 5 microsecond delay per pending descriptor */ 438 writew(min((5 * (active - i)), IOAT_INTRDELAY_MASK), 439 chan->device->reg_base + IOAT_INTRDELAY_OFFSET); 440} 441 442static void ioat3_cleanup(struct ioat2_dma_chan *ioat) 443{ 444 struct ioat_chan_common *chan = &ioat->base; 445 u64 phys_complete; 446 447 spin_lock_bh(&chan->cleanup_lock); 448 449 if (ioat3_cleanup_preamble(chan, &phys_complete)) 450 __cleanup(ioat, phys_complete); 451 452 if (is_ioat_halted(*chan->completion)) { 453 u32 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); 454 455 if (chanerr & IOAT_CHANERR_HANDLE_MASK) { 456 mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); 457 ioat3_eh(ioat); 458 } 459 } 460 461 spin_unlock_bh(&chan->cleanup_lock); 462} 463 464static void ioat3_cleanup_event(unsigned long data) 465{ 466 struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data); 467 struct ioat_chan_common *chan = &ioat->base; 468 469 ioat3_cleanup(ioat); 470 if (!test_bit(IOAT_RUN, &chan->state)) 471 return; 472 writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET); 473} 474 475static void ioat3_restart_channel(struct ioat2_dma_chan *ioat) 476{ 477 struct ioat_chan_common *chan = &ioat->base; 478 u64 phys_complete; 479 480 ioat2_quiesce(chan, 0); 481 if (ioat3_cleanup_preamble(chan, &phys_complete)) 482 __cleanup(ioat, phys_complete); 483 484 __ioat2_restart_chan(ioat); 485} 486 487static void ioat3_eh(struct ioat2_dma_chan *ioat) 488{ 489 struct ioat_chan_common *chan = &ioat->base; 490 struct pci_dev *pdev = to_pdev(chan); 491 struct ioat_dma_descriptor *hw; 492 u64 phys_complete; 493 struct ioat_ring_ent *desc; 494 u32 err_handled = 0; 495 u32 chanerr_int; 496 u32 chanerr; 497 498 /* cleanup so tail points to descriptor that caused the error */ 499 if (ioat3_cleanup_preamble(chan, &phys_complete)) 500 __cleanup(ioat, phys_complete); 501 502 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); 503 pci_read_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, &chanerr_int); 504 505 dev_dbg(to_dev(chan), "%s: error = %x:%x\n", 506 __func__, chanerr, chanerr_int); 507 508 desc = ioat2_get_ring_ent(ioat, ioat->tail); 509 hw = desc->hw; 510 dump_desc_dbg(ioat, desc); 511 512 switch (hw->ctl_f.op) { 513 case IOAT_OP_XOR_VAL: 514 if (chanerr & IOAT_CHANERR_XOR_P_OR_CRC_ERR) { 515 *desc->result |= SUM_CHECK_P_RESULT; 516 err_handled |= IOAT_CHANERR_XOR_P_OR_CRC_ERR; 517 } 518 break; 519 case IOAT_OP_PQ_VAL: 520 case IOAT_OP_PQ_VAL_16S: 521 if (chanerr & IOAT_CHANERR_XOR_P_OR_CRC_ERR) { 522 *desc->result |= SUM_CHECK_P_RESULT; 523 err_handled |= IOAT_CHANERR_XOR_P_OR_CRC_ERR; 524 } 525 if (chanerr & IOAT_CHANERR_XOR_Q_ERR) { 526 *desc->result |= SUM_CHECK_Q_RESULT; 527 err_handled |= IOAT_CHANERR_XOR_Q_ERR; 528 } 529 break; 530 } 531 532 /* fault on unhandled error or spurious halt */ 533 if (chanerr ^ err_handled || chanerr == 0) { 534 dev_err(to_dev(chan), "%s: fatal error (%x:%x)\n", 535 __func__, chanerr, err_handled); 536 BUG(); 537 } 538 539 writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET); 540 pci_write_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, chanerr_int); 541 542 /* mark faulting descriptor as complete */ 543 *chan->completion = desc->txd.phys; 544 545 spin_lock_bh(&ioat->prep_lock); 546 ioat3_restart_channel(ioat); 547 spin_unlock_bh(&ioat->prep_lock); 548} 549 550static void check_active(struct ioat2_dma_chan *ioat) 551{ 552 struct ioat_chan_common *chan = &ioat->base; 553 554 if (ioat2_ring_active(ioat)) { 555 mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); 556 return; 557 } 558 559 if (test_and_clear_bit(IOAT_CHAN_ACTIVE, &chan->state)) 560 mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); 561 else if (ioat->alloc_order > ioat_get_alloc_order()) { 562 /* if the ring is idle, empty, and oversized try to step 563 * down the size 564 */ 565 reshape_ring(ioat, ioat->alloc_order - 1); 566 567 /* keep shrinking until we get back to our minimum 568 * default size 569 */ 570 if (ioat->alloc_order > ioat_get_alloc_order()) 571 mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); 572 } 573 574} 575 576static void ioat3_timer_event(unsigned long data) 577{ 578 struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data); 579 struct ioat_chan_common *chan = &ioat->base; 580 dma_addr_t phys_complete; 581 u64 status; 582 583 status = ioat_chansts(chan); 584 585 /* when halted due to errors check for channel 586 * programming errors before advancing the completion state 587 */ 588 if (is_ioat_halted(status)) { 589 u32 chanerr; 590 591 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); 592 dev_err(to_dev(chan), "%s: Channel halted (%x)\n", 593 __func__, chanerr); 594 if (test_bit(IOAT_RUN, &chan->state)) 595 BUG_ON(is_ioat_bug(chanerr)); 596 else /* we never got off the ground */ 597 return; 598 } 599 600 /* if we haven't made progress and we have already 601 * acknowledged a pending completion once, then be more 602 * forceful with a restart 603 */ 604 spin_lock_bh(&chan->cleanup_lock); 605 if (ioat_cleanup_preamble(chan, &phys_complete)) 606 __cleanup(ioat, phys_complete); 607 else if (test_bit(IOAT_COMPLETION_ACK, &chan->state)) { 608 spin_lock_bh(&ioat->prep_lock); 609 ioat3_restart_channel(ioat); 610 spin_unlock_bh(&ioat->prep_lock); 611 spin_unlock_bh(&chan->cleanup_lock); 612 return; 613 } else { 614 set_bit(IOAT_COMPLETION_ACK, &chan->state); 615 mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); 616 } 617 618 619 if (ioat2_ring_active(ioat)) 620 mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); 621 else { 622 spin_lock_bh(&ioat->prep_lock); 623 check_active(ioat); 624 spin_unlock_bh(&ioat->prep_lock); 625 } 626 spin_unlock_bh(&chan->cleanup_lock); 627} 628 629static enum dma_status 630ioat3_tx_status(struct dma_chan *c, dma_cookie_t cookie, 631 struct dma_tx_state *txstate) 632{ 633 struct ioat2_dma_chan *ioat = to_ioat2_chan(c); 634 enum dma_status ret; 635 636 ret = dma_cookie_status(c, cookie, txstate); 637 if (ret == DMA_COMPLETE) 638 return ret; 639 640 ioat3_cleanup(ioat); 641 642 return dma_cookie_status(c, cookie, txstate); 643} 644 645static struct dma_async_tx_descriptor * 646__ioat3_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result, 647 dma_addr_t dest, dma_addr_t *src, unsigned int src_cnt, 648 size_t len, unsigned long flags) 649{ 650 struct ioat2_dma_chan *ioat = to_ioat2_chan(c); 651 struct ioat_ring_ent *compl_desc; 652 struct ioat_ring_ent *desc; 653 struct ioat_ring_ent *ext; 654 size_t total_len = len; 655 struct ioat_xor_descriptor *xor; 656 struct ioat_xor_ext_descriptor *xor_ex = NULL; 657 struct ioat_dma_descriptor *hw; 658 int num_descs, with_ext, idx, i; 659 u32 offset = 0; 660 u8 op = result ? IOAT_OP_XOR_VAL : IOAT_OP_XOR; 661 662 BUG_ON(src_cnt < 2); 663 664 num_descs = ioat2_xferlen_to_descs(ioat, len); 665 /* we need 2x the number of descriptors to cover greater than 5 666 * sources 667 */ 668 if (src_cnt > 5) { 669 with_ext = 1; 670 num_descs *= 2; 671 } else 672 with_ext = 0; 673 674 /* completion writes from the raid engine may pass completion 675 * writes from the legacy engine, so we need one extra null 676 * (legacy) descriptor to ensure all completion writes arrive in 677 * order. 678 */ 679 if (likely(num_descs) && ioat2_check_space_lock(ioat, num_descs+1) == 0) 680 idx = ioat->head; 681 else 682 return NULL; 683 i = 0; 684 do { 685 struct ioat_raw_descriptor *descs[2]; 686 size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log); 687 int s; 688 689 desc = ioat2_get_ring_ent(ioat, idx + i); 690 xor = desc->xor; 691 692 /* save a branch by unconditionally retrieving the 693 * extended descriptor xor_set_src() knows to not write 694 * to it in the single descriptor case 695 */ 696 ext = ioat2_get_ring_ent(ioat, idx + i + 1); 697 xor_ex = ext->xor_ex; 698 699 descs[0] = (struct ioat_raw_descriptor *) xor; 700 descs[1] = (struct ioat_raw_descriptor *) xor_ex; 701 for (s = 0; s < src_cnt; s++) 702 xor_set_src(descs, src[s], offset, s); 703 xor->size = xfer_size; 704 xor->dst_addr = dest + offset; 705 xor->ctl = 0; 706 xor->ctl_f.op = op; 707 xor->ctl_f.src_cnt = src_cnt_to_hw(src_cnt); 708 709 len -= xfer_size; 710 offset += xfer_size; 711 dump_desc_dbg(ioat, desc); 712 } while ((i += 1 + with_ext) < num_descs); 713 714 /* last xor descriptor carries the unmap parameters and fence bit */ 715 desc->txd.flags = flags; 716 desc->len = total_len; 717 if (result) 718 desc->result = result; 719 xor->ctl_f.fence = !!(flags & DMA_PREP_FENCE); 720 721 /* completion descriptor carries interrupt bit */ 722 compl_desc = ioat2_get_ring_ent(ioat, idx + i); 723 compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT; 724 hw = compl_desc->hw; 725 hw->ctl = 0; 726 hw->ctl_f.null = 1; 727 hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); 728 hw->ctl_f.compl_write = 1; 729 hw->size = NULL_DESC_BUFFER_SIZE; 730 dump_desc_dbg(ioat, compl_desc); 731 732 /* we leave the channel locked to ensure in order submission */ 733 return &compl_desc->txd; 734} 735 736static struct dma_async_tx_descriptor * 737ioat3_prep_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, 738 unsigned int src_cnt, size_t len, unsigned long flags) 739{ 740 return __ioat3_prep_xor_lock(chan, NULL, dest, src, src_cnt, len, flags); 741} 742 743struct dma_async_tx_descriptor * 744ioat3_prep_xor_val(struct dma_chan *chan, dma_addr_t *src, 745 unsigned int src_cnt, size_t len, 746 enum sum_check_flags *result, unsigned long flags) 747{ 748 /* the cleanup routine only sets bits on validate failure, it 749 * does not clear bits on validate success... so clear it here 750 */ 751 *result = 0; 752 753 return __ioat3_prep_xor_lock(chan, result, src[0], &src[1], 754 src_cnt - 1, len, flags); 755} 756 757static void 758dump_pq_desc_dbg(struct ioat2_dma_chan *ioat, struct ioat_ring_ent *desc, struct ioat_ring_ent *ext) 759{ 760 struct device *dev = to_dev(&ioat->base); 761 struct ioat_pq_descriptor *pq = desc->pq; 762 struct ioat_pq_ext_descriptor *pq_ex = ext ? ext->pq_ex : NULL; 763 struct ioat_raw_descriptor *descs[] = { (void *) pq, (void *) pq_ex }; 764 int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt); 765 int i; 766 767 dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x" 768 " sz: %#10.8x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s'" 769 " src_cnt: %d)\n", 770 desc_id(desc), (unsigned long long) desc->txd.phys, 771 (unsigned long long) (pq_ex ? pq_ex->next : pq->next), 772 desc->txd.flags, pq->size, pq->ctl, pq->ctl_f.op, pq->ctl_f.int_en, 773 pq->ctl_f.compl_write, 774 pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q", 775 pq->ctl_f.src_cnt); 776 for (i = 0; i < src_cnt; i++) 777 dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i, 778 (unsigned long long) pq_get_src(descs, i), pq->coef[i]); 779 dev_dbg(dev, "\tP: %#llx\n", pq->p_addr); 780 dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr); 781 dev_dbg(dev, "\tNEXT: %#llx\n", pq->next); 782} 783 784static void dump_pq16_desc_dbg(struct ioat2_dma_chan *ioat, 785 struct ioat_ring_ent *desc) 786{ 787 struct device *dev = to_dev(&ioat->base); 788 struct ioat_pq_descriptor *pq = desc->pq; 789 struct ioat_raw_descriptor *descs[] = { (void *)pq, 790 (void *)pq, 791 (void *)pq }; 792 int src_cnt = src16_cnt_to_sw(pq->ctl_f.src_cnt); 793 int i; 794 795 if (desc->sed) { 796 descs[1] = (void *)desc->sed->hw; 797 descs[2] = (void *)desc->sed->hw + 64; 798 } 799 800 dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x" 801 " sz: %#x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s'" 802 " src_cnt: %d)\n", 803 desc_id(desc), (unsigned long long) desc->txd.phys, 804 (unsigned long long) pq->next, 805 desc->txd.flags, pq->size, pq->ctl, 806 pq->ctl_f.op, pq->ctl_f.int_en, 807 pq->ctl_f.compl_write, 808 pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q", 809 pq->ctl_f.src_cnt); 810 for (i = 0; i < src_cnt; i++) { 811 dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i, 812 (unsigned long long) pq16_get_src(descs, i), 813 pq->coef[i]); 814 } 815 dev_dbg(dev, "\tP: %#llx\n", pq->p_addr); 816 dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr); 817} 818 819static struct dma_async_tx_descriptor * 820__ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result, 821 const dma_addr_t *dst, const dma_addr_t *src, 822 unsigned int src_cnt, const unsigned char *scf, 823 size_t len, unsigned long flags) 824{ 825 struct ioat2_dma_chan *ioat = to_ioat2_chan(c); 826 struct ioat_chan_common *chan = &ioat->base; 827 struct ioatdma_device *device = chan->device; 828 struct ioat_ring_ent *compl_desc; 829 struct ioat_ring_ent *desc; 830 struct ioat_ring_ent *ext; 831 size_t total_len = len; 832 struct ioat_pq_descriptor *pq; 833 struct ioat_pq_ext_descriptor *pq_ex = NULL; 834 struct ioat_dma_descriptor *hw; 835 u32 offset = 0; 836 u8 op = result ? IOAT_OP_PQ_VAL : IOAT_OP_PQ; 837 int i, s, idx, with_ext, num_descs; 838 int cb32 = (device->version < IOAT_VER_3_3) ? 1 : 0; 839 840 dev_dbg(to_dev(chan), "%s\n", __func__); 841 /* the engine requires at least two sources (we provide 842 * at least 1 implied source in the DMA_PREP_CONTINUE case) 843 */ 844 BUG_ON(src_cnt + dmaf_continue(flags) < 2); 845 846 num_descs = ioat2_xferlen_to_descs(ioat, len); 847 /* we need 2x the number of descriptors to cover greater than 3 848 * sources (we need 1 extra source in the q-only continuation 849 * case and 3 extra sources in the p+q continuation case. 850 */ 851 if (src_cnt + dmaf_p_disabled_continue(flags) > 3 || 852 (dmaf_continue(flags) && !dmaf_p_disabled_continue(flags))) { 853 with_ext = 1; 854 num_descs *= 2; 855 } else 856 with_ext = 0; 857 858 /* completion writes from the raid engine may pass completion 859 * writes from the legacy engine, so we need one extra null 860 * (legacy) descriptor to ensure all completion writes arrive in 861 * order. 862 */ 863 if (likely(num_descs) && 864 ioat2_check_space_lock(ioat, num_descs + cb32) == 0) 865 idx = ioat->head; 866 else 867 return NULL; 868 i = 0; 869 do { 870 struct ioat_raw_descriptor *descs[2]; 871 size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log); 872 873 desc = ioat2_get_ring_ent(ioat, idx + i); 874 pq = desc->pq; 875 876 /* save a branch by unconditionally retrieving the 877 * extended descriptor pq_set_src() knows to not write 878 * to it in the single descriptor case 879 */ 880 ext = ioat2_get_ring_ent(ioat, idx + i + with_ext); 881 pq_ex = ext->pq_ex; 882 883 descs[0] = (struct ioat_raw_descriptor *) pq; 884 descs[1] = (struct ioat_raw_descriptor *) pq_ex; 885 886 for (s = 0; s < src_cnt; s++) 887 pq_set_src(descs, src[s], offset, scf[s], s); 888 889 /* see the comment for dma_maxpq in include/linux/dmaengine.h */ 890 if (dmaf_p_disabled_continue(flags)) 891 pq_set_src(descs, dst[1], offset, 1, s++); 892 else if (dmaf_continue(flags)) { 893 pq_set_src(descs, dst[0], offset, 0, s++); 894 pq_set_src(descs, dst[1], offset, 1, s++); 895 pq_set_src(descs, dst[1], offset, 0, s++); 896 } 897 pq->size = xfer_size; 898 pq->p_addr = dst[0] + offset; 899 pq->q_addr = dst[1] + offset; 900 pq->ctl = 0; 901 pq->ctl_f.op = op; 902 /* we turn on descriptor write back error status */ 903 if (device->cap & IOAT_CAP_DWBES) 904 pq->ctl_f.wb_en = result ? 1 : 0; 905 pq->ctl_f.src_cnt = src_cnt_to_hw(s); 906 pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P); 907 pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q); 908 909 len -= xfer_size; 910 offset += xfer_size; 911 } while ((i += 1 + with_ext) < num_descs); 912 913 /* last pq descriptor carries the unmap parameters and fence bit */ 914 desc->txd.flags = flags; 915 desc->len = total_len; 916 if (result) 917 desc->result = result; 918 pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE); 919 dump_pq_desc_dbg(ioat, desc, ext); 920 921 if (!cb32) { 922 pq->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); 923 pq->ctl_f.compl_write = 1; 924 compl_desc = desc; 925 } else { 926 /* completion descriptor carries interrupt bit */ 927 compl_desc = ioat2_get_ring_ent(ioat, idx + i); 928 compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT; 929 hw = compl_desc->hw; 930 hw->ctl = 0; 931 hw->ctl_f.null = 1; 932 hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); 933 hw->ctl_f.compl_write = 1; 934 hw->size = NULL_DESC_BUFFER_SIZE; 935 dump_desc_dbg(ioat, compl_desc); 936 } 937 938 939 /* we leave the channel locked to ensure in order submission */ 940 return &compl_desc->txd; 941} 942 943static struct dma_async_tx_descriptor * 944__ioat3_prep_pq16_lock(struct dma_chan *c, enum sum_check_flags *result, 945 const dma_addr_t *dst, const dma_addr_t *src, 946 unsigned int src_cnt, const unsigned char *scf, 947 size_t len, unsigned long flags) 948{ 949 struct ioat2_dma_chan *ioat = to_ioat2_chan(c); 950 struct ioat_chan_common *chan = &ioat->base; 951 struct ioatdma_device *device = chan->device; 952 struct ioat_ring_ent *desc; 953 size_t total_len = len; 954 struct ioat_pq_descriptor *pq; 955 u32 offset = 0; 956 u8 op; 957 int i, s, idx, num_descs; 958 959 /* this function is only called with 9-16 sources */ 960 op = result ? IOAT_OP_PQ_VAL_16S : IOAT_OP_PQ_16S; 961 962 dev_dbg(to_dev(chan), "%s\n", __func__); 963 964 num_descs = ioat2_xferlen_to_descs(ioat, len); 965 966 /* 967 * 16 source pq is only available on cb3.3 and has no completion 968 * write hw bug. 969 */ 970 if (num_descs && ioat2_check_space_lock(ioat, num_descs) == 0) 971 idx = ioat->head; 972 else 973 return NULL; 974 975 i = 0; 976 977 do { 978 struct ioat_raw_descriptor *descs[4]; 979 size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log); 980 981 desc = ioat2_get_ring_ent(ioat, idx + i); 982 pq = desc->pq; 983 984 descs[0] = (struct ioat_raw_descriptor *) pq; 985 986 desc->sed = ioat3_alloc_sed(device, (src_cnt-2) >> 3); 987 if (!desc->sed) { 988 dev_err(to_dev(chan), 989 "%s: no free sed entries\n", __func__); 990 return NULL; 991 } 992 993 pq->sed_addr = desc->sed->dma; 994 desc->sed->parent = desc; 995 996 descs[1] = (struct ioat_raw_descriptor *)desc->sed->hw; 997 descs[2] = (void *)descs[1] + 64; 998 999 for (s = 0; s < src_cnt; s++) 1000 pq16_set_src(descs, src[s], offset, scf[s], s); 1001 1002 /* see the comment for dma_maxpq in include/linux/dmaengine.h */ 1003 if (dmaf_p_disabled_continue(flags)) 1004 pq16_set_src(descs, dst[1], offset, 1, s++); 1005 else if (dmaf_continue(flags)) { 1006 pq16_set_src(descs, dst[0], offset, 0, s++); 1007 pq16_set_src(descs, dst[1], offset, 1, s++); 1008 pq16_set_src(descs, dst[1], offset, 0, s++); 1009 } 1010 1011 pq->size = xfer_size; 1012 pq->p_addr = dst[0] + offset; 1013 pq->q_addr = dst[1] + offset; 1014 pq->ctl = 0; 1015 pq->ctl_f.op = op; 1016 pq->ctl_f.src_cnt = src16_cnt_to_hw(s); 1017 /* we turn on descriptor write back error status */ 1018 if (device->cap & IOAT_CAP_DWBES) 1019 pq->ctl_f.wb_en = result ? 1 : 0; 1020 pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P); 1021 pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q); 1022 1023 len -= xfer_size; 1024 offset += xfer_size; 1025 } while (++i < num_descs); 1026 1027 /* last pq descriptor carries the unmap parameters and fence bit */ 1028 desc->txd.flags = flags; 1029 desc->len = total_len; 1030 if (result) 1031 desc->result = result; 1032 pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE); 1033 1034 /* with cb3.3 we should be able to do completion w/o a null desc */ 1035 pq->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); 1036 pq->ctl_f.compl_write = 1; 1037 1038 dump_pq16_desc_dbg(ioat, desc); 1039 1040 /* we leave the channel locked to ensure in order submission */ 1041 return &desc->txd; 1042} 1043 1044static int src_cnt_flags(unsigned int src_cnt, unsigned long flags) 1045{ 1046 if (dmaf_p_disabled_continue(flags)) 1047 return src_cnt + 1; 1048 else if (dmaf_continue(flags)) 1049 return src_cnt + 3; 1050 else 1051 return src_cnt; 1052} 1053 1054static struct dma_async_tx_descriptor * 1055ioat3_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src, 1056 unsigned int src_cnt, const unsigned char *scf, size_t len, 1057 unsigned long flags) 1058{ 1059 /* specify valid address for disabled result */ 1060 if (flags & DMA_PREP_PQ_DISABLE_P) 1061 dst[0] = dst[1]; 1062 if (flags & DMA_PREP_PQ_DISABLE_Q) 1063 dst[1] = dst[0]; 1064 1065 /* handle the single source multiply case from the raid6 1066 * recovery path 1067 */ 1068 if ((flags & DMA_PREP_PQ_DISABLE_P) && src_cnt == 1) { 1069 dma_addr_t single_source[2]; 1070 unsigned char single_source_coef[2]; 1071 1072 BUG_ON(flags & DMA_PREP_PQ_DISABLE_Q); 1073 single_source[0] = src[0]; 1074 single_source[1] = src[0]; 1075 single_source_coef[0] = scf[0]; 1076 single_source_coef[1] = 0; 1077 1078 return src_cnt_flags(src_cnt, flags) > 8 ? 1079 __ioat3_prep_pq16_lock(chan, NULL, dst, single_source, 1080 2, single_source_coef, len, 1081 flags) : 1082 __ioat3_prep_pq_lock(chan, NULL, dst, single_source, 2, 1083 single_source_coef, len, flags); 1084 1085 } else { 1086 return src_cnt_flags(src_cnt, flags) > 8 ? 1087 __ioat3_prep_pq16_lock(chan, NULL, dst, src, src_cnt, 1088 scf, len, flags) : 1089 __ioat3_prep_pq_lock(chan, NULL, dst, src, src_cnt, 1090 scf, len, flags); 1091 } 1092} 1093 1094struct dma_async_tx_descriptor * 1095ioat3_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src, 1096 unsigned int src_cnt, const unsigned char *scf, size_t len, 1097 enum sum_check_flags *pqres, unsigned long flags) 1098{ 1099 /* specify valid address for disabled result */ 1100 if (flags & DMA_PREP_PQ_DISABLE_P) 1101 pq[0] = pq[1]; 1102 if (flags & DMA_PREP_PQ_DISABLE_Q) 1103 pq[1] = pq[0]; 1104 1105 /* the cleanup routine only sets bits on validate failure, it 1106 * does not clear bits on validate success... so clear it here 1107 */ 1108 *pqres = 0; 1109 1110 return src_cnt_flags(src_cnt, flags) > 8 ? 1111 __ioat3_prep_pq16_lock(chan, pqres, pq, src, src_cnt, scf, len, 1112 flags) : 1113 __ioat3_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len, 1114 flags); 1115} 1116 1117static struct dma_async_tx_descriptor * 1118ioat3_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src, 1119 unsigned int src_cnt, size_t len, unsigned long flags) 1120{ 1121 unsigned char scf[src_cnt]; 1122 dma_addr_t pq[2]; 1123 1124 memset(scf, 0, src_cnt); 1125 pq[0] = dst; 1126 flags |= DMA_PREP_PQ_DISABLE_Q; 1127 pq[1] = dst; /* specify valid address for disabled result */ 1128 1129 return src_cnt_flags(src_cnt, flags) > 8 ? 1130 __ioat3_prep_pq16_lock(chan, NULL, pq, src, src_cnt, scf, len, 1131 flags) : 1132 __ioat3_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len, 1133 flags); 1134} 1135 1136struct dma_async_tx_descriptor * 1137ioat3_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src, 1138 unsigned int src_cnt, size_t len, 1139 enum sum_check_flags *result, unsigned long flags) 1140{ 1141 unsigned char scf[src_cnt]; 1142 dma_addr_t pq[2]; 1143 1144 /* the cleanup routine only sets bits on validate failure, it 1145 * does not clear bits on validate success... so clear it here 1146 */ 1147 *result = 0; 1148 1149 memset(scf, 0, src_cnt); 1150 pq[0] = src[0]; 1151 flags |= DMA_PREP_PQ_DISABLE_Q; 1152 pq[1] = pq[0]; /* specify valid address for disabled result */ 1153 1154 return src_cnt_flags(src_cnt, flags) > 8 ? 1155 __ioat3_prep_pq16_lock(chan, result, pq, &src[1], src_cnt - 1, 1156 scf, len, flags) : 1157 __ioat3_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1, 1158 scf, len, flags); 1159} 1160 1161static struct dma_async_tx_descriptor * 1162ioat3_prep_interrupt_lock(struct dma_chan *c, unsigned long flags) 1163{ 1164 struct ioat2_dma_chan *ioat = to_ioat2_chan(c); 1165 struct ioat_ring_ent *desc; 1166 struct ioat_dma_descriptor *hw; 1167 1168 if (ioat2_check_space_lock(ioat, 1) == 0) 1169 desc = ioat2_get_ring_ent(ioat, ioat->head); 1170 else 1171 return NULL; 1172 1173 hw = desc->hw; 1174 hw->ctl = 0; 1175 hw->ctl_f.null = 1; 1176 hw->ctl_f.int_en = 1; 1177 hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE); 1178 hw->ctl_f.compl_write = 1; 1179 hw->size = NULL_DESC_BUFFER_SIZE; 1180 hw->src_addr = 0; 1181 hw->dst_addr = 0; 1182 1183 desc->txd.flags = flags; 1184 desc->len = 1; 1185 1186 dump_desc_dbg(ioat, desc); 1187 1188 /* we leave the channel locked to ensure in order submission */ 1189 return &desc->txd; 1190} 1191 1192static void ioat3_dma_test_callback(void *dma_async_param) 1193{ 1194 struct completion *cmp = dma_async_param; 1195 1196 complete(cmp); 1197} 1198 1199#define IOAT_NUM_SRC_TEST 6 /* must be <= 8 */ 1200static int ioat_xor_val_self_test(struct ioatdma_device *device) 1201{ 1202 int i, src_idx; 1203 struct page *dest; 1204 struct page *xor_srcs[IOAT_NUM_SRC_TEST]; 1205 struct page *xor_val_srcs[IOAT_NUM_SRC_TEST + 1]; 1206 dma_addr_t dma_srcs[IOAT_NUM_SRC_TEST + 1]; 1207 dma_addr_t dest_dma; 1208 struct dma_async_tx_descriptor *tx; 1209 struct dma_chan *dma_chan; 1210 dma_cookie_t cookie; 1211 u8 cmp_byte = 0; 1212 u32 cmp_word; 1213 u32 xor_val_result; 1214 int err = 0; 1215 struct completion cmp; 1216 unsigned long tmo; 1217 struct device *dev = &device->pdev->dev; 1218 struct dma_device *dma = &device->common; 1219 u8 op = 0; 1220 1221 dev_dbg(dev, "%s\n", __func__); 1222 1223 if (!dma_has_cap(DMA_XOR, dma->cap_mask)) 1224 return 0; 1225 1226 for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) { 1227 xor_srcs[src_idx] = alloc_page(GFP_KERNEL); 1228 if (!xor_srcs[src_idx]) { 1229 while (src_idx--) 1230 __free_page(xor_srcs[src_idx]); 1231 return -ENOMEM; 1232 } 1233 } 1234 1235 dest = alloc_page(GFP_KERNEL); 1236 if (!dest) { 1237 while (src_idx--) 1238 __free_page(xor_srcs[src_idx]); 1239 return -ENOMEM; 1240 } 1241 1242 /* Fill in src buffers */ 1243 for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) { 1244 u8 *ptr = page_address(xor_srcs[src_idx]); 1245 for (i = 0; i < PAGE_SIZE; i++) 1246 ptr[i] = (1 << src_idx); 1247 } 1248 1249 for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) 1250 cmp_byte ^= (u8) (1 << src_idx); 1251 1252 cmp_word = (cmp_byte << 24) | (cmp_byte << 16) | 1253 (cmp_byte << 8) | cmp_byte; 1254 1255 memset(page_address(dest), 0, PAGE_SIZE); 1256 1257 dma_chan = container_of(dma->channels.next, struct dma_chan, 1258 device_node); 1259 if (dma->device_alloc_chan_resources(dma_chan) < 1) { 1260 err = -ENODEV; 1261 goto out; 1262 } 1263 1264 /* test xor */ 1265 op = IOAT_OP_XOR; 1266 1267 dest_dma = dma_map_page(dev, dest, 0, PAGE_SIZE, DMA_FROM_DEVICE); 1268 for (i = 0; i < IOAT_NUM_SRC_TEST; i++) 1269 dma_srcs[i] = dma_map_page(dev, xor_srcs[i], 0, PAGE_SIZE, 1270 DMA_TO_DEVICE); 1271 tx = dma->device_prep_dma_xor(dma_chan, dest_dma, dma_srcs, 1272 IOAT_NUM_SRC_TEST, PAGE_SIZE, 1273 DMA_PREP_INTERRUPT); 1274 1275 if (!tx) { 1276 dev_err(dev, "Self-test xor prep failed\n"); 1277 err = -ENODEV; 1278 goto dma_unmap; 1279 } 1280 1281 async_tx_ack(tx); 1282 init_completion(&cmp); 1283 tx->callback = ioat3_dma_test_callback; 1284 tx->callback_param = &cmp; 1285 cookie = tx->tx_submit(tx); 1286 if (cookie < 0) { 1287 dev_err(dev, "Self-test xor setup failed\n"); 1288 err = -ENODEV; 1289 goto dma_unmap; 1290 } 1291 dma->device_issue_pending(dma_chan); 1292 1293 tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); 1294 1295 if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_COMPLETE) { 1296 dev_err(dev, "Self-test xor timed out\n"); 1297 err = -ENODEV; 1298 goto dma_unmap; 1299 } 1300 1301 dma_unmap_page(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE); 1302 for (i = 0; i < IOAT_NUM_SRC_TEST; i++) 1303 dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, DMA_TO_DEVICE); 1304 1305 dma_sync_single_for_cpu(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE); 1306 for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) { 1307 u32 *ptr = page_address(dest); 1308 if (ptr[i] != cmp_word) { 1309 dev_err(dev, "Self-test xor failed compare\n"); 1310 err = -ENODEV; 1311 goto free_resources; 1312 } 1313 } 1314 dma_sync_single_for_device(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE); 1315 1316 /* skip validate if the capability is not present */ 1317 if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask)) 1318 goto free_resources; 1319 1320 op = IOAT_OP_XOR_VAL; 1321 1322 /* validate the sources with the destintation page */ 1323 for (i = 0; i < IOAT_NUM_SRC_TEST; i++) 1324 xor_val_srcs[i] = xor_srcs[i]; 1325 xor_val_srcs[i] = dest; 1326 1327 xor_val_result = 1; 1328 1329 for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) 1330 dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE, 1331 DMA_TO_DEVICE); 1332 tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs, 1333 IOAT_NUM_SRC_TEST + 1, PAGE_SIZE, 1334 &xor_val_result, DMA_PREP_INTERRUPT); 1335 if (!tx) { 1336 dev_err(dev, "Self-test zero prep failed\n"); 1337 err = -ENODEV; 1338 goto dma_unmap; 1339 } 1340 1341 async_tx_ack(tx); 1342 init_completion(&cmp); 1343 tx->callback = ioat3_dma_test_callback; 1344 tx->callback_param = &cmp; 1345 cookie = tx->tx_submit(tx); 1346 if (cookie < 0) { 1347 dev_err(dev, "Self-test zero setup failed\n"); 1348 err = -ENODEV; 1349 goto dma_unmap; 1350 } 1351 dma->device_issue_pending(dma_chan); 1352 1353 tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); 1354 1355 if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_COMPLETE) { 1356 dev_err(dev, "Self-test validate timed out\n"); 1357 err = -ENODEV; 1358 goto dma_unmap; 1359 } 1360 1361 for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) 1362 dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, DMA_TO_DEVICE); 1363 1364 if (xor_val_result != 0) { 1365 dev_err(dev, "Self-test validate failed compare\n"); 1366 err = -ENODEV; 1367 goto free_resources; 1368 } 1369 1370 memset(page_address(dest), 0, PAGE_SIZE); 1371 1372 /* test for non-zero parity sum */ 1373 op = IOAT_OP_XOR_VAL; 1374 1375 xor_val_result = 0; 1376 for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) 1377 dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE, 1378 DMA_TO_DEVICE); 1379 tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs, 1380 IOAT_NUM_SRC_TEST + 1, PAGE_SIZE, 1381 &xor_val_result, DMA_PREP_INTERRUPT); 1382 if (!tx) { 1383 dev_err(dev, "Self-test 2nd zero prep failed\n"); 1384 err = -ENODEV; 1385 goto dma_unmap; 1386 } 1387 1388 async_tx_ack(tx); 1389 init_completion(&cmp); 1390 tx->callback = ioat3_dma_test_callback; 1391 tx->callback_param = &cmp; 1392 cookie = tx->tx_submit(tx); 1393 if (cookie < 0) { 1394 dev_err(dev, "Self-test 2nd zero setup failed\n"); 1395 err = -ENODEV; 1396 goto dma_unmap; 1397 } 1398 dma->device_issue_pending(dma_chan); 1399 1400 tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); 1401 1402 if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_COMPLETE) { 1403 dev_err(dev, "Self-test 2nd validate timed out\n"); 1404 err = -ENODEV; 1405 goto dma_unmap; 1406 } 1407 1408 if (xor_val_result != SUM_CHECK_P_RESULT) { 1409 dev_err(dev, "Self-test validate failed compare\n"); 1410 err = -ENODEV; 1411 goto dma_unmap; 1412 } 1413 1414 for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) 1415 dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, DMA_TO_DEVICE); 1416 1417 goto free_resources; 1418dma_unmap: 1419 if (op == IOAT_OP_XOR) { 1420 dma_unmap_page(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE); 1421 for (i = 0; i < IOAT_NUM_SRC_TEST; i++) 1422 dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, 1423 DMA_TO_DEVICE); 1424 } else if (op == IOAT_OP_XOR_VAL) { 1425 for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) 1426 dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, 1427 DMA_TO_DEVICE); 1428 } 1429free_resources: 1430 dma->device_free_chan_resources(dma_chan); 1431out: 1432 src_idx = IOAT_NUM_SRC_TEST; 1433 while (src_idx--) 1434 __free_page(xor_srcs[src_idx]); 1435 __free_page(dest); 1436 return err; 1437} 1438 1439static int ioat3_dma_self_test(struct ioatdma_device *device) 1440{ 1441 int rc = ioat_dma_self_test(device); 1442 1443 if (rc) 1444 return rc; 1445 1446 rc = ioat_xor_val_self_test(device); 1447 if (rc) 1448 return rc; 1449 1450 return 0; 1451} 1452 1453static int ioat3_irq_reinit(struct ioatdma_device *device) 1454{ 1455 struct pci_dev *pdev = device->pdev; 1456 int irq = pdev->irq, i; 1457 1458 if (!is_bwd_ioat(pdev)) 1459 return 0; 1460 1461 switch (device->irq_mode) { 1462 case IOAT_MSIX: 1463 for (i = 0; i < device->common.chancnt; i++) { 1464 struct msix_entry *msix = &device->msix_entries[i]; 1465 struct ioat_chan_common *chan; 1466 1467 chan = ioat_chan_by_index(device, i); 1468 devm_free_irq(&pdev->dev, msix->vector, chan); 1469 } 1470 1471 pci_disable_msix(pdev); 1472 break; 1473 case IOAT_MSI: 1474 pci_disable_msi(pdev); 1475 /* fall through */ 1476 case IOAT_INTX: 1477 devm_free_irq(&pdev->dev, irq, device); 1478 break; 1479 default: 1480 return 0; 1481 } 1482 device->irq_mode = IOAT_NOIRQ; 1483 1484 return ioat_dma_setup_interrupts(device); 1485} 1486 1487static int ioat3_reset_hw(struct ioat_chan_common *chan) 1488{ 1489 /* throw away whatever the channel was doing and get it 1490 * initialized, with ioat3 specific workarounds 1491 */ 1492 struct ioatdma_device *device = chan->device; 1493 struct pci_dev *pdev = device->pdev; 1494 u32 chanerr; 1495 u16 dev_id; 1496 int err; 1497 1498 ioat2_quiesce(chan, msecs_to_jiffies(100)); 1499 1500 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); 1501 writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET); 1502 1503 if (device->version < IOAT_VER_3_3) { 1504 /* clear any pending errors */ 1505 err = pci_read_config_dword(pdev, 1506 IOAT_PCI_CHANERR_INT_OFFSET, &chanerr); 1507 if (err) { 1508 dev_err(&pdev->dev, 1509 "channel error register unreachable\n"); 1510 return err; 1511 } 1512 pci_write_config_dword(pdev, 1513 IOAT_PCI_CHANERR_INT_OFFSET, chanerr); 1514 1515 /* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit 1516 * (workaround for spurious config parity error after restart) 1517 */ 1518 pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id); 1519 if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) { 1520 pci_write_config_dword(pdev, 1521 IOAT_PCI_DMAUNCERRSTS_OFFSET, 1522 0x10); 1523 } 1524 } 1525 1526 err = ioat2_reset_sync(chan, msecs_to_jiffies(200)); 1527 if (!err) 1528 err = ioat3_irq_reinit(device); 1529 1530 if (err) 1531 dev_err(&pdev->dev, "Failed to reset: %d\n", err); 1532 1533 return err; 1534} 1535 1536static void ioat3_intr_quirk(struct ioatdma_device *device) 1537{ 1538 struct dma_device *dma; 1539 struct dma_chan *c; 1540 struct ioat_chan_common *chan; 1541 u32 errmask; 1542 1543 dma = &device->common; 1544 1545 /* 1546 * if we have descriptor write back error status, we mask the 1547 * error interrupts 1548 */ 1549 if (device->cap & IOAT_CAP_DWBES) { 1550 list_for_each_entry(c, &dma->channels, device_node) { 1551 chan = to_chan_common(c); 1552 errmask = readl(chan->reg_base + 1553 IOAT_CHANERR_MASK_OFFSET); 1554 errmask |= IOAT_CHANERR_XOR_P_OR_CRC_ERR | 1555 IOAT_CHANERR_XOR_Q_ERR; 1556 writel(errmask, chan->reg_base + 1557 IOAT_CHANERR_MASK_OFFSET); 1558 } 1559 } 1560} 1561 1562int ioat3_dma_probe(struct ioatdma_device *device, int dca) 1563{ 1564 struct pci_dev *pdev = device->pdev; 1565 int dca_en = system_has_dca_enabled(pdev); 1566 struct dma_device *dma; 1567 struct dma_chan *c; 1568 struct ioat_chan_common *chan; 1569 bool is_raid_device = false; 1570 int err; 1571 1572 device->enumerate_channels = ioat2_enumerate_channels; 1573 device->reset_hw = ioat3_reset_hw; 1574 device->self_test = ioat3_dma_self_test; 1575 device->intr_quirk = ioat3_intr_quirk; 1576 dma = &device->common; 1577 dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock; 1578 dma->device_issue_pending = ioat2_issue_pending; 1579 dma->device_alloc_chan_resources = ioat2_alloc_chan_resources; 1580 dma->device_free_chan_resources = ioat2_free_chan_resources; 1581 1582 dma_cap_set(DMA_INTERRUPT, dma->cap_mask); 1583 dma->device_prep_dma_interrupt = ioat3_prep_interrupt_lock; 1584 1585 device->cap = readl(device->reg_base + IOAT_DMA_CAP_OFFSET); 1586 1587 if (is_xeon_cb32(pdev) || is_bwd_noraid(pdev)) 1588 device->cap &= ~(IOAT_CAP_XOR | IOAT_CAP_PQ | IOAT_CAP_RAID16SS); 1589 1590 /* dca is incompatible with raid operations */ 1591 if (dca_en && (device->cap & (IOAT_CAP_XOR|IOAT_CAP_PQ))) 1592 device->cap &= ~(IOAT_CAP_XOR|IOAT_CAP_PQ); 1593 1594 if (device->cap & IOAT_CAP_XOR) { 1595 is_raid_device = true; 1596 dma->max_xor = 8; 1597 1598 dma_cap_set(DMA_XOR, dma->cap_mask); 1599 dma->device_prep_dma_xor = ioat3_prep_xor; 1600 1601 dma_cap_set(DMA_XOR_VAL, dma->cap_mask); 1602 dma->device_prep_dma_xor_val = ioat3_prep_xor_val; 1603 } 1604 1605 if (device->cap & IOAT_CAP_PQ) { 1606 is_raid_device = true; 1607 1608 dma->device_prep_dma_pq = ioat3_prep_pq; 1609 dma->device_prep_dma_pq_val = ioat3_prep_pq_val; 1610 dma_cap_set(DMA_PQ, dma->cap_mask); 1611 dma_cap_set(DMA_PQ_VAL, dma->cap_mask); 1612 1613 if (device->cap & IOAT_CAP_RAID16SS) { 1614 dma_set_maxpq(dma, 16, 0); 1615 } else { 1616 dma_set_maxpq(dma, 8, 0); 1617 } 1618 1619 if (!(device->cap & IOAT_CAP_XOR)) { 1620 dma->device_prep_dma_xor = ioat3_prep_pqxor; 1621 dma->device_prep_dma_xor_val = ioat3_prep_pqxor_val; 1622 dma_cap_set(DMA_XOR, dma->cap_mask); 1623 dma_cap_set(DMA_XOR_VAL, dma->cap_mask); 1624 1625 if (device->cap & IOAT_CAP_RAID16SS) { 1626 dma->max_xor = 16; 1627 } else { 1628 dma->max_xor = 8; 1629 } 1630 } 1631 } 1632 1633 dma->device_tx_status = ioat3_tx_status; 1634 device->cleanup_fn = ioat3_cleanup_event; 1635 device->timer_fn = ioat3_timer_event; 1636 1637 /* starting with CB3.3 super extended descriptors are supported */ 1638 if (device->cap & IOAT_CAP_RAID16SS) { 1639 char pool_name[14]; 1640 int i; 1641 1642 for (i = 0; i < MAX_SED_POOLS; i++) { 1643 snprintf(pool_name, 14, "ioat_hw%d_sed", i); 1644 1645 /* allocate SED DMA pool */ 1646 device->sed_hw_pool[i] = dmam_pool_create(pool_name, 1647 &pdev->dev, 1648 SED_SIZE * (i + 1), 64, 0); 1649 if (!device->sed_hw_pool[i]) 1650 return -ENOMEM; 1651 1652 } 1653 } 1654 1655 err = ioat_probe(device); 1656 if (err) 1657 return err; 1658 ioat_set_tcp_copy_break(262144); 1659 1660 list_for_each_entry(c, &dma->channels, device_node) { 1661 chan = to_chan_common(c); 1662 writel(IOAT_DMA_DCA_ANY_CPU, 1663 chan->reg_base + IOAT_DCACTRL_OFFSET); 1664 } 1665 1666 err = ioat_register(device); 1667 if (err) 1668 return err; 1669 1670 ioat_kobject_add(device, &ioat2_ktype); 1671 1672 if (dca) 1673 device->dca = ioat3_dca_init(pdev, device->reg_base); 1674 1675 return 0; 1676}