[SPARC64]: Add SG merging support back into IOMMU code.

Mimicks almost perfectly the powerpc IOMMU code, except that it
doesn't have the IOMMU_PAGE_SIZE != PAGE_SIZE handling, and it also
lacks the device dma mask support bits.

I'll add that later as time permits, but this gets us at least back to
where we were beforehand.

Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

David S. Miller and committed by
David S. Miller
13fa14e1 d284142c

+294 -151
+157 -72
arch/sparc64/kernel/iommu.c
··· 512 static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist, 513 int nelems, enum dma_data_direction direction) 514 { 515 - unsigned long flags, ctx, i, npages, iopte_protection; 516 - struct scatterlist *sg; 517 struct strbuf *strbuf; 518 struct iommu *iommu; 519 - iopte_t *base; 520 - u32 dma_base; 521 522 - /* Fast path single entry scatterlists. */ 523 - if (nelems == 1) { 524 - sglist->dma_address = 525 - dma_4u_map_single(dev, sg_virt(sglist), 526 - sglist->length, direction); 527 - if (unlikely(sglist->dma_address == DMA_ERROR_CODE)) 528 - return 0; 529 - sglist->dma_length = sglist->length; 530 - return 1; 531 - } 532 533 iommu = dev->archdata.iommu; 534 strbuf = dev->archdata.stc; 535 - 536 - if (unlikely(direction == DMA_NONE)) 537 - goto bad_no_ctx; 538 - 539 - npages = calc_npages(sglist, nelems); 540 541 spin_lock_irqsave(&iommu->lock, flags); 542 543 - base = alloc_npages(dev, iommu, npages); 544 ctx = 0; 545 if (iommu->iommu_ctxflush) 546 ctx = iommu_alloc_ctx(iommu); 547 548 - spin_unlock_irqrestore(&iommu->lock, flags); 549 - 550 - if (base == NULL) 551 - goto bad; 552 - 553 - dma_base = iommu->page_table_map_base + 554 - ((base - iommu->page_table) << IO_PAGE_SHIFT); 555 - 556 if (strbuf->strbuf_enabled) 557 - iopte_protection = IOPTE_STREAMING(ctx); 558 else 559 - iopte_protection = IOPTE_CONSISTENT(ctx); 560 if (direction != DMA_TO_DEVICE) 561 - iopte_protection |= IOPTE_WRITE; 562 563 - for_each_sg(sglist, sg, nelems, i) { 564 - unsigned long paddr = SG_ENT_PHYS_ADDRESS(sg); 565 - unsigned long slen = sg->length; 566 - unsigned long this_npages; 567 568 - this_npages = iommu_num_pages(paddr, slen); 569 570 - sg->dma_address = dma_base | (paddr & ~IO_PAGE_MASK); 571 - sg->dma_length = slen; 572 573 paddr &= IO_PAGE_MASK; 574 - while (this_npages--) { 575 - iopte_val(*base) = iopte_protection | paddr; 576 - 577 base++; 578 paddr += IO_PAGE_SIZE; 579 - dma_base += IO_PAGE_SIZE; 580 } 581 } 582 583 - return nelems; 584 585 - bad: 586 - iommu_free_ctx(iommu, ctx); 587 - bad_no_ctx: 588 - if (printk_ratelimit()) 589 - WARN_ON(1); 590 return 0; 591 } 592 593 static void dma_4u_unmap_sg(struct device *dev, struct scatterlist *sglist, 594 int nelems, enum dma_data_direction direction) 595 { 596 - unsigned long flags, ctx, i, npages; 597 struct strbuf *strbuf; 598 struct iommu *iommu; 599 - iopte_t *base; 600 - u32 bus_addr; 601 602 - if (unlikely(direction == DMA_NONE)) { 603 - if (printk_ratelimit()) 604 - WARN_ON(1); 605 - } 606 607 iommu = dev->archdata.iommu; 608 strbuf = dev->archdata.stc; 609 610 - bus_addr = sglist->dma_address & IO_PAGE_MASK; 611 - 612 - npages = calc_npages(sglist, nelems); 613 - 614 - base = iommu->page_table + 615 - ((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT); 616 617 spin_lock_irqsave(&iommu->lock, flags); 618 619 - /* Record the context, if any. */ 620 - ctx = 0; 621 - if (iommu->iommu_ctxflush) 622 - ctx = (iopte_val(*base) & IOPTE_CONTEXT) >> 47UL; 623 624 - /* Step 1: Kick data out of streaming buffers if necessary. */ 625 - if (strbuf->strbuf_enabled) 626 - strbuf_flush(strbuf, iommu, bus_addr, ctx, npages, direction); 627 628 - /* Step 2: Clear out the TSB entries. */ 629 - for (i = 0; i < npages; i++) 630 - iopte_make_dummy(iommu, base + i); 631 632 - iommu_range_free(iommu, bus_addr, npages); 633 634 iommu_free_ctx(iommu, ctx); 635
··· 512 static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist, 513 int nelems, enum dma_data_direction direction) 514 { 515 + struct scatterlist *s, *outs, *segstart; 516 + unsigned long flags, handle, prot, ctx; 517 + dma_addr_t dma_next = 0, dma_addr; 518 + unsigned int max_seg_size; 519 + int outcount, incount, i; 520 struct strbuf *strbuf; 521 struct iommu *iommu; 522 523 + BUG_ON(direction == DMA_NONE); 524 525 iommu = dev->archdata.iommu; 526 strbuf = dev->archdata.stc; 527 + if (nelems == 0 || !iommu) 528 + return 0; 529 530 spin_lock_irqsave(&iommu->lock, flags); 531 532 ctx = 0; 533 if (iommu->iommu_ctxflush) 534 ctx = iommu_alloc_ctx(iommu); 535 536 if (strbuf->strbuf_enabled) 537 + prot = IOPTE_STREAMING(ctx); 538 else 539 + prot = IOPTE_CONSISTENT(ctx); 540 if (direction != DMA_TO_DEVICE) 541 + prot |= IOPTE_WRITE; 542 543 + outs = s = segstart = &sglist[0]; 544 + outcount = 1; 545 + incount = nelems; 546 + handle = 0; 547 548 + /* Init first segment length for backout at failure */ 549 + outs->dma_length = 0; 550 551 + max_seg_size = dma_get_max_seg_size(dev); 552 + for_each_sg(sglist, s, nelems, i) { 553 + unsigned long paddr, npages, entry, slen; 554 + iopte_t *base; 555 556 + slen = s->length; 557 + /* Sanity check */ 558 + if (slen == 0) { 559 + dma_next = 0; 560 + continue; 561 + } 562 + /* Allocate iommu entries for that segment */ 563 + paddr = (unsigned long) SG_ENT_PHYS_ADDRESS(s); 564 + npages = iommu_num_pages(paddr, slen); 565 + entry = iommu_range_alloc(dev, iommu, npages, &handle); 566 + 567 + /* Handle failure */ 568 + if (unlikely(entry == DMA_ERROR_CODE)) { 569 + if (printk_ratelimit()) 570 + printk(KERN_INFO "iommu_alloc failed, iommu %p paddr %lx" 571 + " npages %lx\n", iommu, paddr, npages); 572 + goto iommu_map_failed; 573 + } 574 + 575 + base = iommu->page_table + entry; 576 + 577 + /* Convert entry to a dma_addr_t */ 578 + dma_addr = iommu->page_table_map_base + 579 + (entry << IO_PAGE_SHIFT); 580 + dma_addr |= (s->offset & ~IO_PAGE_MASK); 581 + 582 + /* Insert into HW table */ 583 paddr &= IO_PAGE_MASK; 584 + while (npages--) { 585 + iopte_val(*base) = prot | paddr; 586 base++; 587 paddr += IO_PAGE_SIZE; 588 } 589 + 590 + /* If we are in an open segment, try merging */ 591 + if (segstart != s) { 592 + /* We cannot merge if: 593 + * - allocated dma_addr isn't contiguous to previous allocation 594 + */ 595 + if ((dma_addr != dma_next) || 596 + (outs->dma_length + s->length > max_seg_size)) { 597 + /* Can't merge: create a new segment */ 598 + segstart = s; 599 + outcount++; 600 + outs = sg_next(outs); 601 + } else { 602 + outs->dma_length += s->length; 603 + } 604 + } 605 + 606 + if (segstart == s) { 607 + /* This is a new segment, fill entries */ 608 + outs->dma_address = dma_addr; 609 + outs->dma_length = slen; 610 + } 611 + 612 + /* Calculate next page pointer for contiguous check */ 613 + dma_next = dma_addr + slen; 614 } 615 616 + spin_unlock_irqrestore(&iommu->lock, flags); 617 618 + if (outcount < incount) { 619 + outs = sg_next(outs); 620 + outs->dma_address = DMA_ERROR_CODE; 621 + outs->dma_length = 0; 622 + } 623 + 624 + return outcount; 625 + 626 + iommu_map_failed: 627 + for_each_sg(sglist, s, nelems, i) { 628 + if (s->dma_length != 0) { 629 + unsigned long vaddr, npages, entry, i; 630 + iopte_t *base; 631 + 632 + vaddr = s->dma_address & IO_PAGE_MASK; 633 + npages = iommu_num_pages(s->dma_address, s->dma_length); 634 + iommu_range_free(iommu, vaddr, npages); 635 + 636 + entry = (vaddr - iommu->page_table_map_base) 637 + >> IO_PAGE_SHIFT; 638 + base = iommu->page_table + entry; 639 + 640 + for (i = 0; i < npages; i++) 641 + iopte_make_dummy(iommu, base + i); 642 + 643 + s->dma_address = DMA_ERROR_CODE; 644 + s->dma_length = 0; 645 + } 646 + if (s == outs) 647 + break; 648 + } 649 + spin_unlock_irqrestore(&iommu->lock, flags); 650 + 651 return 0; 652 + } 653 + 654 + /* If contexts are being used, they are the same in all of the mappings 655 + * we make for a particular SG. 656 + */ 657 + static unsigned long fetch_sg_ctx(struct iommu *iommu, struct scatterlist *sg) 658 + { 659 + unsigned long ctx = 0; 660 + 661 + if (iommu->iommu_ctxflush) { 662 + iopte_t *base; 663 + u32 bus_addr; 664 + 665 + bus_addr = sg->dma_address & IO_PAGE_MASK; 666 + base = iommu->page_table + 667 + ((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT); 668 + 669 + ctx = (iopte_val(*base) & IOPTE_CONTEXT) >> 47UL; 670 + } 671 + return ctx; 672 } 673 674 static void dma_4u_unmap_sg(struct device *dev, struct scatterlist *sglist, 675 int nelems, enum dma_data_direction direction) 676 { 677 + unsigned long flags, ctx; 678 + struct scatterlist *sg; 679 struct strbuf *strbuf; 680 struct iommu *iommu; 681 682 + BUG_ON(direction == DMA_NONE); 683 684 iommu = dev->archdata.iommu; 685 strbuf = dev->archdata.stc; 686 687 + ctx = fetch_sg_ctx(iommu, sglist); 688 689 spin_lock_irqsave(&iommu->lock, flags); 690 691 + sg = sglist; 692 + while (nelems--) { 693 + dma_addr_t dma_handle = sg->dma_address; 694 + unsigned int len = sg->dma_length; 695 + unsigned long npages, entry; 696 + iopte_t *base; 697 + int i; 698 699 + if (!len) 700 + break; 701 + npages = iommu_num_pages(dma_handle, len); 702 + iommu_range_free(iommu, dma_handle, npages); 703 704 + entry = ((dma_handle - iommu->page_table_map_base) 705 + >> IO_PAGE_SHIFT); 706 + base = iommu->page_table + entry; 707 708 + dma_handle &= IO_PAGE_MASK; 709 + if (strbuf->strbuf_enabled) 710 + strbuf_flush(strbuf, iommu, dma_handle, ctx, 711 + npages, direction); 712 + 713 + for (i = 0; i < npages; i++) 714 + iopte_make_dummy(iommu, base + i); 715 + 716 + sg = sg_next(sg); 717 + } 718 719 iommu_free_ctx(iommu, ctx); 720
+136 -78
arch/sparc64/kernel/pci_sun4v.c
··· 89 return 0; 90 } 91 92 /* Interrupts must be disabled. */ 93 static inline long iommu_batch_add(u64 phys_page) 94 { ··· 331 static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist, 332 int nelems, enum dma_data_direction direction) 333 { 334 - unsigned long flags, npages, i, prot; 335 - u32 dma_base, orig_dma_base; 336 - struct scatterlist *sg; 337 struct iommu *iommu; 338 - long entry, err; 339 340 - /* Fast path single entry scatterlists. */ 341 - if (nelems == 1) { 342 - sglist->dma_address = 343 - dma_4v_map_single(dev, sg_virt(sglist), 344 - sglist->length, direction); 345 - if (unlikely(sglist->dma_address == DMA_ERROR_CODE)) 346 - return 0; 347 - sglist->dma_length = sglist->length; 348 - return 1; 349 - } 350 351 iommu = dev->archdata.iommu; 352 353 - if (unlikely(direction == DMA_NONE)) 354 - goto bad; 355 - 356 - npages = calc_npages(sglist, nelems); 357 - 358 - spin_lock_irqsave(&iommu->lock, flags); 359 - entry = iommu_range_alloc(dev, iommu, npages, NULL); 360 - spin_unlock_irqrestore(&iommu->lock, flags); 361 - 362 - if (unlikely(entry == DMA_ERROR_CODE)) 363 - goto bad; 364 - 365 - orig_dma_base = dma_base = iommu->page_table_map_base + 366 - (entry << IO_PAGE_SHIFT); 367 - 368 prot = HV_PCI_MAP_ATTR_READ; 369 if (direction != DMA_TO_DEVICE) 370 prot |= HV_PCI_MAP_ATTR_WRITE; 371 372 - local_irq_save(flags); 373 374 - iommu_batch_start(dev, prot, entry); 375 376 - for_each_sg(sglist, sg, nelems, i) { 377 - unsigned long paddr = SG_ENT_PHYS_ADDRESS(sg); 378 - unsigned long slen = sg->length; 379 - unsigned long this_npages; 380 381 - this_npages = iommu_num_pages(paddr, slen); 382 383 - sg->dma_address = dma_base | (paddr & ~IO_PAGE_MASK); 384 - sg->dma_length = slen; 385 386 - paddr &= IO_PAGE_MASK; 387 - while (this_npages--) { 388 - err = iommu_batch_add(paddr); 389 - if (unlikely(err < 0L)) { 390 - local_irq_restore(flags); 391 - goto iommu_map_failed; 392 - } 393 - 394 - paddr += IO_PAGE_SIZE; 395 - dma_base += IO_PAGE_SIZE; 396 } 397 } 398 399 err = iommu_batch_end(); 400 401 - local_irq_restore(flags); 402 - 403 if (unlikely(err < 0L)) 404 goto iommu_map_failed; 405 406 - return nelems; 407 408 - bad: 409 - if (printk_ratelimit()) 410 - WARN_ON(1); 411 - return 0; 412 413 iommu_map_failed: 414 - spin_lock_irqsave(&iommu->lock, flags); 415 - iommu_range_free(iommu, orig_dma_base, npages); 416 spin_unlock_irqrestore(&iommu->lock, flags); 417 418 return 0; ··· 464 static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist, 465 int nelems, enum dma_data_direction direction) 466 { 467 - unsigned long flags, npages; 468 struct pci_pbm_info *pbm; 469 - u32 devhandle, bus_addr; 470 struct iommu *iommu; 471 - long entry; 472 473 - if (unlikely(direction == DMA_NONE)) { 474 - if (printk_ratelimit()) 475 - WARN_ON(1); 476 - } 477 478 iommu = dev->archdata.iommu; 479 pbm = dev->archdata.host_controller; 480 devhandle = pbm->devhandle; 481 482 - bus_addr = sglist->dma_address & IO_PAGE_MASK; 483 - 484 - npages = calc_npages(sglist, nelems); 485 - 486 - entry = ((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT); 487 - 488 spin_lock_irqsave(&iommu->lock, flags); 489 490 - iommu_range_free(iommu, bus_addr, npages); 491 492 - do { 493 - unsigned long num; 494 495 - num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry), 496 - npages); 497 - entry += num; 498 - npages -= num; 499 - } while (npages != 0); 500 501 spin_unlock_irqrestore(&iommu->lock, flags); 502 }
··· 89 return 0; 90 } 91 92 + static inline void iommu_batch_new_entry(unsigned long entry) 93 + { 94 + struct iommu_batch *p = &__get_cpu_var(iommu_batch); 95 + 96 + if (p->entry + p->npages == entry) 97 + return; 98 + if (p->entry != ~0UL) 99 + iommu_batch_flush(p); 100 + p->entry = entry; 101 + } 102 + 103 /* Interrupts must be disabled. */ 104 static inline long iommu_batch_add(u64 phys_page) 105 { ··· 320 static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist, 321 int nelems, enum dma_data_direction direction) 322 { 323 + struct scatterlist *s, *outs, *segstart; 324 + unsigned long flags, handle, prot; 325 + dma_addr_t dma_next = 0, dma_addr; 326 + unsigned int max_seg_size; 327 + int outcount, incount, i; 328 struct iommu *iommu; 329 + long err; 330 331 + BUG_ON(direction == DMA_NONE); 332 333 iommu = dev->archdata.iommu; 334 + if (nelems == 0 || !iommu) 335 + return 0; 336 337 prot = HV_PCI_MAP_ATTR_READ; 338 if (direction != DMA_TO_DEVICE) 339 prot |= HV_PCI_MAP_ATTR_WRITE; 340 341 + outs = s = segstart = &sglist[0]; 342 + outcount = 1; 343 + incount = nelems; 344 + handle = 0; 345 346 + /* Init first segment length for backout at failure */ 347 + outs->dma_length = 0; 348 349 + spin_lock_irqsave(&iommu->lock, flags); 350 351 + iommu_batch_start(dev, prot, ~0UL); 352 353 + max_seg_size = dma_get_max_seg_size(dev); 354 + for_each_sg(sglist, s, nelems, i) { 355 + unsigned long paddr, npages, entry, slen; 356 357 + slen = s->length; 358 + /* Sanity check */ 359 + if (slen == 0) { 360 + dma_next = 0; 361 + continue; 362 } 363 + /* Allocate iommu entries for that segment */ 364 + paddr = (unsigned long) SG_ENT_PHYS_ADDRESS(s); 365 + npages = iommu_num_pages(paddr, slen); 366 + entry = iommu_range_alloc(dev, iommu, npages, &handle); 367 + 368 + /* Handle failure */ 369 + if (unlikely(entry == DMA_ERROR_CODE)) { 370 + if (printk_ratelimit()) 371 + printk(KERN_INFO "iommu_alloc failed, iommu %p paddr %lx" 372 + " npages %lx\n", iommu, paddr, npages); 373 + goto iommu_map_failed; 374 + } 375 + 376 + iommu_batch_new_entry(entry); 377 + 378 + /* Convert entry to a dma_addr_t */ 379 + dma_addr = iommu->page_table_map_base + 380 + (entry << IO_PAGE_SHIFT); 381 + dma_addr |= (s->offset & ~IO_PAGE_MASK); 382 + 383 + /* Insert into HW table */ 384 + paddr &= IO_PAGE_MASK; 385 + while (npages--) { 386 + err = iommu_batch_add(paddr); 387 + if (unlikely(err < 0L)) 388 + goto iommu_map_failed; 389 + paddr += IO_PAGE_SIZE; 390 + } 391 + 392 + /* If we are in an open segment, try merging */ 393 + if (segstart != s) { 394 + /* We cannot merge if: 395 + * - allocated dma_addr isn't contiguous to previous allocation 396 + */ 397 + if ((dma_addr != dma_next) || 398 + (outs->dma_length + s->length > max_seg_size)) { 399 + /* Can't merge: create a new segment */ 400 + segstart = s; 401 + outcount++; 402 + outs = sg_next(outs); 403 + } else { 404 + outs->dma_length += s->length; 405 + } 406 + } 407 + 408 + if (segstart == s) { 409 + /* This is a new segment, fill entries */ 410 + outs->dma_address = dma_addr; 411 + outs->dma_length = slen; 412 + } 413 + 414 + /* Calculate next page pointer for contiguous check */ 415 + dma_next = dma_addr + slen; 416 } 417 418 err = iommu_batch_end(); 419 420 if (unlikely(err < 0L)) 421 goto iommu_map_failed; 422 423 + spin_unlock_irqrestore(&iommu->lock, flags); 424 425 + if (outcount < incount) { 426 + outs = sg_next(outs); 427 + outs->dma_address = DMA_ERROR_CODE; 428 + outs->dma_length = 0; 429 + } 430 + 431 + return outcount; 432 433 iommu_map_failed: 434 + for_each_sg(sglist, s, nelems, i) { 435 + if (s->dma_length != 0) { 436 + unsigned long vaddr, npages; 437 + 438 + vaddr = s->dma_address & IO_PAGE_MASK; 439 + npages = iommu_num_pages(s->dma_address, s->dma_length); 440 + iommu_range_free(iommu, vaddr, npages); 441 + /* XXX demap? XXX */ 442 + s->dma_address = DMA_ERROR_CODE; 443 + s->dma_length = 0; 444 + } 445 + if (s == outs) 446 + break; 447 + } 448 spin_unlock_irqrestore(&iommu->lock, flags); 449 450 return 0; ··· 410 static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist, 411 int nelems, enum dma_data_direction direction) 412 { 413 struct pci_pbm_info *pbm; 414 + struct scatterlist *sg; 415 struct iommu *iommu; 416 + unsigned long flags; 417 + u32 devhandle; 418 419 + BUG_ON(direction == DMA_NONE); 420 421 iommu = dev->archdata.iommu; 422 pbm = dev->archdata.host_controller; 423 devhandle = pbm->devhandle; 424 425 spin_lock_irqsave(&iommu->lock, flags); 426 427 + sg = sglist; 428 + while (nelems--) { 429 + dma_addr_t dma_handle = sg->dma_address; 430 + unsigned int len = sg->dma_length; 431 + unsigned long npages, entry; 432 433 + if (!len) 434 + break; 435 + npages = iommu_num_pages(dma_handle, len); 436 + iommu_range_free(iommu, dma_handle, npages); 437 438 + entry = ((dma_handle - iommu->page_table_map_base) >> IO_PAGE_SHIFT); 439 + while (npages) { 440 + unsigned long num; 441 + 442 + num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry), 443 + npages); 444 + entry += num; 445 + npages -= num; 446 + } 447 + 448 + sg = sg_next(sg); 449 + } 450 451 spin_unlock_irqrestore(&iommu->lock, flags); 452 }
+1 -1
include/asm-sparc64/io.h
··· 16 /* BIO layer definitions. */ 17 extern unsigned long kern_base, kern_size; 18 #define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT) 19 - #define BIO_VMERGE_BOUNDARY 0 20 21 static inline u8 _inb(unsigned long addr) 22 {
··· 16 /* BIO layer definitions. */ 17 extern unsigned long kern_base, kern_size; 18 #define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT) 19 + #define BIO_VMERGE_BOUNDARY 8192 20 21 static inline u8 _inb(unsigned long addr) 22 {