[SPARC64]: Fix SBUS IOMMU allocation code.

There are several IOMMU allocator bugs. Instead of trying to fix this
overly complicated code, just mirror the PCI IOMMU arena allocator
which is very stable and well stress tested.

I tried to make the code as identical as possible so we can switch
sun4u PCI and SBUS over to a common piece of IOMMU code. All that
will be need are two callbacks, one to do a full IOMMU flush and one
to do a streaming buffer flush.

This patch gets rid of a lot of hangs and mysterious crashes on SBUS
sparc64 systems, at least for me.

Signed-off-by: David S. Miller <davem@davemloft.net>

+248 -344
+248 -344
arch/sparc64/kernel/sbus.c
··· 24 25 #include "iommu_common.h" 26 27 - /* These should be allocated on an SMP_CACHE_BYTES 28 - * aligned boundary for optimal performance. 29 - * 30 - * On SYSIO, using an 8K page size we have 1GB of SBUS 31 - * DMA space mapped. We divide this space into equally 32 - * sized clusters. We allocate a DMA mapping from the 33 - * cluster that matches the order of the allocation, or 34 - * if the order is greater than the number of clusters, 35 - * we try to allocate from the last cluster. 36 - */ 37 - 38 - #define NCLUSTERS 8UL 39 - #define ONE_GIG (1UL * 1024UL * 1024UL * 1024UL) 40 - #define CLUSTER_SIZE (ONE_GIG / NCLUSTERS) 41 - #define CLUSTER_MASK (CLUSTER_SIZE - 1) 42 - #define CLUSTER_NPAGES (CLUSTER_SIZE >> IO_PAGE_SHIFT) 43 #define MAP_BASE ((u32)0xc0000000) 44 45 struct sbus_iommu { 46 - /*0x00*/spinlock_t lock; 47 48 - /*0x08*/iopte_t *page_table; 49 - /*0x10*/unsigned long strbuf_regs; 50 - /*0x18*/unsigned long iommu_regs; 51 - /*0x20*/unsigned long sbus_control_reg; 52 53 - /*0x28*/volatile unsigned long strbuf_flushflag; 54 55 - /* If NCLUSTERS is ever decresed to 4 or lower, 56 - * you must increase the size of the type of 57 - * these counters. You have been duly warned. -DaveM 58 - */ 59 - /*0x30*/struct { 60 - u16 next; 61 - u16 flush; 62 - } alloc_info[NCLUSTERS]; 63 - 64 - /* The lowest used consistent mapping entry. Since 65 - * we allocate consistent maps out of cluster 0 this 66 - * is relative to the beginning of closter 0. 67 - */ 68 - /*0x50*/u32 lowest_consistent_map; 69 }; 70 71 /* Offsets from iommu_regs */ ··· 67 upa_writeq(0, tag); 68 tag += 8UL; 69 } 70 - upa_readq(iommu->sbus_control_reg); 71 - 72 - for (entry = 0; entry < NCLUSTERS; entry++) { 73 - iommu->alloc_info[entry].flush = 74 - iommu->alloc_info[entry].next; 75 - } 76 - } 77 - 78 - static void iommu_flush(struct sbus_iommu *iommu, u32 base, unsigned long npages) 79 - { 80 - while (npages--) 81 - upa_writeq(base + (npages << IO_PAGE_SHIFT), 82 - iommu->iommu_regs + IOMMU_FLUSH); 83 upa_readq(iommu->sbus_control_reg); 84 } 85 ··· 120 base, npages); 121 } 122 123 - static iopte_t *alloc_streaming_cluster(struct sbus_iommu *iommu, unsigned long npages) 124 { 125 - iopte_t *iopte, *limit, *first, *cluster; 126 - unsigned long cnum, ent, nent, flush_point, found; 127 128 - cnum = 0; 129 - nent = 1; 130 - while ((1UL << cnum) < npages) 131 - cnum++; 132 - if(cnum >= NCLUSTERS) { 133 - nent = 1UL << (cnum - NCLUSTERS); 134 - cnum = NCLUSTERS - 1; 135 - } 136 - iopte = iommu->page_table + (cnum * CLUSTER_NPAGES); 137 138 - if (cnum == 0) 139 - limit = (iommu->page_table + 140 - iommu->lowest_consistent_map); 141 - else 142 - limit = (iopte + CLUSTER_NPAGES); 143 - 144 - iopte += ((ent = iommu->alloc_info[cnum].next) << cnum); 145 - flush_point = iommu->alloc_info[cnum].flush; 146 - 147 - first = iopte; 148 - cluster = NULL; 149 - found = 0; 150 - for (;;) { 151 - if (iopte_val(*iopte) == 0UL) { 152 - found++; 153 - if (!cluster) 154 - cluster = iopte; 155 - } else { 156 - /* Used cluster in the way */ 157 - cluster = NULL; 158 - found = 0; 159 - } 160 - 161 - if (found == nent) 162 - break; 163 - 164 - iopte += (1 << cnum); 165 - ent++; 166 - if (iopte >= limit) { 167 - iopte = (iommu->page_table + (cnum * CLUSTER_NPAGES)); 168 - ent = 0; 169 - 170 - /* Multiple cluster allocations must not wrap */ 171 - cluster = NULL; 172 - found = 0; 173 - } 174 - if (ent == flush_point) 175 __iommu_flushall(iommu); 176 - if (iopte == first) 177 - goto bad; 178 - } 179 - 180 - /* ent/iopte points to the last cluster entry we're going to use, 181 - * so save our place for the next allocation. 182 - */ 183 - if ((iopte + (1 << cnum)) >= limit) 184 - ent = 0; 185 - else 186 - ent = ent + 1; 187 - iommu->alloc_info[cnum].next = ent; 188 - if (ent == flush_point) 189 - __iommu_flushall(iommu); 190 - 191 - /* I've got your streaming cluster right here buddy boy... */ 192 - return cluster; 193 - 194 - bad: 195 - printk(KERN_EMERG "sbus: alloc_streaming_cluster of npages(%ld) failed!\n", 196 - npages); 197 - return NULL; 198 - } 199 - 200 - static void free_streaming_cluster(struct sbus_iommu *iommu, u32 base, unsigned long npages) 201 - { 202 - unsigned long cnum, ent, nent; 203 - iopte_t *iopte; 204 - 205 - cnum = 0; 206 - nent = 1; 207 - while ((1UL << cnum) < npages) 208 - cnum++; 209 - if(cnum >= NCLUSTERS) { 210 - nent = 1UL << (cnum - NCLUSTERS); 211 - cnum = NCLUSTERS - 1; 212 - } 213 - ent = (base & CLUSTER_MASK) >> (IO_PAGE_SHIFT + cnum); 214 - iopte = iommu->page_table + ((base - MAP_BASE) >> IO_PAGE_SHIFT); 215 - do { 216 - iopte_val(*iopte) = 0UL; 217 - iopte += 1 << cnum; 218 - } while(--nent); 219 - 220 - /* If the global flush might not have caught this entry, 221 - * adjust the flush point such that we will flush before 222 - * ever trying to reuse it. 223 - */ 224 - #define between(X,Y,Z) (((Z) - (Y)) >= ((X) - (Y))) 225 - if (between(ent, iommu->alloc_info[cnum].next, iommu->alloc_info[cnum].flush)) 226 - iommu->alloc_info[cnum].flush = ent; 227 - #undef between 228 - } 229 - 230 - /* We allocate consistent mappings from the end of cluster zero. */ 231 - static iopte_t *alloc_consistent_cluster(struct sbus_iommu *iommu, unsigned long npages) 232 - { 233 - iopte_t *iopte; 234 - 235 - iopte = iommu->page_table + (1 * CLUSTER_NPAGES); 236 - while (iopte > iommu->page_table) { 237 - iopte--; 238 - if (!(iopte_val(*iopte) & IOPTE_VALID)) { 239 - unsigned long tmp = npages; 240 - 241 - while (--tmp) { 242 - iopte--; 243 - if (iopte_val(*iopte) & IOPTE_VALID) 244 - break; 245 - } 246 - if (tmp == 0) { 247 - u32 entry = (iopte - iommu->page_table); 248 - 249 - if (entry < iommu->lowest_consistent_map) 250 - iommu->lowest_consistent_map = entry; 251 - return iopte; 252 - } 253 } 254 } 255 - return NULL; 256 - } 257 258 - static void free_consistent_cluster(struct sbus_iommu *iommu, u32 base, unsigned long npages) 259 - { 260 - iopte_t *iopte = iommu->page_table + ((base - MAP_BASE) >> IO_PAGE_SHIFT); 261 - 262 - if ((iopte - iommu->page_table) == iommu->lowest_consistent_map) { 263 - iopte_t *walk = iopte + npages; 264 - iopte_t *limit; 265 - 266 - limit = iommu->page_table + CLUSTER_NPAGES; 267 - while (walk < limit) { 268 - if (iopte_val(*walk) != 0UL) 269 - break; 270 - walk++; 271 } 272 - iommu->lowest_consistent_map = 273 - (walk - iommu->page_table); 274 } 275 276 - while (npages--) 277 - *iopte++ = __iopte(0UL); 278 } 279 280 void *sbus_alloc_consistent(struct sbus_dev *sdev, size_t size, dma_addr_t *dvma_addr) 281 { 282 - unsigned long order, first_page, flags; 283 struct sbus_iommu *iommu; 284 iopte_t *iopte; 285 void *ret; 286 int npages; 287 - 288 - if (size <= 0 || sdev == NULL || dvma_addr == NULL) 289 - return NULL; 290 291 size = IO_PAGE_ALIGN(size); 292 order = get_order(size); 293 if (order >= 10) 294 return NULL; 295 first_page = __get_free_pages(GFP_KERNEL|__GFP_COMP, order); 296 if (first_page == 0UL) 297 return NULL; ··· 237 iommu = sdev->bus->iommu; 238 239 spin_lock_irqsave(&iommu->lock, flags); 240 - iopte = alloc_consistent_cluster(iommu, size >> IO_PAGE_SHIFT); 241 - if (iopte == NULL) { 242 - spin_unlock_irqrestore(&iommu->lock, flags); 243 free_pages(first_page, order); 244 return NULL; 245 } 246 247 - /* Ok, we're committed at this point. */ 248 - *dvma_addr = MAP_BASE + ((iopte - iommu->page_table) << IO_PAGE_SHIFT); 249 ret = (void *) first_page; 250 npages = size >> IO_PAGE_SHIFT; 251 while (npages--) { 252 - *iopte++ = __iopte(IOPTE_VALID | IOPTE_CACHE | IOPTE_WRITE | 253 - (__pa(first_page) & IOPTE_PAGE)); 254 first_page += IO_PAGE_SIZE; 255 } 256 - iommu_flush(iommu, *dvma_addr, size >> IO_PAGE_SHIFT); 257 - spin_unlock_irqrestore(&iommu->lock, flags); 258 259 return ret; 260 } 261 262 void sbus_free_consistent(struct sbus_dev *sdev, size_t size, void *cpu, dma_addr_t dvma) 263 { 264 - unsigned long order, npages; 265 struct sbus_iommu *iommu; 266 - 267 - if (size <= 0 || sdev == NULL || cpu == NULL) 268 - return; 269 270 npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT; 271 iommu = sdev->bus->iommu; 272 273 - spin_lock_irq(&iommu->lock); 274 - free_consistent_cluster(iommu, dvma, npages); 275 - iommu_flush(iommu, dvma, npages); 276 - spin_unlock_irq(&iommu->lock); 277 278 order = get_order(size); 279 if (order < 10) 280 free_pages((unsigned long)cpu, order); 281 } 282 283 - dma_addr_t sbus_map_single(struct sbus_dev *sdev, void *ptr, size_t size, int dir) 284 { 285 - struct sbus_iommu *iommu = sdev->bus->iommu; 286 - unsigned long npages, pbase, flags; 287 - iopte_t *iopte; 288 - u32 dma_base, offset; 289 - unsigned long iopte_bits; 290 291 - if (dir == SBUS_DMA_NONE) 292 BUG(); 293 294 - pbase = (unsigned long) ptr; 295 - offset = (u32) (pbase & ~IO_PAGE_MASK); 296 - size = (IO_PAGE_ALIGN(pbase + size) - (pbase & IO_PAGE_MASK)); 297 - pbase = (unsigned long) __pa(pbase & IO_PAGE_MASK); 298 299 spin_lock_irqsave(&iommu->lock, flags); 300 - npages = size >> IO_PAGE_SHIFT; 301 - iopte = alloc_streaming_cluster(iommu, npages); 302 - if (iopte == NULL) 303 - goto bad; 304 - dma_base = MAP_BASE + ((iopte - iommu->page_table) << IO_PAGE_SHIFT); 305 - npages = size >> IO_PAGE_SHIFT; 306 - iopte_bits = IOPTE_VALID | IOPTE_STBUF | IOPTE_CACHE; 307 - if (dir != SBUS_DMA_TODEVICE) 308 - iopte_bits |= IOPTE_WRITE; 309 - while (npages--) { 310 - *iopte++ = __iopte(iopte_bits | (pbase & IOPTE_PAGE)); 311 - pbase += IO_PAGE_SIZE; 312 - } 313 - npages = size >> IO_PAGE_SHIFT; 314 spin_unlock_irqrestore(&iommu->lock, flags); 315 316 - return (dma_base | offset); 317 318 - bad: 319 - spin_unlock_irqrestore(&iommu->lock, flags); 320 - BUG(); 321 - return 0; 322 } 323 324 - void sbus_unmap_single(struct sbus_dev *sdev, dma_addr_t dma_addr, size_t size, int direction) 325 { 326 struct sbus_iommu *iommu = sdev->bus->iommu; 327 - u32 dma_base = dma_addr & IO_PAGE_MASK; 328 - unsigned long flags; 329 330 - size = (IO_PAGE_ALIGN(dma_addr + size) - dma_base); 331 332 spin_lock_irqsave(&iommu->lock, flags); 333 - free_streaming_cluster(iommu, dma_base, size >> IO_PAGE_SHIFT); 334 - sbus_strbuf_flush(iommu, dma_base, size >> IO_PAGE_SHIFT, direction); 335 spin_unlock_irqrestore(&iommu->lock, flags); 336 } 337 338 #define SG_ENT_PHYS_ADDRESS(SG) \ 339 (__pa(page_address((SG)->page)) + (SG)->offset) 340 341 - static inline void fill_sg(iopte_t *iopte, struct scatterlist *sg, int nused, int nelems, unsigned long iopte_bits) 342 { 343 struct scatterlist *dma_sg = sg; 344 struct scatterlist *sg_end = sg + nelems; ··· 376 for (;;) { 377 unsigned long tmp; 378 379 - tmp = (unsigned long) SG_ENT_PHYS_ADDRESS(sg); 380 len = sg->length; 381 if (((tmp ^ pteval) >> IO_PAGE_SHIFT) != 0UL) { 382 pteval = tmp & IO_PAGE_MASK; ··· 392 sg++; 393 } 394 395 - pteval = ((pteval & IOPTE_PAGE) | iopte_bits); 396 while (len > 0) { 397 *iopte++ = __iopte(pteval); 398 pteval += IO_PAGE_SIZE; ··· 423 } 424 } 425 426 - int sbus_map_sg(struct sbus_dev *sdev, struct scatterlist *sg, int nents, int dir) 427 { 428 - struct sbus_iommu *iommu = sdev->bus->iommu; 429 - unsigned long flags, npages; 430 - iopte_t *iopte; 431 u32 dma_base; 432 struct scatterlist *sgtmp; 433 int used; 434 - unsigned long iopte_bits; 435 - 436 - if (dir == SBUS_DMA_NONE) 437 - BUG(); 438 439 /* Fast path single entry scatterlists. */ 440 - if (nents == 1) { 441 - sg->dma_address = 442 sbus_map_single(sdev, 443 - (page_address(sg->page) + sg->offset), 444 - sg->length, dir); 445 - sg->dma_length = sg->length; 446 return 1; 447 } 448 449 - npages = prepare_sg(sg, nents); 450 451 spin_lock_irqsave(&iommu->lock, flags); 452 - iopte = alloc_streaming_cluster(iommu, npages); 453 - if (iopte == NULL) 454 - goto bad; 455 - dma_base = MAP_BASE + ((iopte - iommu->page_table) << IO_PAGE_SHIFT); 456 457 /* Normalize DVMA addresses. */ 458 - sgtmp = sg; 459 - used = nents; 460 461 while (used && sgtmp->dma_length) { 462 sgtmp->dma_address += dma_base; 463 sgtmp++; 464 used--; 465 } 466 - used = nents - used; 467 468 - iopte_bits = IOPTE_VALID | IOPTE_STBUF | IOPTE_CACHE; 469 - if (dir != SBUS_DMA_TODEVICE) 470 - iopte_bits |= IOPTE_WRITE; 471 472 - fill_sg(iopte, sg, used, nents, iopte_bits); 473 #ifdef VERIFY_SG 474 - verify_sglist(sg, nents, iopte, npages); 475 #endif 476 - spin_unlock_irqrestore(&iommu->lock, flags); 477 478 return used; 479 - 480 - bad: 481 - spin_unlock_irqrestore(&iommu->lock, flags); 482 - BUG(); 483 - return 0; 484 } 485 486 - void sbus_unmap_sg(struct sbus_dev *sdev, struct scatterlist *sg, int nents, int direction) 487 { 488 - unsigned long size, flags; 489 struct sbus_iommu *iommu; 490 - u32 dvma_base; 491 - int i; 492 493 - /* Fast path single entry scatterlists. */ 494 - if (nents == 1) { 495 - sbus_unmap_single(sdev, sg->dma_address, sg->dma_length, direction); 496 - return; 497 - } 498 - 499 - dvma_base = sg[0].dma_address & IO_PAGE_MASK; 500 - for (i = 0; i < nents; i++) { 501 - if (sg[i].dma_length == 0) 502 - break; 503 - } 504 - i--; 505 - size = IO_PAGE_ALIGN(sg[i].dma_address + sg[i].dma_length) - dvma_base; 506 507 iommu = sdev->bus->iommu; 508 spin_lock_irqsave(&iommu->lock, flags); 509 - free_streaming_cluster(iommu, dvma_base, size >> IO_PAGE_SHIFT); 510 - sbus_strbuf_flush(iommu, dvma_base, size >> IO_PAGE_SHIFT, direction); 511 spin_unlock_irqrestore(&iommu->lock, flags); 512 } 513 514 - void sbus_dma_sync_single_for_cpu(struct sbus_dev *sdev, dma_addr_t base, size_t size, int direction) 515 { 516 - struct sbus_iommu *iommu = sdev->bus->iommu; 517 - unsigned long flags; 518 519 - size = (IO_PAGE_ALIGN(base + size) - (base & IO_PAGE_MASK)); 520 521 spin_lock_irqsave(&iommu->lock, flags); 522 - sbus_strbuf_flush(iommu, base & IO_PAGE_MASK, size >> IO_PAGE_SHIFT, direction); 523 spin_unlock_irqrestore(&iommu->lock, flags); 524 } 525 ··· 535 { 536 } 537 538 - void sbus_dma_sync_sg_for_cpu(struct sbus_dev *sdev, struct scatterlist *sg, int nents, int direction) 539 { 540 - struct sbus_iommu *iommu = sdev->bus->iommu; 541 - unsigned long flags, size; 542 - u32 base; 543 - int i; 544 545 - base = sg[0].dma_address & IO_PAGE_MASK; 546 - for (i = 0; i < nents; i++) { 547 - if (sg[i].dma_length == 0) 548 break; 549 } 550 i--; 551 - size = IO_PAGE_ALIGN(sg[i].dma_address + sg[i].dma_length) - base; 552 553 spin_lock_irqsave(&iommu->lock, flags); 554 - sbus_strbuf_flush(iommu, base, size >> IO_PAGE_SHIFT, direction); 555 spin_unlock_irqrestore(&iommu->lock, flags); 556 } 557 ··· 1028 struct linux_prom64_registers *pr; 1029 struct device_node *dp; 1030 struct sbus_iommu *iommu; 1031 - unsigned long regs, tsb_base; 1032 u64 control; 1033 int i; 1034 ··· 1056 1057 memset(iommu, 0, sizeof(*iommu)); 1058 1059 - /* We start with no consistent mappings. */ 1060 - iommu->lowest_consistent_map = CLUSTER_NPAGES; 1061 - 1062 - for (i = 0; i < NCLUSTERS; i++) { 1063 - iommu->alloc_info[i].flush = 0; 1064 - iommu->alloc_info[i].next = 0; 1065 - } 1066 - 1067 /* Setup spinlock. */ 1068 spin_lock_init(&iommu->lock); 1069 ··· 1075 sbus->portid, regs); 1076 1077 /* Setup for TSB_SIZE=7, TBW_SIZE=0, MMU_DE=1, MMU_EN=1 */ 1078 control = upa_readq(iommu->iommu_regs + IOMMU_CONTROL); 1079 control = ((7UL << 16UL) | 1080 (0UL << 2UL) | 1081 (1UL << 1UL) | 1082 (1UL << 0UL)); 1083 - 1084 - /* Using the above configuration we need 1MB iommu page 1085 - * table (128K ioptes * 8 bytes per iopte). This is 1086 - * page order 7 on UltraSparc. 1087 - */ 1088 - tsb_base = __get_free_pages(GFP_ATOMIC, get_order(IO_TSB_SIZE)); 1089 - if (tsb_base == 0UL) { 1090 - prom_printf("sbus_iommu_init: Fatal error, cannot alloc TSB table.\n"); 1091 - prom_halt(); 1092 - } 1093 - 1094 - iommu->page_table = (iopte_t *) tsb_base; 1095 - memset(iommu->page_table, 0, IO_TSB_SIZE); 1096 - 1097 upa_writeq(control, iommu->iommu_regs + IOMMU_CONTROL); 1098 1099 /* Clean out any cruft in the IOMMU using ··· 1099 upa_readq(iommu->sbus_control_reg); 1100 1101 /* Give the TSB to SYSIO. */ 1102 - upa_writeq(__pa(tsb_base), iommu->iommu_regs + IOMMU_TSBBASE); 1103 1104 /* Setup streaming buffer, DE=1 SB_EN=1 */ 1105 control = (1UL << 1UL) | (1UL << 0UL);
··· 24 25 #include "iommu_common.h" 26 27 #define MAP_BASE ((u32)0xc0000000) 28 29 + struct sbus_iommu_arena { 30 + unsigned long *map; 31 + unsigned int hint; 32 + unsigned int limit; 33 + }; 34 + 35 struct sbus_iommu { 36 + spinlock_t lock; 37 38 + struct sbus_iommu_arena arena; 39 40 + iopte_t *page_table; 41 + unsigned long strbuf_regs; 42 + unsigned long iommu_regs; 43 + unsigned long sbus_control_reg; 44 45 + volatile unsigned long strbuf_flushflag; 46 }; 47 48 /* Offsets from iommu_regs */ ··· 90 upa_writeq(0, tag); 91 tag += 8UL; 92 } 93 upa_readq(iommu->sbus_control_reg); 94 } 95 ··· 156 base, npages); 157 } 158 159 + /* Based largely upon the ppc64 iommu allocator. */ 160 + static long sbus_arena_alloc(struct sbus_iommu *iommu, unsigned long npages) 161 { 162 + struct sbus_iommu_arena *arena = &iommu->arena; 163 + unsigned long n, i, start, end, limit; 164 + int pass; 165 166 + limit = arena->limit; 167 + start = arena->hint; 168 + pass = 0; 169 170 + again: 171 + n = find_next_zero_bit(arena->map, limit, start); 172 + end = n + npages; 173 + if (unlikely(end >= limit)) { 174 + if (likely(pass < 1)) { 175 + limit = start; 176 + start = 0; 177 __iommu_flushall(iommu); 178 + pass++; 179 + goto again; 180 + } else { 181 + /* Scanned the whole thing, give up. */ 182 + return -1; 183 } 184 } 185 186 + for (i = n; i < end; i++) { 187 + if (test_bit(i, arena->map)) { 188 + start = i + 1; 189 + goto again; 190 } 191 } 192 193 + for (i = n; i < end; i++) 194 + __set_bit(i, arena->map); 195 + 196 + arena->hint = end; 197 + 198 + return n; 199 + } 200 + 201 + static void sbus_arena_free(struct sbus_iommu_arena *arena, unsigned long base, unsigned long npages) 202 + { 203 + unsigned long i; 204 + 205 + for (i = base; i < (base + npages); i++) 206 + __clear_bit(i, arena->map); 207 + } 208 + 209 + static void sbus_iommu_table_init(struct sbus_iommu *iommu, unsigned int tsbsize) 210 + { 211 + unsigned long tsbbase, order, sz, num_tsb_entries; 212 + 213 + num_tsb_entries = tsbsize / sizeof(iopte_t); 214 + 215 + /* Setup initial software IOMMU state. */ 216 + spin_lock_init(&iommu->lock); 217 + 218 + /* Allocate and initialize the free area map. */ 219 + sz = num_tsb_entries / 8; 220 + sz = (sz + 7UL) & ~7UL; 221 + iommu->arena.map = kzalloc(sz, GFP_KERNEL); 222 + if (!iommu->arena.map) { 223 + prom_printf("PCI_IOMMU: Error, kmalloc(arena.map) failed.\n"); 224 + prom_halt(); 225 + } 226 + iommu->arena.limit = num_tsb_entries; 227 + 228 + /* Now allocate and setup the IOMMU page table itself. */ 229 + order = get_order(tsbsize); 230 + tsbbase = __get_free_pages(GFP_KERNEL, order); 231 + if (!tsbbase) { 232 + prom_printf("IOMMU: Error, gfp(tsb) failed.\n"); 233 + prom_halt(); 234 + } 235 + iommu->page_table = (iopte_t *)tsbbase; 236 + memset(iommu->page_table, 0, tsbsize); 237 + } 238 + 239 + static inline iopte_t *alloc_npages(struct sbus_iommu *iommu, unsigned long npages) 240 + { 241 + long entry; 242 + 243 + entry = sbus_arena_alloc(iommu, npages); 244 + if (unlikely(entry < 0)) 245 + return NULL; 246 + 247 + return iommu->page_table + entry; 248 + } 249 + 250 + static inline void free_npages(struct sbus_iommu *iommu, dma_addr_t base, unsigned long npages) 251 + { 252 + sbus_arena_free(&iommu->arena, base >> IO_PAGE_SHIFT, npages); 253 } 254 255 void *sbus_alloc_consistent(struct sbus_dev *sdev, size_t size, dma_addr_t *dvma_addr) 256 { 257 struct sbus_iommu *iommu; 258 iopte_t *iopte; 259 + unsigned long flags, order, first_page; 260 void *ret; 261 int npages; 262 263 size = IO_PAGE_ALIGN(size); 264 order = get_order(size); 265 if (order >= 10) 266 return NULL; 267 + 268 first_page = __get_free_pages(GFP_KERNEL|__GFP_COMP, order); 269 if (first_page == 0UL) 270 return NULL; ··· 336 iommu = sdev->bus->iommu; 337 338 spin_lock_irqsave(&iommu->lock, flags); 339 + iopte = alloc_npages(iommu, size >> IO_PAGE_SHIFT); 340 + spin_unlock_irqrestore(&iommu->lock, flags); 341 + 342 + if (unlikely(iopte == NULL)) { 343 free_pages(first_page, order); 344 return NULL; 345 } 346 347 + *dvma_addr = (MAP_BASE + 348 + ((iopte - iommu->page_table) << IO_PAGE_SHIFT)); 349 ret = (void *) first_page; 350 npages = size >> IO_PAGE_SHIFT; 351 + first_page = __pa(first_page); 352 while (npages--) { 353 + iopte_val(*iopte) = (IOPTE_VALID | IOPTE_CACHE | 354 + IOPTE_WRITE | 355 + (first_page & IOPTE_PAGE)); 356 + iopte++; 357 first_page += IO_PAGE_SIZE; 358 } 359 360 return ret; 361 } 362 363 void sbus_free_consistent(struct sbus_dev *sdev, size_t size, void *cpu, dma_addr_t dvma) 364 { 365 struct sbus_iommu *iommu; 366 + iopte_t *iopte; 367 + unsigned long flags, order, npages; 368 369 npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT; 370 iommu = sdev->bus->iommu; 371 + iopte = iommu->page_table + 372 + ((dvma - MAP_BASE) >> IO_PAGE_SHIFT); 373 374 + spin_lock_irqsave(&iommu->lock, flags); 375 + 376 + free_npages(iommu, dvma - MAP_BASE, npages); 377 + 378 + spin_unlock_irqrestore(&iommu->lock, flags); 379 380 order = get_order(size); 381 if (order < 10) 382 free_pages((unsigned long)cpu, order); 383 } 384 385 + dma_addr_t sbus_map_single(struct sbus_dev *sdev, void *ptr, size_t sz, int direction) 386 { 387 + struct sbus_iommu *iommu; 388 + iopte_t *base; 389 + unsigned long flags, npages, oaddr; 390 + unsigned long i, base_paddr; 391 + u32 bus_addr, ret; 392 + unsigned long iopte_protection; 393 394 + iommu = sdev->bus->iommu; 395 + 396 + if (unlikely(direction == SBUS_DMA_NONE)) 397 BUG(); 398 399 + oaddr = (unsigned long)ptr; 400 + npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK); 401 + npages >>= IO_PAGE_SHIFT; 402 403 spin_lock_irqsave(&iommu->lock, flags); 404 + base = alloc_npages(iommu, npages); 405 spin_unlock_irqrestore(&iommu->lock, flags); 406 407 + if (unlikely(!base)) 408 + BUG(); 409 410 + bus_addr = (MAP_BASE + 411 + ((base - iommu->page_table) << IO_PAGE_SHIFT)); 412 + ret = bus_addr | (oaddr & ~IO_PAGE_MASK); 413 + base_paddr = __pa(oaddr & IO_PAGE_MASK); 414 + 415 + iopte_protection = IOPTE_VALID | IOPTE_STBUF | IOPTE_CACHE; 416 + if (direction != SBUS_DMA_TODEVICE) 417 + iopte_protection |= IOPTE_WRITE; 418 + 419 + for (i = 0; i < npages; i++, base++, base_paddr += IO_PAGE_SIZE) 420 + iopte_val(*base) = iopte_protection | base_paddr; 421 + 422 + return ret; 423 } 424 425 + void sbus_unmap_single(struct sbus_dev *sdev, dma_addr_t bus_addr, size_t sz, int direction) 426 { 427 struct sbus_iommu *iommu = sdev->bus->iommu; 428 + iopte_t *base; 429 + unsigned long flags, npages, i; 430 431 + if (unlikely(direction == SBUS_DMA_NONE)) 432 + BUG(); 433 + 434 + npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK); 435 + npages >>= IO_PAGE_SHIFT; 436 + base = iommu->page_table + 437 + ((bus_addr - MAP_BASE) >> IO_PAGE_SHIFT); 438 + 439 + bus_addr &= IO_PAGE_MASK; 440 441 spin_lock_irqsave(&iommu->lock, flags); 442 + sbus_strbuf_flush(iommu, bus_addr, npages, direction); 443 + for (i = 0; i < npages; i++) 444 + iopte_val(base[i]) = 0UL; 445 + free_npages(iommu, bus_addr - MAP_BASE, npages); 446 spin_unlock_irqrestore(&iommu->lock, flags); 447 } 448 449 #define SG_ENT_PHYS_ADDRESS(SG) \ 450 (__pa(page_address((SG)->page)) + (SG)->offset) 451 452 + static inline void fill_sg(iopte_t *iopte, struct scatterlist *sg, 453 + int nused, int nelems, unsigned long iopte_protection) 454 { 455 struct scatterlist *dma_sg = sg; 456 struct scatterlist *sg_end = sg + nelems; ··· 462 for (;;) { 463 unsigned long tmp; 464 465 + tmp = SG_ENT_PHYS_ADDRESS(sg); 466 len = sg->length; 467 if (((tmp ^ pteval) >> IO_PAGE_SHIFT) != 0UL) { 468 pteval = tmp & IO_PAGE_MASK; ··· 478 sg++; 479 } 480 481 + pteval = iopte_protection | (pteval & IOPTE_PAGE); 482 while (len > 0) { 483 *iopte++ = __iopte(pteval); 484 pteval += IO_PAGE_SIZE; ··· 509 } 510 } 511 512 + int sbus_map_sg(struct sbus_dev *sdev, struct scatterlist *sglist, int nelems, int direction) 513 { 514 + struct sbus_iommu *iommu; 515 + unsigned long flags, npages, iopte_protection; 516 + iopte_t *base; 517 u32 dma_base; 518 struct scatterlist *sgtmp; 519 int used; 520 521 /* Fast path single entry scatterlists. */ 522 + if (nelems == 1) { 523 + sglist->dma_address = 524 sbus_map_single(sdev, 525 + (page_address(sglist->page) + sglist->offset), 526 + sglist->length, direction); 527 + sglist->dma_length = sglist->length; 528 return 1; 529 } 530 531 + iommu = sdev->bus->iommu; 532 + 533 + if (unlikely(direction == SBUS_DMA_NONE)) 534 + BUG(); 535 + 536 + npages = prepare_sg(sglist, nelems); 537 538 spin_lock_irqsave(&iommu->lock, flags); 539 + base = alloc_npages(iommu, npages); 540 + spin_unlock_irqrestore(&iommu->lock, flags); 541 + 542 + if (unlikely(base == NULL)) 543 + BUG(); 544 + 545 + dma_base = MAP_BASE + 546 + ((base - iommu->page_table) << IO_PAGE_SHIFT); 547 548 /* Normalize DVMA addresses. */ 549 + used = nelems; 550 551 + sgtmp = sglist; 552 while (used && sgtmp->dma_length) { 553 sgtmp->dma_address += dma_base; 554 sgtmp++; 555 used--; 556 } 557 + used = nelems - used; 558 559 + iopte_protection = IOPTE_VALID | IOPTE_STBUF | IOPTE_CACHE; 560 + if (direction != SBUS_DMA_TODEVICE) 561 + iopte_protection |= IOPTE_WRITE; 562 563 + fill_sg(base, sglist, used, nelems, iopte_protection); 564 + 565 #ifdef VERIFY_SG 566 + verify_sglist(sglist, nelems, base, npages); 567 #endif 568 569 return used; 570 } 571 572 + void sbus_unmap_sg(struct sbus_dev *sdev, struct scatterlist *sglist, int nelems, int direction) 573 { 574 struct sbus_iommu *iommu; 575 + iopte_t *base; 576 + unsigned long flags, i, npages; 577 + u32 bus_addr; 578 579 + if (unlikely(direction == SBUS_DMA_NONE)) 580 + BUG(); 581 582 iommu = sdev->bus->iommu; 583 + 584 + bus_addr = sglist->dma_address & IO_PAGE_MASK; 585 + 586 + for (i = 1; i < nelems; i++) 587 + if (sglist[i].dma_length == 0) 588 + break; 589 + i--; 590 + npages = (IO_PAGE_ALIGN(sglist[i].dma_address + sglist[i].dma_length) - 591 + bus_addr) >> IO_PAGE_SHIFT; 592 + 593 + base = iommu->page_table + 594 + ((bus_addr - MAP_BASE) >> IO_PAGE_SHIFT); 595 + 596 spin_lock_irqsave(&iommu->lock, flags); 597 + sbus_strbuf_flush(iommu, bus_addr, npages, direction); 598 + for (i = 0; i < npages; i++) 599 + iopte_val(base[i]) = 0UL; 600 + free_npages(iommu, bus_addr - MAP_BASE, npages); 601 spin_unlock_irqrestore(&iommu->lock, flags); 602 } 603 604 + void sbus_dma_sync_single_for_cpu(struct sbus_dev *sdev, dma_addr_t bus_addr, size_t sz, int direction) 605 { 606 + struct sbus_iommu *iommu; 607 + unsigned long flags, npages; 608 609 + iommu = sdev->bus->iommu; 610 + 611 + npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK); 612 + npages >>= IO_PAGE_SHIFT; 613 + bus_addr &= IO_PAGE_MASK; 614 615 spin_lock_irqsave(&iommu->lock, flags); 616 + sbus_strbuf_flush(iommu, bus_addr, npages, direction); 617 spin_unlock_irqrestore(&iommu->lock, flags); 618 } 619 ··· 613 { 614 } 615 616 + void sbus_dma_sync_sg_for_cpu(struct sbus_dev *sdev, struct scatterlist *sglist, int nelems, int direction) 617 { 618 + struct sbus_iommu *iommu; 619 + unsigned long flags, npages, i; 620 + u32 bus_addr; 621 622 + iommu = sdev->bus->iommu; 623 + 624 + bus_addr = sglist[0].dma_address & IO_PAGE_MASK; 625 + for (i = 0; i < nelems; i++) { 626 + if (!sglist[i].dma_length) 627 break; 628 } 629 i--; 630 + npages = (IO_PAGE_ALIGN(sglist[i].dma_address + sglist[i].dma_length) 631 + - bus_addr) >> IO_PAGE_SHIFT; 632 633 spin_lock_irqsave(&iommu->lock, flags); 634 + sbus_strbuf_flush(iommu, bus_addr, npages, direction); 635 spin_unlock_irqrestore(&iommu->lock, flags); 636 } 637 ··· 1104 struct linux_prom64_registers *pr; 1105 struct device_node *dp; 1106 struct sbus_iommu *iommu; 1107 + unsigned long regs; 1108 u64 control; 1109 int i; 1110 ··· 1132 1133 memset(iommu, 0, sizeof(*iommu)); 1134 1135 /* Setup spinlock. */ 1136 spin_lock_init(&iommu->lock); 1137 ··· 1159 sbus->portid, regs); 1160 1161 /* Setup for TSB_SIZE=7, TBW_SIZE=0, MMU_DE=1, MMU_EN=1 */ 1162 + sbus_iommu_table_init(iommu, IO_TSB_SIZE); 1163 + 1164 control = upa_readq(iommu->iommu_regs + IOMMU_CONTROL); 1165 control = ((7UL << 16UL) | 1166 (0UL << 2UL) | 1167 (1UL << 1UL) | 1168 (1UL << 0UL)); 1169 upa_writeq(control, iommu->iommu_regs + IOMMU_CONTROL); 1170 1171 /* Clean out any cruft in the IOMMU using ··· 1195 upa_readq(iommu->sbus_control_reg); 1196 1197 /* Give the TSB to SYSIO. */ 1198 + upa_writeq(__pa(iommu->page_table), iommu->iommu_regs + IOMMU_TSBBASE); 1199 1200 /* Setup streaming buffer, DE=1 SB_EN=1 */ 1201 control = (1UL << 1UL) | (1UL << 0UL);