at v3.12 25 kB view raw
1#include <linux/dmaengine.h> 2#include <linux/dma-mapping.h> 3#include <linux/platform_device.h> 4#include <linux/module.h> 5#include <linux/of.h> 6#include <linux/slab.h> 7#include <linux/of_dma.h> 8#include <linux/of_irq.h> 9#include <linux/dmapool.h> 10#include <linux/interrupt.h> 11#include <linux/of_address.h> 12#include <linux/pm_runtime.h> 13#include "dmaengine.h" 14 15#define DESC_TYPE 27 16#define DESC_TYPE_HOST 0x10 17#define DESC_TYPE_TEARD 0x13 18 19#define TD_DESC_IS_RX (1 << 16) 20#define TD_DESC_DMA_NUM 10 21 22#define DESC_LENGTH_BITS_NUM 21 23 24#define DESC_TYPE_USB (5 << 26) 25#define DESC_PD_COMPLETE (1 << 31) 26 27/* DMA engine */ 28#define DMA_TDFDQ 4 29#define DMA_TXGCR(x) (0x800 + (x) * 0x20) 30#define DMA_RXGCR(x) (0x808 + (x) * 0x20) 31#define RXHPCRA0 4 32 33#define GCR_CHAN_ENABLE (1 << 31) 34#define GCR_TEARDOWN (1 << 30) 35#define GCR_STARV_RETRY (1 << 24) 36#define GCR_DESC_TYPE_HOST (1 << 14) 37 38/* DMA scheduler */ 39#define DMA_SCHED_CTRL 0 40#define DMA_SCHED_CTRL_EN (1 << 31) 41#define DMA_SCHED_WORD(x) ((x) * 4 + 0x800) 42 43#define SCHED_ENTRY0_CHAN(x) ((x) << 0) 44#define SCHED_ENTRY0_IS_RX (1 << 7) 45 46#define SCHED_ENTRY1_CHAN(x) ((x) << 8) 47#define SCHED_ENTRY1_IS_RX (1 << 15) 48 49#define SCHED_ENTRY2_CHAN(x) ((x) << 16) 50#define SCHED_ENTRY2_IS_RX (1 << 23) 51 52#define SCHED_ENTRY3_CHAN(x) ((x) << 24) 53#define SCHED_ENTRY3_IS_RX (1 << 31) 54 55/* Queue manager */ 56/* 4 KiB of memory for descriptors, 2 for each endpoint */ 57#define ALLOC_DECS_NUM 128 58#define DESCS_AREAS 1 59#define TOTAL_DESCS_NUM (ALLOC_DECS_NUM * DESCS_AREAS) 60#define QMGR_SCRATCH_SIZE (TOTAL_DESCS_NUM * 4) 61 62#define QMGR_LRAM0_BASE 0x80 63#define QMGR_LRAM_SIZE 0x84 64#define QMGR_LRAM1_BASE 0x88 65#define QMGR_MEMBASE(x) (0x1000 + (x) * 0x10) 66#define QMGR_MEMCTRL(x) (0x1004 + (x) * 0x10) 67#define QMGR_MEMCTRL_IDX_SH 16 68#define QMGR_MEMCTRL_DESC_SH 8 69 70#define QMGR_NUM_PEND 5 71#define QMGR_PEND(x) (0x90 + (x) * 4) 72 73#define QMGR_PENDING_SLOT_Q(x) (x / 32) 74#define QMGR_PENDING_BIT_Q(x) (x % 32) 75 76#define QMGR_QUEUE_A(n) (0x2000 + (n) * 0x10) 77#define QMGR_QUEUE_B(n) (0x2004 + (n) * 0x10) 78#define QMGR_QUEUE_C(n) (0x2008 + (n) * 0x10) 79#define QMGR_QUEUE_D(n) (0x200c + (n) * 0x10) 80 81/* Glue layer specific */ 82/* USBSS / USB AM335x */ 83#define USBSS_IRQ_STATUS 0x28 84#define USBSS_IRQ_ENABLER 0x2c 85#define USBSS_IRQ_CLEARR 0x30 86 87#define USBSS_IRQ_PD_COMP (1 << 2) 88 89struct cppi41_channel { 90 struct dma_chan chan; 91 struct dma_async_tx_descriptor txd; 92 struct cppi41_dd *cdd; 93 struct cppi41_desc *desc; 94 dma_addr_t desc_phys; 95 void __iomem *gcr_reg; 96 int is_tx; 97 u32 residue; 98 99 unsigned int q_num; 100 unsigned int q_comp_num; 101 unsigned int port_num; 102 103 unsigned td_retry; 104 unsigned td_queued:1; 105 unsigned td_seen:1; 106 unsigned td_desc_seen:1; 107}; 108 109struct cppi41_desc { 110 u32 pd0; 111 u32 pd1; 112 u32 pd2; 113 u32 pd3; 114 u32 pd4; 115 u32 pd5; 116 u32 pd6; 117 u32 pd7; 118} __aligned(32); 119 120struct chan_queues { 121 u16 submit; 122 u16 complete; 123}; 124 125struct cppi41_dd { 126 struct dma_device ddev; 127 128 void *qmgr_scratch; 129 dma_addr_t scratch_phys; 130 131 struct cppi41_desc *cd; 132 dma_addr_t descs_phys; 133 u32 first_td_desc; 134 struct cppi41_channel *chan_busy[ALLOC_DECS_NUM]; 135 136 void __iomem *usbss_mem; 137 void __iomem *ctrl_mem; 138 void __iomem *sched_mem; 139 void __iomem *qmgr_mem; 140 unsigned int irq; 141 const struct chan_queues *queues_rx; 142 const struct chan_queues *queues_tx; 143 struct chan_queues td_queue; 144}; 145 146#define FIST_COMPLETION_QUEUE 93 147static struct chan_queues usb_queues_tx[] = { 148 /* USB0 ENDP 1 */ 149 [ 0] = { .submit = 32, .complete = 93}, 150 [ 1] = { .submit = 34, .complete = 94}, 151 [ 2] = { .submit = 36, .complete = 95}, 152 [ 3] = { .submit = 38, .complete = 96}, 153 [ 4] = { .submit = 40, .complete = 97}, 154 [ 5] = { .submit = 42, .complete = 98}, 155 [ 6] = { .submit = 44, .complete = 99}, 156 [ 7] = { .submit = 46, .complete = 100}, 157 [ 8] = { .submit = 48, .complete = 101}, 158 [ 9] = { .submit = 50, .complete = 102}, 159 [10] = { .submit = 52, .complete = 103}, 160 [11] = { .submit = 54, .complete = 104}, 161 [12] = { .submit = 56, .complete = 105}, 162 [13] = { .submit = 58, .complete = 106}, 163 [14] = { .submit = 60, .complete = 107}, 164 165 /* USB1 ENDP1 */ 166 [15] = { .submit = 62, .complete = 125}, 167 [16] = { .submit = 64, .complete = 126}, 168 [17] = { .submit = 66, .complete = 127}, 169 [18] = { .submit = 68, .complete = 128}, 170 [19] = { .submit = 70, .complete = 129}, 171 [20] = { .submit = 72, .complete = 130}, 172 [21] = { .submit = 74, .complete = 131}, 173 [22] = { .submit = 76, .complete = 132}, 174 [23] = { .submit = 78, .complete = 133}, 175 [24] = { .submit = 80, .complete = 134}, 176 [25] = { .submit = 82, .complete = 135}, 177 [26] = { .submit = 84, .complete = 136}, 178 [27] = { .submit = 86, .complete = 137}, 179 [28] = { .submit = 88, .complete = 138}, 180 [29] = { .submit = 90, .complete = 139}, 181}; 182 183static const struct chan_queues usb_queues_rx[] = { 184 /* USB0 ENDP 1 */ 185 [ 0] = { .submit = 1, .complete = 109}, 186 [ 1] = { .submit = 2, .complete = 110}, 187 [ 2] = { .submit = 3, .complete = 111}, 188 [ 3] = { .submit = 4, .complete = 112}, 189 [ 4] = { .submit = 5, .complete = 113}, 190 [ 5] = { .submit = 6, .complete = 114}, 191 [ 6] = { .submit = 7, .complete = 115}, 192 [ 7] = { .submit = 8, .complete = 116}, 193 [ 8] = { .submit = 9, .complete = 117}, 194 [ 9] = { .submit = 10, .complete = 118}, 195 [10] = { .submit = 11, .complete = 119}, 196 [11] = { .submit = 12, .complete = 120}, 197 [12] = { .submit = 13, .complete = 121}, 198 [13] = { .submit = 14, .complete = 122}, 199 [14] = { .submit = 15, .complete = 123}, 200 201 /* USB1 ENDP 1 */ 202 [15] = { .submit = 16, .complete = 141}, 203 [16] = { .submit = 17, .complete = 142}, 204 [17] = { .submit = 18, .complete = 143}, 205 [18] = { .submit = 19, .complete = 144}, 206 [19] = { .submit = 20, .complete = 145}, 207 [20] = { .submit = 21, .complete = 146}, 208 [21] = { .submit = 22, .complete = 147}, 209 [22] = { .submit = 23, .complete = 148}, 210 [23] = { .submit = 24, .complete = 149}, 211 [24] = { .submit = 25, .complete = 150}, 212 [25] = { .submit = 26, .complete = 151}, 213 [26] = { .submit = 27, .complete = 152}, 214 [27] = { .submit = 28, .complete = 153}, 215 [28] = { .submit = 29, .complete = 154}, 216 [29] = { .submit = 30, .complete = 155}, 217}; 218 219struct cppi_glue_infos { 220 irqreturn_t (*isr)(int irq, void *data); 221 const struct chan_queues *queues_rx; 222 const struct chan_queues *queues_tx; 223 struct chan_queues td_queue; 224}; 225 226static struct cppi41_channel *to_cpp41_chan(struct dma_chan *c) 227{ 228 return container_of(c, struct cppi41_channel, chan); 229} 230 231static struct cppi41_channel *desc_to_chan(struct cppi41_dd *cdd, u32 desc) 232{ 233 struct cppi41_channel *c; 234 u32 descs_size; 235 u32 desc_num; 236 237 descs_size = sizeof(struct cppi41_desc) * ALLOC_DECS_NUM; 238 239 if (!((desc >= cdd->descs_phys) && 240 (desc < (cdd->descs_phys + descs_size)))) { 241 return NULL; 242 } 243 244 desc_num = (desc - cdd->descs_phys) / sizeof(struct cppi41_desc); 245 BUG_ON(desc_num >= ALLOC_DECS_NUM); 246 c = cdd->chan_busy[desc_num]; 247 cdd->chan_busy[desc_num] = NULL; 248 return c; 249} 250 251static void cppi_writel(u32 val, void *__iomem *mem) 252{ 253 __raw_writel(val, mem); 254} 255 256static u32 cppi_readl(void *__iomem *mem) 257{ 258 return __raw_readl(mem); 259} 260 261static u32 pd_trans_len(u32 val) 262{ 263 return val & ((1 << (DESC_LENGTH_BITS_NUM + 1)) - 1); 264} 265 266static irqreturn_t cppi41_irq(int irq, void *data) 267{ 268 struct cppi41_dd *cdd = data; 269 struct cppi41_channel *c; 270 u32 status; 271 int i; 272 273 status = cppi_readl(cdd->usbss_mem + USBSS_IRQ_STATUS); 274 if (!(status & USBSS_IRQ_PD_COMP)) 275 return IRQ_NONE; 276 cppi_writel(status, cdd->usbss_mem + USBSS_IRQ_STATUS); 277 278 for (i = QMGR_PENDING_SLOT_Q(FIST_COMPLETION_QUEUE); i < QMGR_NUM_PEND; 279 i++) { 280 u32 val; 281 u32 q_num; 282 283 val = cppi_readl(cdd->qmgr_mem + QMGR_PEND(i)); 284 if (i == QMGR_PENDING_SLOT_Q(FIST_COMPLETION_QUEUE) && val) { 285 u32 mask; 286 /* set corresponding bit for completetion Q 93 */ 287 mask = 1 << QMGR_PENDING_BIT_Q(FIST_COMPLETION_QUEUE); 288 /* not set all bits for queues less than Q 93 */ 289 mask--; 290 /* now invert and keep only Q 93+ set */ 291 val &= ~mask; 292 } 293 294 if (val) 295 __iormb(); 296 297 while (val) { 298 u32 desc; 299 300 q_num = __fls(val); 301 val &= ~(1 << q_num); 302 q_num += 32 * i; 303 desc = cppi_readl(cdd->qmgr_mem + QMGR_QUEUE_D(q_num)); 304 desc &= ~0x1f; 305 c = desc_to_chan(cdd, desc); 306 if (WARN_ON(!c)) { 307 pr_err("%s() q %d desc %08x\n", __func__, 308 q_num, desc); 309 continue; 310 } 311 c->residue = pd_trans_len(c->desc->pd6) - 312 pd_trans_len(c->desc->pd0); 313 314 dma_cookie_complete(&c->txd); 315 c->txd.callback(c->txd.callback_param); 316 } 317 } 318 return IRQ_HANDLED; 319} 320 321static dma_cookie_t cppi41_tx_submit(struct dma_async_tx_descriptor *tx) 322{ 323 dma_cookie_t cookie; 324 325 cookie = dma_cookie_assign(tx); 326 327 return cookie; 328} 329 330static int cppi41_dma_alloc_chan_resources(struct dma_chan *chan) 331{ 332 struct cppi41_channel *c = to_cpp41_chan(chan); 333 334 dma_cookie_init(chan); 335 dma_async_tx_descriptor_init(&c->txd, chan); 336 c->txd.tx_submit = cppi41_tx_submit; 337 338 if (!c->is_tx) 339 cppi_writel(c->q_num, c->gcr_reg + RXHPCRA0); 340 341 return 0; 342} 343 344static void cppi41_dma_free_chan_resources(struct dma_chan *chan) 345{ 346} 347 348static enum dma_status cppi41_dma_tx_status(struct dma_chan *chan, 349 dma_cookie_t cookie, struct dma_tx_state *txstate) 350{ 351 struct cppi41_channel *c = to_cpp41_chan(chan); 352 enum dma_status ret; 353 354 /* lock */ 355 ret = dma_cookie_status(chan, cookie, txstate); 356 if (txstate && ret == DMA_SUCCESS) 357 txstate->residue = c->residue; 358 /* unlock */ 359 360 return ret; 361} 362 363static void push_desc_queue(struct cppi41_channel *c) 364{ 365 struct cppi41_dd *cdd = c->cdd; 366 u32 desc_num; 367 u32 desc_phys; 368 u32 reg; 369 370 desc_phys = lower_32_bits(c->desc_phys); 371 desc_num = (desc_phys - cdd->descs_phys) / sizeof(struct cppi41_desc); 372 WARN_ON(cdd->chan_busy[desc_num]); 373 cdd->chan_busy[desc_num] = c; 374 375 reg = (sizeof(struct cppi41_desc) - 24) / 4; 376 reg |= desc_phys; 377 cppi_writel(reg, cdd->qmgr_mem + QMGR_QUEUE_D(c->q_num)); 378} 379 380static void cppi41_dma_issue_pending(struct dma_chan *chan) 381{ 382 struct cppi41_channel *c = to_cpp41_chan(chan); 383 u32 reg; 384 385 c->residue = 0; 386 387 reg = GCR_CHAN_ENABLE; 388 if (!c->is_tx) { 389 reg |= GCR_STARV_RETRY; 390 reg |= GCR_DESC_TYPE_HOST; 391 reg |= c->q_comp_num; 392 } 393 394 cppi_writel(reg, c->gcr_reg); 395 396 /* 397 * We don't use writel() but __raw_writel() so we have to make sure 398 * that the DMA descriptor in coherent memory made to the main memory 399 * before starting the dma engine. 400 */ 401 __iowmb(); 402 push_desc_queue(c); 403} 404 405static u32 get_host_pd0(u32 length) 406{ 407 u32 reg; 408 409 reg = DESC_TYPE_HOST << DESC_TYPE; 410 reg |= length; 411 412 return reg; 413} 414 415static u32 get_host_pd1(struct cppi41_channel *c) 416{ 417 u32 reg; 418 419 reg = 0; 420 421 return reg; 422} 423 424static u32 get_host_pd2(struct cppi41_channel *c) 425{ 426 u32 reg; 427 428 reg = DESC_TYPE_USB; 429 reg |= c->q_comp_num; 430 431 return reg; 432} 433 434static u32 get_host_pd3(u32 length) 435{ 436 u32 reg; 437 438 /* PD3 = packet size */ 439 reg = length; 440 441 return reg; 442} 443 444static u32 get_host_pd6(u32 length) 445{ 446 u32 reg; 447 448 /* PD6 buffer size */ 449 reg = DESC_PD_COMPLETE; 450 reg |= length; 451 452 return reg; 453} 454 455static u32 get_host_pd4_or_7(u32 addr) 456{ 457 u32 reg; 458 459 reg = addr; 460 461 return reg; 462} 463 464static u32 get_host_pd5(void) 465{ 466 u32 reg; 467 468 reg = 0; 469 470 return reg; 471} 472 473static struct dma_async_tx_descriptor *cppi41_dma_prep_slave_sg( 474 struct dma_chan *chan, struct scatterlist *sgl, unsigned sg_len, 475 enum dma_transfer_direction dir, unsigned long tx_flags, void *context) 476{ 477 struct cppi41_channel *c = to_cpp41_chan(chan); 478 struct cppi41_desc *d; 479 struct scatterlist *sg; 480 unsigned int i; 481 unsigned int num; 482 483 num = 0; 484 d = c->desc; 485 for_each_sg(sgl, sg, sg_len, i) { 486 u32 addr; 487 u32 len; 488 489 /* We need to use more than one desc once musb supports sg */ 490 BUG_ON(num > 0); 491 addr = lower_32_bits(sg_dma_address(sg)); 492 len = sg_dma_len(sg); 493 494 d->pd0 = get_host_pd0(len); 495 d->pd1 = get_host_pd1(c); 496 d->pd2 = get_host_pd2(c); 497 d->pd3 = get_host_pd3(len); 498 d->pd4 = get_host_pd4_or_7(addr); 499 d->pd5 = get_host_pd5(); 500 d->pd6 = get_host_pd6(len); 501 d->pd7 = get_host_pd4_or_7(addr); 502 503 d++; 504 } 505 506 return &c->txd; 507} 508 509static int cpp41_cfg_chan(struct cppi41_channel *c, 510 struct dma_slave_config *cfg) 511{ 512 return 0; 513} 514 515static void cppi41_compute_td_desc(struct cppi41_desc *d) 516{ 517 d->pd0 = DESC_TYPE_TEARD << DESC_TYPE; 518} 519 520static u32 cppi41_pop_desc(struct cppi41_dd *cdd, unsigned queue_num) 521{ 522 u32 desc; 523 524 desc = cppi_readl(cdd->qmgr_mem + QMGR_QUEUE_D(queue_num)); 525 desc &= ~0x1f; 526 return desc; 527} 528 529static int cppi41_tear_down_chan(struct cppi41_channel *c) 530{ 531 struct cppi41_dd *cdd = c->cdd; 532 struct cppi41_desc *td; 533 u32 reg; 534 u32 desc_phys; 535 u32 td_desc_phys; 536 537 td = cdd->cd; 538 td += cdd->first_td_desc; 539 540 td_desc_phys = cdd->descs_phys; 541 td_desc_phys += cdd->first_td_desc * sizeof(struct cppi41_desc); 542 543 if (!c->td_queued) { 544 cppi41_compute_td_desc(td); 545 __iowmb(); 546 547 reg = (sizeof(struct cppi41_desc) - 24) / 4; 548 reg |= td_desc_phys; 549 cppi_writel(reg, cdd->qmgr_mem + 550 QMGR_QUEUE_D(cdd->td_queue.submit)); 551 552 reg = GCR_CHAN_ENABLE; 553 if (!c->is_tx) { 554 reg |= GCR_STARV_RETRY; 555 reg |= GCR_DESC_TYPE_HOST; 556 reg |= c->q_comp_num; 557 } 558 reg |= GCR_TEARDOWN; 559 cppi_writel(reg, c->gcr_reg); 560 c->td_queued = 1; 561 c->td_retry = 100; 562 } 563 564 if (!c->td_seen) { 565 unsigned td_comp_queue; 566 567 if (c->is_tx) 568 td_comp_queue = cdd->td_queue.complete; 569 else 570 td_comp_queue = c->q_comp_num; 571 572 desc_phys = cppi41_pop_desc(cdd, td_comp_queue); 573 if (desc_phys) { 574 __iormb(); 575 576 if (desc_phys == td_desc_phys) { 577 u32 pd0; 578 pd0 = td->pd0; 579 WARN_ON((pd0 >> DESC_TYPE) != DESC_TYPE_TEARD); 580 WARN_ON(!c->is_tx && !(pd0 & TD_DESC_IS_RX)); 581 WARN_ON((pd0 & 0x1f) != c->port_num); 582 } else { 583 WARN_ON_ONCE(1); 584 } 585 c->td_seen = 1; 586 } 587 } 588 if (!c->td_desc_seen) { 589 desc_phys = cppi41_pop_desc(cdd, c->q_comp_num); 590 if (desc_phys) { 591 __iormb(); 592 WARN_ON(c->desc_phys != desc_phys); 593 c->td_desc_seen = 1; 594 } 595 } 596 c->td_retry--; 597 /* 598 * If the TX descriptor / channel is in use, the caller needs to poke 599 * his TD bit multiple times. After that he hardware releases the 600 * transfer descriptor followed by TD descriptor. Waiting seems not to 601 * cause any difference. 602 * RX seems to be thrown out right away. However once the TearDown 603 * descriptor gets through we are done. If we have seens the transfer 604 * descriptor before the TD we fetch it from enqueue, it has to be 605 * there waiting for us. 606 */ 607 if (!c->td_seen && c->td_retry) 608 return -EAGAIN; 609 610 WARN_ON(!c->td_retry); 611 if (!c->td_desc_seen) { 612 desc_phys = cppi_readl(cdd->qmgr_mem + QMGR_QUEUE_D(c->q_num)); 613 WARN_ON(!desc_phys); 614 } 615 616 c->td_queued = 0; 617 c->td_seen = 0; 618 c->td_desc_seen = 0; 619 cppi_writel(0, c->gcr_reg); 620 return 0; 621} 622 623static int cppi41_stop_chan(struct dma_chan *chan) 624{ 625 struct cppi41_channel *c = to_cpp41_chan(chan); 626 struct cppi41_dd *cdd = c->cdd; 627 u32 desc_num; 628 u32 desc_phys; 629 int ret; 630 631 ret = cppi41_tear_down_chan(c); 632 if (ret) 633 return ret; 634 635 desc_phys = lower_32_bits(c->desc_phys); 636 desc_num = (desc_phys - cdd->descs_phys) / sizeof(struct cppi41_desc); 637 WARN_ON(!cdd->chan_busy[desc_num]); 638 cdd->chan_busy[desc_num] = NULL; 639 640 return 0; 641} 642 643static int cppi41_dma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, 644 unsigned long arg) 645{ 646 struct cppi41_channel *c = to_cpp41_chan(chan); 647 int ret; 648 649 switch (cmd) { 650 case DMA_SLAVE_CONFIG: 651 ret = cpp41_cfg_chan(c, (struct dma_slave_config *) arg); 652 break; 653 654 case DMA_TERMINATE_ALL: 655 ret = cppi41_stop_chan(chan); 656 break; 657 658 default: 659 ret = -ENXIO; 660 break; 661 } 662 return ret; 663} 664 665static void cleanup_chans(struct cppi41_dd *cdd) 666{ 667 while (!list_empty(&cdd->ddev.channels)) { 668 struct cppi41_channel *cchan; 669 670 cchan = list_first_entry(&cdd->ddev.channels, 671 struct cppi41_channel, chan.device_node); 672 list_del(&cchan->chan.device_node); 673 kfree(cchan); 674 } 675} 676 677static int cppi41_add_chans(struct platform_device *pdev, struct cppi41_dd *cdd) 678{ 679 struct cppi41_channel *cchan; 680 int i; 681 int ret; 682 u32 n_chans; 683 684 ret = of_property_read_u32(pdev->dev.of_node, "#dma-channels", 685 &n_chans); 686 if (ret) 687 return ret; 688 /* 689 * The channels can only be used as TX or as RX. So we add twice 690 * that much dma channels because USB can only do RX or TX. 691 */ 692 n_chans *= 2; 693 694 for (i = 0; i < n_chans; i++) { 695 cchan = kzalloc(sizeof(*cchan), GFP_KERNEL); 696 if (!cchan) 697 goto err; 698 699 cchan->cdd = cdd; 700 if (i & 1) { 701 cchan->gcr_reg = cdd->ctrl_mem + DMA_TXGCR(i >> 1); 702 cchan->is_tx = 1; 703 } else { 704 cchan->gcr_reg = cdd->ctrl_mem + DMA_RXGCR(i >> 1); 705 cchan->is_tx = 0; 706 } 707 cchan->port_num = i >> 1; 708 cchan->desc = &cdd->cd[i]; 709 cchan->desc_phys = cdd->descs_phys; 710 cchan->desc_phys += i * sizeof(struct cppi41_desc); 711 cchan->chan.device = &cdd->ddev; 712 list_add_tail(&cchan->chan.device_node, &cdd->ddev.channels); 713 } 714 cdd->first_td_desc = n_chans; 715 716 return 0; 717err: 718 cleanup_chans(cdd); 719 return -ENOMEM; 720} 721 722static void purge_descs(struct platform_device *pdev, struct cppi41_dd *cdd) 723{ 724 unsigned int mem_decs; 725 int i; 726 727 mem_decs = ALLOC_DECS_NUM * sizeof(struct cppi41_desc); 728 729 for (i = 0; i < DESCS_AREAS; i++) { 730 731 cppi_writel(0, cdd->qmgr_mem + QMGR_MEMBASE(i)); 732 cppi_writel(0, cdd->qmgr_mem + QMGR_MEMCTRL(i)); 733 734 dma_free_coherent(&pdev->dev, mem_decs, cdd->cd, 735 cdd->descs_phys); 736 } 737} 738 739static void disable_sched(struct cppi41_dd *cdd) 740{ 741 cppi_writel(0, cdd->sched_mem + DMA_SCHED_CTRL); 742} 743 744static void deinit_cpii41(struct platform_device *pdev, struct cppi41_dd *cdd) 745{ 746 disable_sched(cdd); 747 748 purge_descs(pdev, cdd); 749 750 cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM0_BASE); 751 cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM0_BASE); 752 dma_free_coherent(&pdev->dev, QMGR_SCRATCH_SIZE, cdd->qmgr_scratch, 753 cdd->scratch_phys); 754} 755 756static int init_descs(struct platform_device *pdev, struct cppi41_dd *cdd) 757{ 758 unsigned int desc_size; 759 unsigned int mem_decs; 760 int i; 761 u32 reg; 762 u32 idx; 763 764 BUILD_BUG_ON(sizeof(struct cppi41_desc) & 765 (sizeof(struct cppi41_desc) - 1)); 766 BUILD_BUG_ON(sizeof(struct cppi41_desc) < 32); 767 BUILD_BUG_ON(ALLOC_DECS_NUM < 32); 768 769 desc_size = sizeof(struct cppi41_desc); 770 mem_decs = ALLOC_DECS_NUM * desc_size; 771 772 idx = 0; 773 for (i = 0; i < DESCS_AREAS; i++) { 774 775 reg = idx << QMGR_MEMCTRL_IDX_SH; 776 reg |= (ilog2(desc_size) - 5) << QMGR_MEMCTRL_DESC_SH; 777 reg |= ilog2(ALLOC_DECS_NUM) - 5; 778 779 BUILD_BUG_ON(DESCS_AREAS != 1); 780 cdd->cd = dma_alloc_coherent(&pdev->dev, mem_decs, 781 &cdd->descs_phys, GFP_KERNEL); 782 if (!cdd->cd) 783 return -ENOMEM; 784 785 cppi_writel(cdd->descs_phys, cdd->qmgr_mem + QMGR_MEMBASE(i)); 786 cppi_writel(reg, cdd->qmgr_mem + QMGR_MEMCTRL(i)); 787 788 idx += ALLOC_DECS_NUM; 789 } 790 return 0; 791} 792 793static void init_sched(struct cppi41_dd *cdd) 794{ 795 unsigned ch; 796 unsigned word; 797 u32 reg; 798 799 word = 0; 800 cppi_writel(0, cdd->sched_mem + DMA_SCHED_CTRL); 801 for (ch = 0; ch < 15 * 2; ch += 2) { 802 803 reg = SCHED_ENTRY0_CHAN(ch); 804 reg |= SCHED_ENTRY1_CHAN(ch) | SCHED_ENTRY1_IS_RX; 805 806 reg |= SCHED_ENTRY2_CHAN(ch + 1); 807 reg |= SCHED_ENTRY3_CHAN(ch + 1) | SCHED_ENTRY3_IS_RX; 808 cppi_writel(reg, cdd->sched_mem + DMA_SCHED_WORD(word)); 809 word++; 810 } 811 reg = 15 * 2 * 2 - 1; 812 reg |= DMA_SCHED_CTRL_EN; 813 cppi_writel(reg, cdd->sched_mem + DMA_SCHED_CTRL); 814} 815 816static int init_cppi41(struct platform_device *pdev, struct cppi41_dd *cdd) 817{ 818 int ret; 819 820 BUILD_BUG_ON(QMGR_SCRATCH_SIZE > ((1 << 14) - 1)); 821 cdd->qmgr_scratch = dma_alloc_coherent(&pdev->dev, QMGR_SCRATCH_SIZE, 822 &cdd->scratch_phys, GFP_KERNEL); 823 if (!cdd->qmgr_scratch) 824 return -ENOMEM; 825 826 cppi_writel(cdd->scratch_phys, cdd->qmgr_mem + QMGR_LRAM0_BASE); 827 cppi_writel(QMGR_SCRATCH_SIZE, cdd->qmgr_mem + QMGR_LRAM_SIZE); 828 cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM1_BASE); 829 830 ret = init_descs(pdev, cdd); 831 if (ret) 832 goto err_td; 833 834 cppi_writel(cdd->td_queue.submit, cdd->ctrl_mem + DMA_TDFDQ); 835 init_sched(cdd); 836 return 0; 837err_td: 838 deinit_cpii41(pdev, cdd); 839 return ret; 840} 841 842static struct platform_driver cpp41_dma_driver; 843/* 844 * The param format is: 845 * X Y 846 * X: Port 847 * Y: 0 = RX else TX 848 */ 849#define INFO_PORT 0 850#define INFO_IS_TX 1 851 852static bool cpp41_dma_filter_fn(struct dma_chan *chan, void *param) 853{ 854 struct cppi41_channel *cchan; 855 struct cppi41_dd *cdd; 856 const struct chan_queues *queues; 857 u32 *num = param; 858 859 if (chan->device->dev->driver != &cpp41_dma_driver.driver) 860 return false; 861 862 cchan = to_cpp41_chan(chan); 863 864 if (cchan->port_num != num[INFO_PORT]) 865 return false; 866 867 if (cchan->is_tx && !num[INFO_IS_TX]) 868 return false; 869 cdd = cchan->cdd; 870 if (cchan->is_tx) 871 queues = cdd->queues_tx; 872 else 873 queues = cdd->queues_rx; 874 875 BUILD_BUG_ON(ARRAY_SIZE(usb_queues_rx) != ARRAY_SIZE(usb_queues_tx)); 876 if (WARN_ON(cchan->port_num > ARRAY_SIZE(usb_queues_rx))) 877 return false; 878 879 cchan->q_num = queues[cchan->port_num].submit; 880 cchan->q_comp_num = queues[cchan->port_num].complete; 881 return true; 882} 883 884static struct of_dma_filter_info cpp41_dma_info = { 885 .filter_fn = cpp41_dma_filter_fn, 886}; 887 888static struct dma_chan *cppi41_dma_xlate(struct of_phandle_args *dma_spec, 889 struct of_dma *ofdma) 890{ 891 int count = dma_spec->args_count; 892 struct of_dma_filter_info *info = ofdma->of_dma_data; 893 894 if (!info || !info->filter_fn) 895 return NULL; 896 897 if (count != 2) 898 return NULL; 899 900 return dma_request_channel(info->dma_cap, info->filter_fn, 901 &dma_spec->args[0]); 902} 903 904static const struct cppi_glue_infos usb_infos = { 905 .isr = cppi41_irq, 906 .queues_rx = usb_queues_rx, 907 .queues_tx = usb_queues_tx, 908 .td_queue = { .submit = 31, .complete = 0 }, 909}; 910 911static const struct of_device_id cppi41_dma_ids[] = { 912 { .compatible = "ti,am3359-cppi41", .data = &usb_infos}, 913 {}, 914}; 915MODULE_DEVICE_TABLE(of, cppi41_dma_ids); 916 917static const struct cppi_glue_infos *get_glue_info(struct platform_device *pdev) 918{ 919 const struct of_device_id *of_id; 920 921 of_id = of_match_node(cppi41_dma_ids, pdev->dev.of_node); 922 if (!of_id) 923 return NULL; 924 return of_id->data; 925} 926 927static int cppi41_dma_probe(struct platform_device *pdev) 928{ 929 struct cppi41_dd *cdd; 930 const struct cppi_glue_infos *glue_info; 931 int irq; 932 int ret; 933 934 glue_info = get_glue_info(pdev); 935 if (!glue_info) 936 return -EINVAL; 937 938 cdd = kzalloc(sizeof(*cdd), GFP_KERNEL); 939 if (!cdd) 940 return -ENOMEM; 941 942 dma_cap_set(DMA_SLAVE, cdd->ddev.cap_mask); 943 cdd->ddev.device_alloc_chan_resources = cppi41_dma_alloc_chan_resources; 944 cdd->ddev.device_free_chan_resources = cppi41_dma_free_chan_resources; 945 cdd->ddev.device_tx_status = cppi41_dma_tx_status; 946 cdd->ddev.device_issue_pending = cppi41_dma_issue_pending; 947 cdd->ddev.device_prep_slave_sg = cppi41_dma_prep_slave_sg; 948 cdd->ddev.device_control = cppi41_dma_control; 949 cdd->ddev.dev = &pdev->dev; 950 INIT_LIST_HEAD(&cdd->ddev.channels); 951 cpp41_dma_info.dma_cap = cdd->ddev.cap_mask; 952 953 cdd->usbss_mem = of_iomap(pdev->dev.of_node, 0); 954 cdd->ctrl_mem = of_iomap(pdev->dev.of_node, 1); 955 cdd->sched_mem = of_iomap(pdev->dev.of_node, 2); 956 cdd->qmgr_mem = of_iomap(pdev->dev.of_node, 3); 957 958 if (!cdd->usbss_mem || !cdd->ctrl_mem || !cdd->sched_mem || 959 !cdd->qmgr_mem) { 960 ret = -ENXIO; 961 goto err_remap; 962 } 963 964 pm_runtime_enable(&pdev->dev); 965 ret = pm_runtime_get_sync(&pdev->dev); 966 if (ret) 967 goto err_get_sync; 968 969 cdd->queues_rx = glue_info->queues_rx; 970 cdd->queues_tx = glue_info->queues_tx; 971 cdd->td_queue = glue_info->td_queue; 972 973 ret = init_cppi41(pdev, cdd); 974 if (ret) 975 goto err_init_cppi; 976 977 ret = cppi41_add_chans(pdev, cdd); 978 if (ret) 979 goto err_chans; 980 981 irq = irq_of_parse_and_map(pdev->dev.of_node, 0); 982 if (!irq) 983 goto err_irq; 984 985 cppi_writel(USBSS_IRQ_PD_COMP, cdd->usbss_mem + USBSS_IRQ_ENABLER); 986 987 ret = request_irq(irq, glue_info->isr, IRQF_SHARED, 988 dev_name(&pdev->dev), cdd); 989 if (ret) 990 goto err_irq; 991 cdd->irq = irq; 992 993 ret = dma_async_device_register(&cdd->ddev); 994 if (ret) 995 goto err_dma_reg; 996 997 ret = of_dma_controller_register(pdev->dev.of_node, 998 cppi41_dma_xlate, &cpp41_dma_info); 999 if (ret) 1000 goto err_of; 1001 1002 platform_set_drvdata(pdev, cdd); 1003 return 0; 1004err_of: 1005 dma_async_device_unregister(&cdd->ddev); 1006err_dma_reg: 1007 free_irq(irq, cdd); 1008err_irq: 1009 cppi_writel(0, cdd->usbss_mem + USBSS_IRQ_CLEARR); 1010 cleanup_chans(cdd); 1011err_chans: 1012 deinit_cpii41(pdev, cdd); 1013err_init_cppi: 1014 pm_runtime_put(&pdev->dev); 1015err_get_sync: 1016 pm_runtime_disable(&pdev->dev); 1017 iounmap(cdd->usbss_mem); 1018 iounmap(cdd->ctrl_mem); 1019 iounmap(cdd->sched_mem); 1020 iounmap(cdd->qmgr_mem); 1021err_remap: 1022 kfree(cdd); 1023 return ret; 1024} 1025 1026static int cppi41_dma_remove(struct platform_device *pdev) 1027{ 1028 struct cppi41_dd *cdd = platform_get_drvdata(pdev); 1029 1030 of_dma_controller_free(pdev->dev.of_node); 1031 dma_async_device_unregister(&cdd->ddev); 1032 1033 cppi_writel(0, cdd->usbss_mem + USBSS_IRQ_CLEARR); 1034 free_irq(cdd->irq, cdd); 1035 cleanup_chans(cdd); 1036 deinit_cpii41(pdev, cdd); 1037 iounmap(cdd->usbss_mem); 1038 iounmap(cdd->ctrl_mem); 1039 iounmap(cdd->sched_mem); 1040 iounmap(cdd->qmgr_mem); 1041 pm_runtime_put(&pdev->dev); 1042 pm_runtime_disable(&pdev->dev); 1043 kfree(cdd); 1044 return 0; 1045} 1046 1047static struct platform_driver cpp41_dma_driver = { 1048 .probe = cppi41_dma_probe, 1049 .remove = cppi41_dma_remove, 1050 .driver = { 1051 .name = "cppi41-dma-engine", 1052 .owner = THIS_MODULE, 1053 .of_match_table = of_match_ptr(cppi41_dma_ids), 1054 }, 1055}; 1056 1057module_platform_driver(cpp41_dma_driver); 1058MODULE_LICENSE("GPL"); 1059MODULE_AUTHOR("Sebastian Andrzej Siewior <bigeasy@linutronix.de>");