at v3.14 26 kB view raw
1#include <linux/dmaengine.h> 2#include <linux/dma-mapping.h> 3#include <linux/platform_device.h> 4#include <linux/module.h> 5#include <linux/of.h> 6#include <linux/slab.h> 7#include <linux/of_dma.h> 8#include <linux/of_irq.h> 9#include <linux/dmapool.h> 10#include <linux/interrupt.h> 11#include <linux/of_address.h> 12#include <linux/pm_runtime.h> 13#include "dmaengine.h" 14 15#define DESC_TYPE 27 16#define DESC_TYPE_HOST 0x10 17#define DESC_TYPE_TEARD 0x13 18 19#define TD_DESC_IS_RX (1 << 16) 20#define TD_DESC_DMA_NUM 10 21 22#define DESC_LENGTH_BITS_NUM 21 23 24#define DESC_TYPE_USB (5 << 26) 25#define DESC_PD_COMPLETE (1 << 31) 26 27/* DMA engine */ 28#define DMA_TDFDQ 4 29#define DMA_TXGCR(x) (0x800 + (x) * 0x20) 30#define DMA_RXGCR(x) (0x808 + (x) * 0x20) 31#define RXHPCRA0 4 32 33#define GCR_CHAN_ENABLE (1 << 31) 34#define GCR_TEARDOWN (1 << 30) 35#define GCR_STARV_RETRY (1 << 24) 36#define GCR_DESC_TYPE_HOST (1 << 14) 37 38/* DMA scheduler */ 39#define DMA_SCHED_CTRL 0 40#define DMA_SCHED_CTRL_EN (1 << 31) 41#define DMA_SCHED_WORD(x) ((x) * 4 + 0x800) 42 43#define SCHED_ENTRY0_CHAN(x) ((x) << 0) 44#define SCHED_ENTRY0_IS_RX (1 << 7) 45 46#define SCHED_ENTRY1_CHAN(x) ((x) << 8) 47#define SCHED_ENTRY1_IS_RX (1 << 15) 48 49#define SCHED_ENTRY2_CHAN(x) ((x) << 16) 50#define SCHED_ENTRY2_IS_RX (1 << 23) 51 52#define SCHED_ENTRY3_CHAN(x) ((x) << 24) 53#define SCHED_ENTRY3_IS_RX (1 << 31) 54 55/* Queue manager */ 56/* 4 KiB of memory for descriptors, 2 for each endpoint */ 57#define ALLOC_DECS_NUM 128 58#define DESCS_AREAS 1 59#define TOTAL_DESCS_NUM (ALLOC_DECS_NUM * DESCS_AREAS) 60#define QMGR_SCRATCH_SIZE (TOTAL_DESCS_NUM * 4) 61 62#define QMGR_LRAM0_BASE 0x80 63#define QMGR_LRAM_SIZE 0x84 64#define QMGR_LRAM1_BASE 0x88 65#define QMGR_MEMBASE(x) (0x1000 + (x) * 0x10) 66#define QMGR_MEMCTRL(x) (0x1004 + (x) * 0x10) 67#define QMGR_MEMCTRL_IDX_SH 16 68#define QMGR_MEMCTRL_DESC_SH 8 69 70#define QMGR_NUM_PEND 5 71#define QMGR_PEND(x) (0x90 + (x) * 4) 72 73#define QMGR_PENDING_SLOT_Q(x) (x / 32) 74#define QMGR_PENDING_BIT_Q(x) (x % 32) 75 76#define QMGR_QUEUE_A(n) (0x2000 + (n) * 0x10) 77#define QMGR_QUEUE_B(n) (0x2004 + (n) * 0x10) 78#define QMGR_QUEUE_C(n) (0x2008 + (n) * 0x10) 79#define QMGR_QUEUE_D(n) (0x200c + (n) * 0x10) 80 81/* Glue layer specific */ 82/* USBSS / USB AM335x */ 83#define USBSS_IRQ_STATUS 0x28 84#define USBSS_IRQ_ENABLER 0x2c 85#define USBSS_IRQ_CLEARR 0x30 86 87#define USBSS_IRQ_PD_COMP (1 << 2) 88 89struct cppi41_channel { 90 struct dma_chan chan; 91 struct dma_async_tx_descriptor txd; 92 struct cppi41_dd *cdd; 93 struct cppi41_desc *desc; 94 dma_addr_t desc_phys; 95 void __iomem *gcr_reg; 96 int is_tx; 97 u32 residue; 98 99 unsigned int q_num; 100 unsigned int q_comp_num; 101 unsigned int port_num; 102 103 unsigned td_retry; 104 unsigned td_queued:1; 105 unsigned td_seen:1; 106 unsigned td_desc_seen:1; 107}; 108 109struct cppi41_desc { 110 u32 pd0; 111 u32 pd1; 112 u32 pd2; 113 u32 pd3; 114 u32 pd4; 115 u32 pd5; 116 u32 pd6; 117 u32 pd7; 118} __aligned(32); 119 120struct chan_queues { 121 u16 submit; 122 u16 complete; 123}; 124 125struct cppi41_dd { 126 struct dma_device ddev; 127 128 void *qmgr_scratch; 129 dma_addr_t scratch_phys; 130 131 struct cppi41_desc *cd; 132 dma_addr_t descs_phys; 133 u32 first_td_desc; 134 struct cppi41_channel *chan_busy[ALLOC_DECS_NUM]; 135 136 void __iomem *usbss_mem; 137 void __iomem *ctrl_mem; 138 void __iomem *sched_mem; 139 void __iomem *qmgr_mem; 140 unsigned int irq; 141 const struct chan_queues *queues_rx; 142 const struct chan_queues *queues_tx; 143 struct chan_queues td_queue; 144 145 /* context for suspend/resume */ 146 unsigned int dma_tdfdq; 147}; 148 149#define FIST_COMPLETION_QUEUE 93 150static struct chan_queues usb_queues_tx[] = { 151 /* USB0 ENDP 1 */ 152 [ 0] = { .submit = 32, .complete = 93}, 153 [ 1] = { .submit = 34, .complete = 94}, 154 [ 2] = { .submit = 36, .complete = 95}, 155 [ 3] = { .submit = 38, .complete = 96}, 156 [ 4] = { .submit = 40, .complete = 97}, 157 [ 5] = { .submit = 42, .complete = 98}, 158 [ 6] = { .submit = 44, .complete = 99}, 159 [ 7] = { .submit = 46, .complete = 100}, 160 [ 8] = { .submit = 48, .complete = 101}, 161 [ 9] = { .submit = 50, .complete = 102}, 162 [10] = { .submit = 52, .complete = 103}, 163 [11] = { .submit = 54, .complete = 104}, 164 [12] = { .submit = 56, .complete = 105}, 165 [13] = { .submit = 58, .complete = 106}, 166 [14] = { .submit = 60, .complete = 107}, 167 168 /* USB1 ENDP1 */ 169 [15] = { .submit = 62, .complete = 125}, 170 [16] = { .submit = 64, .complete = 126}, 171 [17] = { .submit = 66, .complete = 127}, 172 [18] = { .submit = 68, .complete = 128}, 173 [19] = { .submit = 70, .complete = 129}, 174 [20] = { .submit = 72, .complete = 130}, 175 [21] = { .submit = 74, .complete = 131}, 176 [22] = { .submit = 76, .complete = 132}, 177 [23] = { .submit = 78, .complete = 133}, 178 [24] = { .submit = 80, .complete = 134}, 179 [25] = { .submit = 82, .complete = 135}, 180 [26] = { .submit = 84, .complete = 136}, 181 [27] = { .submit = 86, .complete = 137}, 182 [28] = { .submit = 88, .complete = 138}, 183 [29] = { .submit = 90, .complete = 139}, 184}; 185 186static const struct chan_queues usb_queues_rx[] = { 187 /* USB0 ENDP 1 */ 188 [ 0] = { .submit = 1, .complete = 109}, 189 [ 1] = { .submit = 2, .complete = 110}, 190 [ 2] = { .submit = 3, .complete = 111}, 191 [ 3] = { .submit = 4, .complete = 112}, 192 [ 4] = { .submit = 5, .complete = 113}, 193 [ 5] = { .submit = 6, .complete = 114}, 194 [ 6] = { .submit = 7, .complete = 115}, 195 [ 7] = { .submit = 8, .complete = 116}, 196 [ 8] = { .submit = 9, .complete = 117}, 197 [ 9] = { .submit = 10, .complete = 118}, 198 [10] = { .submit = 11, .complete = 119}, 199 [11] = { .submit = 12, .complete = 120}, 200 [12] = { .submit = 13, .complete = 121}, 201 [13] = { .submit = 14, .complete = 122}, 202 [14] = { .submit = 15, .complete = 123}, 203 204 /* USB1 ENDP 1 */ 205 [15] = { .submit = 16, .complete = 141}, 206 [16] = { .submit = 17, .complete = 142}, 207 [17] = { .submit = 18, .complete = 143}, 208 [18] = { .submit = 19, .complete = 144}, 209 [19] = { .submit = 20, .complete = 145}, 210 [20] = { .submit = 21, .complete = 146}, 211 [21] = { .submit = 22, .complete = 147}, 212 [22] = { .submit = 23, .complete = 148}, 213 [23] = { .submit = 24, .complete = 149}, 214 [24] = { .submit = 25, .complete = 150}, 215 [25] = { .submit = 26, .complete = 151}, 216 [26] = { .submit = 27, .complete = 152}, 217 [27] = { .submit = 28, .complete = 153}, 218 [28] = { .submit = 29, .complete = 154}, 219 [29] = { .submit = 30, .complete = 155}, 220}; 221 222struct cppi_glue_infos { 223 irqreturn_t (*isr)(int irq, void *data); 224 const struct chan_queues *queues_rx; 225 const struct chan_queues *queues_tx; 226 struct chan_queues td_queue; 227}; 228 229static struct cppi41_channel *to_cpp41_chan(struct dma_chan *c) 230{ 231 return container_of(c, struct cppi41_channel, chan); 232} 233 234static struct cppi41_channel *desc_to_chan(struct cppi41_dd *cdd, u32 desc) 235{ 236 struct cppi41_channel *c; 237 u32 descs_size; 238 u32 desc_num; 239 240 descs_size = sizeof(struct cppi41_desc) * ALLOC_DECS_NUM; 241 242 if (!((desc >= cdd->descs_phys) && 243 (desc < (cdd->descs_phys + descs_size)))) { 244 return NULL; 245 } 246 247 desc_num = (desc - cdd->descs_phys) / sizeof(struct cppi41_desc); 248 BUG_ON(desc_num >= ALLOC_DECS_NUM); 249 c = cdd->chan_busy[desc_num]; 250 cdd->chan_busy[desc_num] = NULL; 251 return c; 252} 253 254static void cppi_writel(u32 val, void *__iomem *mem) 255{ 256 __raw_writel(val, mem); 257} 258 259static u32 cppi_readl(void *__iomem *mem) 260{ 261 return __raw_readl(mem); 262} 263 264static u32 pd_trans_len(u32 val) 265{ 266 return val & ((1 << (DESC_LENGTH_BITS_NUM + 1)) - 1); 267} 268 269static u32 cppi41_pop_desc(struct cppi41_dd *cdd, unsigned queue_num) 270{ 271 u32 desc; 272 273 desc = cppi_readl(cdd->qmgr_mem + QMGR_QUEUE_D(queue_num)); 274 desc &= ~0x1f; 275 return desc; 276} 277 278static irqreturn_t cppi41_irq(int irq, void *data) 279{ 280 struct cppi41_dd *cdd = data; 281 struct cppi41_channel *c; 282 u32 status; 283 int i; 284 285 status = cppi_readl(cdd->usbss_mem + USBSS_IRQ_STATUS); 286 if (!(status & USBSS_IRQ_PD_COMP)) 287 return IRQ_NONE; 288 cppi_writel(status, cdd->usbss_mem + USBSS_IRQ_STATUS); 289 290 for (i = QMGR_PENDING_SLOT_Q(FIST_COMPLETION_QUEUE); i < QMGR_NUM_PEND; 291 i++) { 292 u32 val; 293 u32 q_num; 294 295 val = cppi_readl(cdd->qmgr_mem + QMGR_PEND(i)); 296 if (i == QMGR_PENDING_SLOT_Q(FIST_COMPLETION_QUEUE) && val) { 297 u32 mask; 298 /* set corresponding bit for completetion Q 93 */ 299 mask = 1 << QMGR_PENDING_BIT_Q(FIST_COMPLETION_QUEUE); 300 /* not set all bits for queues less than Q 93 */ 301 mask--; 302 /* now invert and keep only Q 93+ set */ 303 val &= ~mask; 304 } 305 306 if (val) 307 __iormb(); 308 309 while (val) { 310 u32 desc; 311 312 q_num = __fls(val); 313 val &= ~(1 << q_num); 314 q_num += 32 * i; 315 desc = cppi41_pop_desc(cdd, q_num); 316 c = desc_to_chan(cdd, desc); 317 if (WARN_ON(!c)) { 318 pr_err("%s() q %d desc %08x\n", __func__, 319 q_num, desc); 320 continue; 321 } 322 c->residue = pd_trans_len(c->desc->pd6) - 323 pd_trans_len(c->desc->pd0); 324 325 dma_cookie_complete(&c->txd); 326 c->txd.callback(c->txd.callback_param); 327 } 328 } 329 return IRQ_HANDLED; 330} 331 332static dma_cookie_t cppi41_tx_submit(struct dma_async_tx_descriptor *tx) 333{ 334 dma_cookie_t cookie; 335 336 cookie = dma_cookie_assign(tx); 337 338 return cookie; 339} 340 341static int cppi41_dma_alloc_chan_resources(struct dma_chan *chan) 342{ 343 struct cppi41_channel *c = to_cpp41_chan(chan); 344 345 dma_cookie_init(chan); 346 dma_async_tx_descriptor_init(&c->txd, chan); 347 c->txd.tx_submit = cppi41_tx_submit; 348 349 if (!c->is_tx) 350 cppi_writel(c->q_num, c->gcr_reg + RXHPCRA0); 351 352 return 0; 353} 354 355static void cppi41_dma_free_chan_resources(struct dma_chan *chan) 356{ 357} 358 359static enum dma_status cppi41_dma_tx_status(struct dma_chan *chan, 360 dma_cookie_t cookie, struct dma_tx_state *txstate) 361{ 362 struct cppi41_channel *c = to_cpp41_chan(chan); 363 enum dma_status ret; 364 365 /* lock */ 366 ret = dma_cookie_status(chan, cookie, txstate); 367 if (txstate && ret == DMA_COMPLETE) 368 txstate->residue = c->residue; 369 /* unlock */ 370 371 return ret; 372} 373 374static void push_desc_queue(struct cppi41_channel *c) 375{ 376 struct cppi41_dd *cdd = c->cdd; 377 u32 desc_num; 378 u32 desc_phys; 379 u32 reg; 380 381 desc_phys = lower_32_bits(c->desc_phys); 382 desc_num = (desc_phys - cdd->descs_phys) / sizeof(struct cppi41_desc); 383 WARN_ON(cdd->chan_busy[desc_num]); 384 cdd->chan_busy[desc_num] = c; 385 386 reg = (sizeof(struct cppi41_desc) - 24) / 4; 387 reg |= desc_phys; 388 cppi_writel(reg, cdd->qmgr_mem + QMGR_QUEUE_D(c->q_num)); 389} 390 391static void cppi41_dma_issue_pending(struct dma_chan *chan) 392{ 393 struct cppi41_channel *c = to_cpp41_chan(chan); 394 u32 reg; 395 396 c->residue = 0; 397 398 reg = GCR_CHAN_ENABLE; 399 if (!c->is_tx) { 400 reg |= GCR_STARV_RETRY; 401 reg |= GCR_DESC_TYPE_HOST; 402 reg |= c->q_comp_num; 403 } 404 405 cppi_writel(reg, c->gcr_reg); 406 407 /* 408 * We don't use writel() but __raw_writel() so we have to make sure 409 * that the DMA descriptor in coherent memory made to the main memory 410 * before starting the dma engine. 411 */ 412 __iowmb(); 413 push_desc_queue(c); 414} 415 416static u32 get_host_pd0(u32 length) 417{ 418 u32 reg; 419 420 reg = DESC_TYPE_HOST << DESC_TYPE; 421 reg |= length; 422 423 return reg; 424} 425 426static u32 get_host_pd1(struct cppi41_channel *c) 427{ 428 u32 reg; 429 430 reg = 0; 431 432 return reg; 433} 434 435static u32 get_host_pd2(struct cppi41_channel *c) 436{ 437 u32 reg; 438 439 reg = DESC_TYPE_USB; 440 reg |= c->q_comp_num; 441 442 return reg; 443} 444 445static u32 get_host_pd3(u32 length) 446{ 447 u32 reg; 448 449 /* PD3 = packet size */ 450 reg = length; 451 452 return reg; 453} 454 455static u32 get_host_pd6(u32 length) 456{ 457 u32 reg; 458 459 /* PD6 buffer size */ 460 reg = DESC_PD_COMPLETE; 461 reg |= length; 462 463 return reg; 464} 465 466static u32 get_host_pd4_or_7(u32 addr) 467{ 468 u32 reg; 469 470 reg = addr; 471 472 return reg; 473} 474 475static u32 get_host_pd5(void) 476{ 477 u32 reg; 478 479 reg = 0; 480 481 return reg; 482} 483 484static struct dma_async_tx_descriptor *cppi41_dma_prep_slave_sg( 485 struct dma_chan *chan, struct scatterlist *sgl, unsigned sg_len, 486 enum dma_transfer_direction dir, unsigned long tx_flags, void *context) 487{ 488 struct cppi41_channel *c = to_cpp41_chan(chan); 489 struct cppi41_desc *d; 490 struct scatterlist *sg; 491 unsigned int i; 492 unsigned int num; 493 494 num = 0; 495 d = c->desc; 496 for_each_sg(sgl, sg, sg_len, i) { 497 u32 addr; 498 u32 len; 499 500 /* We need to use more than one desc once musb supports sg */ 501 BUG_ON(num > 0); 502 addr = lower_32_bits(sg_dma_address(sg)); 503 len = sg_dma_len(sg); 504 505 d->pd0 = get_host_pd0(len); 506 d->pd1 = get_host_pd1(c); 507 d->pd2 = get_host_pd2(c); 508 d->pd3 = get_host_pd3(len); 509 d->pd4 = get_host_pd4_or_7(addr); 510 d->pd5 = get_host_pd5(); 511 d->pd6 = get_host_pd6(len); 512 d->pd7 = get_host_pd4_or_7(addr); 513 514 d++; 515 } 516 517 return &c->txd; 518} 519 520static int cpp41_cfg_chan(struct cppi41_channel *c, 521 struct dma_slave_config *cfg) 522{ 523 return 0; 524} 525 526static void cppi41_compute_td_desc(struct cppi41_desc *d) 527{ 528 d->pd0 = DESC_TYPE_TEARD << DESC_TYPE; 529} 530 531static int cppi41_tear_down_chan(struct cppi41_channel *c) 532{ 533 struct cppi41_dd *cdd = c->cdd; 534 struct cppi41_desc *td; 535 u32 reg; 536 u32 desc_phys; 537 u32 td_desc_phys; 538 539 td = cdd->cd; 540 td += cdd->first_td_desc; 541 542 td_desc_phys = cdd->descs_phys; 543 td_desc_phys += cdd->first_td_desc * sizeof(struct cppi41_desc); 544 545 if (!c->td_queued) { 546 cppi41_compute_td_desc(td); 547 __iowmb(); 548 549 reg = (sizeof(struct cppi41_desc) - 24) / 4; 550 reg |= td_desc_phys; 551 cppi_writel(reg, cdd->qmgr_mem + 552 QMGR_QUEUE_D(cdd->td_queue.submit)); 553 554 reg = GCR_CHAN_ENABLE; 555 if (!c->is_tx) { 556 reg |= GCR_STARV_RETRY; 557 reg |= GCR_DESC_TYPE_HOST; 558 reg |= c->q_comp_num; 559 } 560 reg |= GCR_TEARDOWN; 561 cppi_writel(reg, c->gcr_reg); 562 c->td_queued = 1; 563 c->td_retry = 100; 564 } 565 566 if (!c->td_seen || !c->td_desc_seen) { 567 568 desc_phys = cppi41_pop_desc(cdd, cdd->td_queue.complete); 569 if (!desc_phys) 570 desc_phys = cppi41_pop_desc(cdd, c->q_comp_num); 571 572 if (desc_phys == c->desc_phys) { 573 c->td_desc_seen = 1; 574 575 } else if (desc_phys == td_desc_phys) { 576 u32 pd0; 577 578 __iormb(); 579 pd0 = td->pd0; 580 WARN_ON((pd0 >> DESC_TYPE) != DESC_TYPE_TEARD); 581 WARN_ON(!c->is_tx && !(pd0 & TD_DESC_IS_RX)); 582 WARN_ON((pd0 & 0x1f) != c->port_num); 583 c->td_seen = 1; 584 } else if (desc_phys) { 585 WARN_ON_ONCE(1); 586 } 587 } 588 c->td_retry--; 589 /* 590 * If the TX descriptor / channel is in use, the caller needs to poke 591 * his TD bit multiple times. After that he hardware releases the 592 * transfer descriptor followed by TD descriptor. Waiting seems not to 593 * cause any difference. 594 * RX seems to be thrown out right away. However once the TearDown 595 * descriptor gets through we are done. If we have seens the transfer 596 * descriptor before the TD we fetch it from enqueue, it has to be 597 * there waiting for us. 598 */ 599 if (!c->td_seen && c->td_retry) 600 return -EAGAIN; 601 602 WARN_ON(!c->td_retry); 603 if (!c->td_desc_seen) { 604 desc_phys = cppi41_pop_desc(cdd, c->q_num); 605 WARN_ON(!desc_phys); 606 } 607 608 c->td_queued = 0; 609 c->td_seen = 0; 610 c->td_desc_seen = 0; 611 cppi_writel(0, c->gcr_reg); 612 return 0; 613} 614 615static int cppi41_stop_chan(struct dma_chan *chan) 616{ 617 struct cppi41_channel *c = to_cpp41_chan(chan); 618 struct cppi41_dd *cdd = c->cdd; 619 u32 desc_num; 620 u32 desc_phys; 621 int ret; 622 623 ret = cppi41_tear_down_chan(c); 624 if (ret) 625 return ret; 626 627 desc_phys = lower_32_bits(c->desc_phys); 628 desc_num = (desc_phys - cdd->descs_phys) / sizeof(struct cppi41_desc); 629 WARN_ON(!cdd->chan_busy[desc_num]); 630 cdd->chan_busy[desc_num] = NULL; 631 632 return 0; 633} 634 635static int cppi41_dma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, 636 unsigned long arg) 637{ 638 struct cppi41_channel *c = to_cpp41_chan(chan); 639 int ret; 640 641 switch (cmd) { 642 case DMA_SLAVE_CONFIG: 643 ret = cpp41_cfg_chan(c, (struct dma_slave_config *) arg); 644 break; 645 646 case DMA_TERMINATE_ALL: 647 ret = cppi41_stop_chan(chan); 648 break; 649 650 default: 651 ret = -ENXIO; 652 break; 653 } 654 return ret; 655} 656 657static void cleanup_chans(struct cppi41_dd *cdd) 658{ 659 while (!list_empty(&cdd->ddev.channels)) { 660 struct cppi41_channel *cchan; 661 662 cchan = list_first_entry(&cdd->ddev.channels, 663 struct cppi41_channel, chan.device_node); 664 list_del(&cchan->chan.device_node); 665 kfree(cchan); 666 } 667} 668 669static int cppi41_add_chans(struct device *dev, struct cppi41_dd *cdd) 670{ 671 struct cppi41_channel *cchan; 672 int i; 673 int ret; 674 u32 n_chans; 675 676 ret = of_property_read_u32(dev->of_node, "#dma-channels", 677 &n_chans); 678 if (ret) 679 return ret; 680 /* 681 * The channels can only be used as TX or as RX. So we add twice 682 * that much dma channels because USB can only do RX or TX. 683 */ 684 n_chans *= 2; 685 686 for (i = 0; i < n_chans; i++) { 687 cchan = kzalloc(sizeof(*cchan), GFP_KERNEL); 688 if (!cchan) 689 goto err; 690 691 cchan->cdd = cdd; 692 if (i & 1) { 693 cchan->gcr_reg = cdd->ctrl_mem + DMA_TXGCR(i >> 1); 694 cchan->is_tx = 1; 695 } else { 696 cchan->gcr_reg = cdd->ctrl_mem + DMA_RXGCR(i >> 1); 697 cchan->is_tx = 0; 698 } 699 cchan->port_num = i >> 1; 700 cchan->desc = &cdd->cd[i]; 701 cchan->desc_phys = cdd->descs_phys; 702 cchan->desc_phys += i * sizeof(struct cppi41_desc); 703 cchan->chan.device = &cdd->ddev; 704 list_add_tail(&cchan->chan.device_node, &cdd->ddev.channels); 705 } 706 cdd->first_td_desc = n_chans; 707 708 return 0; 709err: 710 cleanup_chans(cdd); 711 return -ENOMEM; 712} 713 714static void purge_descs(struct device *dev, struct cppi41_dd *cdd) 715{ 716 unsigned int mem_decs; 717 int i; 718 719 mem_decs = ALLOC_DECS_NUM * sizeof(struct cppi41_desc); 720 721 for (i = 0; i < DESCS_AREAS; i++) { 722 723 cppi_writel(0, cdd->qmgr_mem + QMGR_MEMBASE(i)); 724 cppi_writel(0, cdd->qmgr_mem + QMGR_MEMCTRL(i)); 725 726 dma_free_coherent(dev, mem_decs, cdd->cd, 727 cdd->descs_phys); 728 } 729} 730 731static void disable_sched(struct cppi41_dd *cdd) 732{ 733 cppi_writel(0, cdd->sched_mem + DMA_SCHED_CTRL); 734} 735 736static void deinit_cppi41(struct device *dev, struct cppi41_dd *cdd) 737{ 738 disable_sched(cdd); 739 740 purge_descs(dev, cdd); 741 742 cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM0_BASE); 743 cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM0_BASE); 744 dma_free_coherent(dev, QMGR_SCRATCH_SIZE, cdd->qmgr_scratch, 745 cdd->scratch_phys); 746} 747 748static int init_descs(struct device *dev, struct cppi41_dd *cdd) 749{ 750 unsigned int desc_size; 751 unsigned int mem_decs; 752 int i; 753 u32 reg; 754 u32 idx; 755 756 BUILD_BUG_ON(sizeof(struct cppi41_desc) & 757 (sizeof(struct cppi41_desc) - 1)); 758 BUILD_BUG_ON(sizeof(struct cppi41_desc) < 32); 759 BUILD_BUG_ON(ALLOC_DECS_NUM < 32); 760 761 desc_size = sizeof(struct cppi41_desc); 762 mem_decs = ALLOC_DECS_NUM * desc_size; 763 764 idx = 0; 765 for (i = 0; i < DESCS_AREAS; i++) { 766 767 reg = idx << QMGR_MEMCTRL_IDX_SH; 768 reg |= (ilog2(desc_size) - 5) << QMGR_MEMCTRL_DESC_SH; 769 reg |= ilog2(ALLOC_DECS_NUM) - 5; 770 771 BUILD_BUG_ON(DESCS_AREAS != 1); 772 cdd->cd = dma_alloc_coherent(dev, mem_decs, 773 &cdd->descs_phys, GFP_KERNEL); 774 if (!cdd->cd) 775 return -ENOMEM; 776 777 cppi_writel(cdd->descs_phys, cdd->qmgr_mem + QMGR_MEMBASE(i)); 778 cppi_writel(reg, cdd->qmgr_mem + QMGR_MEMCTRL(i)); 779 780 idx += ALLOC_DECS_NUM; 781 } 782 return 0; 783} 784 785static void init_sched(struct cppi41_dd *cdd) 786{ 787 unsigned ch; 788 unsigned word; 789 u32 reg; 790 791 word = 0; 792 cppi_writel(0, cdd->sched_mem + DMA_SCHED_CTRL); 793 for (ch = 0; ch < 15 * 2; ch += 2) { 794 795 reg = SCHED_ENTRY0_CHAN(ch); 796 reg |= SCHED_ENTRY1_CHAN(ch) | SCHED_ENTRY1_IS_RX; 797 798 reg |= SCHED_ENTRY2_CHAN(ch + 1); 799 reg |= SCHED_ENTRY3_CHAN(ch + 1) | SCHED_ENTRY3_IS_RX; 800 cppi_writel(reg, cdd->sched_mem + DMA_SCHED_WORD(word)); 801 word++; 802 } 803 reg = 15 * 2 * 2 - 1; 804 reg |= DMA_SCHED_CTRL_EN; 805 cppi_writel(reg, cdd->sched_mem + DMA_SCHED_CTRL); 806} 807 808static int init_cppi41(struct device *dev, struct cppi41_dd *cdd) 809{ 810 int ret; 811 812 BUILD_BUG_ON(QMGR_SCRATCH_SIZE > ((1 << 14) - 1)); 813 cdd->qmgr_scratch = dma_alloc_coherent(dev, QMGR_SCRATCH_SIZE, 814 &cdd->scratch_phys, GFP_KERNEL); 815 if (!cdd->qmgr_scratch) 816 return -ENOMEM; 817 818 cppi_writel(cdd->scratch_phys, cdd->qmgr_mem + QMGR_LRAM0_BASE); 819 cppi_writel(QMGR_SCRATCH_SIZE, cdd->qmgr_mem + QMGR_LRAM_SIZE); 820 cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM1_BASE); 821 822 ret = init_descs(dev, cdd); 823 if (ret) 824 goto err_td; 825 826 cppi_writel(cdd->td_queue.submit, cdd->ctrl_mem + DMA_TDFDQ); 827 init_sched(cdd); 828 return 0; 829err_td: 830 deinit_cppi41(dev, cdd); 831 return ret; 832} 833 834static struct platform_driver cpp41_dma_driver; 835/* 836 * The param format is: 837 * X Y 838 * X: Port 839 * Y: 0 = RX else TX 840 */ 841#define INFO_PORT 0 842#define INFO_IS_TX 1 843 844static bool cpp41_dma_filter_fn(struct dma_chan *chan, void *param) 845{ 846 struct cppi41_channel *cchan; 847 struct cppi41_dd *cdd; 848 const struct chan_queues *queues; 849 u32 *num = param; 850 851 if (chan->device->dev->driver != &cpp41_dma_driver.driver) 852 return false; 853 854 cchan = to_cpp41_chan(chan); 855 856 if (cchan->port_num != num[INFO_PORT]) 857 return false; 858 859 if (cchan->is_tx && !num[INFO_IS_TX]) 860 return false; 861 cdd = cchan->cdd; 862 if (cchan->is_tx) 863 queues = cdd->queues_tx; 864 else 865 queues = cdd->queues_rx; 866 867 BUILD_BUG_ON(ARRAY_SIZE(usb_queues_rx) != ARRAY_SIZE(usb_queues_tx)); 868 if (WARN_ON(cchan->port_num > ARRAY_SIZE(usb_queues_rx))) 869 return false; 870 871 cchan->q_num = queues[cchan->port_num].submit; 872 cchan->q_comp_num = queues[cchan->port_num].complete; 873 return true; 874} 875 876static struct of_dma_filter_info cpp41_dma_info = { 877 .filter_fn = cpp41_dma_filter_fn, 878}; 879 880static struct dma_chan *cppi41_dma_xlate(struct of_phandle_args *dma_spec, 881 struct of_dma *ofdma) 882{ 883 int count = dma_spec->args_count; 884 struct of_dma_filter_info *info = ofdma->of_dma_data; 885 886 if (!info || !info->filter_fn) 887 return NULL; 888 889 if (count != 2) 890 return NULL; 891 892 return dma_request_channel(info->dma_cap, info->filter_fn, 893 &dma_spec->args[0]); 894} 895 896static const struct cppi_glue_infos usb_infos = { 897 .isr = cppi41_irq, 898 .queues_rx = usb_queues_rx, 899 .queues_tx = usb_queues_tx, 900 .td_queue = { .submit = 31, .complete = 0 }, 901}; 902 903static const struct of_device_id cppi41_dma_ids[] = { 904 { .compatible = "ti,am3359-cppi41", .data = &usb_infos}, 905 {}, 906}; 907MODULE_DEVICE_TABLE(of, cppi41_dma_ids); 908 909static const struct cppi_glue_infos *get_glue_info(struct device *dev) 910{ 911 const struct of_device_id *of_id; 912 913 of_id = of_match_node(cppi41_dma_ids, dev->of_node); 914 if (!of_id) 915 return NULL; 916 return of_id->data; 917} 918 919static int cppi41_dma_probe(struct platform_device *pdev) 920{ 921 struct cppi41_dd *cdd; 922 struct device *dev = &pdev->dev; 923 const struct cppi_glue_infos *glue_info; 924 int irq; 925 int ret; 926 927 glue_info = get_glue_info(dev); 928 if (!glue_info) 929 return -EINVAL; 930 931 cdd = kzalloc(sizeof(*cdd), GFP_KERNEL); 932 if (!cdd) 933 return -ENOMEM; 934 935 dma_cap_set(DMA_SLAVE, cdd->ddev.cap_mask); 936 cdd->ddev.device_alloc_chan_resources = cppi41_dma_alloc_chan_resources; 937 cdd->ddev.device_free_chan_resources = cppi41_dma_free_chan_resources; 938 cdd->ddev.device_tx_status = cppi41_dma_tx_status; 939 cdd->ddev.device_issue_pending = cppi41_dma_issue_pending; 940 cdd->ddev.device_prep_slave_sg = cppi41_dma_prep_slave_sg; 941 cdd->ddev.device_control = cppi41_dma_control; 942 cdd->ddev.dev = dev; 943 INIT_LIST_HEAD(&cdd->ddev.channels); 944 cpp41_dma_info.dma_cap = cdd->ddev.cap_mask; 945 946 cdd->usbss_mem = of_iomap(dev->of_node, 0); 947 cdd->ctrl_mem = of_iomap(dev->of_node, 1); 948 cdd->sched_mem = of_iomap(dev->of_node, 2); 949 cdd->qmgr_mem = of_iomap(dev->of_node, 3); 950 951 if (!cdd->usbss_mem || !cdd->ctrl_mem || !cdd->sched_mem || 952 !cdd->qmgr_mem) { 953 ret = -ENXIO; 954 goto err_remap; 955 } 956 957 pm_runtime_enable(dev); 958 ret = pm_runtime_get_sync(dev); 959 if (ret < 0) 960 goto err_get_sync; 961 962 cdd->queues_rx = glue_info->queues_rx; 963 cdd->queues_tx = glue_info->queues_tx; 964 cdd->td_queue = glue_info->td_queue; 965 966 ret = init_cppi41(dev, cdd); 967 if (ret) 968 goto err_init_cppi; 969 970 ret = cppi41_add_chans(dev, cdd); 971 if (ret) 972 goto err_chans; 973 974 irq = irq_of_parse_and_map(dev->of_node, 0); 975 if (!irq) { 976 ret = -EINVAL; 977 goto err_irq; 978 } 979 980 cppi_writel(USBSS_IRQ_PD_COMP, cdd->usbss_mem + USBSS_IRQ_ENABLER); 981 982 ret = request_irq(irq, glue_info->isr, IRQF_SHARED, 983 dev_name(dev), cdd); 984 if (ret) 985 goto err_irq; 986 cdd->irq = irq; 987 988 ret = dma_async_device_register(&cdd->ddev); 989 if (ret) 990 goto err_dma_reg; 991 992 ret = of_dma_controller_register(dev->of_node, 993 cppi41_dma_xlate, &cpp41_dma_info); 994 if (ret) 995 goto err_of; 996 997 platform_set_drvdata(pdev, cdd); 998 return 0; 999err_of: 1000 dma_async_device_unregister(&cdd->ddev); 1001err_dma_reg: 1002 free_irq(irq, cdd); 1003err_irq: 1004 cppi_writel(0, cdd->usbss_mem + USBSS_IRQ_CLEARR); 1005 cleanup_chans(cdd); 1006err_chans: 1007 deinit_cppi41(dev, cdd); 1008err_init_cppi: 1009 pm_runtime_put(dev); 1010err_get_sync: 1011 pm_runtime_disable(dev); 1012 iounmap(cdd->usbss_mem); 1013 iounmap(cdd->ctrl_mem); 1014 iounmap(cdd->sched_mem); 1015 iounmap(cdd->qmgr_mem); 1016err_remap: 1017 kfree(cdd); 1018 return ret; 1019} 1020 1021static int cppi41_dma_remove(struct platform_device *pdev) 1022{ 1023 struct cppi41_dd *cdd = platform_get_drvdata(pdev); 1024 1025 of_dma_controller_free(pdev->dev.of_node); 1026 dma_async_device_unregister(&cdd->ddev); 1027 1028 cppi_writel(0, cdd->usbss_mem + USBSS_IRQ_CLEARR); 1029 free_irq(cdd->irq, cdd); 1030 cleanup_chans(cdd); 1031 deinit_cppi41(&pdev->dev, cdd); 1032 iounmap(cdd->usbss_mem); 1033 iounmap(cdd->ctrl_mem); 1034 iounmap(cdd->sched_mem); 1035 iounmap(cdd->qmgr_mem); 1036 pm_runtime_put(&pdev->dev); 1037 pm_runtime_disable(&pdev->dev); 1038 kfree(cdd); 1039 return 0; 1040} 1041 1042#ifdef CONFIG_PM_SLEEP 1043static int cppi41_suspend(struct device *dev) 1044{ 1045 struct cppi41_dd *cdd = dev_get_drvdata(dev); 1046 1047 cdd->dma_tdfdq = cppi_readl(cdd->ctrl_mem + DMA_TDFDQ); 1048 cppi_writel(0, cdd->usbss_mem + USBSS_IRQ_CLEARR); 1049 disable_sched(cdd); 1050 1051 return 0; 1052} 1053 1054static int cppi41_resume(struct device *dev) 1055{ 1056 struct cppi41_dd *cdd = dev_get_drvdata(dev); 1057 struct cppi41_channel *c; 1058 int i; 1059 1060 for (i = 0; i < DESCS_AREAS; i++) 1061 cppi_writel(cdd->descs_phys, cdd->qmgr_mem + QMGR_MEMBASE(i)); 1062 1063 list_for_each_entry(c, &cdd->ddev.channels, chan.device_node) 1064 if (!c->is_tx) 1065 cppi_writel(c->q_num, c->gcr_reg + RXHPCRA0); 1066 1067 init_sched(cdd); 1068 1069 cppi_writel(cdd->dma_tdfdq, cdd->ctrl_mem + DMA_TDFDQ); 1070 cppi_writel(cdd->scratch_phys, cdd->qmgr_mem + QMGR_LRAM0_BASE); 1071 cppi_writel(QMGR_SCRATCH_SIZE, cdd->qmgr_mem + QMGR_LRAM_SIZE); 1072 cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM1_BASE); 1073 1074 cppi_writel(USBSS_IRQ_PD_COMP, cdd->usbss_mem + USBSS_IRQ_ENABLER); 1075 1076 return 0; 1077} 1078#endif 1079 1080static SIMPLE_DEV_PM_OPS(cppi41_pm_ops, cppi41_suspend, cppi41_resume); 1081 1082static struct platform_driver cpp41_dma_driver = { 1083 .probe = cppi41_dma_probe, 1084 .remove = cppi41_dma_remove, 1085 .driver = { 1086 .name = "cppi41-dma-engine", 1087 .owner = THIS_MODULE, 1088 .pm = &cppi41_pm_ops, 1089 .of_match_table = of_match_ptr(cppi41_dma_ids), 1090 }, 1091}; 1092 1093module_platform_driver(cpp41_dma_driver); 1094MODULE_LICENSE("GPL"); 1095MODULE_AUTHOR("Sebastian Andrzej Siewior <bigeasy@linutronix.de>");