at v4.11 29 kB view raw
1#include <linux/delay.h> 2#include <linux/dmaengine.h> 3#include <linux/dma-mapping.h> 4#include <linux/platform_device.h> 5#include <linux/module.h> 6#include <linux/of.h> 7#include <linux/slab.h> 8#include <linux/of_dma.h> 9#include <linux/of_irq.h> 10#include <linux/dmapool.h> 11#include <linux/interrupt.h> 12#include <linux/of_address.h> 13#include <linux/pm_runtime.h> 14#include "dmaengine.h" 15 16#define DESC_TYPE 27 17#define DESC_TYPE_HOST 0x10 18#define DESC_TYPE_TEARD 0x13 19 20#define TD_DESC_IS_RX (1 << 16) 21#define TD_DESC_DMA_NUM 10 22 23#define DESC_LENGTH_BITS_NUM 21 24 25#define DESC_TYPE_USB (5 << 26) 26#define DESC_PD_COMPLETE (1 << 31) 27 28/* DMA engine */ 29#define DMA_TDFDQ 4 30#define DMA_TXGCR(x) (0x800 + (x) * 0x20) 31#define DMA_RXGCR(x) (0x808 + (x) * 0x20) 32#define RXHPCRA0 4 33 34#define GCR_CHAN_ENABLE (1 << 31) 35#define GCR_TEARDOWN (1 << 30) 36#define GCR_STARV_RETRY (1 << 24) 37#define GCR_DESC_TYPE_HOST (1 << 14) 38 39/* DMA scheduler */ 40#define DMA_SCHED_CTRL 0 41#define DMA_SCHED_CTRL_EN (1 << 31) 42#define DMA_SCHED_WORD(x) ((x) * 4 + 0x800) 43 44#define SCHED_ENTRY0_CHAN(x) ((x) << 0) 45#define SCHED_ENTRY0_IS_RX (1 << 7) 46 47#define SCHED_ENTRY1_CHAN(x) ((x) << 8) 48#define SCHED_ENTRY1_IS_RX (1 << 15) 49 50#define SCHED_ENTRY2_CHAN(x) ((x) << 16) 51#define SCHED_ENTRY2_IS_RX (1 << 23) 52 53#define SCHED_ENTRY3_CHAN(x) ((x) << 24) 54#define SCHED_ENTRY3_IS_RX (1 << 31) 55 56/* Queue manager */ 57/* 4 KiB of memory for descriptors, 2 for each endpoint */ 58#define ALLOC_DECS_NUM 128 59#define DESCS_AREAS 1 60#define TOTAL_DESCS_NUM (ALLOC_DECS_NUM * DESCS_AREAS) 61#define QMGR_SCRATCH_SIZE (TOTAL_DESCS_NUM * 4) 62 63#define QMGR_LRAM0_BASE 0x80 64#define QMGR_LRAM_SIZE 0x84 65#define QMGR_LRAM1_BASE 0x88 66#define QMGR_MEMBASE(x) (0x1000 + (x) * 0x10) 67#define QMGR_MEMCTRL(x) (0x1004 + (x) * 0x10) 68#define QMGR_MEMCTRL_IDX_SH 16 69#define QMGR_MEMCTRL_DESC_SH 8 70 71#define QMGR_NUM_PEND 5 72#define QMGR_PEND(x) (0x90 + (x) * 4) 73 74#define QMGR_PENDING_SLOT_Q(x) (x / 32) 75#define QMGR_PENDING_BIT_Q(x) (x % 32) 76 77#define QMGR_QUEUE_A(n) (0x2000 + (n) * 0x10) 78#define QMGR_QUEUE_B(n) (0x2004 + (n) * 0x10) 79#define QMGR_QUEUE_C(n) (0x2008 + (n) * 0x10) 80#define QMGR_QUEUE_D(n) (0x200c + (n) * 0x10) 81 82/* Packet Descriptor */ 83#define PD2_ZERO_LENGTH (1 << 19) 84 85struct cppi41_channel { 86 struct dma_chan chan; 87 struct dma_async_tx_descriptor txd; 88 struct cppi41_dd *cdd; 89 struct cppi41_desc *desc; 90 dma_addr_t desc_phys; 91 void __iomem *gcr_reg; 92 int is_tx; 93 u32 residue; 94 95 unsigned int q_num; 96 unsigned int q_comp_num; 97 unsigned int port_num; 98 99 unsigned td_retry; 100 unsigned td_queued:1; 101 unsigned td_seen:1; 102 unsigned td_desc_seen:1; 103 104 struct list_head node; /* Node for pending list */ 105}; 106 107struct cppi41_desc { 108 u32 pd0; 109 u32 pd1; 110 u32 pd2; 111 u32 pd3; 112 u32 pd4; 113 u32 pd5; 114 u32 pd6; 115 u32 pd7; 116} __aligned(32); 117 118struct chan_queues { 119 u16 submit; 120 u16 complete; 121}; 122 123struct cppi41_dd { 124 struct dma_device ddev; 125 126 void *qmgr_scratch; 127 dma_addr_t scratch_phys; 128 129 struct cppi41_desc *cd; 130 dma_addr_t descs_phys; 131 u32 first_td_desc; 132 struct cppi41_channel *chan_busy[ALLOC_DECS_NUM]; 133 134 void __iomem *usbss_mem; 135 void __iomem *ctrl_mem; 136 void __iomem *sched_mem; 137 void __iomem *qmgr_mem; 138 unsigned int irq; 139 const struct chan_queues *queues_rx; 140 const struct chan_queues *queues_tx; 141 struct chan_queues td_queue; 142 143 struct list_head pending; /* Pending queued transfers */ 144 spinlock_t lock; /* Lock for pending list */ 145 146 /* context for suspend/resume */ 147 unsigned int dma_tdfdq; 148 149 bool is_suspended; 150}; 151 152#define FIST_COMPLETION_QUEUE 93 153static struct chan_queues usb_queues_tx[] = { 154 /* USB0 ENDP 1 */ 155 [ 0] = { .submit = 32, .complete = 93}, 156 [ 1] = { .submit = 34, .complete = 94}, 157 [ 2] = { .submit = 36, .complete = 95}, 158 [ 3] = { .submit = 38, .complete = 96}, 159 [ 4] = { .submit = 40, .complete = 97}, 160 [ 5] = { .submit = 42, .complete = 98}, 161 [ 6] = { .submit = 44, .complete = 99}, 162 [ 7] = { .submit = 46, .complete = 100}, 163 [ 8] = { .submit = 48, .complete = 101}, 164 [ 9] = { .submit = 50, .complete = 102}, 165 [10] = { .submit = 52, .complete = 103}, 166 [11] = { .submit = 54, .complete = 104}, 167 [12] = { .submit = 56, .complete = 105}, 168 [13] = { .submit = 58, .complete = 106}, 169 [14] = { .submit = 60, .complete = 107}, 170 171 /* USB1 ENDP1 */ 172 [15] = { .submit = 62, .complete = 125}, 173 [16] = { .submit = 64, .complete = 126}, 174 [17] = { .submit = 66, .complete = 127}, 175 [18] = { .submit = 68, .complete = 128}, 176 [19] = { .submit = 70, .complete = 129}, 177 [20] = { .submit = 72, .complete = 130}, 178 [21] = { .submit = 74, .complete = 131}, 179 [22] = { .submit = 76, .complete = 132}, 180 [23] = { .submit = 78, .complete = 133}, 181 [24] = { .submit = 80, .complete = 134}, 182 [25] = { .submit = 82, .complete = 135}, 183 [26] = { .submit = 84, .complete = 136}, 184 [27] = { .submit = 86, .complete = 137}, 185 [28] = { .submit = 88, .complete = 138}, 186 [29] = { .submit = 90, .complete = 139}, 187}; 188 189static const struct chan_queues usb_queues_rx[] = { 190 /* USB0 ENDP 1 */ 191 [ 0] = { .submit = 1, .complete = 109}, 192 [ 1] = { .submit = 2, .complete = 110}, 193 [ 2] = { .submit = 3, .complete = 111}, 194 [ 3] = { .submit = 4, .complete = 112}, 195 [ 4] = { .submit = 5, .complete = 113}, 196 [ 5] = { .submit = 6, .complete = 114}, 197 [ 6] = { .submit = 7, .complete = 115}, 198 [ 7] = { .submit = 8, .complete = 116}, 199 [ 8] = { .submit = 9, .complete = 117}, 200 [ 9] = { .submit = 10, .complete = 118}, 201 [10] = { .submit = 11, .complete = 119}, 202 [11] = { .submit = 12, .complete = 120}, 203 [12] = { .submit = 13, .complete = 121}, 204 [13] = { .submit = 14, .complete = 122}, 205 [14] = { .submit = 15, .complete = 123}, 206 207 /* USB1 ENDP 1 */ 208 [15] = { .submit = 16, .complete = 141}, 209 [16] = { .submit = 17, .complete = 142}, 210 [17] = { .submit = 18, .complete = 143}, 211 [18] = { .submit = 19, .complete = 144}, 212 [19] = { .submit = 20, .complete = 145}, 213 [20] = { .submit = 21, .complete = 146}, 214 [21] = { .submit = 22, .complete = 147}, 215 [22] = { .submit = 23, .complete = 148}, 216 [23] = { .submit = 24, .complete = 149}, 217 [24] = { .submit = 25, .complete = 150}, 218 [25] = { .submit = 26, .complete = 151}, 219 [26] = { .submit = 27, .complete = 152}, 220 [27] = { .submit = 28, .complete = 153}, 221 [28] = { .submit = 29, .complete = 154}, 222 [29] = { .submit = 30, .complete = 155}, 223}; 224 225struct cppi_glue_infos { 226 irqreturn_t (*isr)(int irq, void *data); 227 const struct chan_queues *queues_rx; 228 const struct chan_queues *queues_tx; 229 struct chan_queues td_queue; 230}; 231 232static struct cppi41_channel *to_cpp41_chan(struct dma_chan *c) 233{ 234 return container_of(c, struct cppi41_channel, chan); 235} 236 237static struct cppi41_channel *desc_to_chan(struct cppi41_dd *cdd, u32 desc) 238{ 239 struct cppi41_channel *c; 240 u32 descs_size; 241 u32 desc_num; 242 243 descs_size = sizeof(struct cppi41_desc) * ALLOC_DECS_NUM; 244 245 if (!((desc >= cdd->descs_phys) && 246 (desc < (cdd->descs_phys + descs_size)))) { 247 return NULL; 248 } 249 250 desc_num = (desc - cdd->descs_phys) / sizeof(struct cppi41_desc); 251 BUG_ON(desc_num >= ALLOC_DECS_NUM); 252 c = cdd->chan_busy[desc_num]; 253 cdd->chan_busy[desc_num] = NULL; 254 255 /* Usecount for chan_busy[], paired with push_desc_queue() */ 256 pm_runtime_put(cdd->ddev.dev); 257 258 return c; 259} 260 261static void cppi_writel(u32 val, void *__iomem *mem) 262{ 263 __raw_writel(val, mem); 264} 265 266static u32 cppi_readl(void *__iomem *mem) 267{ 268 return __raw_readl(mem); 269} 270 271static u32 pd_trans_len(u32 val) 272{ 273 return val & ((1 << (DESC_LENGTH_BITS_NUM + 1)) - 1); 274} 275 276static u32 cppi41_pop_desc(struct cppi41_dd *cdd, unsigned queue_num) 277{ 278 u32 desc; 279 280 desc = cppi_readl(cdd->qmgr_mem + QMGR_QUEUE_D(queue_num)); 281 desc &= ~0x1f; 282 return desc; 283} 284 285static irqreturn_t cppi41_irq(int irq, void *data) 286{ 287 struct cppi41_dd *cdd = data; 288 struct cppi41_channel *c; 289 int i; 290 291 for (i = QMGR_PENDING_SLOT_Q(FIST_COMPLETION_QUEUE); i < QMGR_NUM_PEND; 292 i++) { 293 u32 val; 294 u32 q_num; 295 296 val = cppi_readl(cdd->qmgr_mem + QMGR_PEND(i)); 297 if (i == QMGR_PENDING_SLOT_Q(FIST_COMPLETION_QUEUE) && val) { 298 u32 mask; 299 /* set corresponding bit for completetion Q 93 */ 300 mask = 1 << QMGR_PENDING_BIT_Q(FIST_COMPLETION_QUEUE); 301 /* not set all bits for queues less than Q 93 */ 302 mask--; 303 /* now invert and keep only Q 93+ set */ 304 val &= ~mask; 305 } 306 307 if (val) 308 __iormb(); 309 310 while (val) { 311 u32 desc, len; 312 313 /* 314 * This should never trigger, see the comments in 315 * push_desc_queue() 316 */ 317 WARN_ON(cdd->is_suspended); 318 319 q_num = __fls(val); 320 val &= ~(1 << q_num); 321 q_num += 32 * i; 322 desc = cppi41_pop_desc(cdd, q_num); 323 c = desc_to_chan(cdd, desc); 324 if (WARN_ON(!c)) { 325 pr_err("%s() q %d desc %08x\n", __func__, 326 q_num, desc); 327 continue; 328 } 329 330 if (c->desc->pd2 & PD2_ZERO_LENGTH) 331 len = 0; 332 else 333 len = pd_trans_len(c->desc->pd0); 334 335 c->residue = pd_trans_len(c->desc->pd6) - len; 336 dma_cookie_complete(&c->txd); 337 dmaengine_desc_get_callback_invoke(&c->txd, NULL); 338 } 339 } 340 return IRQ_HANDLED; 341} 342 343static dma_cookie_t cppi41_tx_submit(struct dma_async_tx_descriptor *tx) 344{ 345 dma_cookie_t cookie; 346 347 cookie = dma_cookie_assign(tx); 348 349 return cookie; 350} 351 352static int cppi41_dma_alloc_chan_resources(struct dma_chan *chan) 353{ 354 struct cppi41_channel *c = to_cpp41_chan(chan); 355 struct cppi41_dd *cdd = c->cdd; 356 int error; 357 358 error = pm_runtime_get_sync(cdd->ddev.dev); 359 if (error < 0) { 360 dev_err(cdd->ddev.dev, "%s pm runtime get: %i\n", 361 __func__, error); 362 pm_runtime_put_noidle(cdd->ddev.dev); 363 364 return error; 365 } 366 367 dma_cookie_init(chan); 368 dma_async_tx_descriptor_init(&c->txd, chan); 369 c->txd.tx_submit = cppi41_tx_submit; 370 371 if (!c->is_tx) 372 cppi_writel(c->q_num, c->gcr_reg + RXHPCRA0); 373 374 pm_runtime_mark_last_busy(cdd->ddev.dev); 375 pm_runtime_put_autosuspend(cdd->ddev.dev); 376 377 return 0; 378} 379 380static void cppi41_dma_free_chan_resources(struct dma_chan *chan) 381{ 382 struct cppi41_channel *c = to_cpp41_chan(chan); 383 struct cppi41_dd *cdd = c->cdd; 384 int error; 385 386 error = pm_runtime_get_sync(cdd->ddev.dev); 387 if (error < 0) { 388 pm_runtime_put_noidle(cdd->ddev.dev); 389 390 return; 391 } 392 393 WARN_ON(!list_empty(&cdd->pending)); 394 395 pm_runtime_mark_last_busy(cdd->ddev.dev); 396 pm_runtime_put_autosuspend(cdd->ddev.dev); 397} 398 399static enum dma_status cppi41_dma_tx_status(struct dma_chan *chan, 400 dma_cookie_t cookie, struct dma_tx_state *txstate) 401{ 402 struct cppi41_channel *c = to_cpp41_chan(chan); 403 enum dma_status ret; 404 405 /* lock */ 406 ret = dma_cookie_status(chan, cookie, txstate); 407 if (txstate && ret == DMA_COMPLETE) 408 txstate->residue = c->residue; 409 /* unlock */ 410 411 return ret; 412} 413 414static void push_desc_queue(struct cppi41_channel *c) 415{ 416 struct cppi41_dd *cdd = c->cdd; 417 u32 desc_num; 418 u32 desc_phys; 419 u32 reg; 420 421 c->residue = 0; 422 423 reg = GCR_CHAN_ENABLE; 424 if (!c->is_tx) { 425 reg |= GCR_STARV_RETRY; 426 reg |= GCR_DESC_TYPE_HOST; 427 reg |= c->q_comp_num; 428 } 429 430 cppi_writel(reg, c->gcr_reg); 431 432 /* 433 * We don't use writel() but __raw_writel() so we have to make sure 434 * that the DMA descriptor in coherent memory made to the main memory 435 * before starting the dma engine. 436 */ 437 __iowmb(); 438 439 /* 440 * DMA transfers can take at least 200ms to complete with USB mass 441 * storage connected. To prevent autosuspend timeouts, we must use 442 * pm_runtime_get/put() when chan_busy[] is modified. This will get 443 * cleared in desc_to_chan() or cppi41_stop_chan() depending on the 444 * outcome of the transfer. 445 */ 446 pm_runtime_get(cdd->ddev.dev); 447 448 desc_phys = lower_32_bits(c->desc_phys); 449 desc_num = (desc_phys - cdd->descs_phys) / sizeof(struct cppi41_desc); 450 WARN_ON(cdd->chan_busy[desc_num]); 451 cdd->chan_busy[desc_num] = c; 452 453 reg = (sizeof(struct cppi41_desc) - 24) / 4; 454 reg |= desc_phys; 455 cppi_writel(reg, cdd->qmgr_mem + QMGR_QUEUE_D(c->q_num)); 456} 457 458/* 459 * Caller must hold cdd->lock to prevent push_desc_queue() 460 * getting called out of order. We have both cppi41_dma_issue_pending() 461 * and cppi41_runtime_resume() call this function. 462 */ 463static void cppi41_run_queue(struct cppi41_dd *cdd) 464{ 465 struct cppi41_channel *c, *_c; 466 467 list_for_each_entry_safe(c, _c, &cdd->pending, node) { 468 push_desc_queue(c); 469 list_del(&c->node); 470 } 471} 472 473static void cppi41_dma_issue_pending(struct dma_chan *chan) 474{ 475 struct cppi41_channel *c = to_cpp41_chan(chan); 476 struct cppi41_dd *cdd = c->cdd; 477 unsigned long flags; 478 int error; 479 480 error = pm_runtime_get(cdd->ddev.dev); 481 if ((error != -EINPROGRESS) && error < 0) { 482 pm_runtime_put_noidle(cdd->ddev.dev); 483 dev_err(cdd->ddev.dev, "Failed to pm_runtime_get: %i\n", 484 error); 485 486 return; 487 } 488 489 spin_lock_irqsave(&cdd->lock, flags); 490 list_add_tail(&c->node, &cdd->pending); 491 if (!cdd->is_suspended) 492 cppi41_run_queue(cdd); 493 spin_unlock_irqrestore(&cdd->lock, flags); 494 495 pm_runtime_mark_last_busy(cdd->ddev.dev); 496 pm_runtime_put_autosuspend(cdd->ddev.dev); 497} 498 499static u32 get_host_pd0(u32 length) 500{ 501 u32 reg; 502 503 reg = DESC_TYPE_HOST << DESC_TYPE; 504 reg |= length; 505 506 return reg; 507} 508 509static u32 get_host_pd1(struct cppi41_channel *c) 510{ 511 u32 reg; 512 513 reg = 0; 514 515 return reg; 516} 517 518static u32 get_host_pd2(struct cppi41_channel *c) 519{ 520 u32 reg; 521 522 reg = DESC_TYPE_USB; 523 reg |= c->q_comp_num; 524 525 return reg; 526} 527 528static u32 get_host_pd3(u32 length) 529{ 530 u32 reg; 531 532 /* PD3 = packet size */ 533 reg = length; 534 535 return reg; 536} 537 538static u32 get_host_pd6(u32 length) 539{ 540 u32 reg; 541 542 /* PD6 buffer size */ 543 reg = DESC_PD_COMPLETE; 544 reg |= length; 545 546 return reg; 547} 548 549static u32 get_host_pd4_or_7(u32 addr) 550{ 551 u32 reg; 552 553 reg = addr; 554 555 return reg; 556} 557 558static u32 get_host_pd5(void) 559{ 560 u32 reg; 561 562 reg = 0; 563 564 return reg; 565} 566 567static struct dma_async_tx_descriptor *cppi41_dma_prep_slave_sg( 568 struct dma_chan *chan, struct scatterlist *sgl, unsigned sg_len, 569 enum dma_transfer_direction dir, unsigned long tx_flags, void *context) 570{ 571 struct cppi41_channel *c = to_cpp41_chan(chan); 572 struct cppi41_desc *d; 573 struct scatterlist *sg; 574 unsigned int i; 575 576 d = c->desc; 577 for_each_sg(sgl, sg, sg_len, i) { 578 u32 addr; 579 u32 len; 580 581 /* We need to use more than one desc once musb supports sg */ 582 addr = lower_32_bits(sg_dma_address(sg)); 583 len = sg_dma_len(sg); 584 585 d->pd0 = get_host_pd0(len); 586 d->pd1 = get_host_pd1(c); 587 d->pd2 = get_host_pd2(c); 588 d->pd3 = get_host_pd3(len); 589 d->pd4 = get_host_pd4_or_7(addr); 590 d->pd5 = get_host_pd5(); 591 d->pd6 = get_host_pd6(len); 592 d->pd7 = get_host_pd4_or_7(addr); 593 594 d++; 595 } 596 597 return &c->txd; 598} 599 600static void cppi41_compute_td_desc(struct cppi41_desc *d) 601{ 602 d->pd0 = DESC_TYPE_TEARD << DESC_TYPE; 603} 604 605static int cppi41_tear_down_chan(struct cppi41_channel *c) 606{ 607 struct dmaengine_result abort_result; 608 struct cppi41_dd *cdd = c->cdd; 609 struct cppi41_desc *td; 610 u32 reg; 611 u32 desc_phys; 612 u32 td_desc_phys; 613 614 td = cdd->cd; 615 td += cdd->first_td_desc; 616 617 td_desc_phys = cdd->descs_phys; 618 td_desc_phys += cdd->first_td_desc * sizeof(struct cppi41_desc); 619 620 if (!c->td_queued) { 621 cppi41_compute_td_desc(td); 622 __iowmb(); 623 624 reg = (sizeof(struct cppi41_desc) - 24) / 4; 625 reg |= td_desc_phys; 626 cppi_writel(reg, cdd->qmgr_mem + 627 QMGR_QUEUE_D(cdd->td_queue.submit)); 628 629 reg = GCR_CHAN_ENABLE; 630 if (!c->is_tx) { 631 reg |= GCR_STARV_RETRY; 632 reg |= GCR_DESC_TYPE_HOST; 633 reg |= c->q_comp_num; 634 } 635 reg |= GCR_TEARDOWN; 636 cppi_writel(reg, c->gcr_reg); 637 c->td_queued = 1; 638 c->td_retry = 500; 639 } 640 641 if (!c->td_seen || !c->td_desc_seen) { 642 643 desc_phys = cppi41_pop_desc(cdd, cdd->td_queue.complete); 644 if (!desc_phys) 645 desc_phys = cppi41_pop_desc(cdd, c->q_comp_num); 646 647 if (desc_phys == c->desc_phys) { 648 c->td_desc_seen = 1; 649 650 } else if (desc_phys == td_desc_phys) { 651 u32 pd0; 652 653 __iormb(); 654 pd0 = td->pd0; 655 WARN_ON((pd0 >> DESC_TYPE) != DESC_TYPE_TEARD); 656 WARN_ON(!c->is_tx && !(pd0 & TD_DESC_IS_RX)); 657 WARN_ON((pd0 & 0x1f) != c->port_num); 658 c->td_seen = 1; 659 } else if (desc_phys) { 660 WARN_ON_ONCE(1); 661 } 662 } 663 c->td_retry--; 664 /* 665 * If the TX descriptor / channel is in use, the caller needs to poke 666 * his TD bit multiple times. After that he hardware releases the 667 * transfer descriptor followed by TD descriptor. Waiting seems not to 668 * cause any difference. 669 * RX seems to be thrown out right away. However once the TearDown 670 * descriptor gets through we are done. If we have seens the transfer 671 * descriptor before the TD we fetch it from enqueue, it has to be 672 * there waiting for us. 673 */ 674 if (!c->td_seen && c->td_retry) { 675 udelay(1); 676 return -EAGAIN; 677 } 678 WARN_ON(!c->td_retry); 679 680 if (!c->td_desc_seen) { 681 desc_phys = cppi41_pop_desc(cdd, c->q_num); 682 if (!desc_phys) 683 desc_phys = cppi41_pop_desc(cdd, c->q_comp_num); 684 WARN_ON(!desc_phys); 685 } 686 687 c->td_queued = 0; 688 c->td_seen = 0; 689 c->td_desc_seen = 0; 690 cppi_writel(0, c->gcr_reg); 691 692 /* Invoke the callback to do the necessary clean-up */ 693 abort_result.result = DMA_TRANS_ABORTED; 694 dma_cookie_complete(&c->txd); 695 dmaengine_desc_get_callback_invoke(&c->txd, &abort_result); 696 697 return 0; 698} 699 700static int cppi41_stop_chan(struct dma_chan *chan) 701{ 702 struct cppi41_channel *c = to_cpp41_chan(chan); 703 struct cppi41_dd *cdd = c->cdd; 704 u32 desc_num; 705 u32 desc_phys; 706 int ret; 707 708 desc_phys = lower_32_bits(c->desc_phys); 709 desc_num = (desc_phys - cdd->descs_phys) / sizeof(struct cppi41_desc); 710 if (!cdd->chan_busy[desc_num]) 711 return 0; 712 713 ret = cppi41_tear_down_chan(c); 714 if (ret) 715 return ret; 716 717 WARN_ON(!cdd->chan_busy[desc_num]); 718 cdd->chan_busy[desc_num] = NULL; 719 720 /* Usecount for chan_busy[], paired with push_desc_queue() */ 721 pm_runtime_put(cdd->ddev.dev); 722 723 return 0; 724} 725 726static void cleanup_chans(struct cppi41_dd *cdd) 727{ 728 while (!list_empty(&cdd->ddev.channels)) { 729 struct cppi41_channel *cchan; 730 731 cchan = list_first_entry(&cdd->ddev.channels, 732 struct cppi41_channel, chan.device_node); 733 list_del(&cchan->chan.device_node); 734 kfree(cchan); 735 } 736} 737 738static int cppi41_add_chans(struct device *dev, struct cppi41_dd *cdd) 739{ 740 struct cppi41_channel *cchan; 741 int i; 742 int ret; 743 u32 n_chans; 744 745 ret = of_property_read_u32(dev->of_node, "#dma-channels", 746 &n_chans); 747 if (ret) 748 return ret; 749 /* 750 * The channels can only be used as TX or as RX. So we add twice 751 * that much dma channels because USB can only do RX or TX. 752 */ 753 n_chans *= 2; 754 755 for (i = 0; i < n_chans; i++) { 756 cchan = kzalloc(sizeof(*cchan), GFP_KERNEL); 757 if (!cchan) 758 goto err; 759 760 cchan->cdd = cdd; 761 if (i & 1) { 762 cchan->gcr_reg = cdd->ctrl_mem + DMA_TXGCR(i >> 1); 763 cchan->is_tx = 1; 764 } else { 765 cchan->gcr_reg = cdd->ctrl_mem + DMA_RXGCR(i >> 1); 766 cchan->is_tx = 0; 767 } 768 cchan->port_num = i >> 1; 769 cchan->desc = &cdd->cd[i]; 770 cchan->desc_phys = cdd->descs_phys; 771 cchan->desc_phys += i * sizeof(struct cppi41_desc); 772 cchan->chan.device = &cdd->ddev; 773 list_add_tail(&cchan->chan.device_node, &cdd->ddev.channels); 774 } 775 cdd->first_td_desc = n_chans; 776 777 return 0; 778err: 779 cleanup_chans(cdd); 780 return -ENOMEM; 781} 782 783static void purge_descs(struct device *dev, struct cppi41_dd *cdd) 784{ 785 unsigned int mem_decs; 786 int i; 787 788 mem_decs = ALLOC_DECS_NUM * sizeof(struct cppi41_desc); 789 790 for (i = 0; i < DESCS_AREAS; i++) { 791 792 cppi_writel(0, cdd->qmgr_mem + QMGR_MEMBASE(i)); 793 cppi_writel(0, cdd->qmgr_mem + QMGR_MEMCTRL(i)); 794 795 dma_free_coherent(dev, mem_decs, cdd->cd, 796 cdd->descs_phys); 797 } 798} 799 800static void disable_sched(struct cppi41_dd *cdd) 801{ 802 cppi_writel(0, cdd->sched_mem + DMA_SCHED_CTRL); 803} 804 805static void deinit_cppi41(struct device *dev, struct cppi41_dd *cdd) 806{ 807 disable_sched(cdd); 808 809 purge_descs(dev, cdd); 810 811 cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM0_BASE); 812 cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM0_BASE); 813 dma_free_coherent(dev, QMGR_SCRATCH_SIZE, cdd->qmgr_scratch, 814 cdd->scratch_phys); 815} 816 817static int init_descs(struct device *dev, struct cppi41_dd *cdd) 818{ 819 unsigned int desc_size; 820 unsigned int mem_decs; 821 int i; 822 u32 reg; 823 u32 idx; 824 825 BUILD_BUG_ON(sizeof(struct cppi41_desc) & 826 (sizeof(struct cppi41_desc) - 1)); 827 BUILD_BUG_ON(sizeof(struct cppi41_desc) < 32); 828 BUILD_BUG_ON(ALLOC_DECS_NUM < 32); 829 830 desc_size = sizeof(struct cppi41_desc); 831 mem_decs = ALLOC_DECS_NUM * desc_size; 832 833 idx = 0; 834 for (i = 0; i < DESCS_AREAS; i++) { 835 836 reg = idx << QMGR_MEMCTRL_IDX_SH; 837 reg |= (ilog2(desc_size) - 5) << QMGR_MEMCTRL_DESC_SH; 838 reg |= ilog2(ALLOC_DECS_NUM) - 5; 839 840 BUILD_BUG_ON(DESCS_AREAS != 1); 841 cdd->cd = dma_alloc_coherent(dev, mem_decs, 842 &cdd->descs_phys, GFP_KERNEL); 843 if (!cdd->cd) 844 return -ENOMEM; 845 846 cppi_writel(cdd->descs_phys, cdd->qmgr_mem + QMGR_MEMBASE(i)); 847 cppi_writel(reg, cdd->qmgr_mem + QMGR_MEMCTRL(i)); 848 849 idx += ALLOC_DECS_NUM; 850 } 851 return 0; 852} 853 854static void init_sched(struct cppi41_dd *cdd) 855{ 856 unsigned ch; 857 unsigned word; 858 u32 reg; 859 860 word = 0; 861 cppi_writel(0, cdd->sched_mem + DMA_SCHED_CTRL); 862 for (ch = 0; ch < 15 * 2; ch += 2) { 863 864 reg = SCHED_ENTRY0_CHAN(ch); 865 reg |= SCHED_ENTRY1_CHAN(ch) | SCHED_ENTRY1_IS_RX; 866 867 reg |= SCHED_ENTRY2_CHAN(ch + 1); 868 reg |= SCHED_ENTRY3_CHAN(ch + 1) | SCHED_ENTRY3_IS_RX; 869 cppi_writel(reg, cdd->sched_mem + DMA_SCHED_WORD(word)); 870 word++; 871 } 872 reg = 15 * 2 * 2 - 1; 873 reg |= DMA_SCHED_CTRL_EN; 874 cppi_writel(reg, cdd->sched_mem + DMA_SCHED_CTRL); 875} 876 877static int init_cppi41(struct device *dev, struct cppi41_dd *cdd) 878{ 879 int ret; 880 881 BUILD_BUG_ON(QMGR_SCRATCH_SIZE > ((1 << 14) - 1)); 882 cdd->qmgr_scratch = dma_alloc_coherent(dev, QMGR_SCRATCH_SIZE, 883 &cdd->scratch_phys, GFP_KERNEL); 884 if (!cdd->qmgr_scratch) 885 return -ENOMEM; 886 887 cppi_writel(cdd->scratch_phys, cdd->qmgr_mem + QMGR_LRAM0_BASE); 888 cppi_writel(QMGR_SCRATCH_SIZE, cdd->qmgr_mem + QMGR_LRAM_SIZE); 889 cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM1_BASE); 890 891 ret = init_descs(dev, cdd); 892 if (ret) 893 goto err_td; 894 895 cppi_writel(cdd->td_queue.submit, cdd->ctrl_mem + DMA_TDFDQ); 896 init_sched(cdd); 897 return 0; 898err_td: 899 deinit_cppi41(dev, cdd); 900 return ret; 901} 902 903static struct platform_driver cpp41_dma_driver; 904/* 905 * The param format is: 906 * X Y 907 * X: Port 908 * Y: 0 = RX else TX 909 */ 910#define INFO_PORT 0 911#define INFO_IS_TX 1 912 913static bool cpp41_dma_filter_fn(struct dma_chan *chan, void *param) 914{ 915 struct cppi41_channel *cchan; 916 struct cppi41_dd *cdd; 917 const struct chan_queues *queues; 918 u32 *num = param; 919 920 if (chan->device->dev->driver != &cpp41_dma_driver.driver) 921 return false; 922 923 cchan = to_cpp41_chan(chan); 924 925 if (cchan->port_num != num[INFO_PORT]) 926 return false; 927 928 if (cchan->is_tx && !num[INFO_IS_TX]) 929 return false; 930 cdd = cchan->cdd; 931 if (cchan->is_tx) 932 queues = cdd->queues_tx; 933 else 934 queues = cdd->queues_rx; 935 936 BUILD_BUG_ON(ARRAY_SIZE(usb_queues_rx) != ARRAY_SIZE(usb_queues_tx)); 937 if (WARN_ON(cchan->port_num > ARRAY_SIZE(usb_queues_rx))) 938 return false; 939 940 cchan->q_num = queues[cchan->port_num].submit; 941 cchan->q_comp_num = queues[cchan->port_num].complete; 942 return true; 943} 944 945static struct of_dma_filter_info cpp41_dma_info = { 946 .filter_fn = cpp41_dma_filter_fn, 947}; 948 949static struct dma_chan *cppi41_dma_xlate(struct of_phandle_args *dma_spec, 950 struct of_dma *ofdma) 951{ 952 int count = dma_spec->args_count; 953 struct of_dma_filter_info *info = ofdma->of_dma_data; 954 955 if (!info || !info->filter_fn) 956 return NULL; 957 958 if (count != 2) 959 return NULL; 960 961 return dma_request_channel(info->dma_cap, info->filter_fn, 962 &dma_spec->args[0]); 963} 964 965static const struct cppi_glue_infos usb_infos = { 966 .isr = cppi41_irq, 967 .queues_rx = usb_queues_rx, 968 .queues_tx = usb_queues_tx, 969 .td_queue = { .submit = 31, .complete = 0 }, 970}; 971 972static const struct of_device_id cppi41_dma_ids[] = { 973 { .compatible = "ti,am3359-cppi41", .data = &usb_infos}, 974 {}, 975}; 976MODULE_DEVICE_TABLE(of, cppi41_dma_ids); 977 978static const struct cppi_glue_infos *get_glue_info(struct device *dev) 979{ 980 const struct of_device_id *of_id; 981 982 of_id = of_match_node(cppi41_dma_ids, dev->of_node); 983 if (!of_id) 984 return NULL; 985 return of_id->data; 986} 987 988#define CPPI41_DMA_BUSWIDTHS (BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \ 989 BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \ 990 BIT(DMA_SLAVE_BUSWIDTH_3_BYTES) | \ 991 BIT(DMA_SLAVE_BUSWIDTH_4_BYTES)) 992 993static int cppi41_dma_probe(struct platform_device *pdev) 994{ 995 struct cppi41_dd *cdd; 996 struct device *dev = &pdev->dev; 997 const struct cppi_glue_infos *glue_info; 998 int irq; 999 int ret; 1000 1001 glue_info = get_glue_info(dev); 1002 if (!glue_info) 1003 return -EINVAL; 1004 1005 cdd = devm_kzalloc(&pdev->dev, sizeof(*cdd), GFP_KERNEL); 1006 if (!cdd) 1007 return -ENOMEM; 1008 1009 dma_cap_set(DMA_SLAVE, cdd->ddev.cap_mask); 1010 cdd->ddev.device_alloc_chan_resources = cppi41_dma_alloc_chan_resources; 1011 cdd->ddev.device_free_chan_resources = cppi41_dma_free_chan_resources; 1012 cdd->ddev.device_tx_status = cppi41_dma_tx_status; 1013 cdd->ddev.device_issue_pending = cppi41_dma_issue_pending; 1014 cdd->ddev.device_prep_slave_sg = cppi41_dma_prep_slave_sg; 1015 cdd->ddev.device_terminate_all = cppi41_stop_chan; 1016 cdd->ddev.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); 1017 cdd->ddev.src_addr_widths = CPPI41_DMA_BUSWIDTHS; 1018 cdd->ddev.dst_addr_widths = CPPI41_DMA_BUSWIDTHS; 1019 cdd->ddev.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; 1020 cdd->ddev.dev = dev; 1021 INIT_LIST_HEAD(&cdd->ddev.channels); 1022 cpp41_dma_info.dma_cap = cdd->ddev.cap_mask; 1023 1024 cdd->usbss_mem = of_iomap(dev->of_node, 0); 1025 cdd->ctrl_mem = of_iomap(dev->of_node, 1); 1026 cdd->sched_mem = of_iomap(dev->of_node, 2); 1027 cdd->qmgr_mem = of_iomap(dev->of_node, 3); 1028 spin_lock_init(&cdd->lock); 1029 INIT_LIST_HEAD(&cdd->pending); 1030 1031 platform_set_drvdata(pdev, cdd); 1032 1033 if (!cdd->usbss_mem || !cdd->ctrl_mem || !cdd->sched_mem || 1034 !cdd->qmgr_mem) 1035 return -ENXIO; 1036 1037 pm_runtime_enable(dev); 1038 pm_runtime_set_autosuspend_delay(dev, 100); 1039 pm_runtime_use_autosuspend(dev); 1040 ret = pm_runtime_get_sync(dev); 1041 if (ret < 0) 1042 goto err_get_sync; 1043 1044 cdd->queues_rx = glue_info->queues_rx; 1045 cdd->queues_tx = glue_info->queues_tx; 1046 cdd->td_queue = glue_info->td_queue; 1047 1048 ret = init_cppi41(dev, cdd); 1049 if (ret) 1050 goto err_init_cppi; 1051 1052 ret = cppi41_add_chans(dev, cdd); 1053 if (ret) 1054 goto err_chans; 1055 1056 irq = irq_of_parse_and_map(dev->of_node, 0); 1057 if (!irq) { 1058 ret = -EINVAL; 1059 goto err_irq; 1060 } 1061 1062 ret = devm_request_irq(&pdev->dev, irq, glue_info->isr, IRQF_SHARED, 1063 dev_name(dev), cdd); 1064 if (ret) 1065 goto err_irq; 1066 cdd->irq = irq; 1067 1068 ret = dma_async_device_register(&cdd->ddev); 1069 if (ret) 1070 goto err_dma_reg; 1071 1072 ret = of_dma_controller_register(dev->of_node, 1073 cppi41_dma_xlate, &cpp41_dma_info); 1074 if (ret) 1075 goto err_of; 1076 1077 pm_runtime_mark_last_busy(dev); 1078 pm_runtime_put_autosuspend(dev); 1079 1080 return 0; 1081err_of: 1082 dma_async_device_unregister(&cdd->ddev); 1083err_dma_reg: 1084err_irq: 1085 cleanup_chans(cdd); 1086err_chans: 1087 deinit_cppi41(dev, cdd); 1088err_init_cppi: 1089 pm_runtime_dont_use_autosuspend(dev); 1090err_get_sync: 1091 pm_runtime_put_sync(dev); 1092 pm_runtime_disable(dev); 1093 iounmap(cdd->usbss_mem); 1094 iounmap(cdd->ctrl_mem); 1095 iounmap(cdd->sched_mem); 1096 iounmap(cdd->qmgr_mem); 1097 return ret; 1098} 1099 1100static int cppi41_dma_remove(struct platform_device *pdev) 1101{ 1102 struct cppi41_dd *cdd = platform_get_drvdata(pdev); 1103 int error; 1104 1105 error = pm_runtime_get_sync(&pdev->dev); 1106 if (error < 0) 1107 dev_err(&pdev->dev, "%s could not pm_runtime_get: %i\n", 1108 __func__, error); 1109 of_dma_controller_free(pdev->dev.of_node); 1110 dma_async_device_unregister(&cdd->ddev); 1111 1112 devm_free_irq(&pdev->dev, cdd->irq, cdd); 1113 cleanup_chans(cdd); 1114 deinit_cppi41(&pdev->dev, cdd); 1115 iounmap(cdd->usbss_mem); 1116 iounmap(cdd->ctrl_mem); 1117 iounmap(cdd->sched_mem); 1118 iounmap(cdd->qmgr_mem); 1119 pm_runtime_dont_use_autosuspend(&pdev->dev); 1120 pm_runtime_put_sync(&pdev->dev); 1121 pm_runtime_disable(&pdev->dev); 1122 return 0; 1123} 1124 1125static int __maybe_unused cppi41_suspend(struct device *dev) 1126{ 1127 struct cppi41_dd *cdd = dev_get_drvdata(dev); 1128 1129 cdd->dma_tdfdq = cppi_readl(cdd->ctrl_mem + DMA_TDFDQ); 1130 disable_sched(cdd); 1131 1132 return 0; 1133} 1134 1135static int __maybe_unused cppi41_resume(struct device *dev) 1136{ 1137 struct cppi41_dd *cdd = dev_get_drvdata(dev); 1138 struct cppi41_channel *c; 1139 int i; 1140 1141 for (i = 0; i < DESCS_AREAS; i++) 1142 cppi_writel(cdd->descs_phys, cdd->qmgr_mem + QMGR_MEMBASE(i)); 1143 1144 list_for_each_entry(c, &cdd->ddev.channels, chan.device_node) 1145 if (!c->is_tx) 1146 cppi_writel(c->q_num, c->gcr_reg + RXHPCRA0); 1147 1148 init_sched(cdd); 1149 1150 cppi_writel(cdd->dma_tdfdq, cdd->ctrl_mem + DMA_TDFDQ); 1151 cppi_writel(cdd->scratch_phys, cdd->qmgr_mem + QMGR_LRAM0_BASE); 1152 cppi_writel(QMGR_SCRATCH_SIZE, cdd->qmgr_mem + QMGR_LRAM_SIZE); 1153 cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM1_BASE); 1154 1155 return 0; 1156} 1157 1158static int __maybe_unused cppi41_runtime_suspend(struct device *dev) 1159{ 1160 struct cppi41_dd *cdd = dev_get_drvdata(dev); 1161 unsigned long flags; 1162 1163 spin_lock_irqsave(&cdd->lock, flags); 1164 cdd->is_suspended = true; 1165 WARN_ON(!list_empty(&cdd->pending)); 1166 spin_unlock_irqrestore(&cdd->lock, flags); 1167 1168 return 0; 1169} 1170 1171static int __maybe_unused cppi41_runtime_resume(struct device *dev) 1172{ 1173 struct cppi41_dd *cdd = dev_get_drvdata(dev); 1174 unsigned long flags; 1175 1176 spin_lock_irqsave(&cdd->lock, flags); 1177 cdd->is_suspended = false; 1178 cppi41_run_queue(cdd); 1179 spin_unlock_irqrestore(&cdd->lock, flags); 1180 1181 return 0; 1182} 1183 1184static const struct dev_pm_ops cppi41_pm_ops = { 1185 SET_LATE_SYSTEM_SLEEP_PM_OPS(cppi41_suspend, cppi41_resume) 1186 SET_RUNTIME_PM_OPS(cppi41_runtime_suspend, 1187 cppi41_runtime_resume, 1188 NULL) 1189}; 1190 1191static struct platform_driver cpp41_dma_driver = { 1192 .probe = cppi41_dma_probe, 1193 .remove = cppi41_dma_remove, 1194 .driver = { 1195 .name = "cppi41-dma-engine", 1196 .pm = &cppi41_pm_ops, 1197 .of_match_table = of_match_ptr(cppi41_dma_ids), 1198 }, 1199}; 1200 1201module_platform_driver(cpp41_dma_driver); 1202MODULE_LICENSE("GPL"); 1203MODULE_AUTHOR("Sebastian Andrzej Siewior <bigeasy@linutronix.de>");