Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

usb: chipidea: udc: add software sg list support

The chipidea controller doesn't support short transfer for sg list,
so we still keep setting IOC per TD, otherwise, there will be no interrupt
for short transfer. Each TD has five entries for data buffer, each data
buffer could be non-countinuous 4KB buffer, so it could handle
up to 5 sg buffers one time. The benefit of this patch is avoiding
OOM for low memory system(eg, 256MB) during large USB transfers, see
below for detail. The non-sg handling has not changed.

ufb: page allocation failure: order:4, mode:0x40cc0(GFP_KERNEL|__GFP_COMP),
nodemask=(null),cpuset=/,mems_allowed=0
CPU: 2 PID: 370 Comm: ufb Not tainted 5.4.3-1.1.0+g54b3750d61fd #1
Hardware name: NXP i.MX8MNano DDR4 EVK board (DT)
Call trace:
dump_backtrace+0x0/0x140
show_stack+0x14/0x20
dump_stack+0xb4/0xf8
warn_alloc+0xec/0x158
__alloc_pages_slowpath+0x9cc/0x9f8
__alloc_pages_nodemask+0x21c/0x280
alloc_pages_current+0x7c/0xe8
kmalloc_order+0x1c/0x88
__kmalloc+0x25c/0x298
ffs_epfile_io.isra.0+0x20c/0x7d0
ffs_epfile_read_iter+0xa8/0x188
new_sync_read+0xe4/0x170
__vfs_read+0x2c/0x40
vfs_read+0xc8/0x1a0
ksys_read+0x68/0xf0
__arm64_sys_read+0x18/0x20
el0_svc_common.constprop.0+0x68/0x160
el0_svc_handler+0x20/0x80
el0_svc+0x8/0xc
Mem-Info:
active_anon:2856 inactive_anon:5269 isolated_anon:12
active_file:5238 inactive_file:18803 isolated_file:0
unevictable:0 dirty:22 writeback:416 unstable:0
slab_reclaimable:4073 slab_unreclaimable:3408
mapped:727 shmem:7393 pagetables:37 bounce:0
free:4104 free_pcp:118 free_cma:0
Node 0 active_anon:11436kB inactive_anon:21076kB active_file:20988kB inactive_file:75216kB unevictable:0kB isolated(ano
Node 0 DMA32 free:16820kB min:1808kB low:2260kB high:2712kB active_anon:11436kB inactive_anon:21076kB active_file:2098B
lowmem_reserve[]: 0 0 0
Node 0 DMA32: 508*4kB (UME) 242*8kB (UME) 730*16kB (UM) 21*32kB (UME) 5*64kB (UME) 2*128kB (M) 0*256kB 0*512kB 0*1024kB
Node 0 hugepages_total=0 hugepages_free=0 hugepages_surp=0 hugepages_size=1048576kB
Node 0 hugepages_total=0 hugepages_free=0 hugepages_surp=0 hugepages_size=32768kB
Node 0 hugepages_total=0 hugepages_free=0 hugepages_surp=0 hugepages_size=2048kB
Node 0 hugepages_total=0 hugepages_free=0 hugepages_surp=0 hugepages_size=64kB
31455 total pagecache pages
0 pages in swap cache
Swap cache stats: add 0, delete 0, find 0/0
Free swap = 0kB
Total swap = 0kB
65536 pages RAM
0 pages HighMem/MovableOnly
10766 pages reserved
0 pages cma reserved
0 pages hwpoisoned

Reviewed-by: Jun Li <jun.li@nxp.com>
Signed-off-by: Peter Chen <peter.chen@nxp.com>

+133 -32
+1
drivers/usb/chipidea/ci.h
··· 25 25 #define TD_PAGE_COUNT 5 26 26 #define CI_HDRC_PAGE_SIZE 4096ul /* page size for TD's */ 27 27 #define ENDPT_MAX 32 28 + #define CI_MAX_BUF_SIZE (TD_PAGE_COUNT * CI_HDRC_PAGE_SIZE) 28 29 29 30 /****************************************************************************** 30 31 * REGISTERS
+131 -32
drivers/usb/chipidea/udc.c
··· 338 338 *****************************************************************************/ 339 339 340 340 static int add_td_to_list(struct ci_hw_ep *hwep, struct ci_hw_req *hwreq, 341 - unsigned length) 341 + unsigned int length, struct scatterlist *s) 342 342 { 343 343 int i; 344 344 u32 temp; ··· 366 366 node->ptr->token |= cpu_to_le32(mul << __ffs(TD_MULTO)); 367 367 } 368 368 369 - temp = (u32) (hwreq->req.dma + hwreq->req.actual); 369 + if (s) { 370 + temp = (u32) (sg_dma_address(s) + hwreq->req.actual); 371 + node->td_remaining_size = CI_MAX_BUF_SIZE - length; 372 + } else { 373 + temp = (u32) (hwreq->req.dma + hwreq->req.actual); 374 + } 375 + 370 376 if (length) { 371 377 node->ptr->page[0] = cpu_to_le32(temp); 372 378 for (i = 1; i < TD_PAGE_COUNT; i++) { ··· 406 400 return ((ep->dir == TX) ? USB_ENDPOINT_DIR_MASK : 0) | ep->num; 407 401 } 408 402 403 + static int prepare_td_for_non_sg(struct ci_hw_ep *hwep, 404 + struct ci_hw_req *hwreq) 405 + { 406 + unsigned int rest = hwreq->req.length; 407 + int pages = TD_PAGE_COUNT; 408 + int ret = 0; 409 + 410 + if (rest == 0) { 411 + ret = add_td_to_list(hwep, hwreq, 0, NULL); 412 + if (ret < 0) 413 + return ret; 414 + } 415 + 416 + /* 417 + * The first buffer could be not page aligned. 418 + * In that case we have to span into one extra td. 419 + */ 420 + if (hwreq->req.dma % PAGE_SIZE) 421 + pages--; 422 + 423 + while (rest > 0) { 424 + unsigned int count = min(hwreq->req.length - hwreq->req.actual, 425 + (unsigned int)(pages * CI_HDRC_PAGE_SIZE)); 426 + 427 + ret = add_td_to_list(hwep, hwreq, count, NULL); 428 + if (ret < 0) 429 + return ret; 430 + 431 + rest -= count; 432 + } 433 + 434 + if (hwreq->req.zero && hwreq->req.length && hwep->dir == TX 435 + && (hwreq->req.length % hwep->ep.maxpacket == 0)) { 436 + ret = add_td_to_list(hwep, hwreq, 0, NULL); 437 + if (ret < 0) 438 + return ret; 439 + } 440 + 441 + return ret; 442 + } 443 + 444 + static int prepare_td_per_sg(struct ci_hw_ep *hwep, struct ci_hw_req *hwreq, 445 + struct scatterlist *s) 446 + { 447 + unsigned int rest = sg_dma_len(s); 448 + int ret = 0; 449 + 450 + hwreq->req.actual = 0; 451 + while (rest > 0) { 452 + unsigned int count = min_t(unsigned int, rest, 453 + CI_MAX_BUF_SIZE); 454 + 455 + ret = add_td_to_list(hwep, hwreq, count, s); 456 + if (ret < 0) 457 + return ret; 458 + 459 + rest -= count; 460 + } 461 + 462 + return ret; 463 + } 464 + 465 + static void ci_add_buffer_entry(struct td_node *node, struct scatterlist *s) 466 + { 467 + int empty_td_slot_index = (CI_MAX_BUF_SIZE - node->td_remaining_size) 468 + / CI_HDRC_PAGE_SIZE; 469 + int i; 470 + 471 + node->ptr->token += 472 + cpu_to_le32(sg_dma_len(s) << __ffs(TD_TOTAL_BYTES)); 473 + 474 + for (i = empty_td_slot_index; i < TD_PAGE_COUNT; i++) { 475 + u32 page = (u32) sg_dma_address(s) + 476 + (i - empty_td_slot_index) * CI_HDRC_PAGE_SIZE; 477 + 478 + page &= ~TD_RESERVED_MASK; 479 + node->ptr->page[i] = cpu_to_le32(page); 480 + } 481 + } 482 + 483 + static int prepare_td_for_sg(struct ci_hw_ep *hwep, struct ci_hw_req *hwreq) 484 + { 485 + struct usb_request *req = &hwreq->req; 486 + struct scatterlist *s = req->sg; 487 + int ret = 0, i = 0; 488 + struct td_node *node = NULL; 489 + 490 + if (!s || req->zero || req->length == 0) { 491 + dev_err(hwep->ci->dev, "not supported operation for sg\n"); 492 + return -EINVAL; 493 + } 494 + 495 + while (i++ < req->num_mapped_sgs) { 496 + if (sg_dma_address(s) % PAGE_SIZE) { 497 + dev_err(hwep->ci->dev, "not page aligned sg buffer\n"); 498 + return -EINVAL; 499 + } 500 + 501 + if (node && (node->td_remaining_size >= sg_dma_len(s))) { 502 + ci_add_buffer_entry(node, s); 503 + node->td_remaining_size -= sg_dma_len(s); 504 + } else { 505 + ret = prepare_td_per_sg(hwep, hwreq, s); 506 + if (ret) 507 + return ret; 508 + 509 + node = list_entry(hwreq->tds.prev, 510 + struct td_node, td); 511 + } 512 + 513 + s = sg_next(s); 514 + } 515 + 516 + return ret; 517 + } 518 + 409 519 /** 410 520 * _hardware_enqueue: configures a request at hardware level 411 521 * @hwep: endpoint ··· 533 411 { 534 412 struct ci_hdrc *ci = hwep->ci; 535 413 int ret = 0; 536 - unsigned rest = hwreq->req.length; 537 - int pages = TD_PAGE_COUNT; 538 414 struct td_node *firstnode, *lastnode; 539 415 540 416 /* don't queue twice */ ··· 546 426 if (ret) 547 427 return ret; 548 428 549 - /* 550 - * The first buffer could be not page aligned. 551 - * In that case we have to span into one extra td. 552 - */ 553 - if (hwreq->req.dma % PAGE_SIZE) 554 - pages--; 429 + if (hwreq->req.num_mapped_sgs) 430 + ret = prepare_td_for_sg(hwep, hwreq); 431 + else 432 + ret = prepare_td_for_non_sg(hwep, hwreq); 555 433 556 - if (rest == 0) { 557 - ret = add_td_to_list(hwep, hwreq, 0); 558 - if (ret < 0) 559 - goto done; 560 - } 561 - 562 - while (rest > 0) { 563 - unsigned count = min(hwreq->req.length - hwreq->req.actual, 564 - (unsigned)(pages * CI_HDRC_PAGE_SIZE)); 565 - ret = add_td_to_list(hwep, hwreq, count); 566 - if (ret < 0) 567 - goto done; 568 - 569 - rest -= count; 570 - } 571 - 572 - if (hwreq->req.zero && hwreq->req.length && hwep->dir == TX 573 - && (hwreq->req.length % hwep->ep.maxpacket == 0)) { 574 - ret = add_td_to_list(hwep, hwreq, 0); 575 - if (ret < 0) 576 - goto done; 577 - } 434 + if (ret) 435 + return ret; 578 436 579 437 firstnode = list_first_entry(&hwreq->tds, struct td_node, td); 580 438 ··· 2039 1941 ci->gadget.max_speed = USB_SPEED_HIGH; 2040 1942 ci->gadget.name = ci->platdata->name; 2041 1943 ci->gadget.otg_caps = otg_caps; 1944 + ci->gadget.sg_supported = 1; 2042 1945 2043 1946 if (ci->platdata->flags & CI_HDRC_REQUIRES_ALIGNED_DMA) 2044 1947 ci->gadget.quirk_avoids_skb_reserve = 1;
+1
drivers/usb/chipidea/udc.h
··· 61 61 struct list_head td; 62 62 dma_addr_t dma; 63 63 struct ci_hw_td *ptr; 64 + int td_remaining_size; 64 65 }; 65 66 66 67 /**