Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

xhci: TD-fragment, align the unsplittable case with a bounce buffer

If the last trb before a link is not packet size aligned, and is not
splittable then use a bounce buffer for that chunk of max packet size
unalignable data.

Allocate a max packet size bounce buffer for every segment of a bulk
endpoint ring at the same time as allocating the ring.
If we need to align the data before the link trb in that segment then
copy the data to the segment bounce buffer, dma map it, and enqueue it.
Once the td finishes, or is cancelled, unmap it.

For in transfers we need to first map the bounce buffer, then queue it,
after it finishes, copy the bounce buffer to the original sg list, and
finally unmap it

Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

authored by

Mathias Nyman and committed by
Greg Kroah-Hartman
f9c589e1 474ed23a

+155 -40
+46 -28
drivers/usb/host/xhci-mem.c
··· 37 37 * "All components of all Command and Transfer TRBs shall be initialized to '0'" 38 38 */ 39 39 static struct xhci_segment *xhci_segment_alloc(struct xhci_hcd *xhci, 40 - unsigned int cycle_state, gfp_t flags) 40 + unsigned int cycle_state, 41 + unsigned int max_packet, 42 + gfp_t flags) 41 43 { 42 44 struct xhci_segment *seg; 43 45 dma_addr_t dma; ··· 55 53 return NULL; 56 54 } 57 55 56 + if (max_packet) { 57 + seg->bounce_buf = kzalloc(max_packet, flags | GFP_DMA); 58 + if (!seg->bounce_buf) { 59 + dma_pool_free(xhci->segment_pool, seg->trbs, dma); 60 + kfree(seg); 61 + return NULL; 62 + } 63 + } 58 64 /* If the cycle state is 0, set the cycle bit to 1 for all the TRBs */ 59 65 if (cycle_state == 0) { 60 66 for (i = 0; i < TRBS_PER_SEGMENT; i++) ··· 80 70 dma_pool_free(xhci->segment_pool, seg->trbs, seg->dma); 81 71 seg->trbs = NULL; 82 72 } 73 + kfree(seg->bounce_buf); 83 74 kfree(seg); 84 75 } 85 76 ··· 328 317 static int xhci_alloc_segments_for_ring(struct xhci_hcd *xhci, 329 318 struct xhci_segment **first, struct xhci_segment **last, 330 319 unsigned int num_segs, unsigned int cycle_state, 331 - enum xhci_ring_type type, gfp_t flags) 320 + enum xhci_ring_type type, unsigned int max_packet, gfp_t flags) 332 321 { 333 322 struct xhci_segment *prev; 334 323 335 - prev = xhci_segment_alloc(xhci, cycle_state, flags); 324 + prev = xhci_segment_alloc(xhci, cycle_state, max_packet, flags); 336 325 if (!prev) 337 326 return -ENOMEM; 338 327 num_segs--; ··· 341 330 while (num_segs > 0) { 342 331 struct xhci_segment *next; 343 332 344 - next = xhci_segment_alloc(xhci, cycle_state, flags); 333 + next = xhci_segment_alloc(xhci, cycle_state, max_packet, flags); 345 334 if (!next) { 346 335 prev = *first; 347 336 while (prev) { ··· 371 360 */ 372 361 static struct xhci_ring *xhci_ring_alloc(struct xhci_hcd *xhci, 373 362 unsigned int num_segs, unsigned int cycle_state, 374 - enum xhci_ring_type type, gfp_t flags) 363 + enum xhci_ring_type type, unsigned int max_packet, gfp_t flags) 375 364 { 376 365 struct xhci_ring *ring; 377 366 int ret; ··· 381 370 return NULL; 382 371 383 372 ring->num_segs = num_segs; 373 + ring->bounce_buf_len = max_packet; 384 374 INIT_LIST_HEAD(&ring->td_list); 385 375 ring->type = type; 386 376 if (num_segs == 0) 387 377 return ring; 388 378 389 379 ret = xhci_alloc_segments_for_ring(xhci, &ring->first_seg, 390 - &ring->last_seg, num_segs, cycle_state, type, flags); 380 + &ring->last_seg, num_segs, cycle_state, type, 381 + max_packet, flags); 391 382 if (ret) 392 383 goto fail; 393 384 ··· 483 470 ring->num_segs : num_segs_needed; 484 471 485 472 ret = xhci_alloc_segments_for_ring(xhci, &first, &last, 486 - num_segs, ring->cycle_state, ring->type, flags); 473 + num_segs, ring->cycle_state, ring->type, 474 + ring->bounce_buf_len, flags); 487 475 if (ret) 488 476 return -ENOMEM; 489 477 ··· 666 652 */ 667 653 struct xhci_stream_info *xhci_alloc_stream_info(struct xhci_hcd *xhci, 668 654 unsigned int num_stream_ctxs, 669 - unsigned int num_streams, gfp_t mem_flags) 655 + unsigned int num_streams, 656 + unsigned int max_packet, gfp_t mem_flags) 670 657 { 671 658 struct xhci_stream_info *stream_info; 672 659 u32 cur_stream; ··· 719 704 * and add their segment DMA addresses to the radix tree. 720 705 * Stream 0 is reserved. 721 706 */ 707 + 722 708 for (cur_stream = 1; cur_stream < num_streams; cur_stream++) { 723 709 stream_info->stream_rings[cur_stream] = 724 - xhci_ring_alloc(xhci, 2, 1, TYPE_STREAM, mem_flags); 710 + xhci_ring_alloc(xhci, 2, 1, TYPE_STREAM, max_packet, 711 + mem_flags); 725 712 cur_ring = stream_info->stream_rings[cur_stream]; 726 713 if (!cur_ring) 727 714 goto cleanup_rings; ··· 1020 1003 } 1021 1004 1022 1005 /* Allocate endpoint 0 ring */ 1023 - dev->eps[0].ring = xhci_ring_alloc(xhci, 2, 1, TYPE_CTRL, flags); 1006 + dev->eps[0].ring = xhci_ring_alloc(xhci, 2, 1, TYPE_CTRL, 0, flags); 1024 1007 if (!dev->eps[0].ring) 1025 1008 goto fail; 1026 1009 ··· 1451 1434 return -EINVAL; 1452 1435 1453 1436 ring_type = usb_endpoint_type(&ep->desc); 1454 - /* Set up the endpoint ring */ 1455 - virt_dev->eps[ep_index].new_ring = 1456 - xhci_ring_alloc(xhci, 2, 1, ring_type, mem_flags); 1457 - if (!virt_dev->eps[ep_index].new_ring) { 1458 - /* Attempt to use the ring cache */ 1459 - if (virt_dev->num_rings_cached == 0) 1460 - return -ENOMEM; 1461 - virt_dev->num_rings_cached--; 1462 - virt_dev->eps[ep_index].new_ring = 1463 - virt_dev->ring_cache[virt_dev->num_rings_cached]; 1464 - virt_dev->ring_cache[virt_dev->num_rings_cached] = NULL; 1465 - xhci_reinit_cached_ring(xhci, virt_dev->eps[ep_index].new_ring, 1466 - 1, ring_type); 1467 - } 1468 - virt_dev->eps[ep_index].skip = false; 1469 - ep_ring = virt_dev->eps[ep_index].new_ring; 1470 1437 1471 1438 /* 1472 1439 * Get values to fill the endpoint context, mostly from ep descriptor. ··· 1479 1478 /* xhci 1.1 with LEC support doesn't use mult field, use RsvdZ */ 1480 1479 if ((xhci->hci_version > 0x100) && HCC2_LEC(xhci->hcc_params2)) 1481 1480 mult = 0; 1481 + 1482 + /* Set up the endpoint ring */ 1483 + virt_dev->eps[ep_index].new_ring = 1484 + xhci_ring_alloc(xhci, 2, 1, ring_type, max_packet, mem_flags); 1485 + if (!virt_dev->eps[ep_index].new_ring) { 1486 + /* Attempt to use the ring cache */ 1487 + if (virt_dev->num_rings_cached == 0) 1488 + return -ENOMEM; 1489 + virt_dev->num_rings_cached--; 1490 + virt_dev->eps[ep_index].new_ring = 1491 + virt_dev->ring_cache[virt_dev->num_rings_cached]; 1492 + virt_dev->ring_cache[virt_dev->num_rings_cached] = NULL; 1493 + xhci_reinit_cached_ring(xhci, virt_dev->eps[ep_index].new_ring, 1494 + 1, ring_type); 1495 + } 1496 + virt_dev->eps[ep_index].skip = false; 1497 + ep_ring = virt_dev->eps[ep_index].new_ring; 1482 1498 1483 1499 /* Fill the endpoint context */ 1484 1500 ep_ctx->ep_info = cpu_to_le32(EP_MAX_ESIT_PAYLOAD_HI(max_esit_payload) | ··· 2427 2409 goto fail; 2428 2410 2429 2411 /* Set up the command ring to have one segments for now. */ 2430 - xhci->cmd_ring = xhci_ring_alloc(xhci, 1, 1, TYPE_COMMAND, flags); 2412 + xhci->cmd_ring = xhci_ring_alloc(xhci, 1, 1, TYPE_COMMAND, 0, flags); 2431 2413 if (!xhci->cmd_ring) 2432 2414 goto fail; 2433 2415 xhci_dbg_trace(xhci, trace_xhci_dbg_init, ··· 2472 2454 */ 2473 2455 xhci_dbg_trace(xhci, trace_xhci_dbg_init, "// Allocating event ring"); 2474 2456 xhci->event_ring = xhci_ring_alloc(xhci, ERST_NUM_SEGS, 1, TYPE_EVENT, 2475 - flags); 2457 + 0, flags); 2476 2458 if (!xhci->event_ring) 2477 2459 goto fail; 2478 2460 if (xhci_check_trb_in_td_math(xhci) < 0)
+96 -10
drivers/usb/host/xhci-ring.c
··· 66 66 67 67 #include <linux/scatterlist.h> 68 68 #include <linux/slab.h> 69 + #include <linux/dma-mapping.h> 69 70 #include "xhci.h" 70 71 #include "xhci-trace.h" 71 72 #include "xhci-mtk.h" ··· 627 626 } 628 627 } 629 628 629 + void xhci_unmap_td_bounce_buffer(struct xhci_hcd *xhci, struct xhci_ring *ring, 630 + struct xhci_td *td) 631 + { 632 + struct device *dev = xhci_to_hcd(xhci)->self.controller; 633 + struct xhci_segment *seg = td->bounce_seg; 634 + struct urb *urb = td->urb; 635 + 636 + if (!seg || !urb) 637 + return; 638 + 639 + if (usb_urb_dir_out(urb)) { 640 + dma_unmap_single(dev, seg->bounce_dma, ring->bounce_buf_len, 641 + DMA_TO_DEVICE); 642 + return; 643 + } 644 + 645 + /* for in tranfers we need to copy the data from bounce to sg */ 646 + sg_pcopy_from_buffer(urb->sg, urb->num_mapped_sgs, seg->bounce_buf, 647 + seg->bounce_len, seg->bounce_offs); 648 + dma_unmap_single(dev, seg->bounce_dma, ring->bounce_buf_len, 649 + DMA_FROM_DEVICE); 650 + seg->bounce_len = 0; 651 + seg->bounce_offs = 0; 652 + } 653 + 630 654 /* 631 655 * When we get a command completion for a Stop Endpoint Command, we need to 632 656 * unlink any cancelled TDs from the ring. There are two ways to do that: ··· 771 745 /* Doesn't matter what we pass for status, since the core will 772 746 * just overwrite it (because the URB has been unlinked). 773 747 */ 748 + if (ep_ring && cur_td->bounce_seg) 749 + xhci_unmap_td_bounce_buffer(xhci, ep_ring, cur_td); 774 750 xhci_giveback_urb_in_irq(xhci, cur_td, 0); 775 751 776 752 /* Stop processing the cancelled list if the watchdog timer is ··· 795 767 list_del_init(&cur_td->td_list); 796 768 if (!list_empty(&cur_td->cancelled_td_list)) 797 769 list_del_init(&cur_td->cancelled_td_list); 770 + 771 + if (cur_td->bounce_seg) 772 + xhci_unmap_td_bounce_buffer(xhci, ring, cur_td); 798 773 xhci_giveback_urb_in_irq(xhci, cur_td, -ESHUTDOWN); 799 774 } 800 775 } ··· 1895 1864 /* Clean up the endpoint's TD list */ 1896 1865 urb = td->urb; 1897 1866 urb_priv = urb->hcpriv; 1867 + 1868 + /* if a bounce buffer was used to align this td then unmap it */ 1869 + if (td->bounce_seg) 1870 + xhci_unmap_td_bounce_buffer(xhci, ep_ring, td); 1898 1871 1899 1872 /* Do one last check of the actual transfer length. 1900 1873 * If the host controller said we transferred more data than the buffer ··· 3151 3116 return (total_packet_count - ((transferred + trb_buff_len) / maxp)); 3152 3117 } 3153 3118 3119 + 3154 3120 static int xhci_align_td(struct xhci_hcd *xhci, struct urb *urb, u32 enqd_len, 3155 - u32 *trb_buff_len) 3121 + u32 *trb_buff_len, struct xhci_segment *seg) 3156 3122 { 3123 + struct device *dev = xhci_to_hcd(xhci)->self.controller; 3157 3124 unsigned int unalign; 3158 3125 unsigned int max_pkt; 3126 + u32 new_buff_len; 3159 3127 3160 3128 max_pkt = GET_MAX_PACKET(usb_endpoint_maxp(&urb->ep->desc)); 3161 3129 unalign = (enqd_len + *trb_buff_len) % max_pkt; ··· 3167 3129 if (unalign == 0) 3168 3130 return 0; 3169 3131 3132 + xhci_dbg(xhci, "Unaligned %d bytes, buff len %d\n", 3133 + unalign, *trb_buff_len); 3134 + 3170 3135 /* is the last nornal TRB alignable by splitting it */ 3171 3136 if (*trb_buff_len > unalign) { 3172 3137 *trb_buff_len -= unalign; 3138 + xhci_dbg(xhci, "split align, new buff len %d\n", *trb_buff_len); 3173 3139 return 0; 3174 3140 } 3141 + 3142 + /* 3143 + * We want enqd_len + trb_buff_len to sum up to a number aligned to 3144 + * number which is divisible by the endpoint's wMaxPacketSize. IOW: 3145 + * (size of currently enqueued TRBs + remainder) % wMaxPacketSize == 0. 3146 + */ 3147 + new_buff_len = max_pkt - (enqd_len % max_pkt); 3148 + 3149 + if (new_buff_len > (urb->transfer_buffer_length - enqd_len)) 3150 + new_buff_len = (urb->transfer_buffer_length - enqd_len); 3151 + 3152 + /* create a max max_pkt sized bounce buffer pointed to by last trb */ 3153 + if (usb_urb_dir_out(urb)) { 3154 + sg_pcopy_to_buffer(urb->sg, urb->num_mapped_sgs, 3155 + seg->bounce_buf, new_buff_len, enqd_len); 3156 + seg->bounce_dma = dma_map_single(dev, seg->bounce_buf, 3157 + max_pkt, DMA_TO_DEVICE); 3158 + } else { 3159 + seg->bounce_dma = dma_map_single(dev, seg->bounce_buf, 3160 + max_pkt, DMA_FROM_DEVICE); 3161 + } 3162 + 3163 + if (dma_mapping_error(dev, seg->bounce_dma)) { 3164 + /* try without aligning. Some host controllers survive */ 3165 + xhci_warn(xhci, "Failed mapping bounce buffer, not aligning\n"); 3166 + return 0; 3167 + } 3168 + *trb_buff_len = new_buff_len; 3169 + seg->bounce_len = new_buff_len; 3170 + seg->bounce_offs = enqd_len; 3171 + 3172 + xhci_dbg(xhci, "Bounce align, new buff len %d\n", *trb_buff_len); 3173 + 3175 3174 return 1; 3176 3175 } 3177 3176 ··· 3227 3152 unsigned int num_trbs; 3228 3153 unsigned int start_cycle, num_sgs = 0; 3229 3154 unsigned int enqd_len, block_len, trb_buff_len, full_len; 3230 - int ret; 3155 + int sent_len, ret; 3231 3156 u32 field, length_field, remainder; 3232 - u64 addr; 3157 + u64 addr, send_addr; 3233 3158 3234 3159 ring = xhci_urb_to_transfer_ring(xhci, urb); 3235 3160 if (!ring) ··· 3269 3194 */ 3270 3195 start_trb = &ring->enqueue->generic; 3271 3196 start_cycle = ring->cycle_state; 3197 + send_addr = addr; 3272 3198 3273 3199 /* Queue the TRBs, even if they are zero-length */ 3274 3200 for (enqd_len = 0; enqd_len < full_len; enqd_len += trb_buff_len) { ··· 3298 3222 if (last_trb(xhci, ring, ring->enq_seg, 3299 3223 ring->enqueue + 1)) { 3300 3224 if (xhci_align_td(xhci, urb, enqd_len, 3301 - &trb_buff_len)) 3302 - xhci_dbg(xhci, "TRB align fail\n"); 3225 + &trb_buff_len, 3226 + ring->enq_seg)) { 3227 + send_addr = ring->enq_seg->bounce_dma; 3228 + /* assuming TD won't span 2 segs */ 3229 + td->bounce_seg = ring->enq_seg; 3230 + } 3303 3231 } 3304 - } else { 3232 + } 3233 + if (enqd_len + trb_buff_len >= full_len) { 3234 + field &= ~TRB_CHAIN; 3305 3235 field |= TRB_IOC; 3306 3236 more_trbs_coming = false; 3307 3237 td->last_trb = ring->enqueue; ··· 3326 3244 TRB_INTR_TARGET(0); 3327 3245 3328 3246 queue_trb(xhci, ring, more_trbs_coming | need_zero_pkt, 3329 - lower_32_bits(addr), 3330 - upper_32_bits(addr), 3247 + lower_32_bits(send_addr), 3248 + upper_32_bits(send_addr), 3331 3249 length_field, 3332 3250 field); 3333 3251 3334 3252 addr += trb_buff_len; 3335 - block_len -= trb_buff_len; 3253 + sent_len = trb_buff_len; 3336 3254 3337 - if (sg && block_len == 0) { 3255 + while (sg && sent_len >= block_len) { 3338 3256 /* New sg entry */ 3339 3257 --num_sgs; 3258 + sent_len -= block_len; 3340 3259 if (num_sgs != 0) { 3341 3260 sg = sg_next(sg); 3342 3261 block_len = sg_dma_len(sg); 3343 3262 addr = (u64) sg_dma_address(sg); 3263 + addr += sent_len; 3344 3264 } 3345 3265 } 3266 + block_len -= sent_len; 3267 + send_addr = addr; 3346 3268 } 3347 3269 3348 3270 if (need_zero_pkt) {
+4 -1
drivers/usb/host/xhci.c
··· 3139 3139 struct xhci_input_control_ctx *ctrl_ctx; 3140 3140 unsigned int ep_index; 3141 3141 unsigned int num_stream_ctxs; 3142 + unsigned int max_packet; 3142 3143 unsigned long flags; 3143 3144 u32 changed_ep_bitmask = 0; 3144 3145 ··· 3213 3212 3214 3213 for (i = 0; i < num_eps; i++) { 3215 3214 ep_index = xhci_get_endpoint_index(&eps[i]->desc); 3215 + max_packet = GET_MAX_PACKET(usb_endpoint_maxp(&eps[i]->desc)); 3216 3216 vdev->eps[ep_index].stream_info = xhci_alloc_stream_info(xhci, 3217 3217 num_stream_ctxs, 3218 - num_streams, mem_flags); 3218 + num_streams, 3219 + max_packet, mem_flags); 3219 3220 if (!vdev->eps[ep_index].stream_info) 3220 3221 goto cleanup; 3221 3222 /* Set maxPstreams in endpoint context and update deq ptr to
+9 -1
drivers/usb/host/xhci.h
··· 1347 1347 /* private to HCD */ 1348 1348 struct xhci_segment *next; 1349 1349 dma_addr_t dma; 1350 + /* Max packet sized bounce buffer for td-fragmant alignment */ 1351 + dma_addr_t bounce_dma; 1352 + void *bounce_buf; 1353 + unsigned int bounce_offs; 1354 + unsigned int bounce_len; 1350 1355 }; 1351 1356 1352 1357 struct xhci_td { ··· 1361 1356 struct xhci_segment *start_seg; 1362 1357 union xhci_trb *first_trb; 1363 1358 union xhci_trb *last_trb; 1359 + struct xhci_segment *bounce_seg; 1364 1360 /* actual_length of the URB has already been set */ 1365 1361 bool urb_length_set; 1366 1362 }; ··· 1411 1405 unsigned int num_segs; 1412 1406 unsigned int num_trbs_free; 1413 1407 unsigned int num_trbs_free_temp; 1408 + unsigned int bounce_buf_len; 1414 1409 enum xhci_ring_type type; 1415 1410 bool last_td_was_short; 1416 1411 struct radix_tree_root *trb_address_map; ··· 1814 1807 unsigned int ep_index); 1815 1808 struct xhci_stream_info *xhci_alloc_stream_info(struct xhci_hcd *xhci, 1816 1809 unsigned int num_stream_ctxs, 1817 - unsigned int num_streams, gfp_t flags); 1810 + unsigned int num_streams, 1811 + unsigned int max_packet, gfp_t flags); 1818 1812 void xhci_free_stream_info(struct xhci_hcd *xhci, 1819 1813 struct xhci_stream_info *stream_info); 1820 1814 void xhci_setup_streams_ep_input_ctx(struct xhci_hcd *xhci,