Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

tsnep: Inline small fragments within TX descriptor

The tsnep network controller is able to extend the descriptor directly
with data to be transmitted. In this case no TX data DMA address is
necessary. Instead of the TX data DMA address the TX data buffer is
placed at the end of the descriptor.

The descriptor is read with a 64 bytes DMA read by the tsnep network
controller. If the sum of descriptor data and TX data is less than or
equal to 64 bytes, then no additional DMA read is necessary to read the
TX data. Therefore, it makes sense to inline small fragments up to this
limit within the descriptor ring.

Inlined fragments need to be copied to the descriptor ring. On the other
hand DMA mapping is not necessary. At most 40 bytes are copied, so
copying should be faster than DMA mapping.

For A53 1.2 GHz copying takes <100ns and DMA mapping takes >200ns. So
inlining small fragments should result in lower CPU load. Performance
improvement is small. Thus, comparision of CPU load with and without
inlining of small fragments did not show any significant difference.
With this optimization less DMA reads will be done, which decreases the
load of the interconnect.

Signed-off-by: Gerhard Engleder <gerhard@engleder-embedded.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Gerhard Engleder and committed by
David S. Miller
dccce1d7 d8118b94

+79 -26
+2
drivers/net/ethernet/engleder/tsnep_hw.h
··· 181 181 #define TSNEP_DESC_SIZE 256 182 182 #define TSNEP_DESC_SIZE_DATA_AFTER 2048 183 183 #define TSNEP_DESC_OFFSET 128 184 + #define TSNEP_DESC_SIZE_DATA_AFTER_INLINE (64 - sizeof(struct tsnep_tx_desc) + \ 185 + sizeof_field(struct tsnep_tx_desc, tx)) 184 186 #define TSNEP_DESC_OWNER_COUNTER_MASK 0xC0000000 185 187 #define TSNEP_DESC_OWNER_COUNTER_SHIFT 30 186 188 #define TSNEP_DESC_LENGTH_MASK 0x00003FFF
+77 -26
drivers/net/ethernet/engleder/tsnep_main.c
··· 51 51 #define TSNEP_COALESCE_USECS_MAX ((ECM_INT_DELAY_MASK >> ECM_INT_DELAY_SHIFT) * \ 52 52 ECM_INT_DELAY_BASE_US + ECM_INT_DELAY_BASE_US - 1) 53 53 54 - #define TSNEP_TX_TYPE_SKB BIT(0) 55 - #define TSNEP_TX_TYPE_SKB_FRAG BIT(1) 56 - #define TSNEP_TX_TYPE_XDP_TX BIT(2) 57 - #define TSNEP_TX_TYPE_XDP_NDO BIT(3) 58 - #define TSNEP_TX_TYPE_XDP (TSNEP_TX_TYPE_XDP_TX | TSNEP_TX_TYPE_XDP_NDO) 59 - #define TSNEP_TX_TYPE_XSK BIT(4) 54 + /* mapping type */ 55 + #define TSNEP_TX_TYPE_MAP BIT(0) 56 + #define TSNEP_TX_TYPE_MAP_PAGE BIT(1) 57 + #define TSNEP_TX_TYPE_INLINE BIT(2) 58 + /* buffer type */ 59 + #define TSNEP_TX_TYPE_SKB BIT(8) 60 + #define TSNEP_TX_TYPE_SKB_MAP (TSNEP_TX_TYPE_SKB | TSNEP_TX_TYPE_MAP) 61 + #define TSNEP_TX_TYPE_SKB_INLINE (TSNEP_TX_TYPE_SKB | TSNEP_TX_TYPE_INLINE) 62 + #define TSNEP_TX_TYPE_SKB_FRAG BIT(9) 63 + #define TSNEP_TX_TYPE_SKB_FRAG_MAP_PAGE (TSNEP_TX_TYPE_SKB_FRAG | TSNEP_TX_TYPE_MAP_PAGE) 64 + #define TSNEP_TX_TYPE_SKB_FRAG_INLINE (TSNEP_TX_TYPE_SKB_FRAG | TSNEP_TX_TYPE_INLINE) 65 + #define TSNEP_TX_TYPE_XDP_TX BIT(10) 66 + #define TSNEP_TX_TYPE_XDP_NDO BIT(11) 67 + #define TSNEP_TX_TYPE_XDP_NDO_MAP_PAGE (TSNEP_TX_TYPE_XDP_NDO | TSNEP_TX_TYPE_MAP_PAGE) 68 + #define TSNEP_TX_TYPE_XDP (TSNEP_TX_TYPE_XDP_TX | TSNEP_TX_TYPE_XDP_NDO) 69 + #define TSNEP_TX_TYPE_XSK BIT(12) 60 70 61 71 #define TSNEP_XDP_TX BIT(0) 62 72 #define TSNEP_XDP_REDIRECT BIT(1) ··· 426 416 entry->properties |= TSNEP_TX_DESC_OWNER_USER_FLAG; 427 417 entry->desc->more_properties = 428 418 __cpu_to_le32(entry->len & TSNEP_DESC_LENGTH_MASK); 419 + if (entry->type & TSNEP_TX_TYPE_INLINE) 420 + entry->properties |= TSNEP_TX_DESC_DATA_AFTER_DESC_FLAG; 429 421 430 422 /* descriptor properties shall be written last, because valid data is 431 423 * signaled there ··· 445 433 return tx->read - tx->write - 1; 446 434 } 447 435 436 + static int tsnep_tx_map_frag(skb_frag_t *frag, struct tsnep_tx_entry *entry, 437 + struct device *dmadev, dma_addr_t *dma) 438 + { 439 + unsigned int len; 440 + int mapped; 441 + 442 + len = skb_frag_size(frag); 443 + if (likely(len > TSNEP_DESC_SIZE_DATA_AFTER_INLINE)) { 444 + *dma = skb_frag_dma_map(dmadev, frag, 0, len, DMA_TO_DEVICE); 445 + if (dma_mapping_error(dmadev, *dma)) 446 + return -ENOMEM; 447 + entry->type = TSNEP_TX_TYPE_SKB_FRAG_MAP_PAGE; 448 + mapped = 1; 449 + } else { 450 + void *fragdata = skb_frag_address_safe(frag); 451 + 452 + if (likely(fragdata)) { 453 + memcpy(&entry->desc->tx, fragdata, len); 454 + } else { 455 + struct page *page = skb_frag_page(frag); 456 + 457 + fragdata = kmap_local_page(page); 458 + memcpy(&entry->desc->tx, fragdata + skb_frag_off(frag), 459 + len); 460 + kunmap_local(fragdata); 461 + } 462 + entry->type = TSNEP_TX_TYPE_SKB_FRAG_INLINE; 463 + mapped = 0; 464 + } 465 + 466 + return mapped; 467 + } 468 + 448 469 static int tsnep_tx_map(struct sk_buff *skb, struct tsnep_tx *tx, int count) 449 470 { 450 471 struct device *dmadev = tx->adapter->dmadev; 451 472 struct tsnep_tx_entry *entry; 452 473 unsigned int len; 453 - dma_addr_t dma; 454 474 int map_len = 0; 455 - int i; 475 + dma_addr_t dma; 476 + int i, mapped; 456 477 457 478 for (i = 0; i < count; i++) { 458 479 entry = &tx->entry[(tx->write + i) & TSNEP_RING_MASK]; 459 480 460 481 if (!i) { 461 482 len = skb_headlen(skb); 462 - dma = dma_map_single(dmadev, skb->data, len, 463 - DMA_TO_DEVICE); 464 - 465 - entry->type = TSNEP_TX_TYPE_SKB; 483 + if (likely(len > TSNEP_DESC_SIZE_DATA_AFTER_INLINE)) { 484 + dma = dma_map_single(dmadev, skb->data, len, 485 + DMA_TO_DEVICE); 486 + if (dma_mapping_error(dmadev, dma)) 487 + return -ENOMEM; 488 + entry->type = TSNEP_TX_TYPE_SKB_MAP; 489 + mapped = 1; 490 + } else { 491 + memcpy(&entry->desc->tx, skb->data, len); 492 + entry->type = TSNEP_TX_TYPE_SKB_INLINE; 493 + mapped = 0; 494 + } 466 495 } else { 467 - len = skb_frag_size(&skb_shinfo(skb)->frags[i - 1]); 468 - dma = skb_frag_dma_map(dmadev, 469 - &skb_shinfo(skb)->frags[i - 1], 470 - 0, len, DMA_TO_DEVICE); 496 + skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1]; 471 497 472 - entry->type = TSNEP_TX_TYPE_SKB_FRAG; 498 + len = skb_frag_size(frag); 499 + mapped = tsnep_tx_map_frag(frag, entry, dmadev, &dma); 500 + if (mapped < 0) 501 + return mapped; 473 502 } 474 - if (dma_mapping_error(dmadev, dma)) 475 - return -ENOMEM; 476 503 477 504 entry->len = len; 478 - dma_unmap_addr_set(entry, dma, dma); 479 - 480 - entry->desc->tx = __cpu_to_le64(dma); 505 + if (likely(mapped)) { 506 + dma_unmap_addr_set(entry, dma, dma); 507 + entry->desc->tx = __cpu_to_le64(dma); 508 + } 481 509 482 510 map_len += len; 483 511 } ··· 536 484 entry = &tx->entry[(index + i) & TSNEP_RING_MASK]; 537 485 538 486 if (entry->len) { 539 - if (entry->type & TSNEP_TX_TYPE_SKB) 487 + if (entry->type & TSNEP_TX_TYPE_MAP) 540 488 dma_unmap_single(dmadev, 541 489 dma_unmap_addr(entry, dma), 542 490 dma_unmap_len(entry, len), 543 491 DMA_TO_DEVICE); 544 - else if (entry->type & 545 - (TSNEP_TX_TYPE_SKB_FRAG | TSNEP_TX_TYPE_XDP_NDO)) 492 + else if (entry->type & TSNEP_TX_TYPE_MAP_PAGE) 546 493 dma_unmap_page(dmadev, 547 494 dma_unmap_addr(entry, dma), 548 495 dma_unmap_len(entry, len), ··· 637 586 if (dma_mapping_error(dmadev, dma)) 638 587 return -ENOMEM; 639 588 640 - entry->type = TSNEP_TX_TYPE_XDP_NDO; 589 + entry->type = TSNEP_TX_TYPE_XDP_NDO_MAP_PAGE; 641 590 } else { 642 591 page = unlikely(frag) ? skb_frag_page(frag) : 643 592 virt_to_page(xdpf->data);