at v2.6.29 1143 lines 32 kB view raw
1/**************************************************************************** 2 * Driver for Solarflare Solarstorm network controllers and boards 3 * Copyright 2005-2006 Fen Systems Ltd. 4 * Copyright 2005-2008 Solarflare Communications Inc. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 as published 8 * by the Free Software Foundation, incorporated herein by reference. 9 */ 10 11#include <linux/pci.h> 12#include <linux/tcp.h> 13#include <linux/ip.h> 14#include <linux/in.h> 15#include <linux/if_ether.h> 16#include <linux/highmem.h> 17#include "net_driver.h" 18#include "tx.h" 19#include "efx.h" 20#include "falcon.h" 21#include "workarounds.h" 22 23/* 24 * TX descriptor ring full threshold 25 * 26 * The tx_queue descriptor ring fill-level must fall below this value 27 * before we restart the netif queue 28 */ 29#define EFX_NETDEV_TX_THRESHOLD(_tx_queue) \ 30 (_tx_queue->efx->type->txd_ring_mask / 2u) 31 32/* We want to be able to nest calls to netif_stop_queue(), since each 33 * channel can have an individual stop on the queue. 34 */ 35void efx_stop_queue(struct efx_nic *efx) 36{ 37 spin_lock_bh(&efx->netif_stop_lock); 38 EFX_TRACE(efx, "stop TX queue\n"); 39 40 atomic_inc(&efx->netif_stop_count); 41 netif_stop_queue(efx->net_dev); 42 43 spin_unlock_bh(&efx->netif_stop_lock); 44} 45 46/* Wake netif's TX queue 47 * We want to be able to nest calls to netif_stop_queue(), since each 48 * channel can have an individual stop on the queue. 49 */ 50void efx_wake_queue(struct efx_nic *efx) 51{ 52 local_bh_disable(); 53 if (atomic_dec_and_lock(&efx->netif_stop_count, 54 &efx->netif_stop_lock)) { 55 EFX_TRACE(efx, "waking TX queue\n"); 56 netif_wake_queue(efx->net_dev); 57 spin_unlock(&efx->netif_stop_lock); 58 } 59 local_bh_enable(); 60} 61 62static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue, 63 struct efx_tx_buffer *buffer) 64{ 65 if (buffer->unmap_len) { 66 struct pci_dev *pci_dev = tx_queue->efx->pci_dev; 67 dma_addr_t unmap_addr = (buffer->dma_addr + buffer->len - 68 buffer->unmap_len); 69 if (buffer->unmap_single) 70 pci_unmap_single(pci_dev, unmap_addr, buffer->unmap_len, 71 PCI_DMA_TODEVICE); 72 else 73 pci_unmap_page(pci_dev, unmap_addr, buffer->unmap_len, 74 PCI_DMA_TODEVICE); 75 buffer->unmap_len = 0; 76 buffer->unmap_single = false; 77 } 78 79 if (buffer->skb) { 80 dev_kfree_skb_any((struct sk_buff *) buffer->skb); 81 buffer->skb = NULL; 82 EFX_TRACE(tx_queue->efx, "TX queue %d transmission id %x " 83 "complete\n", tx_queue->queue, read_ptr); 84 } 85} 86 87/** 88 * struct efx_tso_header - a DMA mapped buffer for packet headers 89 * @next: Linked list of free ones. 90 * The list is protected by the TX queue lock. 91 * @dma_unmap_len: Length to unmap for an oversize buffer, or 0. 92 * @dma_addr: The DMA address of the header below. 93 * 94 * This controls the memory used for a TSO header. Use TSOH_DATA() 95 * to find the packet header data. Use TSOH_SIZE() to calculate the 96 * total size required for a given packet header length. TSO headers 97 * in the free list are exactly %TSOH_STD_SIZE bytes in size. 98 */ 99struct efx_tso_header { 100 union { 101 struct efx_tso_header *next; 102 size_t unmap_len; 103 }; 104 dma_addr_t dma_addr; 105}; 106 107static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, 108 struct sk_buff *skb); 109static void efx_fini_tso(struct efx_tx_queue *tx_queue); 110static void efx_tsoh_heap_free(struct efx_tx_queue *tx_queue, 111 struct efx_tso_header *tsoh); 112 113static void efx_tsoh_free(struct efx_tx_queue *tx_queue, 114 struct efx_tx_buffer *buffer) 115{ 116 if (buffer->tsoh) { 117 if (likely(!buffer->tsoh->unmap_len)) { 118 buffer->tsoh->next = tx_queue->tso_headers_free; 119 tx_queue->tso_headers_free = buffer->tsoh; 120 } else { 121 efx_tsoh_heap_free(tx_queue, buffer->tsoh); 122 } 123 buffer->tsoh = NULL; 124 } 125} 126 127 128/* 129 * Add a socket buffer to a TX queue 130 * 131 * This maps all fragments of a socket buffer for DMA and adds them to 132 * the TX queue. The queue's insert pointer will be incremented by 133 * the number of fragments in the socket buffer. 134 * 135 * If any DMA mapping fails, any mapped fragments will be unmapped, 136 * the queue's insert pointer will be restored to its original value. 137 * 138 * Returns NETDEV_TX_OK or NETDEV_TX_BUSY 139 * You must hold netif_tx_lock() to call this function. 140 */ 141static int efx_enqueue_skb(struct efx_tx_queue *tx_queue, 142 struct sk_buff *skb) 143{ 144 struct efx_nic *efx = tx_queue->efx; 145 struct pci_dev *pci_dev = efx->pci_dev; 146 struct efx_tx_buffer *buffer; 147 skb_frag_t *fragment; 148 struct page *page; 149 int page_offset; 150 unsigned int len, unmap_len = 0, fill_level, insert_ptr, misalign; 151 dma_addr_t dma_addr, unmap_addr = 0; 152 unsigned int dma_len; 153 bool unmap_single; 154 int q_space, i = 0; 155 int rc = NETDEV_TX_OK; 156 157 EFX_BUG_ON_PARANOID(tx_queue->write_count != tx_queue->insert_count); 158 159 if (skb_shinfo((struct sk_buff *)skb)->gso_size) 160 return efx_enqueue_skb_tso(tx_queue, skb); 161 162 /* Get size of the initial fragment */ 163 len = skb_headlen(skb); 164 165 fill_level = tx_queue->insert_count - tx_queue->old_read_count; 166 q_space = efx->type->txd_ring_mask - 1 - fill_level; 167 168 /* Map for DMA. Use pci_map_single rather than pci_map_page 169 * since this is more efficient on machines with sparse 170 * memory. 171 */ 172 unmap_single = true; 173 dma_addr = pci_map_single(pci_dev, skb->data, len, PCI_DMA_TODEVICE); 174 175 /* Process all fragments */ 176 while (1) { 177 if (unlikely(pci_dma_mapping_error(pci_dev, dma_addr))) 178 goto pci_err; 179 180 /* Store fields for marking in the per-fragment final 181 * descriptor */ 182 unmap_len = len; 183 unmap_addr = dma_addr; 184 185 /* Add to TX queue, splitting across DMA boundaries */ 186 do { 187 if (unlikely(q_space-- <= 0)) { 188 /* It might be that completions have 189 * happened since the xmit path last 190 * checked. Update the xmit path's 191 * copy of read_count. 192 */ 193 ++tx_queue->stopped; 194 /* This memory barrier protects the 195 * change of stopped from the access 196 * of read_count. */ 197 smp_mb(); 198 tx_queue->old_read_count = 199 *(volatile unsigned *) 200 &tx_queue->read_count; 201 fill_level = (tx_queue->insert_count 202 - tx_queue->old_read_count); 203 q_space = (efx->type->txd_ring_mask - 1 - 204 fill_level); 205 if (unlikely(q_space-- <= 0)) 206 goto stop; 207 smp_mb(); 208 --tx_queue->stopped; 209 } 210 211 insert_ptr = (tx_queue->insert_count & 212 efx->type->txd_ring_mask); 213 buffer = &tx_queue->buffer[insert_ptr]; 214 efx_tsoh_free(tx_queue, buffer); 215 EFX_BUG_ON_PARANOID(buffer->tsoh); 216 EFX_BUG_ON_PARANOID(buffer->skb); 217 EFX_BUG_ON_PARANOID(buffer->len); 218 EFX_BUG_ON_PARANOID(!buffer->continuation); 219 EFX_BUG_ON_PARANOID(buffer->unmap_len); 220 221 dma_len = (((~dma_addr) & efx->type->tx_dma_mask) + 1); 222 if (likely(dma_len > len)) 223 dma_len = len; 224 225 misalign = (unsigned)dma_addr & efx->type->bug5391_mask; 226 if (misalign && dma_len + misalign > 512) 227 dma_len = 512 - misalign; 228 229 /* Fill out per descriptor fields */ 230 buffer->len = dma_len; 231 buffer->dma_addr = dma_addr; 232 len -= dma_len; 233 dma_addr += dma_len; 234 ++tx_queue->insert_count; 235 } while (len); 236 237 /* Transfer ownership of the unmapping to the final buffer */ 238 buffer->unmap_single = unmap_single; 239 buffer->unmap_len = unmap_len; 240 unmap_len = 0; 241 242 /* Get address and size of next fragment */ 243 if (i >= skb_shinfo(skb)->nr_frags) 244 break; 245 fragment = &skb_shinfo(skb)->frags[i]; 246 len = fragment->size; 247 page = fragment->page; 248 page_offset = fragment->page_offset; 249 i++; 250 /* Map for DMA */ 251 unmap_single = false; 252 dma_addr = pci_map_page(pci_dev, page, page_offset, len, 253 PCI_DMA_TODEVICE); 254 } 255 256 /* Transfer ownership of the skb to the final buffer */ 257 buffer->skb = skb; 258 buffer->continuation = false; 259 260 /* Pass off to hardware */ 261 falcon_push_buffers(tx_queue); 262 263 return NETDEV_TX_OK; 264 265 pci_err: 266 EFX_ERR_RL(efx, " TX queue %d could not map skb with %d bytes %d " 267 "fragments for DMA\n", tx_queue->queue, skb->len, 268 skb_shinfo(skb)->nr_frags + 1); 269 270 /* Mark the packet as transmitted, and free the SKB ourselves */ 271 dev_kfree_skb_any((struct sk_buff *)skb); 272 goto unwind; 273 274 stop: 275 rc = NETDEV_TX_BUSY; 276 277 if (tx_queue->stopped == 1) 278 efx_stop_queue(efx); 279 280 unwind: 281 /* Work backwards until we hit the original insert pointer value */ 282 while (tx_queue->insert_count != tx_queue->write_count) { 283 --tx_queue->insert_count; 284 insert_ptr = tx_queue->insert_count & efx->type->txd_ring_mask; 285 buffer = &tx_queue->buffer[insert_ptr]; 286 efx_dequeue_buffer(tx_queue, buffer); 287 buffer->len = 0; 288 } 289 290 /* Free the fragment we were mid-way through pushing */ 291 if (unmap_len) { 292 if (unmap_single) 293 pci_unmap_single(pci_dev, unmap_addr, unmap_len, 294 PCI_DMA_TODEVICE); 295 else 296 pci_unmap_page(pci_dev, unmap_addr, unmap_len, 297 PCI_DMA_TODEVICE); 298 } 299 300 return rc; 301} 302 303/* Remove packets from the TX queue 304 * 305 * This removes packets from the TX queue, up to and including the 306 * specified index. 307 */ 308static void efx_dequeue_buffers(struct efx_tx_queue *tx_queue, 309 unsigned int index) 310{ 311 struct efx_nic *efx = tx_queue->efx; 312 unsigned int stop_index, read_ptr; 313 unsigned int mask = tx_queue->efx->type->txd_ring_mask; 314 315 stop_index = (index + 1) & mask; 316 read_ptr = tx_queue->read_count & mask; 317 318 while (read_ptr != stop_index) { 319 struct efx_tx_buffer *buffer = &tx_queue->buffer[read_ptr]; 320 if (unlikely(buffer->len == 0)) { 321 EFX_ERR(tx_queue->efx, "TX queue %d spurious TX " 322 "completion id %x\n", tx_queue->queue, 323 read_ptr); 324 efx_schedule_reset(efx, RESET_TYPE_TX_SKIP); 325 return; 326 } 327 328 efx_dequeue_buffer(tx_queue, buffer); 329 buffer->continuation = true; 330 buffer->len = 0; 331 332 ++tx_queue->read_count; 333 read_ptr = tx_queue->read_count & mask; 334 } 335} 336 337/* Initiate a packet transmission on the specified TX queue. 338 * Note that returning anything other than NETDEV_TX_OK will cause the 339 * OS to free the skb. 340 * 341 * This function is split out from efx_hard_start_xmit to allow the 342 * loopback test to direct packets via specific TX queues. It is 343 * therefore a non-static inline, so as not to penalise performance 344 * for non-loopback transmissions. 345 * 346 * Context: netif_tx_lock held 347 */ 348inline int efx_xmit(struct efx_nic *efx, 349 struct efx_tx_queue *tx_queue, struct sk_buff *skb) 350{ 351 int rc; 352 353 /* Map fragments for DMA and add to TX queue */ 354 rc = efx_enqueue_skb(tx_queue, skb); 355 if (unlikely(rc != NETDEV_TX_OK)) 356 goto out; 357 358 /* Update last TX timer */ 359 efx->net_dev->trans_start = jiffies; 360 361 out: 362 return rc; 363} 364 365/* Initiate a packet transmission. We use one channel per CPU 366 * (sharing when we have more CPUs than channels). On Falcon, the TX 367 * completion events will be directed back to the CPU that transmitted 368 * the packet, which should be cache-efficient. 369 * 370 * Context: non-blocking. 371 * Note that returning anything other than NETDEV_TX_OK will cause the 372 * OS to free the skb. 373 */ 374int efx_hard_start_xmit(struct sk_buff *skb, struct net_device *net_dev) 375{ 376 struct efx_nic *efx = netdev_priv(net_dev); 377 struct efx_tx_queue *tx_queue; 378 379 if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) 380 tx_queue = &efx->tx_queue[EFX_TX_QUEUE_OFFLOAD_CSUM]; 381 else 382 tx_queue = &efx->tx_queue[EFX_TX_QUEUE_NO_CSUM]; 383 384 return efx_xmit(efx, tx_queue, skb); 385} 386 387void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index) 388{ 389 unsigned fill_level; 390 struct efx_nic *efx = tx_queue->efx; 391 392 EFX_BUG_ON_PARANOID(index > efx->type->txd_ring_mask); 393 394 efx_dequeue_buffers(tx_queue, index); 395 396 /* See if we need to restart the netif queue. This barrier 397 * separates the update of read_count from the test of 398 * stopped. */ 399 smp_mb(); 400 if (unlikely(tx_queue->stopped)) { 401 fill_level = tx_queue->insert_count - tx_queue->read_count; 402 if (fill_level < EFX_NETDEV_TX_THRESHOLD(tx_queue)) { 403 EFX_BUG_ON_PARANOID(!efx_dev_registered(efx)); 404 405 /* Do this under netif_tx_lock(), to avoid racing 406 * with efx_xmit(). */ 407 netif_tx_lock(efx->net_dev); 408 if (tx_queue->stopped) { 409 tx_queue->stopped = 0; 410 efx_wake_queue(efx); 411 } 412 netif_tx_unlock(efx->net_dev); 413 } 414 } 415} 416 417int efx_probe_tx_queue(struct efx_tx_queue *tx_queue) 418{ 419 struct efx_nic *efx = tx_queue->efx; 420 unsigned int txq_size; 421 int i, rc; 422 423 EFX_LOG(efx, "creating TX queue %d\n", tx_queue->queue); 424 425 /* Allocate software ring */ 426 txq_size = (efx->type->txd_ring_mask + 1) * sizeof(*tx_queue->buffer); 427 tx_queue->buffer = kzalloc(txq_size, GFP_KERNEL); 428 if (!tx_queue->buffer) 429 return -ENOMEM; 430 for (i = 0; i <= efx->type->txd_ring_mask; ++i) 431 tx_queue->buffer[i].continuation = true; 432 433 /* Allocate hardware ring */ 434 rc = falcon_probe_tx(tx_queue); 435 if (rc) 436 goto fail; 437 438 return 0; 439 440 fail: 441 kfree(tx_queue->buffer); 442 tx_queue->buffer = NULL; 443 return rc; 444} 445 446void efx_init_tx_queue(struct efx_tx_queue *tx_queue) 447{ 448 EFX_LOG(tx_queue->efx, "initialising TX queue %d\n", tx_queue->queue); 449 450 tx_queue->insert_count = 0; 451 tx_queue->write_count = 0; 452 tx_queue->read_count = 0; 453 tx_queue->old_read_count = 0; 454 BUG_ON(tx_queue->stopped); 455 456 /* Set up TX descriptor ring */ 457 falcon_init_tx(tx_queue); 458} 459 460void efx_release_tx_buffers(struct efx_tx_queue *tx_queue) 461{ 462 struct efx_tx_buffer *buffer; 463 464 if (!tx_queue->buffer) 465 return; 466 467 /* Free any buffers left in the ring */ 468 while (tx_queue->read_count != tx_queue->write_count) { 469 buffer = &tx_queue->buffer[tx_queue->read_count & 470 tx_queue->efx->type->txd_ring_mask]; 471 efx_dequeue_buffer(tx_queue, buffer); 472 buffer->continuation = true; 473 buffer->len = 0; 474 475 ++tx_queue->read_count; 476 } 477} 478 479void efx_fini_tx_queue(struct efx_tx_queue *tx_queue) 480{ 481 EFX_LOG(tx_queue->efx, "shutting down TX queue %d\n", tx_queue->queue); 482 483 /* Flush TX queue, remove descriptor ring */ 484 falcon_fini_tx(tx_queue); 485 486 efx_release_tx_buffers(tx_queue); 487 488 /* Free up TSO header cache */ 489 efx_fini_tso(tx_queue); 490 491 /* Release queue's stop on port, if any */ 492 if (tx_queue->stopped) { 493 tx_queue->stopped = 0; 494 efx_wake_queue(tx_queue->efx); 495 } 496} 497 498void efx_remove_tx_queue(struct efx_tx_queue *tx_queue) 499{ 500 EFX_LOG(tx_queue->efx, "destroying TX queue %d\n", tx_queue->queue); 501 falcon_remove_tx(tx_queue); 502 503 kfree(tx_queue->buffer); 504 tx_queue->buffer = NULL; 505} 506 507 508/* Efx TCP segmentation acceleration. 509 * 510 * Why? Because by doing it here in the driver we can go significantly 511 * faster than the GSO. 512 * 513 * Requires TX checksum offload support. 514 */ 515 516/* Number of bytes inserted at the start of a TSO header buffer, 517 * similar to NET_IP_ALIGN. 518 */ 519#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS 520#define TSOH_OFFSET 0 521#else 522#define TSOH_OFFSET NET_IP_ALIGN 523#endif 524 525#define TSOH_BUFFER(tsoh) ((u8 *)(tsoh + 1) + TSOH_OFFSET) 526 527/* Total size of struct efx_tso_header, buffer and padding */ 528#define TSOH_SIZE(hdr_len) \ 529 (sizeof(struct efx_tso_header) + TSOH_OFFSET + hdr_len) 530 531/* Size of blocks on free list. Larger blocks must be allocated from 532 * the heap. 533 */ 534#define TSOH_STD_SIZE 128 535 536#define PTR_DIFF(p1, p2) ((u8 *)(p1) - (u8 *)(p2)) 537#define ETH_HDR_LEN(skb) (skb_network_header(skb) - (skb)->data) 538#define SKB_TCP_OFF(skb) PTR_DIFF(tcp_hdr(skb), (skb)->data) 539#define SKB_IPV4_OFF(skb) PTR_DIFF(ip_hdr(skb), (skb)->data) 540 541/** 542 * struct tso_state - TSO state for an SKB 543 * @out_len: Remaining length in current segment 544 * @seqnum: Current sequence number 545 * @ipv4_id: Current IPv4 ID, host endian 546 * @packet_space: Remaining space in current packet 547 * @dma_addr: DMA address of current position 548 * @in_len: Remaining length in current SKB fragment 549 * @unmap_len: Length of SKB fragment 550 * @unmap_addr: DMA address of SKB fragment 551 * @unmap_single: DMA single vs page mapping flag 552 * @header_len: Number of bytes of header 553 * @full_packet_size: Number of bytes to put in each outgoing segment 554 * 555 * The state used during segmentation. It is put into this data structure 556 * just to make it easy to pass into inline functions. 557 */ 558struct tso_state { 559 /* Output position */ 560 unsigned out_len; 561 unsigned seqnum; 562 unsigned ipv4_id; 563 unsigned packet_space; 564 565 /* Input position */ 566 dma_addr_t dma_addr; 567 unsigned in_len; 568 unsigned unmap_len; 569 dma_addr_t unmap_addr; 570 bool unmap_single; 571 572 unsigned header_len; 573 int full_packet_size; 574}; 575 576 577/* 578 * Verify that our various assumptions about sk_buffs and the conditions 579 * under which TSO will be attempted hold true. 580 */ 581static void efx_tso_check_safe(struct sk_buff *skb) 582{ 583 __be16 protocol = skb->protocol; 584 585 EFX_BUG_ON_PARANOID(((struct ethhdr *)skb->data)->h_proto != 586 protocol); 587 if (protocol == htons(ETH_P_8021Q)) { 588 /* Find the encapsulated protocol; reset network header 589 * and transport header based on that. */ 590 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; 591 protocol = veh->h_vlan_encapsulated_proto; 592 skb_set_network_header(skb, sizeof(*veh)); 593 if (protocol == htons(ETH_P_IP)) 594 skb_set_transport_header(skb, sizeof(*veh) + 595 4 * ip_hdr(skb)->ihl); 596 } 597 598 EFX_BUG_ON_PARANOID(protocol != htons(ETH_P_IP)); 599 EFX_BUG_ON_PARANOID(ip_hdr(skb)->protocol != IPPROTO_TCP); 600 EFX_BUG_ON_PARANOID((PTR_DIFF(tcp_hdr(skb), skb->data) 601 + (tcp_hdr(skb)->doff << 2u)) > 602 skb_headlen(skb)); 603} 604 605 606/* 607 * Allocate a page worth of efx_tso_header structures, and string them 608 * into the tx_queue->tso_headers_free linked list. Return 0 or -ENOMEM. 609 */ 610static int efx_tsoh_block_alloc(struct efx_tx_queue *tx_queue) 611{ 612 613 struct pci_dev *pci_dev = tx_queue->efx->pci_dev; 614 struct efx_tso_header *tsoh; 615 dma_addr_t dma_addr; 616 u8 *base_kva, *kva; 617 618 base_kva = pci_alloc_consistent(pci_dev, PAGE_SIZE, &dma_addr); 619 if (base_kva == NULL) { 620 EFX_ERR(tx_queue->efx, "Unable to allocate page for TSO" 621 " headers\n"); 622 return -ENOMEM; 623 } 624 625 /* pci_alloc_consistent() allocates pages. */ 626 EFX_BUG_ON_PARANOID(dma_addr & (PAGE_SIZE - 1u)); 627 628 for (kva = base_kva; kva < base_kva + PAGE_SIZE; kva += TSOH_STD_SIZE) { 629 tsoh = (struct efx_tso_header *)kva; 630 tsoh->dma_addr = dma_addr + (TSOH_BUFFER(tsoh) - base_kva); 631 tsoh->next = tx_queue->tso_headers_free; 632 tx_queue->tso_headers_free = tsoh; 633 } 634 635 return 0; 636} 637 638 639/* Free up a TSO header, and all others in the same page. */ 640static void efx_tsoh_block_free(struct efx_tx_queue *tx_queue, 641 struct efx_tso_header *tsoh, 642 struct pci_dev *pci_dev) 643{ 644 struct efx_tso_header **p; 645 unsigned long base_kva; 646 dma_addr_t base_dma; 647 648 base_kva = (unsigned long)tsoh & PAGE_MASK; 649 base_dma = tsoh->dma_addr & PAGE_MASK; 650 651 p = &tx_queue->tso_headers_free; 652 while (*p != NULL) { 653 if (((unsigned long)*p & PAGE_MASK) == base_kva) 654 *p = (*p)->next; 655 else 656 p = &(*p)->next; 657 } 658 659 pci_free_consistent(pci_dev, PAGE_SIZE, (void *)base_kva, base_dma); 660} 661 662static struct efx_tso_header * 663efx_tsoh_heap_alloc(struct efx_tx_queue *tx_queue, size_t header_len) 664{ 665 struct efx_tso_header *tsoh; 666 667 tsoh = kmalloc(TSOH_SIZE(header_len), GFP_ATOMIC | GFP_DMA); 668 if (unlikely(!tsoh)) 669 return NULL; 670 671 tsoh->dma_addr = pci_map_single(tx_queue->efx->pci_dev, 672 TSOH_BUFFER(tsoh), header_len, 673 PCI_DMA_TODEVICE); 674 if (unlikely(pci_dma_mapping_error(tx_queue->efx->pci_dev, 675 tsoh->dma_addr))) { 676 kfree(tsoh); 677 return NULL; 678 } 679 680 tsoh->unmap_len = header_len; 681 return tsoh; 682} 683 684static void 685efx_tsoh_heap_free(struct efx_tx_queue *tx_queue, struct efx_tso_header *tsoh) 686{ 687 pci_unmap_single(tx_queue->efx->pci_dev, 688 tsoh->dma_addr, tsoh->unmap_len, 689 PCI_DMA_TODEVICE); 690 kfree(tsoh); 691} 692 693/** 694 * efx_tx_queue_insert - push descriptors onto the TX queue 695 * @tx_queue: Efx TX queue 696 * @dma_addr: DMA address of fragment 697 * @len: Length of fragment 698 * @final_buffer: The final buffer inserted into the queue 699 * 700 * Push descriptors onto the TX queue. Return 0 on success or 1 if 701 * @tx_queue full. 702 */ 703static int efx_tx_queue_insert(struct efx_tx_queue *tx_queue, 704 dma_addr_t dma_addr, unsigned len, 705 struct efx_tx_buffer **final_buffer) 706{ 707 struct efx_tx_buffer *buffer; 708 struct efx_nic *efx = tx_queue->efx; 709 unsigned dma_len, fill_level, insert_ptr, misalign; 710 int q_space; 711 712 EFX_BUG_ON_PARANOID(len <= 0); 713 714 fill_level = tx_queue->insert_count - tx_queue->old_read_count; 715 /* -1 as there is no way to represent all descriptors used */ 716 q_space = efx->type->txd_ring_mask - 1 - fill_level; 717 718 while (1) { 719 if (unlikely(q_space-- <= 0)) { 720 /* It might be that completions have happened 721 * since the xmit path last checked. Update 722 * the xmit path's copy of read_count. 723 */ 724 ++tx_queue->stopped; 725 /* This memory barrier protects the change of 726 * stopped from the access of read_count. */ 727 smp_mb(); 728 tx_queue->old_read_count = 729 *(volatile unsigned *)&tx_queue->read_count; 730 fill_level = (tx_queue->insert_count 731 - tx_queue->old_read_count); 732 q_space = efx->type->txd_ring_mask - 1 - fill_level; 733 if (unlikely(q_space-- <= 0)) { 734 *final_buffer = NULL; 735 return 1; 736 } 737 smp_mb(); 738 --tx_queue->stopped; 739 } 740 741 insert_ptr = tx_queue->insert_count & efx->type->txd_ring_mask; 742 buffer = &tx_queue->buffer[insert_ptr]; 743 ++tx_queue->insert_count; 744 745 EFX_BUG_ON_PARANOID(tx_queue->insert_count - 746 tx_queue->read_count > 747 efx->type->txd_ring_mask); 748 749 efx_tsoh_free(tx_queue, buffer); 750 EFX_BUG_ON_PARANOID(buffer->len); 751 EFX_BUG_ON_PARANOID(buffer->unmap_len); 752 EFX_BUG_ON_PARANOID(buffer->skb); 753 EFX_BUG_ON_PARANOID(!buffer->continuation); 754 EFX_BUG_ON_PARANOID(buffer->tsoh); 755 756 buffer->dma_addr = dma_addr; 757 758 /* Ensure we do not cross a boundary unsupported by H/W */ 759 dma_len = (~dma_addr & efx->type->tx_dma_mask) + 1; 760 761 misalign = (unsigned)dma_addr & efx->type->bug5391_mask; 762 if (misalign && dma_len + misalign > 512) 763 dma_len = 512 - misalign; 764 765 /* If there is enough space to send then do so */ 766 if (dma_len >= len) 767 break; 768 769 buffer->len = dma_len; /* Don't set the other members */ 770 dma_addr += dma_len; 771 len -= dma_len; 772 } 773 774 EFX_BUG_ON_PARANOID(!len); 775 buffer->len = len; 776 *final_buffer = buffer; 777 return 0; 778} 779 780 781/* 782 * Put a TSO header into the TX queue. 783 * 784 * This is special-cased because we know that it is small enough to fit in 785 * a single fragment, and we know it doesn't cross a page boundary. It 786 * also allows us to not worry about end-of-packet etc. 787 */ 788static void efx_tso_put_header(struct efx_tx_queue *tx_queue, 789 struct efx_tso_header *tsoh, unsigned len) 790{ 791 struct efx_tx_buffer *buffer; 792 793 buffer = &tx_queue->buffer[tx_queue->insert_count & 794 tx_queue->efx->type->txd_ring_mask]; 795 efx_tsoh_free(tx_queue, buffer); 796 EFX_BUG_ON_PARANOID(buffer->len); 797 EFX_BUG_ON_PARANOID(buffer->unmap_len); 798 EFX_BUG_ON_PARANOID(buffer->skb); 799 EFX_BUG_ON_PARANOID(!buffer->continuation); 800 EFX_BUG_ON_PARANOID(buffer->tsoh); 801 buffer->len = len; 802 buffer->dma_addr = tsoh->dma_addr; 803 buffer->tsoh = tsoh; 804 805 ++tx_queue->insert_count; 806} 807 808 809/* Remove descriptors put into a tx_queue. */ 810static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue) 811{ 812 struct efx_tx_buffer *buffer; 813 dma_addr_t unmap_addr; 814 815 /* Work backwards until we hit the original insert pointer value */ 816 while (tx_queue->insert_count != tx_queue->write_count) { 817 --tx_queue->insert_count; 818 buffer = &tx_queue->buffer[tx_queue->insert_count & 819 tx_queue->efx->type->txd_ring_mask]; 820 efx_tsoh_free(tx_queue, buffer); 821 EFX_BUG_ON_PARANOID(buffer->skb); 822 buffer->len = 0; 823 buffer->continuation = true; 824 if (buffer->unmap_len) { 825 unmap_addr = (buffer->dma_addr + buffer->len - 826 buffer->unmap_len); 827 if (buffer->unmap_single) 828 pci_unmap_single(tx_queue->efx->pci_dev, 829 unmap_addr, buffer->unmap_len, 830 PCI_DMA_TODEVICE); 831 else 832 pci_unmap_page(tx_queue->efx->pci_dev, 833 unmap_addr, buffer->unmap_len, 834 PCI_DMA_TODEVICE); 835 buffer->unmap_len = 0; 836 } 837 } 838} 839 840 841/* Parse the SKB header and initialise state. */ 842static void tso_start(struct tso_state *st, const struct sk_buff *skb) 843{ 844 /* All ethernet/IP/TCP headers combined size is TCP header size 845 * plus offset of TCP header relative to start of packet. 846 */ 847 st->header_len = ((tcp_hdr(skb)->doff << 2u) 848 + PTR_DIFF(tcp_hdr(skb), skb->data)); 849 st->full_packet_size = st->header_len + skb_shinfo(skb)->gso_size; 850 851 st->ipv4_id = ntohs(ip_hdr(skb)->id); 852 st->seqnum = ntohl(tcp_hdr(skb)->seq); 853 854 EFX_BUG_ON_PARANOID(tcp_hdr(skb)->urg); 855 EFX_BUG_ON_PARANOID(tcp_hdr(skb)->syn); 856 EFX_BUG_ON_PARANOID(tcp_hdr(skb)->rst); 857 858 st->packet_space = st->full_packet_size; 859 st->out_len = skb->len - st->header_len; 860 st->unmap_len = 0; 861 st->unmap_single = false; 862} 863 864static int tso_get_fragment(struct tso_state *st, struct efx_nic *efx, 865 skb_frag_t *frag) 866{ 867 st->unmap_addr = pci_map_page(efx->pci_dev, frag->page, 868 frag->page_offset, frag->size, 869 PCI_DMA_TODEVICE); 870 if (likely(!pci_dma_mapping_error(efx->pci_dev, st->unmap_addr))) { 871 st->unmap_single = false; 872 st->unmap_len = frag->size; 873 st->in_len = frag->size; 874 st->dma_addr = st->unmap_addr; 875 return 0; 876 } 877 return -ENOMEM; 878} 879 880static int tso_get_head_fragment(struct tso_state *st, struct efx_nic *efx, 881 const struct sk_buff *skb) 882{ 883 int hl = st->header_len; 884 int len = skb_headlen(skb) - hl; 885 886 st->unmap_addr = pci_map_single(efx->pci_dev, skb->data + hl, 887 len, PCI_DMA_TODEVICE); 888 if (likely(!pci_dma_mapping_error(efx->pci_dev, st->unmap_addr))) { 889 st->unmap_single = true; 890 st->unmap_len = len; 891 st->in_len = len; 892 st->dma_addr = st->unmap_addr; 893 return 0; 894 } 895 return -ENOMEM; 896} 897 898 899/** 900 * tso_fill_packet_with_fragment - form descriptors for the current fragment 901 * @tx_queue: Efx TX queue 902 * @skb: Socket buffer 903 * @st: TSO state 904 * 905 * Form descriptors for the current fragment, until we reach the end 906 * of fragment or end-of-packet. Return 0 on success, 1 if not enough 907 * space in @tx_queue. 908 */ 909static int tso_fill_packet_with_fragment(struct efx_tx_queue *tx_queue, 910 const struct sk_buff *skb, 911 struct tso_state *st) 912{ 913 struct efx_tx_buffer *buffer; 914 int n, end_of_packet, rc; 915 916 if (st->in_len == 0) 917 return 0; 918 if (st->packet_space == 0) 919 return 0; 920 921 EFX_BUG_ON_PARANOID(st->in_len <= 0); 922 EFX_BUG_ON_PARANOID(st->packet_space <= 0); 923 924 n = min(st->in_len, st->packet_space); 925 926 st->packet_space -= n; 927 st->out_len -= n; 928 st->in_len -= n; 929 930 rc = efx_tx_queue_insert(tx_queue, st->dma_addr, n, &buffer); 931 if (likely(rc == 0)) { 932 if (st->out_len == 0) 933 /* Transfer ownership of the skb */ 934 buffer->skb = skb; 935 936 end_of_packet = st->out_len == 0 || st->packet_space == 0; 937 buffer->continuation = !end_of_packet; 938 939 if (st->in_len == 0) { 940 /* Transfer ownership of the pci mapping */ 941 buffer->unmap_len = st->unmap_len; 942 buffer->unmap_single = st->unmap_single; 943 st->unmap_len = 0; 944 } 945 } 946 947 st->dma_addr += n; 948 return rc; 949} 950 951 952/** 953 * tso_start_new_packet - generate a new header and prepare for the new packet 954 * @tx_queue: Efx TX queue 955 * @skb: Socket buffer 956 * @st: TSO state 957 * 958 * Generate a new header and prepare for the new packet. Return 0 on 959 * success, or -1 if failed to alloc header. 960 */ 961static int tso_start_new_packet(struct efx_tx_queue *tx_queue, 962 const struct sk_buff *skb, 963 struct tso_state *st) 964{ 965 struct efx_tso_header *tsoh; 966 struct iphdr *tsoh_iph; 967 struct tcphdr *tsoh_th; 968 unsigned ip_length; 969 u8 *header; 970 971 /* Allocate a DMA-mapped header buffer. */ 972 if (likely(TSOH_SIZE(st->header_len) <= TSOH_STD_SIZE)) { 973 if (tx_queue->tso_headers_free == NULL) { 974 if (efx_tsoh_block_alloc(tx_queue)) 975 return -1; 976 } 977 EFX_BUG_ON_PARANOID(!tx_queue->tso_headers_free); 978 tsoh = tx_queue->tso_headers_free; 979 tx_queue->tso_headers_free = tsoh->next; 980 tsoh->unmap_len = 0; 981 } else { 982 tx_queue->tso_long_headers++; 983 tsoh = efx_tsoh_heap_alloc(tx_queue, st->header_len); 984 if (unlikely(!tsoh)) 985 return -1; 986 } 987 988 header = TSOH_BUFFER(tsoh); 989 tsoh_th = (struct tcphdr *)(header + SKB_TCP_OFF(skb)); 990 tsoh_iph = (struct iphdr *)(header + SKB_IPV4_OFF(skb)); 991 992 /* Copy and update the headers. */ 993 memcpy(header, skb->data, st->header_len); 994 995 tsoh_th->seq = htonl(st->seqnum); 996 st->seqnum += skb_shinfo(skb)->gso_size; 997 if (st->out_len > skb_shinfo(skb)->gso_size) { 998 /* This packet will not finish the TSO burst. */ 999 ip_length = st->full_packet_size - ETH_HDR_LEN(skb); 1000 tsoh_th->fin = 0; 1001 tsoh_th->psh = 0; 1002 } else { 1003 /* This packet will be the last in the TSO burst. */ 1004 ip_length = st->header_len - ETH_HDR_LEN(skb) + st->out_len; 1005 tsoh_th->fin = tcp_hdr(skb)->fin; 1006 tsoh_th->psh = tcp_hdr(skb)->psh; 1007 } 1008 tsoh_iph->tot_len = htons(ip_length); 1009 1010 /* Linux leaves suitable gaps in the IP ID space for us to fill. */ 1011 tsoh_iph->id = htons(st->ipv4_id); 1012 st->ipv4_id++; 1013 1014 st->packet_space = skb_shinfo(skb)->gso_size; 1015 ++tx_queue->tso_packets; 1016 1017 /* Form a descriptor for this header. */ 1018 efx_tso_put_header(tx_queue, tsoh, st->header_len); 1019 1020 return 0; 1021} 1022 1023 1024/** 1025 * efx_enqueue_skb_tso - segment and transmit a TSO socket buffer 1026 * @tx_queue: Efx TX queue 1027 * @skb: Socket buffer 1028 * 1029 * Context: You must hold netif_tx_lock() to call this function. 1030 * 1031 * Add socket buffer @skb to @tx_queue, doing TSO or return != 0 if 1032 * @skb was not enqueued. In all cases @skb is consumed. Return 1033 * %NETDEV_TX_OK or %NETDEV_TX_BUSY. 1034 */ 1035static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, 1036 struct sk_buff *skb) 1037{ 1038 struct efx_nic *efx = tx_queue->efx; 1039 int frag_i, rc, rc2 = NETDEV_TX_OK; 1040 struct tso_state state; 1041 1042 /* Verify TSO is safe - these checks should never fail. */ 1043 efx_tso_check_safe(skb); 1044 1045 EFX_BUG_ON_PARANOID(tx_queue->write_count != tx_queue->insert_count); 1046 1047 tso_start(&state, skb); 1048 1049 /* Assume that skb header area contains exactly the headers, and 1050 * all payload is in the frag list. 1051 */ 1052 if (skb_headlen(skb) == state.header_len) { 1053 /* Grab the first payload fragment. */ 1054 EFX_BUG_ON_PARANOID(skb_shinfo(skb)->nr_frags < 1); 1055 frag_i = 0; 1056 rc = tso_get_fragment(&state, efx, 1057 skb_shinfo(skb)->frags + frag_i); 1058 if (rc) 1059 goto mem_err; 1060 } else { 1061 rc = tso_get_head_fragment(&state, efx, skb); 1062 if (rc) 1063 goto mem_err; 1064 frag_i = -1; 1065 } 1066 1067 if (tso_start_new_packet(tx_queue, skb, &state) < 0) 1068 goto mem_err; 1069 1070 while (1) { 1071 rc = tso_fill_packet_with_fragment(tx_queue, skb, &state); 1072 if (unlikely(rc)) 1073 goto stop; 1074 1075 /* Move onto the next fragment? */ 1076 if (state.in_len == 0) { 1077 if (++frag_i >= skb_shinfo(skb)->nr_frags) 1078 /* End of payload reached. */ 1079 break; 1080 rc = tso_get_fragment(&state, efx, 1081 skb_shinfo(skb)->frags + frag_i); 1082 if (rc) 1083 goto mem_err; 1084 } 1085 1086 /* Start at new packet? */ 1087 if (state.packet_space == 0 && 1088 tso_start_new_packet(tx_queue, skb, &state) < 0) 1089 goto mem_err; 1090 } 1091 1092 /* Pass off to hardware */ 1093 falcon_push_buffers(tx_queue); 1094 1095 tx_queue->tso_bursts++; 1096 return NETDEV_TX_OK; 1097 1098 mem_err: 1099 EFX_ERR(efx, "Out of memory for TSO headers, or PCI mapping error\n"); 1100 dev_kfree_skb_any((struct sk_buff *)skb); 1101 goto unwind; 1102 1103 stop: 1104 rc2 = NETDEV_TX_BUSY; 1105 1106 /* Stop the queue if it wasn't stopped before. */ 1107 if (tx_queue->stopped == 1) 1108 efx_stop_queue(efx); 1109 1110 unwind: 1111 /* Free the DMA mapping we were in the process of writing out */ 1112 if (state.unmap_len) { 1113 if (state.unmap_single) 1114 pci_unmap_single(efx->pci_dev, state.unmap_addr, 1115 state.unmap_len, PCI_DMA_TODEVICE); 1116 else 1117 pci_unmap_page(efx->pci_dev, state.unmap_addr, 1118 state.unmap_len, PCI_DMA_TODEVICE); 1119 } 1120 1121 efx_enqueue_unwind(tx_queue); 1122 return rc2; 1123} 1124 1125 1126/* 1127 * Free up all TSO datastructures associated with tx_queue. This 1128 * routine should be called only once the tx_queue is both empty and 1129 * will no longer be used. 1130 */ 1131static void efx_fini_tso(struct efx_tx_queue *tx_queue) 1132{ 1133 unsigned i; 1134 1135 if (tx_queue->buffer) { 1136 for (i = 0; i <= tx_queue->efx->type->txd_ring_mask; ++i) 1137 efx_tsoh_free(tx_queue, &tx_queue->buffer[i]); 1138 } 1139 1140 while (tx_queue->tso_headers_free != NULL) 1141 efx_tsoh_block_free(tx_queue, tx_queue->tso_headers_free, 1142 tx_queue->efx->pci_dev); 1143}