at v2.6.29 880 lines 25 kB view raw
1/**************************************************************************** 2 * Driver for Solarflare Solarstorm network controllers and boards 3 * Copyright 2005-2006 Fen Systems Ltd. 4 * Copyright 2005-2008 Solarflare Communications Inc. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 as published 8 * by the Free Software Foundation, incorporated herein by reference. 9 */ 10 11#include <linux/socket.h> 12#include <linux/in.h> 13#include <linux/ip.h> 14#include <linux/tcp.h> 15#include <linux/udp.h> 16#include <net/ip.h> 17#include <net/checksum.h> 18#include "net_driver.h" 19#include "rx.h" 20#include "efx.h" 21#include "falcon.h" 22#include "selftest.h" 23#include "workarounds.h" 24 25/* Number of RX descriptors pushed at once. */ 26#define EFX_RX_BATCH 8 27 28/* Size of buffer allocated for skb header area. */ 29#define EFX_SKB_HEADERS 64u 30 31/* 32 * rx_alloc_method - RX buffer allocation method 33 * 34 * This driver supports two methods for allocating and using RX buffers: 35 * each RX buffer may be backed by an skb or by an order-n page. 36 * 37 * When LRO is in use then the second method has a lower overhead, 38 * since we don't have to allocate then free skbs on reassembled frames. 39 * 40 * Values: 41 * - RX_ALLOC_METHOD_AUTO = 0 42 * - RX_ALLOC_METHOD_SKB = 1 43 * - RX_ALLOC_METHOD_PAGE = 2 44 * 45 * The heuristic for %RX_ALLOC_METHOD_AUTO is a simple hysteresis count 46 * controlled by the parameters below. 47 * 48 * - Since pushing and popping descriptors are separated by the rx_queue 49 * size, so the watermarks should be ~rxd_size. 50 * - The performance win by using page-based allocation for LRO is less 51 * than the performance hit of using page-based allocation of non-LRO, 52 * so the watermarks should reflect this. 53 * 54 * Per channel we maintain a single variable, updated by each channel: 55 * 56 * rx_alloc_level += (lro_performed ? RX_ALLOC_FACTOR_LRO : 57 * RX_ALLOC_FACTOR_SKB) 58 * Per NAPI poll interval, we constrain rx_alloc_level to 0..MAX (which 59 * limits the hysteresis), and update the allocation strategy: 60 * 61 * rx_alloc_method = (rx_alloc_level > RX_ALLOC_LEVEL_LRO ? 62 * RX_ALLOC_METHOD_PAGE : RX_ALLOC_METHOD_SKB) 63 */ 64static int rx_alloc_method = RX_ALLOC_METHOD_PAGE; 65 66#define RX_ALLOC_LEVEL_LRO 0x2000 67#define RX_ALLOC_LEVEL_MAX 0x3000 68#define RX_ALLOC_FACTOR_LRO 1 69#define RX_ALLOC_FACTOR_SKB (-2) 70 71/* This is the percentage fill level below which new RX descriptors 72 * will be added to the RX descriptor ring. 73 */ 74static unsigned int rx_refill_threshold = 90; 75 76/* This is the percentage fill level to which an RX queue will be refilled 77 * when the "RX refill threshold" is reached. 78 */ 79static unsigned int rx_refill_limit = 95; 80 81/* 82 * RX maximum head room required. 83 * 84 * This must be at least 1 to prevent overflow and at least 2 to allow 85 * pipelined receives. 86 */ 87#define EFX_RXD_HEAD_ROOM 2 88 89static inline unsigned int efx_rx_buf_offset(struct efx_rx_buffer *buf) 90{ 91 /* Offset is always within one page, so we don't need to consider 92 * the page order. 93 */ 94 return (__force unsigned long) buf->data & (PAGE_SIZE - 1); 95} 96static inline unsigned int efx_rx_buf_size(struct efx_nic *efx) 97{ 98 return PAGE_SIZE << efx->rx_buffer_order; 99} 100 101 102/************************************************************************** 103 * 104 * Linux generic LRO handling 105 * 106 ************************************************************************** 107 */ 108 109static int efx_lro_get_skb_hdr(struct sk_buff *skb, void **ip_hdr, 110 void **tcpudp_hdr, u64 *hdr_flags, void *priv) 111{ 112 struct efx_channel *channel = priv; 113 struct iphdr *iph; 114 struct tcphdr *th; 115 116 iph = (struct iphdr *)skb->data; 117 if (skb->protocol != htons(ETH_P_IP) || iph->protocol != IPPROTO_TCP) 118 goto fail; 119 120 th = (struct tcphdr *)(skb->data + iph->ihl * 4); 121 122 *tcpudp_hdr = th; 123 *ip_hdr = iph; 124 *hdr_flags = LRO_IPV4 | LRO_TCP; 125 126 channel->rx_alloc_level += RX_ALLOC_FACTOR_LRO; 127 return 0; 128fail: 129 channel->rx_alloc_level += RX_ALLOC_FACTOR_SKB; 130 return -1; 131} 132 133static int efx_get_frag_hdr(struct skb_frag_struct *frag, void **mac_hdr, 134 void **ip_hdr, void **tcpudp_hdr, u64 *hdr_flags, 135 void *priv) 136{ 137 struct efx_channel *channel = priv; 138 struct ethhdr *eh; 139 struct iphdr *iph; 140 141 /* We support EtherII and VLAN encapsulated IPv4 */ 142 eh = page_address(frag->page) + frag->page_offset; 143 *mac_hdr = eh; 144 145 if (eh->h_proto == htons(ETH_P_IP)) { 146 iph = (struct iphdr *)(eh + 1); 147 } else { 148 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)eh; 149 if (veh->h_vlan_encapsulated_proto != htons(ETH_P_IP)) 150 goto fail; 151 152 iph = (struct iphdr *)(veh + 1); 153 } 154 *ip_hdr = iph; 155 156 /* We can only do LRO over TCP */ 157 if (iph->protocol != IPPROTO_TCP) 158 goto fail; 159 160 *hdr_flags = LRO_IPV4 | LRO_TCP; 161 *tcpudp_hdr = (struct tcphdr *)((u8 *) iph + iph->ihl * 4); 162 163 channel->rx_alloc_level += RX_ALLOC_FACTOR_LRO; 164 return 0; 165 fail: 166 channel->rx_alloc_level += RX_ALLOC_FACTOR_SKB; 167 return -1; 168} 169 170int efx_lro_init(struct net_lro_mgr *lro_mgr, struct efx_nic *efx) 171{ 172 size_t s = sizeof(struct net_lro_desc) * EFX_MAX_LRO_DESCRIPTORS; 173 struct net_lro_desc *lro_arr; 174 175 /* Allocate the LRO descriptors structure */ 176 lro_arr = kzalloc(s, GFP_KERNEL); 177 if (lro_arr == NULL) 178 return -ENOMEM; 179 180 lro_mgr->lro_arr = lro_arr; 181 lro_mgr->max_desc = EFX_MAX_LRO_DESCRIPTORS; 182 lro_mgr->max_aggr = EFX_MAX_LRO_AGGR; 183 lro_mgr->frag_align_pad = EFX_PAGE_SKB_ALIGN; 184 185 lro_mgr->get_skb_header = efx_lro_get_skb_hdr; 186 lro_mgr->get_frag_header = efx_get_frag_hdr; 187 lro_mgr->dev = efx->net_dev; 188 189 lro_mgr->features = LRO_F_NAPI; 190 191 /* We can pass packets up with the checksum intact */ 192 lro_mgr->ip_summed = CHECKSUM_UNNECESSARY; 193 194 lro_mgr->ip_summed_aggr = CHECKSUM_UNNECESSARY; 195 196 return 0; 197} 198 199void efx_lro_fini(struct net_lro_mgr *lro_mgr) 200{ 201 kfree(lro_mgr->lro_arr); 202 lro_mgr->lro_arr = NULL; 203} 204 205/** 206 * efx_init_rx_buffer_skb - create new RX buffer using skb-based allocation 207 * 208 * @rx_queue: Efx RX queue 209 * @rx_buf: RX buffer structure to populate 210 * 211 * This allocates memory for a new receive buffer, maps it for DMA, 212 * and populates a struct efx_rx_buffer with the relevant 213 * information. Return a negative error code or 0 on success. 214 */ 215static int efx_init_rx_buffer_skb(struct efx_rx_queue *rx_queue, 216 struct efx_rx_buffer *rx_buf) 217{ 218 struct efx_nic *efx = rx_queue->efx; 219 struct net_device *net_dev = efx->net_dev; 220 int skb_len = efx->rx_buffer_len; 221 222 rx_buf->skb = netdev_alloc_skb(net_dev, skb_len); 223 if (unlikely(!rx_buf->skb)) 224 return -ENOMEM; 225 226 /* Adjust the SKB for padding and checksum */ 227 skb_reserve(rx_buf->skb, NET_IP_ALIGN); 228 rx_buf->len = skb_len - NET_IP_ALIGN; 229 rx_buf->data = (char *)rx_buf->skb->data; 230 rx_buf->skb->ip_summed = CHECKSUM_UNNECESSARY; 231 232 rx_buf->dma_addr = pci_map_single(efx->pci_dev, 233 rx_buf->data, rx_buf->len, 234 PCI_DMA_FROMDEVICE); 235 236 if (unlikely(pci_dma_mapping_error(efx->pci_dev, rx_buf->dma_addr))) { 237 dev_kfree_skb_any(rx_buf->skb); 238 rx_buf->skb = NULL; 239 return -EIO; 240 } 241 242 return 0; 243} 244 245/** 246 * efx_init_rx_buffer_page - create new RX buffer using page-based allocation 247 * 248 * @rx_queue: Efx RX queue 249 * @rx_buf: RX buffer structure to populate 250 * 251 * This allocates memory for a new receive buffer, maps it for DMA, 252 * and populates a struct efx_rx_buffer with the relevant 253 * information. Return a negative error code or 0 on success. 254 */ 255static int efx_init_rx_buffer_page(struct efx_rx_queue *rx_queue, 256 struct efx_rx_buffer *rx_buf) 257{ 258 struct efx_nic *efx = rx_queue->efx; 259 int bytes, space, offset; 260 261 bytes = efx->rx_buffer_len - EFX_PAGE_IP_ALIGN; 262 263 /* If there is space left in the previously allocated page, 264 * then use it. Otherwise allocate a new one */ 265 rx_buf->page = rx_queue->buf_page; 266 if (rx_buf->page == NULL) { 267 dma_addr_t dma_addr; 268 269 rx_buf->page = alloc_pages(__GFP_COLD | __GFP_COMP | GFP_ATOMIC, 270 efx->rx_buffer_order); 271 if (unlikely(rx_buf->page == NULL)) 272 return -ENOMEM; 273 274 dma_addr = pci_map_page(efx->pci_dev, rx_buf->page, 275 0, efx_rx_buf_size(efx), 276 PCI_DMA_FROMDEVICE); 277 278 if (unlikely(pci_dma_mapping_error(efx->pci_dev, dma_addr))) { 279 __free_pages(rx_buf->page, efx->rx_buffer_order); 280 rx_buf->page = NULL; 281 return -EIO; 282 } 283 284 rx_queue->buf_page = rx_buf->page; 285 rx_queue->buf_dma_addr = dma_addr; 286 rx_queue->buf_data = (page_address(rx_buf->page) + 287 EFX_PAGE_IP_ALIGN); 288 } 289 290 rx_buf->len = bytes; 291 rx_buf->data = rx_queue->buf_data; 292 offset = efx_rx_buf_offset(rx_buf); 293 rx_buf->dma_addr = rx_queue->buf_dma_addr + offset; 294 295 /* Try to pack multiple buffers per page */ 296 if (efx->rx_buffer_order == 0) { 297 /* The next buffer starts on the next 512 byte boundary */ 298 rx_queue->buf_data += ((bytes + 0x1ff) & ~0x1ff); 299 offset += ((bytes + 0x1ff) & ~0x1ff); 300 301 space = efx_rx_buf_size(efx) - offset; 302 if (space >= bytes) { 303 /* Refs dropped on kernel releasing each skb */ 304 get_page(rx_queue->buf_page); 305 goto out; 306 } 307 } 308 309 /* This is the final RX buffer for this page, so mark it for 310 * unmapping */ 311 rx_queue->buf_page = NULL; 312 rx_buf->unmap_addr = rx_queue->buf_dma_addr; 313 314 out: 315 return 0; 316} 317 318/* This allocates memory for a new receive buffer, maps it for DMA, 319 * and populates a struct efx_rx_buffer with the relevant 320 * information. 321 */ 322static int efx_init_rx_buffer(struct efx_rx_queue *rx_queue, 323 struct efx_rx_buffer *new_rx_buf) 324{ 325 int rc = 0; 326 327 if (rx_queue->channel->rx_alloc_push_pages) { 328 new_rx_buf->skb = NULL; 329 rc = efx_init_rx_buffer_page(rx_queue, new_rx_buf); 330 rx_queue->alloc_page_count++; 331 } else { 332 new_rx_buf->page = NULL; 333 rc = efx_init_rx_buffer_skb(rx_queue, new_rx_buf); 334 rx_queue->alloc_skb_count++; 335 } 336 337 if (unlikely(rc < 0)) 338 EFX_LOG_RL(rx_queue->efx, "%s RXQ[%d] =%d\n", __func__, 339 rx_queue->queue, rc); 340 return rc; 341} 342 343static void efx_unmap_rx_buffer(struct efx_nic *efx, 344 struct efx_rx_buffer *rx_buf) 345{ 346 if (rx_buf->page) { 347 EFX_BUG_ON_PARANOID(rx_buf->skb); 348 if (rx_buf->unmap_addr) { 349 pci_unmap_page(efx->pci_dev, rx_buf->unmap_addr, 350 efx_rx_buf_size(efx), 351 PCI_DMA_FROMDEVICE); 352 rx_buf->unmap_addr = 0; 353 } 354 } else if (likely(rx_buf->skb)) { 355 pci_unmap_single(efx->pci_dev, rx_buf->dma_addr, 356 rx_buf->len, PCI_DMA_FROMDEVICE); 357 } 358} 359 360static void efx_free_rx_buffer(struct efx_nic *efx, 361 struct efx_rx_buffer *rx_buf) 362{ 363 if (rx_buf->page) { 364 __free_pages(rx_buf->page, efx->rx_buffer_order); 365 rx_buf->page = NULL; 366 } else if (likely(rx_buf->skb)) { 367 dev_kfree_skb_any(rx_buf->skb); 368 rx_buf->skb = NULL; 369 } 370} 371 372static void efx_fini_rx_buffer(struct efx_rx_queue *rx_queue, 373 struct efx_rx_buffer *rx_buf) 374{ 375 efx_unmap_rx_buffer(rx_queue->efx, rx_buf); 376 efx_free_rx_buffer(rx_queue->efx, rx_buf); 377} 378 379/** 380 * efx_fast_push_rx_descriptors - push new RX descriptors quickly 381 * @rx_queue: RX descriptor queue 382 * @retry: Recheck the fill level 383 * This will aim to fill the RX descriptor queue up to 384 * @rx_queue->@fast_fill_limit. If there is insufficient atomic 385 * memory to do so, the caller should retry. 386 */ 387static int __efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue, 388 int retry) 389{ 390 struct efx_rx_buffer *rx_buf; 391 unsigned fill_level, index; 392 int i, space, rc = 0; 393 394 /* Calculate current fill level. Do this outside the lock, 395 * because most of the time we'll end up not wanting to do the 396 * fill anyway. 397 */ 398 fill_level = (rx_queue->added_count - rx_queue->removed_count); 399 EFX_BUG_ON_PARANOID(fill_level > 400 rx_queue->efx->type->rxd_ring_mask + 1); 401 402 /* Don't fill if we don't need to */ 403 if (fill_level >= rx_queue->fast_fill_trigger) 404 return 0; 405 406 /* Record minimum fill level */ 407 if (unlikely(fill_level < rx_queue->min_fill)) { 408 if (fill_level) 409 rx_queue->min_fill = fill_level; 410 } 411 412 /* Acquire RX add lock. If this lock is contended, then a fast 413 * fill must already be in progress (e.g. in the refill 414 * tasklet), so we don't need to do anything 415 */ 416 if (!spin_trylock_bh(&rx_queue->add_lock)) 417 return -1; 418 419 retry: 420 /* Recalculate current fill level now that we have the lock */ 421 fill_level = (rx_queue->added_count - rx_queue->removed_count); 422 EFX_BUG_ON_PARANOID(fill_level > 423 rx_queue->efx->type->rxd_ring_mask + 1); 424 space = rx_queue->fast_fill_limit - fill_level; 425 if (space < EFX_RX_BATCH) 426 goto out_unlock; 427 428 EFX_TRACE(rx_queue->efx, "RX queue %d fast-filling descriptor ring from" 429 " level %d to level %d using %s allocation\n", 430 rx_queue->queue, fill_level, rx_queue->fast_fill_limit, 431 rx_queue->channel->rx_alloc_push_pages ? "page" : "skb"); 432 433 do { 434 for (i = 0; i < EFX_RX_BATCH; ++i) { 435 index = (rx_queue->added_count & 436 rx_queue->efx->type->rxd_ring_mask); 437 rx_buf = efx_rx_buffer(rx_queue, index); 438 rc = efx_init_rx_buffer(rx_queue, rx_buf); 439 if (unlikely(rc)) 440 goto out; 441 ++rx_queue->added_count; 442 } 443 } while ((space -= EFX_RX_BATCH) >= EFX_RX_BATCH); 444 445 EFX_TRACE(rx_queue->efx, "RX queue %d fast-filled descriptor ring " 446 "to level %d\n", rx_queue->queue, 447 rx_queue->added_count - rx_queue->removed_count); 448 449 out: 450 /* Send write pointer to card. */ 451 falcon_notify_rx_desc(rx_queue); 452 453 /* If the fast fill is running inside from the refill tasklet, then 454 * for SMP systems it may be running on a different CPU to 455 * RX event processing, which means that the fill level may now be 456 * out of date. */ 457 if (unlikely(retry && (rc == 0))) 458 goto retry; 459 460 out_unlock: 461 spin_unlock_bh(&rx_queue->add_lock); 462 463 return rc; 464} 465 466/** 467 * efx_fast_push_rx_descriptors - push new RX descriptors quickly 468 * @rx_queue: RX descriptor queue 469 * 470 * This will aim to fill the RX descriptor queue up to 471 * @rx_queue->@fast_fill_limit. If there is insufficient memory to do so, 472 * it will schedule a work item to immediately continue the fast fill 473 */ 474void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue) 475{ 476 int rc; 477 478 rc = __efx_fast_push_rx_descriptors(rx_queue, 0); 479 if (unlikely(rc)) { 480 /* Schedule the work item to run immediately. The hope is 481 * that work is immediately pending to free some memory 482 * (e.g. an RX event or TX completion) 483 */ 484 efx_schedule_slow_fill(rx_queue, 0); 485 } 486} 487 488void efx_rx_work(struct work_struct *data) 489{ 490 struct efx_rx_queue *rx_queue; 491 int rc; 492 493 rx_queue = container_of(data, struct efx_rx_queue, work.work); 494 495 if (unlikely(!rx_queue->channel->enabled)) 496 return; 497 498 EFX_TRACE(rx_queue->efx, "RX queue %d worker thread executing on CPU " 499 "%d\n", rx_queue->queue, raw_smp_processor_id()); 500 501 ++rx_queue->slow_fill_count; 502 /* Push new RX descriptors, allowing at least 1 jiffy for 503 * the kernel to free some more memory. */ 504 rc = __efx_fast_push_rx_descriptors(rx_queue, 1); 505 if (rc) 506 efx_schedule_slow_fill(rx_queue, 1); 507} 508 509static void efx_rx_packet__check_len(struct efx_rx_queue *rx_queue, 510 struct efx_rx_buffer *rx_buf, 511 int len, bool *discard, 512 bool *leak_packet) 513{ 514 struct efx_nic *efx = rx_queue->efx; 515 unsigned max_len = rx_buf->len - efx->type->rx_buffer_padding; 516 517 if (likely(len <= max_len)) 518 return; 519 520 /* The packet must be discarded, but this is only a fatal error 521 * if the caller indicated it was 522 */ 523 *discard = true; 524 525 if ((len > rx_buf->len) && EFX_WORKAROUND_8071(efx)) { 526 EFX_ERR_RL(efx, " RX queue %d seriously overlength " 527 "RX event (0x%x > 0x%x+0x%x). Leaking\n", 528 rx_queue->queue, len, max_len, 529 efx->type->rx_buffer_padding); 530 /* If this buffer was skb-allocated, then the meta 531 * data at the end of the skb will be trashed. So 532 * we have no choice but to leak the fragment. 533 */ 534 *leak_packet = (rx_buf->skb != NULL); 535 efx_schedule_reset(efx, RESET_TYPE_RX_RECOVERY); 536 } else { 537 EFX_ERR_RL(efx, " RX queue %d overlength RX event " 538 "(0x%x > 0x%x)\n", rx_queue->queue, len, max_len); 539 } 540 541 rx_queue->channel->n_rx_overlength++; 542} 543 544/* Pass a received packet up through the generic LRO stack 545 * 546 * Handles driverlink veto, and passes the fragment up via 547 * the appropriate LRO method 548 */ 549static void efx_rx_packet_lro(struct efx_channel *channel, 550 struct efx_rx_buffer *rx_buf) 551{ 552 struct net_lro_mgr *lro_mgr = &channel->lro_mgr; 553 void *priv = channel; 554 555 /* Pass the skb/page into the LRO engine */ 556 if (rx_buf->page) { 557 struct skb_frag_struct frags; 558 559 frags.page = rx_buf->page; 560 frags.page_offset = efx_rx_buf_offset(rx_buf); 561 frags.size = rx_buf->len; 562 563 lro_receive_frags(lro_mgr, &frags, rx_buf->len, 564 rx_buf->len, priv, 0); 565 566 EFX_BUG_ON_PARANOID(rx_buf->skb); 567 rx_buf->page = NULL; 568 } else { 569 EFX_BUG_ON_PARANOID(!rx_buf->skb); 570 571 lro_receive_skb(lro_mgr, rx_buf->skb, priv); 572 rx_buf->skb = NULL; 573 } 574} 575 576/* Allocate and construct an SKB around a struct page.*/ 577static struct sk_buff *efx_rx_mk_skb(struct efx_rx_buffer *rx_buf, 578 struct efx_nic *efx, 579 int hdr_len) 580{ 581 struct sk_buff *skb; 582 583 /* Allocate an SKB to store the headers */ 584 skb = netdev_alloc_skb(efx->net_dev, hdr_len + EFX_PAGE_SKB_ALIGN); 585 if (unlikely(skb == NULL)) { 586 EFX_ERR_RL(efx, "RX out of memory for skb\n"); 587 return NULL; 588 } 589 590 EFX_BUG_ON_PARANOID(skb_shinfo(skb)->nr_frags); 591 EFX_BUG_ON_PARANOID(rx_buf->len < hdr_len); 592 593 skb->ip_summed = CHECKSUM_UNNECESSARY; 594 skb_reserve(skb, EFX_PAGE_SKB_ALIGN); 595 596 skb->len = rx_buf->len; 597 skb->truesize = rx_buf->len + sizeof(struct sk_buff); 598 memcpy(skb->data, rx_buf->data, hdr_len); 599 skb->tail += hdr_len; 600 601 /* Append the remaining page onto the frag list */ 602 if (unlikely(rx_buf->len > hdr_len)) { 603 struct skb_frag_struct *frag = skb_shinfo(skb)->frags; 604 frag->page = rx_buf->page; 605 frag->page_offset = efx_rx_buf_offset(rx_buf) + hdr_len; 606 frag->size = skb->len - hdr_len; 607 skb_shinfo(skb)->nr_frags = 1; 608 skb->data_len = frag->size; 609 } else { 610 __free_pages(rx_buf->page, efx->rx_buffer_order); 611 skb->data_len = 0; 612 } 613 614 /* Ownership has transferred from the rx_buf to skb */ 615 rx_buf->page = NULL; 616 617 /* Move past the ethernet header */ 618 skb->protocol = eth_type_trans(skb, efx->net_dev); 619 620 return skb; 621} 622 623void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index, 624 unsigned int len, bool checksummed, bool discard) 625{ 626 struct efx_nic *efx = rx_queue->efx; 627 struct efx_rx_buffer *rx_buf; 628 bool leak_packet = false; 629 630 rx_buf = efx_rx_buffer(rx_queue, index); 631 EFX_BUG_ON_PARANOID(!rx_buf->data); 632 EFX_BUG_ON_PARANOID(rx_buf->skb && rx_buf->page); 633 EFX_BUG_ON_PARANOID(!(rx_buf->skb || rx_buf->page)); 634 635 /* This allows the refill path to post another buffer. 636 * EFX_RXD_HEAD_ROOM ensures that the slot we are using 637 * isn't overwritten yet. 638 */ 639 rx_queue->removed_count++; 640 641 /* Validate the length encoded in the event vs the descriptor pushed */ 642 efx_rx_packet__check_len(rx_queue, rx_buf, len, 643 &discard, &leak_packet); 644 645 EFX_TRACE(efx, "RX queue %d received id %x at %llx+%x %s%s\n", 646 rx_queue->queue, index, 647 (unsigned long long)rx_buf->dma_addr, len, 648 (checksummed ? " [SUMMED]" : ""), 649 (discard ? " [DISCARD]" : "")); 650 651 /* Discard packet, if instructed to do so */ 652 if (unlikely(discard)) { 653 if (unlikely(leak_packet)) 654 rx_queue->channel->n_skbuff_leaks++; 655 else 656 /* We haven't called efx_unmap_rx_buffer yet, 657 * so fini the entire rx_buffer here */ 658 efx_fini_rx_buffer(rx_queue, rx_buf); 659 return; 660 } 661 662 /* Release card resources - assumes all RX buffers consumed in-order 663 * per RX queue 664 */ 665 efx_unmap_rx_buffer(efx, rx_buf); 666 667 /* Prefetch nice and early so data will (hopefully) be in cache by 668 * the time we look at it. 669 */ 670 prefetch(rx_buf->data); 671 672 /* Pipeline receives so that we give time for packet headers to be 673 * prefetched into cache. 674 */ 675 rx_buf->len = len; 676 if (rx_queue->channel->rx_pkt) 677 __efx_rx_packet(rx_queue->channel, 678 rx_queue->channel->rx_pkt, 679 rx_queue->channel->rx_pkt_csummed); 680 rx_queue->channel->rx_pkt = rx_buf; 681 rx_queue->channel->rx_pkt_csummed = checksummed; 682} 683 684/* Handle a received packet. Second half: Touches packet payload. */ 685void __efx_rx_packet(struct efx_channel *channel, 686 struct efx_rx_buffer *rx_buf, bool checksummed) 687{ 688 struct efx_nic *efx = channel->efx; 689 struct sk_buff *skb; 690 bool lro = !!(efx->net_dev->features & NETIF_F_LRO); 691 692 /* If we're in loopback test, then pass the packet directly to the 693 * loopback layer, and free the rx_buf here 694 */ 695 if (unlikely(efx->loopback_selftest)) { 696 efx_loopback_rx_packet(efx, rx_buf->data, rx_buf->len); 697 efx_free_rx_buffer(efx, rx_buf); 698 goto done; 699 } 700 701 if (rx_buf->skb) { 702 prefetch(skb_shinfo(rx_buf->skb)); 703 704 skb_put(rx_buf->skb, rx_buf->len); 705 706 /* Move past the ethernet header. rx_buf->data still points 707 * at the ethernet header */ 708 rx_buf->skb->protocol = eth_type_trans(rx_buf->skb, 709 efx->net_dev); 710 } 711 712 /* Both our generic-LRO and SFC-SSR support skb and page based 713 * allocation, but neither support switching from one to the 714 * other on the fly. If we spot that the allocation mode has 715 * changed, then flush the LRO state. 716 */ 717 if (unlikely(channel->rx_alloc_pop_pages != (rx_buf->page != NULL))) { 718 efx_flush_lro(channel); 719 channel->rx_alloc_pop_pages = (rx_buf->page != NULL); 720 } 721 if (likely(checksummed && lro)) { 722 efx_rx_packet_lro(channel, rx_buf); 723 goto done; 724 } 725 726 /* Form an skb if required */ 727 if (rx_buf->page) { 728 int hdr_len = min(rx_buf->len, EFX_SKB_HEADERS); 729 skb = efx_rx_mk_skb(rx_buf, efx, hdr_len); 730 if (unlikely(skb == NULL)) { 731 efx_free_rx_buffer(efx, rx_buf); 732 goto done; 733 } 734 } else { 735 /* We now own the SKB */ 736 skb = rx_buf->skb; 737 rx_buf->skb = NULL; 738 } 739 740 EFX_BUG_ON_PARANOID(rx_buf->page); 741 EFX_BUG_ON_PARANOID(rx_buf->skb); 742 EFX_BUG_ON_PARANOID(!skb); 743 744 /* Set the SKB flags */ 745 if (unlikely(!checksummed || !efx->rx_checksum_enabled)) 746 skb->ip_summed = CHECKSUM_NONE; 747 748 /* Pass the packet up */ 749 netif_receive_skb(skb); 750 751 /* Update allocation strategy method */ 752 channel->rx_alloc_level += RX_ALLOC_FACTOR_SKB; 753 754done: 755 ; 756} 757 758void efx_rx_strategy(struct efx_channel *channel) 759{ 760 enum efx_rx_alloc_method method = rx_alloc_method; 761 762 /* Only makes sense to use page based allocation if LRO is enabled */ 763 if (!(channel->efx->net_dev->features & NETIF_F_LRO)) { 764 method = RX_ALLOC_METHOD_SKB; 765 } else if (method == RX_ALLOC_METHOD_AUTO) { 766 /* Constrain the rx_alloc_level */ 767 if (channel->rx_alloc_level < 0) 768 channel->rx_alloc_level = 0; 769 else if (channel->rx_alloc_level > RX_ALLOC_LEVEL_MAX) 770 channel->rx_alloc_level = RX_ALLOC_LEVEL_MAX; 771 772 /* Decide on the allocation method */ 773 method = ((channel->rx_alloc_level > RX_ALLOC_LEVEL_LRO) ? 774 RX_ALLOC_METHOD_PAGE : RX_ALLOC_METHOD_SKB); 775 } 776 777 /* Push the option */ 778 channel->rx_alloc_push_pages = (method == RX_ALLOC_METHOD_PAGE); 779} 780 781int efx_probe_rx_queue(struct efx_rx_queue *rx_queue) 782{ 783 struct efx_nic *efx = rx_queue->efx; 784 unsigned int rxq_size; 785 int rc; 786 787 EFX_LOG(efx, "creating RX queue %d\n", rx_queue->queue); 788 789 /* Allocate RX buffers */ 790 rxq_size = (efx->type->rxd_ring_mask + 1) * sizeof(*rx_queue->buffer); 791 rx_queue->buffer = kzalloc(rxq_size, GFP_KERNEL); 792 if (!rx_queue->buffer) 793 return -ENOMEM; 794 795 rc = falcon_probe_rx(rx_queue); 796 if (rc) { 797 kfree(rx_queue->buffer); 798 rx_queue->buffer = NULL; 799 } 800 return rc; 801} 802 803void efx_init_rx_queue(struct efx_rx_queue *rx_queue) 804{ 805 struct efx_nic *efx = rx_queue->efx; 806 unsigned int max_fill, trigger, limit; 807 808 EFX_LOG(rx_queue->efx, "initialising RX queue %d\n", rx_queue->queue); 809 810 /* Initialise ptr fields */ 811 rx_queue->added_count = 0; 812 rx_queue->notified_count = 0; 813 rx_queue->removed_count = 0; 814 rx_queue->min_fill = -1U; 815 rx_queue->min_overfill = -1U; 816 817 /* Initialise limit fields */ 818 max_fill = efx->type->rxd_ring_mask + 1 - EFX_RXD_HEAD_ROOM; 819 trigger = max_fill * min(rx_refill_threshold, 100U) / 100U; 820 limit = max_fill * min(rx_refill_limit, 100U) / 100U; 821 822 rx_queue->max_fill = max_fill; 823 rx_queue->fast_fill_trigger = trigger; 824 rx_queue->fast_fill_limit = limit; 825 826 /* Set up RX descriptor ring */ 827 falcon_init_rx(rx_queue); 828} 829 830void efx_fini_rx_queue(struct efx_rx_queue *rx_queue) 831{ 832 int i; 833 struct efx_rx_buffer *rx_buf; 834 835 EFX_LOG(rx_queue->efx, "shutting down RX queue %d\n", rx_queue->queue); 836 837 falcon_fini_rx(rx_queue); 838 839 /* Release RX buffers NB start at index 0 not current HW ptr */ 840 if (rx_queue->buffer) { 841 for (i = 0; i <= rx_queue->efx->type->rxd_ring_mask; i++) { 842 rx_buf = efx_rx_buffer(rx_queue, i); 843 efx_fini_rx_buffer(rx_queue, rx_buf); 844 } 845 } 846 847 /* For a page that is part-way through splitting into RX buffers */ 848 if (rx_queue->buf_page != NULL) { 849 pci_unmap_page(rx_queue->efx->pci_dev, rx_queue->buf_dma_addr, 850 efx_rx_buf_size(rx_queue->efx), 851 PCI_DMA_FROMDEVICE); 852 __free_pages(rx_queue->buf_page, 853 rx_queue->efx->rx_buffer_order); 854 rx_queue->buf_page = NULL; 855 } 856} 857 858void efx_remove_rx_queue(struct efx_rx_queue *rx_queue) 859{ 860 EFX_LOG(rx_queue->efx, "destroying RX queue %d\n", rx_queue->queue); 861 862 falcon_remove_rx(rx_queue); 863 864 kfree(rx_queue->buffer); 865 rx_queue->buffer = NULL; 866} 867 868void efx_flush_lro(struct efx_channel *channel) 869{ 870 lro_flush_all(&channel->lro_mgr); 871} 872 873 874module_param(rx_alloc_method, int, 0644); 875MODULE_PARM_DESC(rx_alloc_method, "Allocation method used for RX buffers"); 876 877module_param(rx_refill_threshold, uint, 0444); 878MODULE_PARM_DESC(rx_refill_threshold, 879 "RX descriptor ring fast/slow fill threshold (%)"); 880