Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v2.6.30-rc7 1079 lines 30 kB view raw
1/* 2 * Copyright (c) 2007 Mellanox Technologies. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 * 32 */ 33 34#include <linux/mlx4/cq.h> 35#include <linux/mlx4/qp.h> 36#include <linux/skbuff.h> 37#include <linux/if_ether.h> 38#include <linux/if_vlan.h> 39#include <linux/vmalloc.h> 40 41#include "mlx4_en.h" 42 43static void *get_wqe(struct mlx4_en_rx_ring *ring, int n) 44{ 45 int offset = n << ring->srq.wqe_shift; 46 return ring->buf + offset; 47} 48 49static void mlx4_en_srq_event(struct mlx4_srq *srq, enum mlx4_event type) 50{ 51 return; 52} 53 54static int mlx4_en_get_frag_header(struct skb_frag_struct *frags, void **mac_hdr, 55 void **ip_hdr, void **tcpudp_hdr, 56 u64 *hdr_flags, void *priv) 57{ 58 *mac_hdr = page_address(frags->page) + frags->page_offset; 59 *ip_hdr = *mac_hdr + ETH_HLEN; 60 *tcpudp_hdr = (struct tcphdr *)(*ip_hdr + sizeof(struct iphdr)); 61 *hdr_flags = LRO_IPV4 | LRO_TCP; 62 63 return 0; 64} 65 66static int mlx4_en_alloc_frag(struct mlx4_en_priv *priv, 67 struct mlx4_en_rx_desc *rx_desc, 68 struct skb_frag_struct *skb_frags, 69 struct mlx4_en_rx_alloc *ring_alloc, 70 int i) 71{ 72 struct mlx4_en_dev *mdev = priv->mdev; 73 struct mlx4_en_frag_info *frag_info = &priv->frag_info[i]; 74 struct mlx4_en_rx_alloc *page_alloc = &ring_alloc[i]; 75 struct page *page; 76 dma_addr_t dma; 77 78 if (page_alloc->offset == frag_info->last_offset) { 79 /* Allocate new page */ 80 page = alloc_pages(GFP_ATOMIC | __GFP_COMP, MLX4_EN_ALLOC_ORDER); 81 if (!page) 82 return -ENOMEM; 83 84 skb_frags[i].page = page_alloc->page; 85 skb_frags[i].page_offset = page_alloc->offset; 86 page_alloc->page = page; 87 page_alloc->offset = frag_info->frag_align; 88 } else { 89 page = page_alloc->page; 90 get_page(page); 91 92 skb_frags[i].page = page; 93 skb_frags[i].page_offset = page_alloc->offset; 94 page_alloc->offset += frag_info->frag_stride; 95 } 96 dma = pci_map_single(mdev->pdev, page_address(skb_frags[i].page) + 97 skb_frags[i].page_offset, frag_info->frag_size, 98 PCI_DMA_FROMDEVICE); 99 rx_desc->data[i].addr = cpu_to_be64(dma); 100 return 0; 101} 102 103static int mlx4_en_init_allocator(struct mlx4_en_priv *priv, 104 struct mlx4_en_rx_ring *ring) 105{ 106 struct mlx4_en_rx_alloc *page_alloc; 107 int i; 108 109 for (i = 0; i < priv->num_frags; i++) { 110 page_alloc = &ring->page_alloc[i]; 111 page_alloc->page = alloc_pages(GFP_ATOMIC | __GFP_COMP, 112 MLX4_EN_ALLOC_ORDER); 113 if (!page_alloc->page) 114 goto out; 115 116 page_alloc->offset = priv->frag_info[i].frag_align; 117 mlx4_dbg(DRV, priv, "Initialized allocator:%d with page:%p\n", 118 i, page_alloc->page); 119 } 120 return 0; 121 122out: 123 while (i--) { 124 page_alloc = &ring->page_alloc[i]; 125 put_page(page_alloc->page); 126 page_alloc->page = NULL; 127 } 128 return -ENOMEM; 129} 130 131static void mlx4_en_destroy_allocator(struct mlx4_en_priv *priv, 132 struct mlx4_en_rx_ring *ring) 133{ 134 struct mlx4_en_rx_alloc *page_alloc; 135 int i; 136 137 for (i = 0; i < priv->num_frags; i++) { 138 page_alloc = &ring->page_alloc[i]; 139 mlx4_dbg(DRV, priv, "Freeing allocator:%d count:%d\n", 140 i, page_count(page_alloc->page)); 141 142 put_page(page_alloc->page); 143 page_alloc->page = NULL; 144 } 145} 146 147 148static void mlx4_en_init_rx_desc(struct mlx4_en_priv *priv, 149 struct mlx4_en_rx_ring *ring, int index) 150{ 151 struct mlx4_en_rx_desc *rx_desc = ring->buf + ring->stride * index; 152 struct skb_frag_struct *skb_frags = ring->rx_info + 153 (index << priv->log_rx_info); 154 int possible_frags; 155 int i; 156 157 /* Pre-link descriptor */ 158 rx_desc->next.next_wqe_index = cpu_to_be16((index + 1) & ring->size_mask); 159 160 /* Set size and memtype fields */ 161 for (i = 0; i < priv->num_frags; i++) { 162 skb_frags[i].size = priv->frag_info[i].frag_size; 163 rx_desc->data[i].byte_count = 164 cpu_to_be32(priv->frag_info[i].frag_size); 165 rx_desc->data[i].lkey = cpu_to_be32(priv->mdev->mr.key); 166 } 167 168 /* If the number of used fragments does not fill up the ring stride, 169 * remaining (unused) fragments must be padded with null address/size 170 * and a special memory key */ 171 possible_frags = (ring->stride - sizeof(struct mlx4_en_rx_desc)) / DS_SIZE; 172 for (i = priv->num_frags; i < possible_frags; i++) { 173 rx_desc->data[i].byte_count = 0; 174 rx_desc->data[i].lkey = cpu_to_be32(MLX4_EN_MEMTYPE_PAD); 175 rx_desc->data[i].addr = 0; 176 } 177} 178 179 180static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv, 181 struct mlx4_en_rx_ring *ring, int index) 182{ 183 struct mlx4_en_rx_desc *rx_desc = ring->buf + (index * ring->stride); 184 struct skb_frag_struct *skb_frags = ring->rx_info + 185 (index << priv->log_rx_info); 186 int i; 187 188 for (i = 0; i < priv->num_frags; i++) 189 if (mlx4_en_alloc_frag(priv, rx_desc, skb_frags, ring->page_alloc, i)) 190 goto err; 191 192 return 0; 193 194err: 195 while (i--) 196 put_page(skb_frags[i].page); 197 return -ENOMEM; 198} 199 200static inline void mlx4_en_update_rx_prod_db(struct mlx4_en_rx_ring *ring) 201{ 202 *ring->wqres.db.db = cpu_to_be32(ring->prod & 0xffff); 203} 204 205static int mlx4_en_fill_rx_buffers(struct mlx4_en_priv *priv) 206{ 207 struct mlx4_en_dev *mdev = priv->mdev; 208 struct mlx4_en_rx_ring *ring; 209 int ring_ind; 210 int buf_ind; 211 212 for (buf_ind = 0; buf_ind < priv->prof->rx_ring_size; buf_ind++) { 213 for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) { 214 ring = &priv->rx_ring[ring_ind]; 215 216 if (mlx4_en_prepare_rx_desc(priv, ring, 217 ring->actual_size)) { 218 if (ring->actual_size < MLX4_EN_MIN_RX_SIZE) { 219 mlx4_err(mdev, "Failed to allocate " 220 "enough rx buffers\n"); 221 return -ENOMEM; 222 } else { 223 if (netif_msg_rx_err(priv)) 224 mlx4_warn(mdev, 225 "Only %d buffers allocated\n", 226 ring->actual_size); 227 goto out; 228 } 229 } 230 ring->actual_size++; 231 ring->prod++; 232 } 233 } 234out: 235 return 0; 236} 237 238static int mlx4_en_fill_rx_buf(struct net_device *dev, 239 struct mlx4_en_rx_ring *ring) 240{ 241 struct mlx4_en_priv *priv = netdev_priv(dev); 242 int num = 0; 243 int err; 244 245 while ((u32) (ring->prod - ring->cons) < ring->actual_size) { 246 err = mlx4_en_prepare_rx_desc(priv, ring, ring->prod & 247 ring->size_mask); 248 if (err) { 249 if (netif_msg_rx_err(priv)) 250 mlx4_warn(priv->mdev, 251 "Failed preparing rx descriptor\n"); 252 priv->port_stats.rx_alloc_failed++; 253 break; 254 } 255 ++num; 256 ++ring->prod; 257 } 258 if ((u32) (ring->prod - ring->cons) == ring->size) 259 ring->full = 1; 260 261 return num; 262} 263 264static void mlx4_en_free_rx_buf(struct mlx4_en_priv *priv, 265 struct mlx4_en_rx_ring *ring) 266{ 267 struct mlx4_en_dev *mdev = priv->mdev; 268 struct skb_frag_struct *skb_frags; 269 struct mlx4_en_rx_desc *rx_desc; 270 dma_addr_t dma; 271 int index; 272 int nr; 273 274 mlx4_dbg(DRV, priv, "Freeing Rx buf - cons:%d prod:%d\n", 275 ring->cons, ring->prod); 276 277 /* Unmap and free Rx buffers */ 278 BUG_ON((u32) (ring->prod - ring->cons) > ring->size); 279 while (ring->cons != ring->prod) { 280 index = ring->cons & ring->size_mask; 281 rx_desc = ring->buf + (index << ring->log_stride); 282 skb_frags = ring->rx_info + (index << priv->log_rx_info); 283 mlx4_dbg(DRV, priv, "Processing descriptor:%d\n", index); 284 285 for (nr = 0; nr < priv->num_frags; nr++) { 286 mlx4_dbg(DRV, priv, "Freeing fragment:%d\n", nr); 287 dma = be64_to_cpu(rx_desc->data[nr].addr); 288 289 mlx4_dbg(DRV, priv, "Unmaping buffer at dma:0x%llx\n", (u64) dma); 290 pci_unmap_single(mdev->pdev, dma, skb_frags[nr].size, 291 PCI_DMA_FROMDEVICE); 292 put_page(skb_frags[nr].page); 293 } 294 ++ring->cons; 295 } 296} 297 298 299void mlx4_en_rx_refill(struct work_struct *work) 300{ 301 struct delayed_work *delay = to_delayed_work(work); 302 struct mlx4_en_priv *priv = container_of(delay, struct mlx4_en_priv, 303 refill_task); 304 struct mlx4_en_dev *mdev = priv->mdev; 305 struct net_device *dev = priv->dev; 306 struct mlx4_en_rx_ring *ring; 307 int need_refill = 0; 308 int i; 309 310 mutex_lock(&mdev->state_lock); 311 if (!mdev->device_up || !priv->port_up) 312 goto out; 313 314 /* We only get here if there are no receive buffers, so we can't race 315 * with Rx interrupts while filling buffers */ 316 for (i = 0; i < priv->rx_ring_num; i++) { 317 ring = &priv->rx_ring[i]; 318 if (ring->need_refill) { 319 if (mlx4_en_fill_rx_buf(dev, ring)) { 320 ring->need_refill = 0; 321 mlx4_en_update_rx_prod_db(ring); 322 } else 323 need_refill = 1; 324 } 325 } 326 if (need_refill) 327 queue_delayed_work(mdev->workqueue, &priv->refill_task, HZ); 328 329out: 330 mutex_unlock(&mdev->state_lock); 331} 332 333 334int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv, 335 struct mlx4_en_rx_ring *ring, u32 size, u16 stride) 336{ 337 struct mlx4_en_dev *mdev = priv->mdev; 338 int err; 339 int tmp; 340 341 /* Sanity check SRQ size before proceeding */ 342 if (size >= mdev->dev->caps.max_srq_wqes) 343 return -EINVAL; 344 345 ring->prod = 0; 346 ring->cons = 0; 347 ring->size = size; 348 ring->size_mask = size - 1; 349 ring->stride = stride; 350 ring->log_stride = ffs(ring->stride) - 1; 351 ring->buf_size = ring->size * ring->stride; 352 353 tmp = size * roundup_pow_of_two(MLX4_EN_MAX_RX_FRAGS * 354 sizeof(struct skb_frag_struct)); 355 ring->rx_info = vmalloc(tmp); 356 if (!ring->rx_info) { 357 mlx4_err(mdev, "Failed allocating rx_info ring\n"); 358 return -ENOMEM; 359 } 360 mlx4_dbg(DRV, priv, "Allocated rx_info ring at addr:%p size:%d\n", 361 ring->rx_info, tmp); 362 363 err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, 364 ring->buf_size, 2 * PAGE_SIZE); 365 if (err) 366 goto err_ring; 367 368 err = mlx4_en_map_buffer(&ring->wqres.buf); 369 if (err) { 370 mlx4_err(mdev, "Failed to map RX buffer\n"); 371 goto err_hwq; 372 } 373 ring->buf = ring->wqres.buf.direct.buf; 374 375 /* Configure lro mngr */ 376 memset(&ring->lro, 0, sizeof(struct net_lro_mgr)); 377 ring->lro.dev = priv->dev; 378 ring->lro.features = LRO_F_NAPI; 379 ring->lro.frag_align_pad = NET_IP_ALIGN; 380 ring->lro.ip_summed = CHECKSUM_UNNECESSARY; 381 ring->lro.ip_summed_aggr = CHECKSUM_UNNECESSARY; 382 ring->lro.max_desc = mdev->profile.num_lro; 383 ring->lro.max_aggr = MAX_SKB_FRAGS; 384 ring->lro.lro_arr = kzalloc(mdev->profile.num_lro * 385 sizeof(struct net_lro_desc), 386 GFP_KERNEL); 387 if (!ring->lro.lro_arr) { 388 mlx4_err(mdev, "Failed to allocate lro array\n"); 389 goto err_map; 390 } 391 ring->lro.get_frag_header = mlx4_en_get_frag_header; 392 393 return 0; 394 395err_map: 396 mlx4_en_unmap_buffer(&ring->wqres.buf); 397err_hwq: 398 mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size); 399err_ring: 400 vfree(ring->rx_info); 401 ring->rx_info = NULL; 402 return err; 403} 404 405int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv) 406{ 407 struct mlx4_en_dev *mdev = priv->mdev; 408 struct mlx4_wqe_srq_next_seg *next; 409 struct mlx4_en_rx_ring *ring; 410 int i; 411 int ring_ind; 412 int err; 413 int stride = roundup_pow_of_two(sizeof(struct mlx4_en_rx_desc) + 414 DS_SIZE * priv->num_frags); 415 int max_gs = (stride - sizeof(struct mlx4_wqe_srq_next_seg)) / DS_SIZE; 416 417 for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) { 418 ring = &priv->rx_ring[ring_ind]; 419 420 ring->prod = 0; 421 ring->cons = 0; 422 ring->actual_size = 0; 423 ring->cqn = priv->rx_cq[ring_ind].mcq.cqn; 424 425 ring->stride = stride; 426 ring->log_stride = ffs(ring->stride) - 1; 427 ring->buf_size = ring->size * ring->stride; 428 429 memset(ring->buf, 0, ring->buf_size); 430 mlx4_en_update_rx_prod_db(ring); 431 432 /* Initailize all descriptors */ 433 for (i = 0; i < ring->size; i++) 434 mlx4_en_init_rx_desc(priv, ring, i); 435 436 /* Initialize page allocators */ 437 err = mlx4_en_init_allocator(priv, ring); 438 if (err) { 439 mlx4_err(mdev, "Failed initializing ring allocator\n"); 440 ring_ind--; 441 goto err_allocator; 442 } 443 444 /* Fill Rx buffers */ 445 ring->full = 0; 446 } 447 err = mlx4_en_fill_rx_buffers(priv); 448 if (err) 449 goto err_buffers; 450 451 for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) { 452 ring = &priv->rx_ring[ring_ind]; 453 454 mlx4_en_update_rx_prod_db(ring); 455 456 /* Configure SRQ representing the ring */ 457 ring->srq.max = ring->size; 458 ring->srq.max_gs = max_gs; 459 ring->srq.wqe_shift = ilog2(ring->stride); 460 461 for (i = 0; i < ring->srq.max; ++i) { 462 next = get_wqe(ring, i); 463 next->next_wqe_index = 464 cpu_to_be16((i + 1) & (ring->srq.max - 1)); 465 } 466 467 err = mlx4_srq_alloc(mdev->dev, mdev->priv_pdn, &ring->wqres.mtt, 468 ring->wqres.db.dma, &ring->srq); 469 if (err){ 470 mlx4_err(mdev, "Failed to allocate srq\n"); 471 ring_ind--; 472 goto err_srq; 473 } 474 ring->srq.event = mlx4_en_srq_event; 475 } 476 477 return 0; 478 479err_srq: 480 while (ring_ind >= 0) { 481 ring = &priv->rx_ring[ring_ind]; 482 mlx4_srq_free(mdev->dev, &ring->srq); 483 ring_ind--; 484 } 485 486err_buffers: 487 for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) 488 mlx4_en_free_rx_buf(priv, &priv->rx_ring[ring_ind]); 489 490 ring_ind = priv->rx_ring_num - 1; 491err_allocator: 492 while (ring_ind >= 0) { 493 mlx4_en_destroy_allocator(priv, &priv->rx_ring[ring_ind]); 494 ring_ind--; 495 } 496 return err; 497} 498 499void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv, 500 struct mlx4_en_rx_ring *ring) 501{ 502 struct mlx4_en_dev *mdev = priv->mdev; 503 504 kfree(ring->lro.lro_arr); 505 mlx4_en_unmap_buffer(&ring->wqres.buf); 506 mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size); 507 vfree(ring->rx_info); 508 ring->rx_info = NULL; 509} 510 511void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv, 512 struct mlx4_en_rx_ring *ring) 513{ 514 struct mlx4_en_dev *mdev = priv->mdev; 515 516 mlx4_srq_free(mdev->dev, &ring->srq); 517 mlx4_en_free_rx_buf(priv, ring); 518 mlx4_en_destroy_allocator(priv, ring); 519} 520 521 522/* Unmap a completed descriptor and free unused pages */ 523static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv, 524 struct mlx4_en_rx_desc *rx_desc, 525 struct skb_frag_struct *skb_frags, 526 struct skb_frag_struct *skb_frags_rx, 527 struct mlx4_en_rx_alloc *page_alloc, 528 int length) 529{ 530 struct mlx4_en_dev *mdev = priv->mdev; 531 struct mlx4_en_frag_info *frag_info; 532 int nr; 533 dma_addr_t dma; 534 535 /* Collect used fragments while replacing them in the HW descirptors */ 536 for (nr = 0; nr < priv->num_frags; nr++) { 537 frag_info = &priv->frag_info[nr]; 538 if (length <= frag_info->frag_prefix_size) 539 break; 540 541 /* Save page reference in skb */ 542 skb_frags_rx[nr].page = skb_frags[nr].page; 543 skb_frags_rx[nr].size = skb_frags[nr].size; 544 skb_frags_rx[nr].page_offset = skb_frags[nr].page_offset; 545 dma = be64_to_cpu(rx_desc->data[nr].addr); 546 547 /* Allocate a replacement page */ 548 if (mlx4_en_alloc_frag(priv, rx_desc, skb_frags, page_alloc, nr)) 549 goto fail; 550 551 /* Unmap buffer */ 552 pci_unmap_single(mdev->pdev, dma, skb_frags[nr].size, 553 PCI_DMA_FROMDEVICE); 554 } 555 /* Adjust size of last fragment to match actual length */ 556 skb_frags_rx[nr - 1].size = length - 557 priv->frag_info[nr - 1].frag_prefix_size; 558 return nr; 559 560fail: 561 /* Drop all accumulated fragments (which have already been replaced in 562 * the descriptor) of this packet; remaining fragments are reused... */ 563 while (nr > 0) { 564 nr--; 565 put_page(skb_frags_rx[nr].page); 566 } 567 return 0; 568} 569 570 571static struct sk_buff *mlx4_en_rx_skb(struct mlx4_en_priv *priv, 572 struct mlx4_en_rx_desc *rx_desc, 573 struct skb_frag_struct *skb_frags, 574 struct mlx4_en_rx_alloc *page_alloc, 575 unsigned int length) 576{ 577 struct mlx4_en_dev *mdev = priv->mdev; 578 struct sk_buff *skb; 579 void *va; 580 int used_frags; 581 dma_addr_t dma; 582 583 skb = dev_alloc_skb(SMALL_PACKET_SIZE + NET_IP_ALIGN); 584 if (!skb) { 585 mlx4_dbg(RX_ERR, priv, "Failed allocating skb\n"); 586 return NULL; 587 } 588 skb->dev = priv->dev; 589 skb_reserve(skb, NET_IP_ALIGN); 590 skb->len = length; 591 skb->truesize = length + sizeof(struct sk_buff); 592 593 /* Get pointer to first fragment so we could copy the headers into the 594 * (linear part of the) skb */ 595 va = page_address(skb_frags[0].page) + skb_frags[0].page_offset; 596 597 if (length <= SMALL_PACKET_SIZE) { 598 /* We are copying all relevant data to the skb - temporarily 599 * synch buffers for the copy */ 600 dma = be64_to_cpu(rx_desc->data[0].addr); 601 dma_sync_single_range_for_cpu(&mdev->pdev->dev, dma, 0, 602 length, DMA_FROM_DEVICE); 603 skb_copy_to_linear_data(skb, va, length); 604 dma_sync_single_range_for_device(&mdev->pdev->dev, dma, 0, 605 length, DMA_FROM_DEVICE); 606 skb->tail += length; 607 } else { 608 609 /* Move relevant fragments to skb */ 610 used_frags = mlx4_en_complete_rx_desc(priv, rx_desc, skb_frags, 611 skb_shinfo(skb)->frags, 612 page_alloc, length); 613 if (unlikely(!used_frags)) { 614 kfree_skb(skb); 615 return NULL; 616 } 617 skb_shinfo(skb)->nr_frags = used_frags; 618 619 /* Copy headers into the skb linear buffer */ 620 memcpy(skb->data, va, HEADER_COPY_SIZE); 621 skb->tail += HEADER_COPY_SIZE; 622 623 /* Skip headers in first fragment */ 624 skb_shinfo(skb)->frags[0].page_offset += HEADER_COPY_SIZE; 625 626 /* Adjust size of first fragment */ 627 skb_shinfo(skb)->frags[0].size -= HEADER_COPY_SIZE; 628 skb->data_len = length - HEADER_COPY_SIZE; 629 } 630 return skb; 631} 632 633static void mlx4_en_copy_desc(struct mlx4_en_priv *priv, 634 struct mlx4_en_rx_ring *ring, 635 int from, int to, int num) 636{ 637 struct skb_frag_struct *skb_frags_from; 638 struct skb_frag_struct *skb_frags_to; 639 struct mlx4_en_rx_desc *rx_desc_from; 640 struct mlx4_en_rx_desc *rx_desc_to; 641 int from_index, to_index; 642 int nr, i; 643 644 for (i = 0; i < num; i++) { 645 from_index = (from + i) & ring->size_mask; 646 to_index = (to + i) & ring->size_mask; 647 skb_frags_from = ring->rx_info + (from_index << priv->log_rx_info); 648 skb_frags_to = ring->rx_info + (to_index << priv->log_rx_info); 649 rx_desc_from = ring->buf + (from_index << ring->log_stride); 650 rx_desc_to = ring->buf + (to_index << ring->log_stride); 651 652 for (nr = 0; nr < priv->num_frags; nr++) { 653 skb_frags_to[nr].page = skb_frags_from[nr].page; 654 skb_frags_to[nr].page_offset = skb_frags_from[nr].page_offset; 655 rx_desc_to->data[nr].addr = rx_desc_from->data[nr].addr; 656 } 657 } 658} 659 660 661int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int budget) 662{ 663 struct mlx4_en_priv *priv = netdev_priv(dev); 664 struct mlx4_en_dev *mdev = priv->mdev; 665 struct mlx4_cqe *cqe; 666 struct mlx4_en_rx_ring *ring = &priv->rx_ring[cq->ring]; 667 struct skb_frag_struct *skb_frags; 668 struct skb_frag_struct lro_frags[MLX4_EN_MAX_RX_FRAGS]; 669 struct mlx4_en_rx_desc *rx_desc; 670 struct sk_buff *skb; 671 int index; 672 int nr; 673 unsigned int length; 674 int polled = 0; 675 int ip_summed; 676 677 if (!priv->port_up) 678 return 0; 679 680 /* We assume a 1:1 mapping between CQEs and Rx descriptors, so Rx 681 * descriptor offset can be deduced from the CQE index instead of 682 * reading 'cqe->index' */ 683 index = cq->mcq.cons_index & ring->size_mask; 684 cqe = &cq->buf[index]; 685 686 /* Process all completed CQEs */ 687 while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, 688 cq->mcq.cons_index & cq->size)) { 689 690 skb_frags = ring->rx_info + (index << priv->log_rx_info); 691 rx_desc = ring->buf + (index << ring->log_stride); 692 693 /* 694 * make sure we read the CQE after we read the ownership bit 695 */ 696 rmb(); 697 698 /* Drop packet on bad receive or bad checksum */ 699 if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == 700 MLX4_CQE_OPCODE_ERROR)) { 701 mlx4_err(mdev, "CQE completed in error - vendor " 702 "syndrom:%d syndrom:%d\n", 703 ((struct mlx4_err_cqe *) cqe)->vendor_err_syndrome, 704 ((struct mlx4_err_cqe *) cqe)->syndrome); 705 goto next; 706 } 707 if (unlikely(cqe->badfcs_enc & MLX4_CQE_BAD_FCS)) { 708 mlx4_dbg(RX_ERR, priv, "Accepted frame with bad FCS\n"); 709 goto next; 710 } 711 712 /* 713 * Packet is OK - process it. 714 */ 715 length = be32_to_cpu(cqe->byte_cnt); 716 ring->bytes += length; 717 ring->packets++; 718 719 if (likely(priv->rx_csum)) { 720 if ((cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) && 721 (cqe->checksum == cpu_to_be16(0xffff))) { 722 priv->port_stats.rx_chksum_good++; 723 /* This packet is eligible for LRO if it is: 724 * - DIX Ethernet (type interpretation) 725 * - TCP/IP (v4) 726 * - without IP options 727 * - not an IP fragment */ 728 if (mlx4_en_can_lro(cqe->status) && 729 dev->features & NETIF_F_LRO) { 730 731 nr = mlx4_en_complete_rx_desc( 732 priv, rx_desc, 733 skb_frags, lro_frags, 734 ring->page_alloc, length); 735 if (!nr) 736 goto next; 737 738 if (priv->vlgrp && (cqe->vlan_my_qpn & 739 cpu_to_be32(MLX4_CQE_VLAN_PRESENT_MASK))) { 740 lro_vlan_hwaccel_receive_frags( 741 &ring->lro, lro_frags, 742 length, length, 743 priv->vlgrp, 744 be16_to_cpu(cqe->sl_vid), 745 NULL, 0); 746 } else 747 lro_receive_frags(&ring->lro, 748 lro_frags, 749 length, 750 length, 751 NULL, 0); 752 753 goto next; 754 } 755 756 /* LRO not possible, complete processing here */ 757 ip_summed = CHECKSUM_UNNECESSARY; 758 INC_PERF_COUNTER(priv->pstats.lro_misses); 759 } else { 760 ip_summed = CHECKSUM_NONE; 761 priv->port_stats.rx_chksum_none++; 762 } 763 } else { 764 ip_summed = CHECKSUM_NONE; 765 priv->port_stats.rx_chksum_none++; 766 } 767 768 skb = mlx4_en_rx_skb(priv, rx_desc, skb_frags, 769 ring->page_alloc, length); 770 if (!skb) { 771 priv->stats.rx_dropped++; 772 goto next; 773 } 774 775 skb->ip_summed = ip_summed; 776 skb->protocol = eth_type_trans(skb, dev); 777 skb_record_rx_queue(skb, cq->ring); 778 779 /* Push it up the stack */ 780 if (priv->vlgrp && (be32_to_cpu(cqe->vlan_my_qpn) & 781 MLX4_CQE_VLAN_PRESENT_MASK)) { 782 vlan_hwaccel_receive_skb(skb, priv->vlgrp, 783 be16_to_cpu(cqe->sl_vid)); 784 } else 785 netif_receive_skb(skb); 786 787next: 788 ++cq->mcq.cons_index; 789 index = (cq->mcq.cons_index) & ring->size_mask; 790 cqe = &cq->buf[index]; 791 if (++polled == budget) { 792 /* We are here because we reached the NAPI budget - 793 * flush only pending LRO sessions */ 794 lro_flush_all(&ring->lro); 795 goto out; 796 } 797 } 798 799 /* If CQ is empty flush all LRO sessions unconditionally */ 800 lro_flush_all(&ring->lro); 801 802out: 803 AVG_PERF_COUNTER(priv->pstats.rx_coal_avg, polled); 804 mlx4_cq_set_ci(&cq->mcq); 805 wmb(); /* ensure HW sees CQ consumer before we post new buffers */ 806 ring->cons = cq->mcq.cons_index; 807 ring->prod += polled; /* Polled descriptors were realocated in place */ 808 if (unlikely(!ring->full)) { 809 mlx4_en_copy_desc(priv, ring, ring->cons - polled, 810 ring->prod - polled, polled); 811 mlx4_en_fill_rx_buf(dev, ring); 812 } 813 mlx4_en_update_rx_prod_db(ring); 814 return polled; 815} 816 817 818void mlx4_en_rx_irq(struct mlx4_cq *mcq) 819{ 820 struct mlx4_en_cq *cq = container_of(mcq, struct mlx4_en_cq, mcq); 821 struct mlx4_en_priv *priv = netdev_priv(cq->dev); 822 823 if (priv->port_up) 824 napi_schedule(&cq->napi); 825 else 826 mlx4_en_arm_cq(priv, cq); 827} 828 829/* Rx CQ polling - called by NAPI */ 830int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget) 831{ 832 struct mlx4_en_cq *cq = container_of(napi, struct mlx4_en_cq, napi); 833 struct net_device *dev = cq->dev; 834 struct mlx4_en_priv *priv = netdev_priv(dev); 835 int done; 836 837 done = mlx4_en_process_rx_cq(dev, cq, budget); 838 839 /* If we used up all the quota - we're probably not done yet... */ 840 if (done == budget) 841 INC_PERF_COUNTER(priv->pstats.napi_quota); 842 else { 843 /* Done for now */ 844 napi_complete(napi); 845 mlx4_en_arm_cq(priv, cq); 846 } 847 return done; 848} 849 850 851/* Calculate the last offset position that accomodates a full fragment 852 * (assuming fagment size = stride-align) */ 853static int mlx4_en_last_alloc_offset(struct mlx4_en_priv *priv, u16 stride, u16 align) 854{ 855 u16 res = MLX4_EN_ALLOC_SIZE % stride; 856 u16 offset = MLX4_EN_ALLOC_SIZE - stride - res + align; 857 858 mlx4_dbg(DRV, priv, "Calculated last offset for stride:%d align:%d " 859 "res:%d offset:%d\n", stride, align, res, offset); 860 return offset; 861} 862 863 864static int frag_sizes[] = { 865 FRAG_SZ0, 866 FRAG_SZ1, 867 FRAG_SZ2, 868 FRAG_SZ3 869}; 870 871void mlx4_en_calc_rx_buf(struct net_device *dev) 872{ 873 struct mlx4_en_priv *priv = netdev_priv(dev); 874 int eff_mtu = dev->mtu + ETH_HLEN + VLAN_HLEN + ETH_LLC_SNAP_SIZE; 875 int buf_size = 0; 876 int i = 0; 877 878 while (buf_size < eff_mtu) { 879 priv->frag_info[i].frag_size = 880 (eff_mtu > buf_size + frag_sizes[i]) ? 881 frag_sizes[i] : eff_mtu - buf_size; 882 priv->frag_info[i].frag_prefix_size = buf_size; 883 if (!i) { 884 priv->frag_info[i].frag_align = NET_IP_ALIGN; 885 priv->frag_info[i].frag_stride = 886 ALIGN(frag_sizes[i] + NET_IP_ALIGN, SMP_CACHE_BYTES); 887 } else { 888 priv->frag_info[i].frag_align = 0; 889 priv->frag_info[i].frag_stride = 890 ALIGN(frag_sizes[i], SMP_CACHE_BYTES); 891 } 892 priv->frag_info[i].last_offset = mlx4_en_last_alloc_offset( 893 priv, priv->frag_info[i].frag_stride, 894 priv->frag_info[i].frag_align); 895 buf_size += priv->frag_info[i].frag_size; 896 i++; 897 } 898 899 priv->num_frags = i; 900 priv->rx_skb_size = eff_mtu; 901 priv->log_rx_info = ROUNDUP_LOG2(i * sizeof(struct skb_frag_struct)); 902 903 mlx4_dbg(DRV, priv, "Rx buffer scatter-list (effective-mtu:%d " 904 "num_frags:%d):\n", eff_mtu, priv->num_frags); 905 for (i = 0; i < priv->num_frags; i++) { 906 mlx4_dbg(DRV, priv, " frag:%d - size:%d prefix:%d align:%d " 907 "stride:%d last_offset:%d\n", i, 908 priv->frag_info[i].frag_size, 909 priv->frag_info[i].frag_prefix_size, 910 priv->frag_info[i].frag_align, 911 priv->frag_info[i].frag_stride, 912 priv->frag_info[i].last_offset); 913 } 914} 915 916/* RSS related functions */ 917 918/* Calculate rss size and map each entry in rss table to rx ring */ 919void mlx4_en_set_default_rss_map(struct mlx4_en_priv *priv, 920 struct mlx4_en_rss_map *rss_map, 921 int num_entries, int num_rings) 922{ 923 int i; 924 925 rss_map->size = roundup_pow_of_two(num_entries); 926 mlx4_dbg(DRV, priv, "Setting default RSS map of %d entires\n", 927 rss_map->size); 928 929 for (i = 0; i < rss_map->size; i++) { 930 rss_map->map[i] = i % num_rings; 931 mlx4_dbg(DRV, priv, "Entry %d ---> ring %d\n", i, rss_map->map[i]); 932 } 933} 934 935static int mlx4_en_config_rss_qp(struct mlx4_en_priv *priv, 936 int qpn, int srqn, int cqn, 937 enum mlx4_qp_state *state, 938 struct mlx4_qp *qp) 939{ 940 struct mlx4_en_dev *mdev = priv->mdev; 941 struct mlx4_qp_context *context; 942 int err = 0; 943 944 context = kmalloc(sizeof *context , GFP_KERNEL); 945 if (!context) { 946 mlx4_err(mdev, "Failed to allocate qp context\n"); 947 return -ENOMEM; 948 } 949 950 err = mlx4_qp_alloc(mdev->dev, qpn, qp); 951 if (err) { 952 mlx4_err(mdev, "Failed to allocate qp #%d\n", qpn); 953 goto out; 954 } 955 qp->event = mlx4_en_sqp_event; 956 957 memset(context, 0, sizeof *context); 958 mlx4_en_fill_qp_context(priv, 0, 0, 0, 0, qpn, cqn, srqn, context); 959 960 err = mlx4_qp_to_ready(mdev->dev, &priv->res.mtt, context, qp, state); 961 if (err) { 962 mlx4_qp_remove(mdev->dev, qp); 963 mlx4_qp_free(mdev->dev, qp); 964 } 965out: 966 kfree(context); 967 return err; 968} 969 970/* Allocate rx qp's and configure them according to rss map */ 971int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv) 972{ 973 struct mlx4_en_dev *mdev = priv->mdev; 974 struct mlx4_en_rss_map *rss_map = &priv->rss_map; 975 struct mlx4_qp_context context; 976 struct mlx4_en_rss_context *rss_context; 977 void *ptr; 978 int rss_xor = mdev->profile.rss_xor; 979 u8 rss_mask = mdev->profile.rss_mask; 980 int i, srqn, qpn, cqn; 981 int err = 0; 982 int good_qps = 0; 983 984 mlx4_dbg(DRV, priv, "Configuring rss steering for port %u\n", priv->port); 985 err = mlx4_qp_reserve_range(mdev->dev, rss_map->size, 986 rss_map->size, &rss_map->base_qpn); 987 if (err) { 988 mlx4_err(mdev, "Failed reserving %d qps for port %u\n", 989 rss_map->size, priv->port); 990 return err; 991 } 992 993 for (i = 0; i < rss_map->size; i++) { 994 cqn = priv->rx_ring[rss_map->map[i]].cqn; 995 srqn = priv->rx_ring[rss_map->map[i]].srq.srqn; 996 qpn = rss_map->base_qpn + i; 997 err = mlx4_en_config_rss_qp(priv, qpn, srqn, cqn, 998 &rss_map->state[i], 999 &rss_map->qps[i]); 1000 if (err) 1001 goto rss_err; 1002 1003 ++good_qps; 1004 } 1005 1006 /* Configure RSS indirection qp */ 1007 err = mlx4_qp_reserve_range(mdev->dev, 1, 1, &priv->base_qpn); 1008 if (err) { 1009 mlx4_err(mdev, "Failed to reserve range for RSS " 1010 "indirection qp\n"); 1011 goto rss_err; 1012 } 1013 err = mlx4_qp_alloc(mdev->dev, priv->base_qpn, &rss_map->indir_qp); 1014 if (err) { 1015 mlx4_err(mdev, "Failed to allocate RSS indirection QP\n"); 1016 goto reserve_err; 1017 } 1018 rss_map->indir_qp.event = mlx4_en_sqp_event; 1019 mlx4_en_fill_qp_context(priv, 0, 0, 0, 1, priv->base_qpn, 1020 priv->rx_ring[0].cqn, 0, &context); 1021 1022 ptr = ((void *) &context) + 0x3c; 1023 rss_context = (struct mlx4_en_rss_context *) ptr; 1024 rss_context->base_qpn = cpu_to_be32(ilog2(rss_map->size) << 24 | 1025 (rss_map->base_qpn)); 1026 rss_context->default_qpn = cpu_to_be32(rss_map->base_qpn); 1027 rss_context->hash_fn = rss_xor & 0x3; 1028 rss_context->flags = rss_mask << 2; 1029 1030 err = mlx4_qp_to_ready(mdev->dev, &priv->res.mtt, &context, 1031 &rss_map->indir_qp, &rss_map->indir_state); 1032 if (err) 1033 goto indir_err; 1034 1035 return 0; 1036 1037indir_err: 1038 mlx4_qp_modify(mdev->dev, NULL, rss_map->indir_state, 1039 MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->indir_qp); 1040 mlx4_qp_remove(mdev->dev, &rss_map->indir_qp); 1041 mlx4_qp_free(mdev->dev, &rss_map->indir_qp); 1042reserve_err: 1043 mlx4_qp_release_range(mdev->dev, priv->base_qpn, 1); 1044rss_err: 1045 for (i = 0; i < good_qps; i++) { 1046 mlx4_qp_modify(mdev->dev, NULL, rss_map->state[i], 1047 MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->qps[i]); 1048 mlx4_qp_remove(mdev->dev, &rss_map->qps[i]); 1049 mlx4_qp_free(mdev->dev, &rss_map->qps[i]); 1050 } 1051 mlx4_qp_release_range(mdev->dev, rss_map->base_qpn, rss_map->size); 1052 return err; 1053} 1054 1055void mlx4_en_release_rss_steer(struct mlx4_en_priv *priv) 1056{ 1057 struct mlx4_en_dev *mdev = priv->mdev; 1058 struct mlx4_en_rss_map *rss_map = &priv->rss_map; 1059 int i; 1060 1061 mlx4_qp_modify(mdev->dev, NULL, rss_map->indir_state, 1062 MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->indir_qp); 1063 mlx4_qp_remove(mdev->dev, &rss_map->indir_qp); 1064 mlx4_qp_free(mdev->dev, &rss_map->indir_qp); 1065 mlx4_qp_release_range(mdev->dev, priv->base_qpn, 1); 1066 1067 for (i = 0; i < rss_map->size; i++) { 1068 mlx4_qp_modify(mdev->dev, NULL, rss_map->state[i], 1069 MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->qps[i]); 1070 mlx4_qp_remove(mdev->dev, &rss_map->qps[i]); 1071 mlx4_qp_free(mdev->dev, &rss_map->qps[i]); 1072 } 1073 mlx4_qp_release_range(mdev->dev, rss_map->base_qpn, rss_map->size); 1074} 1075 1076 1077 1078 1079