Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v2.6.28 1080 lines 30 kB view raw
1/* 2 * Copyright (c) 2007 Mellanox Technologies. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 * 32 */ 33 34#include <linux/mlx4/cq.h> 35#include <linux/mlx4/qp.h> 36#include <linux/skbuff.h> 37#include <linux/if_ether.h> 38#include <linux/if_vlan.h> 39#include <linux/vmalloc.h> 40 41#include "mlx4_en.h" 42 43static void *get_wqe(struct mlx4_en_rx_ring *ring, int n) 44{ 45 int offset = n << ring->srq.wqe_shift; 46 return ring->buf + offset; 47} 48 49static void mlx4_en_srq_event(struct mlx4_srq *srq, enum mlx4_event type) 50{ 51 return; 52} 53 54static int mlx4_en_get_frag_header(struct skb_frag_struct *frags, void **mac_hdr, 55 void **ip_hdr, void **tcpudp_hdr, 56 u64 *hdr_flags, void *priv) 57{ 58 *mac_hdr = page_address(frags->page) + frags->page_offset; 59 *ip_hdr = *mac_hdr + ETH_HLEN; 60 *tcpudp_hdr = (struct tcphdr *)(*ip_hdr + sizeof(struct iphdr)); 61 *hdr_flags = LRO_IPV4 | LRO_TCP; 62 63 return 0; 64} 65 66static int mlx4_en_alloc_frag(struct mlx4_en_priv *priv, 67 struct mlx4_en_rx_desc *rx_desc, 68 struct skb_frag_struct *skb_frags, 69 struct mlx4_en_rx_alloc *ring_alloc, 70 int i) 71{ 72 struct mlx4_en_dev *mdev = priv->mdev; 73 struct mlx4_en_frag_info *frag_info = &priv->frag_info[i]; 74 struct mlx4_en_rx_alloc *page_alloc = &ring_alloc[i]; 75 struct page *page; 76 dma_addr_t dma; 77 78 if (page_alloc->offset == frag_info->last_offset) { 79 /* Allocate new page */ 80 page = alloc_pages(GFP_ATOMIC | __GFP_COMP, MLX4_EN_ALLOC_ORDER); 81 if (!page) 82 return -ENOMEM; 83 84 skb_frags[i].page = page_alloc->page; 85 skb_frags[i].page_offset = page_alloc->offset; 86 page_alloc->page = page; 87 page_alloc->offset = frag_info->frag_align; 88 } else { 89 page = page_alloc->page; 90 get_page(page); 91 92 skb_frags[i].page = page; 93 skb_frags[i].page_offset = page_alloc->offset; 94 page_alloc->offset += frag_info->frag_stride; 95 } 96 dma = pci_map_single(mdev->pdev, page_address(skb_frags[i].page) + 97 skb_frags[i].page_offset, frag_info->frag_size, 98 PCI_DMA_FROMDEVICE); 99 rx_desc->data[i].addr = cpu_to_be64(dma); 100 return 0; 101} 102 103static int mlx4_en_init_allocator(struct mlx4_en_priv *priv, 104 struct mlx4_en_rx_ring *ring) 105{ 106 struct mlx4_en_rx_alloc *page_alloc; 107 int i; 108 109 for (i = 0; i < priv->num_frags; i++) { 110 page_alloc = &ring->page_alloc[i]; 111 page_alloc->page = alloc_pages(GFP_ATOMIC | __GFP_COMP, 112 MLX4_EN_ALLOC_ORDER); 113 if (!page_alloc->page) 114 goto out; 115 116 page_alloc->offset = priv->frag_info[i].frag_align; 117 mlx4_dbg(DRV, priv, "Initialized allocator:%d with page:%p\n", 118 i, page_alloc->page); 119 } 120 return 0; 121 122out: 123 while (i--) { 124 page_alloc = &ring->page_alloc[i]; 125 put_page(page_alloc->page); 126 page_alloc->page = NULL; 127 } 128 return -ENOMEM; 129} 130 131static void mlx4_en_destroy_allocator(struct mlx4_en_priv *priv, 132 struct mlx4_en_rx_ring *ring) 133{ 134 struct mlx4_en_rx_alloc *page_alloc; 135 int i; 136 137 for (i = 0; i < priv->num_frags; i++) { 138 page_alloc = &ring->page_alloc[i]; 139 mlx4_dbg(DRV, priv, "Freeing allocator:%d count:%d\n", 140 i, page_count(page_alloc->page)); 141 142 put_page(page_alloc->page); 143 page_alloc->page = NULL; 144 } 145} 146 147 148static void mlx4_en_init_rx_desc(struct mlx4_en_priv *priv, 149 struct mlx4_en_rx_ring *ring, int index) 150{ 151 struct mlx4_en_rx_desc *rx_desc = ring->buf + ring->stride * index; 152 struct skb_frag_struct *skb_frags = ring->rx_info + 153 (index << priv->log_rx_info); 154 int possible_frags; 155 int i; 156 157 /* Pre-link descriptor */ 158 rx_desc->next.next_wqe_index = cpu_to_be16((index + 1) & ring->size_mask); 159 160 /* Set size and memtype fields */ 161 for (i = 0; i < priv->num_frags; i++) { 162 skb_frags[i].size = priv->frag_info[i].frag_size; 163 rx_desc->data[i].byte_count = 164 cpu_to_be32(priv->frag_info[i].frag_size); 165 rx_desc->data[i].lkey = cpu_to_be32(priv->mdev->mr.key); 166 } 167 168 /* If the number of used fragments does not fill up the ring stride, 169 * remaining (unused) fragments must be padded with null address/size 170 * and a special memory key */ 171 possible_frags = (ring->stride - sizeof(struct mlx4_en_rx_desc)) / DS_SIZE; 172 for (i = priv->num_frags; i < possible_frags; i++) { 173 rx_desc->data[i].byte_count = 0; 174 rx_desc->data[i].lkey = cpu_to_be32(MLX4_EN_MEMTYPE_PAD); 175 rx_desc->data[i].addr = 0; 176 } 177} 178 179 180static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv, 181 struct mlx4_en_rx_ring *ring, int index) 182{ 183 struct mlx4_en_rx_desc *rx_desc = ring->buf + (index * ring->stride); 184 struct skb_frag_struct *skb_frags = ring->rx_info + 185 (index << priv->log_rx_info); 186 int i; 187 188 for (i = 0; i < priv->num_frags; i++) 189 if (mlx4_en_alloc_frag(priv, rx_desc, skb_frags, ring->page_alloc, i)) 190 goto err; 191 192 return 0; 193 194err: 195 while (i--) 196 put_page(skb_frags[i].page); 197 return -ENOMEM; 198} 199 200static inline void mlx4_en_update_rx_prod_db(struct mlx4_en_rx_ring *ring) 201{ 202 *ring->wqres.db.db = cpu_to_be32(ring->prod & 0xffff); 203} 204 205static int mlx4_en_fill_rx_buffers(struct mlx4_en_priv *priv) 206{ 207 struct mlx4_en_dev *mdev = priv->mdev; 208 struct mlx4_en_rx_ring *ring; 209 int ring_ind; 210 int buf_ind; 211 212 for (buf_ind = 0; buf_ind < priv->prof->rx_ring_size; buf_ind++) { 213 for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) { 214 ring = &priv->rx_ring[ring_ind]; 215 216 if (mlx4_en_prepare_rx_desc(priv, ring, 217 ring->actual_size)) { 218 if (ring->actual_size < MLX4_EN_MIN_RX_SIZE) { 219 mlx4_err(mdev, "Failed to allocate " 220 "enough rx buffers\n"); 221 return -ENOMEM; 222 } else { 223 if (netif_msg_rx_err(priv)) 224 mlx4_warn(mdev, 225 "Only %d buffers allocated\n", 226 ring->actual_size); 227 goto out; 228 } 229 } 230 ring->actual_size++; 231 ring->prod++; 232 } 233 } 234out: 235 return 0; 236} 237 238static int mlx4_en_fill_rx_buf(struct net_device *dev, 239 struct mlx4_en_rx_ring *ring) 240{ 241 struct mlx4_en_priv *priv = netdev_priv(dev); 242 int num = 0; 243 int err; 244 245 while ((u32) (ring->prod - ring->cons) < ring->actual_size) { 246 err = mlx4_en_prepare_rx_desc(priv, ring, ring->prod & 247 ring->size_mask); 248 if (err) { 249 if (netif_msg_rx_err(priv)) 250 mlx4_warn(priv->mdev, 251 "Failed preparing rx descriptor\n"); 252 priv->port_stats.rx_alloc_failed++; 253 break; 254 } 255 ++num; 256 ++ring->prod; 257 } 258 if ((u32) (ring->prod - ring->cons) == ring->size) 259 ring->full = 1; 260 261 return num; 262} 263 264static void mlx4_en_free_rx_buf(struct mlx4_en_priv *priv, 265 struct mlx4_en_rx_ring *ring) 266{ 267 struct mlx4_en_dev *mdev = priv->mdev; 268 struct skb_frag_struct *skb_frags; 269 struct mlx4_en_rx_desc *rx_desc; 270 dma_addr_t dma; 271 int index; 272 int nr; 273 274 mlx4_dbg(DRV, priv, "Freeing Rx buf - cons:%d prod:%d\n", 275 ring->cons, ring->prod); 276 277 /* Unmap and free Rx buffers */ 278 BUG_ON((u32) (ring->prod - ring->cons) > ring->size); 279 while (ring->cons != ring->prod) { 280 index = ring->cons & ring->size_mask; 281 rx_desc = ring->buf + (index << ring->log_stride); 282 skb_frags = ring->rx_info + (index << priv->log_rx_info); 283 mlx4_dbg(DRV, priv, "Processing descriptor:%d\n", index); 284 285 for (nr = 0; nr < priv->num_frags; nr++) { 286 mlx4_dbg(DRV, priv, "Freeing fragment:%d\n", nr); 287 dma = be64_to_cpu(rx_desc->data[nr].addr); 288 289 mlx4_dbg(DRV, priv, "Unmaping buffer at dma:0x%llx\n", (u64) dma); 290 pci_unmap_single(mdev->pdev, dma, skb_frags[nr].size, 291 PCI_DMA_FROMDEVICE); 292 put_page(skb_frags[nr].page); 293 } 294 ++ring->cons; 295 } 296} 297 298 299void mlx4_en_rx_refill(struct work_struct *work) 300{ 301 struct delayed_work *delay = container_of(work, struct delayed_work, work); 302 struct mlx4_en_priv *priv = container_of(delay, struct mlx4_en_priv, 303 refill_task); 304 struct mlx4_en_dev *mdev = priv->mdev; 305 struct net_device *dev = priv->dev; 306 struct mlx4_en_rx_ring *ring; 307 int need_refill = 0; 308 int i; 309 310 mutex_lock(&mdev->state_lock); 311 if (!mdev->device_up || !priv->port_up) 312 goto out; 313 314 /* We only get here if there are no receive buffers, so we can't race 315 * with Rx interrupts while filling buffers */ 316 for (i = 0; i < priv->rx_ring_num; i++) { 317 ring = &priv->rx_ring[i]; 318 if (ring->need_refill) { 319 if (mlx4_en_fill_rx_buf(dev, ring)) { 320 ring->need_refill = 0; 321 mlx4_en_update_rx_prod_db(ring); 322 } else 323 need_refill = 1; 324 } 325 } 326 if (need_refill) 327 queue_delayed_work(mdev->workqueue, &priv->refill_task, HZ); 328 329out: 330 mutex_unlock(&mdev->state_lock); 331} 332 333 334int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv, 335 struct mlx4_en_rx_ring *ring, u32 size, u16 stride) 336{ 337 struct mlx4_en_dev *mdev = priv->mdev; 338 int err; 339 int tmp; 340 341 /* Sanity check SRQ size before proceeding */ 342 if (size >= mdev->dev->caps.max_srq_wqes) 343 return -EINVAL; 344 345 ring->prod = 0; 346 ring->cons = 0; 347 ring->size = size; 348 ring->size_mask = size - 1; 349 ring->stride = stride; 350 ring->log_stride = ffs(ring->stride) - 1; 351 ring->buf_size = ring->size * ring->stride; 352 353 tmp = size * roundup_pow_of_two(MLX4_EN_MAX_RX_FRAGS * 354 sizeof(struct skb_frag_struct)); 355 ring->rx_info = vmalloc(tmp); 356 if (!ring->rx_info) { 357 mlx4_err(mdev, "Failed allocating rx_info ring\n"); 358 return -ENOMEM; 359 } 360 mlx4_dbg(DRV, priv, "Allocated rx_info ring at addr:%p size:%d\n", 361 ring->rx_info, tmp); 362 363 err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, 364 ring->buf_size, 2 * PAGE_SIZE); 365 if (err) 366 goto err_ring; 367 368 err = mlx4_en_map_buffer(&ring->wqres.buf); 369 if (err) { 370 mlx4_err(mdev, "Failed to map RX buffer\n"); 371 goto err_hwq; 372 } 373 ring->buf = ring->wqres.buf.direct.buf; 374 375 /* Configure lro mngr */ 376 memset(&ring->lro, 0, sizeof(struct net_lro_mgr)); 377 ring->lro.dev = priv->dev; 378 ring->lro.features = LRO_F_NAPI; 379 ring->lro.frag_align_pad = NET_IP_ALIGN; 380 ring->lro.ip_summed = CHECKSUM_UNNECESSARY; 381 ring->lro.ip_summed_aggr = CHECKSUM_UNNECESSARY; 382 ring->lro.max_desc = mdev->profile.num_lro; 383 ring->lro.max_aggr = MAX_SKB_FRAGS; 384 ring->lro.lro_arr = kzalloc(mdev->profile.num_lro * 385 sizeof(struct net_lro_desc), 386 GFP_KERNEL); 387 if (!ring->lro.lro_arr) { 388 mlx4_err(mdev, "Failed to allocate lro array\n"); 389 goto err_map; 390 } 391 ring->lro.get_frag_header = mlx4_en_get_frag_header; 392 393 return 0; 394 395err_map: 396 mlx4_en_unmap_buffer(&ring->wqres.buf); 397err_hwq: 398 mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size); 399err_ring: 400 vfree(ring->rx_info); 401 ring->rx_info = NULL; 402 return err; 403} 404 405int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv) 406{ 407 struct mlx4_en_dev *mdev = priv->mdev; 408 struct mlx4_wqe_srq_next_seg *next; 409 struct mlx4_en_rx_ring *ring; 410 int i; 411 int ring_ind; 412 int err; 413 int stride = roundup_pow_of_two(sizeof(struct mlx4_en_rx_desc) + 414 DS_SIZE * priv->num_frags); 415 int max_gs = (stride - sizeof(struct mlx4_wqe_srq_next_seg)) / DS_SIZE; 416 417 for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) { 418 ring = &priv->rx_ring[ring_ind]; 419 420 ring->prod = 0; 421 ring->cons = 0; 422 ring->actual_size = 0; 423 ring->cqn = priv->rx_cq[ring_ind].mcq.cqn; 424 425 ring->stride = stride; 426 ring->log_stride = ffs(ring->stride) - 1; 427 ring->buf_size = ring->size * ring->stride; 428 429 memset(ring->buf, 0, ring->buf_size); 430 mlx4_en_update_rx_prod_db(ring); 431 432 /* Initailize all descriptors */ 433 for (i = 0; i < ring->size; i++) 434 mlx4_en_init_rx_desc(priv, ring, i); 435 436 /* Initialize page allocators */ 437 err = mlx4_en_init_allocator(priv, ring); 438 if (err) { 439 mlx4_err(mdev, "Failed initializing ring allocator\n"); 440 goto err_allocator; 441 } 442 443 /* Fill Rx buffers */ 444 ring->full = 0; 445 } 446 if (mlx4_en_fill_rx_buffers(priv)) 447 goto err_buffers; 448 449 for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) { 450 ring = &priv->rx_ring[ring_ind]; 451 452 mlx4_en_update_rx_prod_db(ring); 453 454 /* Configure SRQ representing the ring */ 455 ring->srq.max = ring->size; 456 ring->srq.max_gs = max_gs; 457 ring->srq.wqe_shift = ilog2(ring->stride); 458 459 for (i = 0; i < ring->srq.max; ++i) { 460 next = get_wqe(ring, i); 461 next->next_wqe_index = 462 cpu_to_be16((i + 1) & (ring->srq.max - 1)); 463 } 464 465 err = mlx4_srq_alloc(mdev->dev, mdev->priv_pdn, &ring->wqres.mtt, 466 ring->wqres.db.dma, &ring->srq); 467 if (err){ 468 mlx4_err(mdev, "Failed to allocate srq\n"); 469 goto err_srq; 470 } 471 ring->srq.event = mlx4_en_srq_event; 472 } 473 474 return 0; 475 476err_srq: 477 while (ring_ind >= 0) { 478 ring = &priv->rx_ring[ring_ind]; 479 mlx4_srq_free(mdev->dev, &ring->srq); 480 ring_ind--; 481 } 482 483err_buffers: 484 for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) 485 mlx4_en_free_rx_buf(priv, &priv->rx_ring[ring_ind]); 486 487 ring_ind = priv->rx_ring_num - 1; 488err_allocator: 489 while (ring_ind >= 0) { 490 mlx4_en_destroy_allocator(priv, &priv->rx_ring[ring_ind]); 491 ring_ind--; 492 } 493 return err; 494} 495 496void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv, 497 struct mlx4_en_rx_ring *ring) 498{ 499 struct mlx4_en_dev *mdev = priv->mdev; 500 501 kfree(ring->lro.lro_arr); 502 mlx4_en_unmap_buffer(&ring->wqres.buf); 503 mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size); 504 vfree(ring->rx_info); 505 ring->rx_info = NULL; 506} 507 508void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv, 509 struct mlx4_en_rx_ring *ring) 510{ 511 struct mlx4_en_dev *mdev = priv->mdev; 512 513 mlx4_srq_free(mdev->dev, &ring->srq); 514 mlx4_en_free_rx_buf(priv, ring); 515 mlx4_en_destroy_allocator(priv, ring); 516} 517 518 519/* Unmap a completed descriptor and free unused pages */ 520static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv, 521 struct mlx4_en_rx_desc *rx_desc, 522 struct skb_frag_struct *skb_frags, 523 struct skb_frag_struct *skb_frags_rx, 524 struct mlx4_en_rx_alloc *page_alloc, 525 int length) 526{ 527 struct mlx4_en_dev *mdev = priv->mdev; 528 struct mlx4_en_frag_info *frag_info; 529 int nr; 530 dma_addr_t dma; 531 532 /* Collect used fragments while replacing them in the HW descirptors */ 533 for (nr = 0; nr < priv->num_frags; nr++) { 534 frag_info = &priv->frag_info[nr]; 535 if (length <= frag_info->frag_prefix_size) 536 break; 537 538 /* Save page reference in skb */ 539 skb_frags_rx[nr].page = skb_frags[nr].page; 540 skb_frags_rx[nr].size = skb_frags[nr].size; 541 skb_frags_rx[nr].page_offset = skb_frags[nr].page_offset; 542 dma = be64_to_cpu(rx_desc->data[nr].addr); 543 544 /* Allocate a replacement page */ 545 if (mlx4_en_alloc_frag(priv, rx_desc, skb_frags, page_alloc, nr)) 546 goto fail; 547 548 /* Unmap buffer */ 549 pci_unmap_single(mdev->pdev, dma, skb_frags[nr].size, 550 PCI_DMA_FROMDEVICE); 551 } 552 /* Adjust size of last fragment to match actual length */ 553 skb_frags_rx[nr - 1].size = length - 554 priv->frag_info[nr - 1].frag_prefix_size; 555 return nr; 556 557fail: 558 /* Drop all accumulated fragments (which have already been replaced in 559 * the descriptor) of this packet; remaining fragments are reused... */ 560 while (nr > 0) { 561 nr--; 562 put_page(skb_frags_rx[nr].page); 563 } 564 return 0; 565} 566 567 568static struct sk_buff *mlx4_en_rx_skb(struct mlx4_en_priv *priv, 569 struct mlx4_en_rx_desc *rx_desc, 570 struct skb_frag_struct *skb_frags, 571 struct mlx4_en_rx_alloc *page_alloc, 572 unsigned int length) 573{ 574 struct mlx4_en_dev *mdev = priv->mdev; 575 struct sk_buff *skb; 576 void *va; 577 int used_frags; 578 dma_addr_t dma; 579 580 skb = dev_alloc_skb(SMALL_PACKET_SIZE + NET_IP_ALIGN); 581 if (!skb) { 582 mlx4_dbg(RX_ERR, priv, "Failed allocating skb\n"); 583 return NULL; 584 } 585 skb->dev = priv->dev; 586 skb_reserve(skb, NET_IP_ALIGN); 587 skb->len = length; 588 skb->truesize = length + sizeof(struct sk_buff); 589 590 /* Get pointer to first fragment so we could copy the headers into the 591 * (linear part of the) skb */ 592 va = page_address(skb_frags[0].page) + skb_frags[0].page_offset; 593 594 if (length <= SMALL_PACKET_SIZE) { 595 /* We are copying all relevant data to the skb - temporarily 596 * synch buffers for the copy */ 597 dma = be64_to_cpu(rx_desc->data[0].addr); 598 dma_sync_single_range_for_cpu(&mdev->pdev->dev, dma, 0, 599 length, DMA_FROM_DEVICE); 600 skb_copy_to_linear_data(skb, va, length); 601 dma_sync_single_range_for_device(&mdev->pdev->dev, dma, 0, 602 length, DMA_FROM_DEVICE); 603 skb->tail += length; 604 } else { 605 606 /* Move relevant fragments to skb */ 607 used_frags = mlx4_en_complete_rx_desc(priv, rx_desc, skb_frags, 608 skb_shinfo(skb)->frags, 609 page_alloc, length); 610 skb_shinfo(skb)->nr_frags = used_frags; 611 612 /* Copy headers into the skb linear buffer */ 613 memcpy(skb->data, va, HEADER_COPY_SIZE); 614 skb->tail += HEADER_COPY_SIZE; 615 616 /* Skip headers in first fragment */ 617 skb_shinfo(skb)->frags[0].page_offset += HEADER_COPY_SIZE; 618 619 /* Adjust size of first fragment */ 620 skb_shinfo(skb)->frags[0].size -= HEADER_COPY_SIZE; 621 skb->data_len = length - HEADER_COPY_SIZE; 622 } 623 return skb; 624} 625 626static void mlx4_en_copy_desc(struct mlx4_en_priv *priv, 627 struct mlx4_en_rx_ring *ring, 628 int from, int to, int num) 629{ 630 struct skb_frag_struct *skb_frags_from; 631 struct skb_frag_struct *skb_frags_to; 632 struct mlx4_en_rx_desc *rx_desc_from; 633 struct mlx4_en_rx_desc *rx_desc_to; 634 int from_index, to_index; 635 int nr, i; 636 637 for (i = 0; i < num; i++) { 638 from_index = (from + i) & ring->size_mask; 639 to_index = (to + i) & ring->size_mask; 640 skb_frags_from = ring->rx_info + (from_index << priv->log_rx_info); 641 skb_frags_to = ring->rx_info + (to_index << priv->log_rx_info); 642 rx_desc_from = ring->buf + (from_index << ring->log_stride); 643 rx_desc_to = ring->buf + (to_index << ring->log_stride); 644 645 for (nr = 0; nr < priv->num_frags; nr++) { 646 skb_frags_to[nr].page = skb_frags_from[nr].page; 647 skb_frags_to[nr].page_offset = skb_frags_from[nr].page_offset; 648 rx_desc_to->data[nr].addr = rx_desc_from->data[nr].addr; 649 } 650 } 651} 652 653 654int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int budget) 655{ 656 struct mlx4_en_priv *priv = netdev_priv(dev); 657 struct mlx4_en_dev *mdev = priv->mdev; 658 struct mlx4_cqe *cqe; 659 struct mlx4_en_rx_ring *ring = &priv->rx_ring[cq->ring]; 660 struct skb_frag_struct *skb_frags; 661 struct skb_frag_struct lro_frags[MLX4_EN_MAX_RX_FRAGS]; 662 struct mlx4_en_rx_desc *rx_desc; 663 struct sk_buff *skb; 664 int index; 665 int nr; 666 unsigned int length; 667 int polled = 0; 668 int ip_summed; 669 670 if (!priv->port_up) 671 return 0; 672 673 /* We assume a 1:1 mapping between CQEs and Rx descriptors, so Rx 674 * descriptor offset can be deduced from the CQE index instead of 675 * reading 'cqe->index' */ 676 index = cq->mcq.cons_index & ring->size_mask; 677 cqe = &cq->buf[index]; 678 679 /* Process all completed CQEs */ 680 while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, 681 cq->mcq.cons_index & cq->size)) { 682 683 skb_frags = ring->rx_info + (index << priv->log_rx_info); 684 rx_desc = ring->buf + (index << ring->log_stride); 685 686 /* 687 * make sure we read the CQE after we read the ownership bit 688 */ 689 rmb(); 690 691 /* Drop packet on bad receive or bad checksum */ 692 if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == 693 MLX4_CQE_OPCODE_ERROR)) { 694 mlx4_err(mdev, "CQE completed in error - vendor " 695 "syndrom:%d syndrom:%d\n", 696 ((struct mlx4_err_cqe *) cqe)->vendor_err_syndrome, 697 ((struct mlx4_err_cqe *) cqe)->syndrome); 698 goto next; 699 } 700 if (unlikely(cqe->badfcs_enc & MLX4_CQE_BAD_FCS)) { 701 mlx4_dbg(RX_ERR, priv, "Accepted frame with bad FCS\n"); 702 goto next; 703 } 704 705 /* 706 * Packet is OK - process it. 707 */ 708 length = be32_to_cpu(cqe->byte_cnt); 709 ring->bytes += length; 710 ring->packets++; 711 712 if (likely(priv->rx_csum)) { 713 if ((cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) && 714 (cqe->checksum == cpu_to_be16(0xffff))) { 715 priv->port_stats.rx_chksum_good++; 716 /* This packet is eligible for LRO if it is: 717 * - DIX Ethernet (type interpretation) 718 * - TCP/IP (v4) 719 * - without IP options 720 * - not an IP fragment */ 721 if (mlx4_en_can_lro(cqe->status) && 722 dev->features & NETIF_F_LRO) { 723 724 nr = mlx4_en_complete_rx_desc( 725 priv, rx_desc, 726 skb_frags, lro_frags, 727 ring->page_alloc, length); 728 if (!nr) 729 goto next; 730 731 if (priv->vlgrp && (cqe->vlan_my_qpn & 732 cpu_to_be32(MLX4_CQE_VLAN_PRESENT_MASK))) { 733 lro_vlan_hwaccel_receive_frags( 734 &ring->lro, lro_frags, 735 length, length, 736 priv->vlgrp, 737 be16_to_cpu(cqe->sl_vid), 738 NULL, 0); 739 } else 740 lro_receive_frags(&ring->lro, 741 lro_frags, 742 length, 743 length, 744 NULL, 0); 745 746 goto next; 747 } 748 749 /* LRO not possible, complete processing here */ 750 ip_summed = CHECKSUM_UNNECESSARY; 751 INC_PERF_COUNTER(priv->pstats.lro_misses); 752 } else { 753 ip_summed = CHECKSUM_NONE; 754 priv->port_stats.rx_chksum_none++; 755 } 756 } else { 757 ip_summed = CHECKSUM_NONE; 758 priv->port_stats.rx_chksum_none++; 759 } 760 761 skb = mlx4_en_rx_skb(priv, rx_desc, skb_frags, 762 ring->page_alloc, length); 763 if (!skb) { 764 priv->stats.rx_dropped++; 765 goto next; 766 } 767 768 skb->ip_summed = ip_summed; 769 skb->protocol = eth_type_trans(skb, dev); 770 771 /* Push it up the stack */ 772 if (priv->vlgrp && (be32_to_cpu(cqe->vlan_my_qpn) & 773 MLX4_CQE_VLAN_PRESENT_MASK)) { 774 vlan_hwaccel_receive_skb(skb, priv->vlgrp, 775 be16_to_cpu(cqe->sl_vid)); 776 } else 777 netif_receive_skb(skb); 778 779 dev->last_rx = jiffies; 780 781next: 782 ++cq->mcq.cons_index; 783 index = (cq->mcq.cons_index) & ring->size_mask; 784 cqe = &cq->buf[index]; 785 if (++polled == budget) { 786 /* We are here because we reached the NAPI budget - 787 * flush only pending LRO sessions */ 788 lro_flush_all(&ring->lro); 789 goto out; 790 } 791 } 792 793 /* If CQ is empty flush all LRO sessions unconditionally */ 794 lro_flush_all(&ring->lro); 795 796out: 797 AVG_PERF_COUNTER(priv->pstats.rx_coal_avg, polled); 798 mlx4_cq_set_ci(&cq->mcq); 799 wmb(); /* ensure HW sees CQ consumer before we post new buffers */ 800 ring->cons = cq->mcq.cons_index; 801 ring->prod += polled; /* Polled descriptors were realocated in place */ 802 if (unlikely(!ring->full)) { 803 mlx4_en_copy_desc(priv, ring, ring->cons - polled, 804 ring->prod - polled, polled); 805 mlx4_en_fill_rx_buf(dev, ring); 806 } 807 mlx4_en_update_rx_prod_db(ring); 808 return polled; 809} 810 811 812void mlx4_en_rx_irq(struct mlx4_cq *mcq) 813{ 814 struct mlx4_en_cq *cq = container_of(mcq, struct mlx4_en_cq, mcq); 815 struct mlx4_en_priv *priv = netdev_priv(cq->dev); 816 817 if (priv->port_up) 818 netif_rx_schedule(cq->dev, &cq->napi); 819 else 820 mlx4_en_arm_cq(priv, cq); 821} 822 823/* Rx CQ polling - called by NAPI */ 824int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget) 825{ 826 struct mlx4_en_cq *cq = container_of(napi, struct mlx4_en_cq, napi); 827 struct net_device *dev = cq->dev; 828 struct mlx4_en_priv *priv = netdev_priv(dev); 829 int done; 830 831 done = mlx4_en_process_rx_cq(dev, cq, budget); 832 833 /* If we used up all the quota - we're probably not done yet... */ 834 if (done == budget) 835 INC_PERF_COUNTER(priv->pstats.napi_quota); 836 else { 837 /* Done for now */ 838 netif_rx_complete(dev, napi); 839 mlx4_en_arm_cq(priv, cq); 840 } 841 return done; 842} 843 844 845/* Calculate the last offset position that accomodates a full fragment 846 * (assuming fagment size = stride-align) */ 847static int mlx4_en_last_alloc_offset(struct mlx4_en_priv *priv, u16 stride, u16 align) 848{ 849 u16 res = MLX4_EN_ALLOC_SIZE % stride; 850 u16 offset = MLX4_EN_ALLOC_SIZE - stride - res + align; 851 852 mlx4_dbg(DRV, priv, "Calculated last offset for stride:%d align:%d " 853 "res:%d offset:%d\n", stride, align, res, offset); 854 return offset; 855} 856 857 858static int frag_sizes[] = { 859 FRAG_SZ0, 860 FRAG_SZ1, 861 FRAG_SZ2, 862 FRAG_SZ3 863}; 864 865void mlx4_en_calc_rx_buf(struct net_device *dev) 866{ 867 struct mlx4_en_priv *priv = netdev_priv(dev); 868 int eff_mtu = dev->mtu + ETH_HLEN + VLAN_HLEN + ETH_LLC_SNAP_SIZE; 869 int buf_size = 0; 870 int i = 0; 871 872 while (buf_size < eff_mtu) { 873 priv->frag_info[i].frag_size = 874 (eff_mtu > buf_size + frag_sizes[i]) ? 875 frag_sizes[i] : eff_mtu - buf_size; 876 priv->frag_info[i].frag_prefix_size = buf_size; 877 if (!i) { 878 priv->frag_info[i].frag_align = NET_IP_ALIGN; 879 priv->frag_info[i].frag_stride = 880 ALIGN(frag_sizes[i] + NET_IP_ALIGN, SMP_CACHE_BYTES); 881 } else { 882 priv->frag_info[i].frag_align = 0; 883 priv->frag_info[i].frag_stride = 884 ALIGN(frag_sizes[i], SMP_CACHE_BYTES); 885 } 886 priv->frag_info[i].last_offset = mlx4_en_last_alloc_offset( 887 priv, priv->frag_info[i].frag_stride, 888 priv->frag_info[i].frag_align); 889 buf_size += priv->frag_info[i].frag_size; 890 i++; 891 } 892 893 priv->num_frags = i; 894 priv->rx_skb_size = eff_mtu; 895 priv->log_rx_info = ROUNDUP_LOG2(i * sizeof(struct skb_frag_struct)); 896 897 mlx4_dbg(DRV, priv, "Rx buffer scatter-list (effective-mtu:%d " 898 "num_frags:%d):\n", eff_mtu, priv->num_frags); 899 for (i = 0; i < priv->num_frags; i++) { 900 mlx4_dbg(DRV, priv, " frag:%d - size:%d prefix:%d align:%d " 901 "stride:%d last_offset:%d\n", i, 902 priv->frag_info[i].frag_size, 903 priv->frag_info[i].frag_prefix_size, 904 priv->frag_info[i].frag_align, 905 priv->frag_info[i].frag_stride, 906 priv->frag_info[i].last_offset); 907 } 908} 909 910/* RSS related functions */ 911 912/* Calculate rss size and map each entry in rss table to rx ring */ 913void mlx4_en_set_default_rss_map(struct mlx4_en_priv *priv, 914 struct mlx4_en_rss_map *rss_map, 915 int num_entries, int num_rings) 916{ 917 int i; 918 919 rss_map->size = roundup_pow_of_two(num_entries); 920 mlx4_dbg(DRV, priv, "Setting default RSS map of %d entires\n", 921 rss_map->size); 922 923 for (i = 0; i < rss_map->size; i++) { 924 rss_map->map[i] = i % num_rings; 925 mlx4_dbg(DRV, priv, "Entry %d ---> ring %d\n", i, rss_map->map[i]); 926 } 927} 928 929static void mlx4_en_sqp_event(struct mlx4_qp *qp, enum mlx4_event event) 930{ 931 return; 932} 933 934 935static int mlx4_en_config_rss_qp(struct mlx4_en_priv *priv, 936 int qpn, int srqn, int cqn, 937 enum mlx4_qp_state *state, 938 struct mlx4_qp *qp) 939{ 940 struct mlx4_en_dev *mdev = priv->mdev; 941 struct mlx4_qp_context *context; 942 int err = 0; 943 944 context = kmalloc(sizeof *context , GFP_KERNEL); 945 if (!context) { 946 mlx4_err(mdev, "Failed to allocate qp context\n"); 947 return -ENOMEM; 948 } 949 950 err = mlx4_qp_alloc(mdev->dev, qpn, qp); 951 if (err) { 952 mlx4_err(mdev, "Failed to allocate qp #%d\n", qpn); 953 goto out; 954 return err; 955 } 956 qp->event = mlx4_en_sqp_event; 957 958 memset(context, 0, sizeof *context); 959 mlx4_en_fill_qp_context(priv, 0, 0, 0, 0, qpn, cqn, srqn, context); 960 961 err = mlx4_qp_to_ready(mdev->dev, &priv->res.mtt, context, qp, state); 962 if (err) { 963 mlx4_qp_remove(mdev->dev, qp); 964 mlx4_qp_free(mdev->dev, qp); 965 } 966out: 967 kfree(context); 968 return err; 969} 970 971/* Allocate rx qp's and configure them according to rss map */ 972int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv) 973{ 974 struct mlx4_en_dev *mdev = priv->mdev; 975 struct mlx4_en_rss_map *rss_map = &priv->rss_map; 976 struct mlx4_qp_context context; 977 struct mlx4_en_rss_context *rss_context; 978 void *ptr; 979 int rss_xor = mdev->profile.rss_xor; 980 u8 rss_mask = mdev->profile.rss_mask; 981 int i, srqn, qpn, cqn; 982 int err = 0; 983 int good_qps = 0; 984 985 mlx4_dbg(DRV, priv, "Configuring rss steering for port %u\n", priv->port); 986 err = mlx4_qp_reserve_range(mdev->dev, rss_map->size, 987 rss_map->size, &rss_map->base_qpn); 988 if (err) { 989 mlx4_err(mdev, "Failed reserving %d qps for port %u\n", 990 rss_map->size, priv->port); 991 return err; 992 } 993 994 for (i = 0; i < rss_map->size; i++) { 995 cqn = priv->rx_ring[rss_map->map[i]].cqn; 996 srqn = priv->rx_ring[rss_map->map[i]].srq.srqn; 997 qpn = rss_map->base_qpn + i; 998 err = mlx4_en_config_rss_qp(priv, qpn, srqn, cqn, 999 &rss_map->state[i], 1000 &rss_map->qps[i]); 1001 if (err) 1002 goto rss_err; 1003 1004 ++good_qps; 1005 } 1006 1007 /* Configure RSS indirection qp */ 1008 err = mlx4_qp_reserve_range(mdev->dev, 1, 1, &priv->base_qpn); 1009 if (err) { 1010 mlx4_err(mdev, "Failed to reserve range for RSS " 1011 "indirection qp\n"); 1012 goto rss_err; 1013 } 1014 err = mlx4_qp_alloc(mdev->dev, priv->base_qpn, &rss_map->indir_qp); 1015 if (err) { 1016 mlx4_err(mdev, "Failed to allocate RSS indirection QP\n"); 1017 goto reserve_err; 1018 } 1019 rss_map->indir_qp.event = mlx4_en_sqp_event; 1020 mlx4_en_fill_qp_context(priv, 0, 0, 0, 1, priv->base_qpn, 1021 priv->rx_ring[0].cqn, 0, &context); 1022 1023 ptr = ((void *) &context) + 0x3c; 1024 rss_context = (struct mlx4_en_rss_context *) ptr; 1025 rss_context->base_qpn = cpu_to_be32(ilog2(rss_map->size) << 24 | 1026 (rss_map->base_qpn)); 1027 rss_context->default_qpn = cpu_to_be32(rss_map->base_qpn); 1028 rss_context->hash_fn = rss_xor & 0x3; 1029 rss_context->flags = rss_mask << 2; 1030 1031 err = mlx4_qp_to_ready(mdev->dev, &priv->res.mtt, &context, 1032 &rss_map->indir_qp, &rss_map->indir_state); 1033 if (err) 1034 goto indir_err; 1035 1036 return 0; 1037 1038indir_err: 1039 mlx4_qp_modify(mdev->dev, NULL, rss_map->indir_state, 1040 MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->indir_qp); 1041 mlx4_qp_remove(mdev->dev, &rss_map->indir_qp); 1042 mlx4_qp_free(mdev->dev, &rss_map->indir_qp); 1043reserve_err: 1044 mlx4_qp_release_range(mdev->dev, priv->base_qpn, 1); 1045rss_err: 1046 for (i = 0; i < good_qps; i++) { 1047 mlx4_qp_modify(mdev->dev, NULL, rss_map->state[i], 1048 MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->qps[i]); 1049 mlx4_qp_remove(mdev->dev, &rss_map->qps[i]); 1050 mlx4_qp_free(mdev->dev, &rss_map->qps[i]); 1051 } 1052 mlx4_qp_release_range(mdev->dev, rss_map->base_qpn, rss_map->size); 1053 return err; 1054} 1055 1056void mlx4_en_release_rss_steer(struct mlx4_en_priv *priv) 1057{ 1058 struct mlx4_en_dev *mdev = priv->mdev; 1059 struct mlx4_en_rss_map *rss_map = &priv->rss_map; 1060 int i; 1061 1062 mlx4_qp_modify(mdev->dev, NULL, rss_map->indir_state, 1063 MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->indir_qp); 1064 mlx4_qp_remove(mdev->dev, &rss_map->indir_qp); 1065 mlx4_qp_free(mdev->dev, &rss_map->indir_qp); 1066 mlx4_qp_release_range(mdev->dev, priv->base_qpn, 1); 1067 1068 for (i = 0; i < rss_map->size; i++) { 1069 mlx4_qp_modify(mdev->dev, NULL, rss_map->state[i], 1070 MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->qps[i]); 1071 mlx4_qp_remove(mdev->dev, &rss_map->qps[i]); 1072 mlx4_qp_free(mdev->dev, &rss_map->qps[i]); 1073 } 1074 mlx4_qp_release_range(mdev->dev, rss_map->base_qpn, rss_map->size); 1075} 1076 1077 1078 1079 1080