Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge git://git.kernel.org/pub/scm/linux/kernel/git/bwh/sfc-next

+3176 -610
+8
drivers/net/ethernet/sfc/Kconfig
··· 26 26 ----help--- 27 27 This exposes the on-board firmware-managed sensors as a 28 28 hardware monitor device. 29 + config SFC_SRIOV 30 + bool "Solarflare SFC9000-family SR-IOV support" 31 + depends on SFC && PCI_IOV 32 + default y 33 + ---help--- 34 + This enables support for the SFC9000 I/O Virtualization 35 + features, allowing accelerated network performance in 36 + virtualized environments.
+1
drivers/net/ethernet/sfc/Makefile
··· 4 4 tenxpress.o txc43128_phy.o falcon_boards.o \ 5 5 mcdi.o mcdi_phy.o mcdi_mon.o 6 6 sfc-$(CONFIG_SFC_MTD) += mtd.o 7 + sfc-$(CONFIG_SFC_SRIOV) += siena_sriov.o 7 8 8 9 obj-$(CONFIG_SFC) += sfc.o
+404 -295
drivers/net/ethernet/sfc/efx.c
··· 186 186 * 187 187 *************************************************************************/ 188 188 189 + static void efx_start_interrupts(struct efx_nic *efx, bool may_keep_eventq); 190 + static void efx_stop_interrupts(struct efx_nic *efx, bool may_keep_eventq); 191 + static void efx_remove_channel(struct efx_channel *channel); 189 192 static void efx_remove_channels(struct efx_nic *efx); 193 + static const struct efx_channel_type efx_default_channel_type; 190 194 static void efx_remove_port(struct efx_nic *efx); 191 - static void efx_init_napi(struct efx_nic *efx); 195 + static void efx_init_napi_channel(struct efx_channel *channel); 192 196 static void efx_fini_napi(struct efx_nic *efx); 193 197 static void efx_fini_napi_channel(struct efx_channel *channel); 194 198 static void efx_fini_struct(struct efx_nic *efx); ··· 221 217 */ 222 218 static int efx_process_channel(struct efx_channel *channel, int budget) 223 219 { 224 - struct efx_nic *efx = channel->efx; 225 220 int spent; 226 221 227 - if (unlikely(efx->reset_pending || !channel->enabled)) 222 + if (unlikely(!channel->enabled)) 228 223 return 0; 229 224 230 225 spent = efx_nic_process_eventq(channel, budget); 231 - if (spent == 0) 232 - return 0; 226 + if (spent && efx_channel_has_rx_queue(channel)) { 227 + struct efx_rx_queue *rx_queue = 228 + efx_channel_get_rx_queue(channel); 233 229 234 - /* Deliver last RX packet. */ 235 - if (channel->rx_pkt) { 236 - __efx_rx_packet(channel, channel->rx_pkt); 237 - channel->rx_pkt = NULL; 230 + /* Deliver last RX packet. */ 231 + if (channel->rx_pkt) { 232 + __efx_rx_packet(channel, channel->rx_pkt); 233 + channel->rx_pkt = NULL; 234 + } 235 + if (rx_queue->enabled) { 236 + efx_rx_strategy(channel); 237 + efx_fast_push_rx_descriptors(rx_queue); 238 + } 238 239 } 239 - 240 - efx_rx_strategy(channel); 241 - 242 - efx_fast_push_rx_descriptors(efx_channel_get_rx_queue(channel)); 243 240 244 241 return spent; 245 242 } ··· 281 276 spent = efx_process_channel(channel, budget); 282 277 283 278 if (spent < budget) { 284 - if (channel->channel < efx->n_rx_channels && 279 + if (efx_channel_has_rx_queue(channel) && 285 280 efx->irq_rx_adaptive && 286 281 unlikely(++channel->irq_count == 1000)) { 287 282 if (unlikely(channel->irq_mod_score < ··· 391 386 efx_nic_init_eventq(channel); 392 387 } 393 388 389 + /* Enable event queue processing and NAPI */ 390 + static void efx_start_eventq(struct efx_channel *channel) 391 + { 392 + netif_dbg(channel->efx, ifup, channel->efx->net_dev, 393 + "chan %d start event queue\n", channel->channel); 394 + 395 + /* The interrupt handler for this channel may set work_pending 396 + * as soon as we enable it. Make sure it's cleared before 397 + * then. Similarly, make sure it sees the enabled flag set. 398 + */ 399 + channel->work_pending = false; 400 + channel->enabled = true; 401 + smp_wmb(); 402 + 403 + napi_enable(&channel->napi_str); 404 + efx_nic_eventq_read_ack(channel); 405 + } 406 + 407 + /* Disable event queue processing and NAPI */ 408 + static void efx_stop_eventq(struct efx_channel *channel) 409 + { 410 + if (!channel->enabled) 411 + return; 412 + 413 + napi_disable(&channel->napi_str); 414 + channel->enabled = false; 415 + } 416 + 394 417 static void efx_fini_eventq(struct efx_channel *channel) 395 418 { 396 419 netif_dbg(channel->efx, drv, channel->efx->net_dev, ··· 441 408 * 442 409 *************************************************************************/ 443 410 444 - /* Allocate and initialise a channel structure, optionally copying 445 - * parameters (but not resources) from an old channel structure. */ 411 + /* Allocate and initialise a channel structure. */ 446 412 static struct efx_channel * 447 413 efx_alloc_channel(struct efx_nic *efx, int i, struct efx_channel *old_channel) 448 414 { ··· 450 418 struct efx_tx_queue *tx_queue; 451 419 int j; 452 420 453 - if (old_channel) { 454 - channel = kmalloc(sizeof(*channel), GFP_KERNEL); 455 - if (!channel) 456 - return NULL; 421 + channel = kzalloc(sizeof(*channel), GFP_KERNEL); 422 + if (!channel) 423 + return NULL; 457 424 458 - *channel = *old_channel; 425 + channel->efx = efx; 426 + channel->channel = i; 427 + channel->type = &efx_default_channel_type; 459 428 460 - channel->napi_dev = NULL; 461 - memset(&channel->eventq, 0, sizeof(channel->eventq)); 462 - 463 - rx_queue = &channel->rx_queue; 464 - rx_queue->buffer = NULL; 465 - memset(&rx_queue->rxd, 0, sizeof(rx_queue->rxd)); 466 - 467 - for (j = 0; j < EFX_TXQ_TYPES; j++) { 468 - tx_queue = &channel->tx_queue[j]; 469 - if (tx_queue->channel) 470 - tx_queue->channel = channel; 471 - tx_queue->buffer = NULL; 472 - memset(&tx_queue->txd, 0, sizeof(tx_queue->txd)); 473 - } 474 - } else { 475 - channel = kzalloc(sizeof(*channel), GFP_KERNEL); 476 - if (!channel) 477 - return NULL; 478 - 479 - channel->efx = efx; 480 - channel->channel = i; 481 - 482 - for (j = 0; j < EFX_TXQ_TYPES; j++) { 483 - tx_queue = &channel->tx_queue[j]; 484 - tx_queue->efx = efx; 485 - tx_queue->queue = i * EFX_TXQ_TYPES + j; 486 - tx_queue->channel = channel; 487 - } 429 + for (j = 0; j < EFX_TXQ_TYPES; j++) { 430 + tx_queue = &channel->tx_queue[j]; 431 + tx_queue->efx = efx; 432 + tx_queue->queue = i * EFX_TXQ_TYPES + j; 433 + tx_queue->channel = channel; 488 434 } 489 435 490 436 rx_queue = &channel->rx_queue; 491 437 rx_queue->efx = efx; 438 + setup_timer(&rx_queue->slow_fill, efx_rx_slow_fill, 439 + (unsigned long)rx_queue); 440 + 441 + return channel; 442 + } 443 + 444 + /* Allocate and initialise a channel structure, copying parameters 445 + * (but not resources) from an old channel structure. 446 + */ 447 + static struct efx_channel * 448 + efx_copy_channel(const struct efx_channel *old_channel) 449 + { 450 + struct efx_channel *channel; 451 + struct efx_rx_queue *rx_queue; 452 + struct efx_tx_queue *tx_queue; 453 + int j; 454 + 455 + channel = kmalloc(sizeof(*channel), GFP_KERNEL); 456 + if (!channel) 457 + return NULL; 458 + 459 + *channel = *old_channel; 460 + 461 + channel->napi_dev = NULL; 462 + memset(&channel->eventq, 0, sizeof(channel->eventq)); 463 + 464 + for (j = 0; j < EFX_TXQ_TYPES; j++) { 465 + tx_queue = &channel->tx_queue[j]; 466 + if (tx_queue->channel) 467 + tx_queue->channel = channel; 468 + tx_queue->buffer = NULL; 469 + memset(&tx_queue->txd, 0, sizeof(tx_queue->txd)); 470 + } 471 + 472 + rx_queue = &channel->rx_queue; 473 + rx_queue->buffer = NULL; 474 + memset(&rx_queue->rxd, 0, sizeof(rx_queue->rxd)); 492 475 setup_timer(&rx_queue->slow_fill, efx_rx_slow_fill, 493 476 (unsigned long)rx_queue); 494 477 ··· 519 472 netif_dbg(channel->efx, probe, channel->efx->net_dev, 520 473 "creating channel %d\n", channel->channel); 521 474 475 + rc = channel->type->pre_probe(channel); 476 + if (rc) 477 + goto fail; 478 + 522 479 rc = efx_probe_eventq(channel); 523 480 if (rc) 524 - goto fail1; 481 + goto fail; 525 482 526 483 efx_for_each_channel_tx_queue(tx_queue, channel) { 527 484 rc = efx_probe_tx_queue(tx_queue); 528 485 if (rc) 529 - goto fail2; 486 + goto fail; 530 487 } 531 488 532 489 efx_for_each_channel_rx_queue(rx_queue, channel) { 533 490 rc = efx_probe_rx_queue(rx_queue); 534 491 if (rc) 535 - goto fail3; 492 + goto fail; 536 493 } 537 494 538 495 channel->n_rx_frm_trunc = 0; 539 496 540 497 return 0; 541 498 542 - fail3: 543 - efx_for_each_channel_rx_queue(rx_queue, channel) 544 - efx_remove_rx_queue(rx_queue); 545 - fail2: 546 - efx_for_each_channel_tx_queue(tx_queue, channel) 547 - efx_remove_tx_queue(tx_queue); 548 - fail1: 499 + fail: 500 + efx_remove_channel(channel); 549 501 return rc; 550 502 } 551 503 504 + static void 505 + efx_get_channel_name(struct efx_channel *channel, char *buf, size_t len) 506 + { 507 + struct efx_nic *efx = channel->efx; 508 + const char *type; 509 + int number; 510 + 511 + number = channel->channel; 512 + if (efx->tx_channel_offset == 0) { 513 + type = ""; 514 + } else if (channel->channel < efx->tx_channel_offset) { 515 + type = "-rx"; 516 + } else { 517 + type = "-tx"; 518 + number -= efx->tx_channel_offset; 519 + } 520 + snprintf(buf, len, "%s%s-%d", efx->name, type, number); 521 + } 552 522 553 523 static void efx_set_channel_names(struct efx_nic *efx) 554 524 { 555 525 struct efx_channel *channel; 556 - const char *type = ""; 557 - int number; 558 526 559 - efx_for_each_channel(channel, efx) { 560 - number = channel->channel; 561 - if (efx->n_channels > efx->n_rx_channels) { 562 - if (channel->channel < efx->n_rx_channels) { 563 - type = "-rx"; 564 - } else { 565 - type = "-tx"; 566 - number -= efx->n_rx_channels; 567 - } 568 - } 569 - snprintf(efx->channel_name[channel->channel], 570 - sizeof(efx->channel_name[0]), 571 - "%s%s-%d", efx->name, type, number); 572 - } 527 + efx_for_each_channel(channel, efx) 528 + channel->type->get_name(channel, 529 + efx->channel_name[channel->channel], 530 + sizeof(efx->channel_name[0])); 573 531 } 574 532 575 533 static int efx_probe_channels(struct efx_nic *efx) ··· 607 555 * to propagate configuration changes (mtu, checksum offload), or 608 556 * to clear hardware error conditions 609 557 */ 610 - static void efx_init_channels(struct efx_nic *efx) 558 + static void efx_start_datapath(struct efx_nic *efx) 611 559 { 612 560 struct efx_tx_queue *tx_queue; 613 561 struct efx_rx_queue *rx_queue; ··· 626 574 627 575 /* Initialise the channels */ 628 576 efx_for_each_channel(channel, efx) { 629 - netif_dbg(channel->efx, drv, channel->efx->net_dev, 630 - "init chan %d\n", channel->channel); 631 - 632 - efx_init_eventq(channel); 633 - 634 577 efx_for_each_channel_tx_queue(tx_queue, channel) 635 578 efx_init_tx_queue(tx_queue); 636 579 637 580 /* The rx buffer allocation strategy is MTU dependent */ 638 581 efx_rx_strategy(channel); 639 582 640 - efx_for_each_channel_rx_queue(rx_queue, channel) 583 + efx_for_each_channel_rx_queue(rx_queue, channel) { 641 584 efx_init_rx_queue(rx_queue); 585 + efx_nic_generate_fill_event(rx_queue); 586 + } 642 587 643 588 WARN_ON(channel->rx_pkt != NULL); 644 589 efx_rx_strategy(channel); 645 590 } 591 + 592 + if (netif_device_present(efx->net_dev)) 593 + netif_tx_wake_all_queues(efx->net_dev); 646 594 } 647 595 648 - /* This enables event queue processing and packet transmission. 649 - * 650 - * Note that this function is not allowed to fail, since that would 651 - * introduce too much complexity into the suspend/resume path. 652 - */ 653 - static void efx_start_channel(struct efx_channel *channel) 654 - { 655 - struct efx_rx_queue *rx_queue; 656 - 657 - netif_dbg(channel->efx, ifup, channel->efx->net_dev, 658 - "starting chan %d\n", channel->channel); 659 - 660 - /* The interrupt handler for this channel may set work_pending 661 - * as soon as we enable it. Make sure it's cleared before 662 - * then. Similarly, make sure it sees the enabled flag set. */ 663 - channel->work_pending = false; 664 - channel->enabled = true; 665 - smp_wmb(); 666 - 667 - /* Fill the queues before enabling NAPI */ 668 - efx_for_each_channel_rx_queue(rx_queue, channel) 669 - efx_fast_push_rx_descriptors(rx_queue); 670 - 671 - napi_enable(&channel->napi_str); 672 - } 673 - 674 - /* This disables event queue processing and packet transmission. 675 - * This function does not guarantee that all queue processing 676 - * (e.g. RX refill) is complete. 677 - */ 678 - static void efx_stop_channel(struct efx_channel *channel) 679 - { 680 - if (!channel->enabled) 681 - return; 682 - 683 - netif_dbg(channel->efx, ifdown, channel->efx->net_dev, 684 - "stop chan %d\n", channel->channel); 685 - 686 - channel->enabled = false; 687 - napi_disable(&channel->napi_str); 688 - } 689 - 690 - static void efx_fini_channels(struct efx_nic *efx) 596 + static void efx_stop_datapath(struct efx_nic *efx) 691 597 { 692 598 struct efx_channel *channel; 693 599 struct efx_tx_queue *tx_queue; ··· 672 662 } 673 663 674 664 efx_for_each_channel(channel, efx) { 675 - netif_dbg(channel->efx, drv, channel->efx->net_dev, 676 - "shut down chan %d\n", channel->channel); 665 + /* RX packet processing is pipelined, so wait for the 666 + * NAPI handler to complete. At least event queue 0 667 + * might be kept active by non-data events, so don't 668 + * use napi_synchronize() but actually disable NAPI 669 + * temporarily. 670 + */ 671 + if (efx_channel_has_rx_queue(channel)) { 672 + efx_stop_eventq(channel); 673 + efx_start_eventq(channel); 674 + } 677 675 678 676 efx_for_each_channel_rx_queue(rx_queue, channel) 679 677 efx_fini_rx_queue(rx_queue); 680 678 efx_for_each_possible_channel_tx_queue(tx_queue, channel) 681 679 efx_fini_tx_queue(tx_queue); 682 - efx_fini_eventq(channel); 683 680 } 684 681 } 685 682 ··· 718 701 { 719 702 struct efx_channel *other_channel[EFX_MAX_CHANNELS], *channel; 720 703 u32 old_rxq_entries, old_txq_entries; 721 - unsigned i; 722 - int rc; 704 + unsigned i, next_buffer_table = 0; 705 + int rc = 0; 706 + 707 + /* Not all channels should be reallocated. We must avoid 708 + * reallocating their buffer table entries. 709 + */ 710 + efx_for_each_channel(channel, efx) { 711 + struct efx_rx_queue *rx_queue; 712 + struct efx_tx_queue *tx_queue; 713 + 714 + if (channel->type->copy) 715 + continue; 716 + next_buffer_table = max(next_buffer_table, 717 + channel->eventq.index + 718 + channel->eventq.entries); 719 + efx_for_each_channel_rx_queue(rx_queue, channel) 720 + next_buffer_table = max(next_buffer_table, 721 + rx_queue->rxd.index + 722 + rx_queue->rxd.entries); 723 + efx_for_each_channel_tx_queue(tx_queue, channel) 724 + next_buffer_table = max(next_buffer_table, 725 + tx_queue->txd.index + 726 + tx_queue->txd.entries); 727 + } 723 728 724 729 efx_stop_all(efx); 725 - efx_fini_channels(efx); 730 + efx_stop_interrupts(efx, true); 726 731 727 - /* Clone channels */ 732 + /* Clone channels (where possible) */ 728 733 memset(other_channel, 0, sizeof(other_channel)); 729 734 for (i = 0; i < efx->n_channels; i++) { 730 - channel = efx_alloc_channel(efx, i, efx->channel[i]); 735 + channel = efx->channel[i]; 736 + if (channel->type->copy) 737 + channel = channel->type->copy(channel); 731 738 if (!channel) { 732 739 rc = -ENOMEM; 733 740 goto out; ··· 770 729 other_channel[i] = channel; 771 730 } 772 731 773 - rc = efx_probe_channels(efx); 774 - if (rc) 775 - goto rollback; 732 + /* Restart buffer table allocation */ 733 + efx->next_buffer_table = next_buffer_table; 776 734 777 - efx_init_napi(efx); 778 - 779 - /* Destroy old channels */ 780 735 for (i = 0; i < efx->n_channels; i++) { 781 - efx_fini_napi_channel(other_channel[i]); 782 - efx_remove_channel(other_channel[i]); 736 + channel = efx->channel[i]; 737 + if (!channel->type->copy) 738 + continue; 739 + rc = efx_probe_channel(channel); 740 + if (rc) 741 + goto rollback; 742 + efx_init_napi_channel(efx->channel[i]); 783 743 } 784 - out: 785 - /* Free unused channel structures */ 786 - for (i = 0; i < efx->n_channels; i++) 787 - kfree(other_channel[i]); 788 744 789 - efx_init_channels(efx); 745 + out: 746 + /* Destroy unused channel structures */ 747 + for (i = 0; i < efx->n_channels; i++) { 748 + channel = other_channel[i]; 749 + if (channel && channel->type->copy) { 750 + efx_fini_napi_channel(channel); 751 + efx_remove_channel(channel); 752 + kfree(channel); 753 + } 754 + } 755 + 756 + efx_start_interrupts(efx, true); 790 757 efx_start_all(efx); 791 758 return rc; 792 759 ··· 813 764 void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue) 814 765 { 815 766 mod_timer(&rx_queue->slow_fill, jiffies + msecs_to_jiffies(100)); 767 + } 768 + 769 + static const struct efx_channel_type efx_default_channel_type = { 770 + .pre_probe = efx_channel_dummy_op_int, 771 + .get_name = efx_get_channel_name, 772 + .copy = efx_copy_channel, 773 + .keep_eventq = false, 774 + }; 775 + 776 + int efx_channel_dummy_op_int(struct efx_channel *channel) 777 + { 778 + return 0; 816 779 } 817 780 818 781 /************************************************************************** ··· 1169 1108 pci_disable_device(efx->pci_dev); 1170 1109 } 1171 1110 1172 - static int efx_wanted_parallelism(void) 1111 + static unsigned int efx_wanted_parallelism(struct efx_nic *efx) 1173 1112 { 1174 1113 cpumask_var_t thread_mask; 1175 - int count; 1114 + unsigned int count; 1176 1115 int cpu; 1177 1116 1178 - if (rss_cpus) 1179 - return rss_cpus; 1180 - 1181 - if (unlikely(!zalloc_cpumask_var(&thread_mask, GFP_KERNEL))) { 1182 - printk(KERN_WARNING 1183 - "sfc: RSS disabled due to allocation failure\n"); 1184 - return 1; 1185 - } 1186 - 1187 - count = 0; 1188 - for_each_online_cpu(cpu) { 1189 - if (!cpumask_test_cpu(cpu, thread_mask)) { 1190 - ++count; 1191 - cpumask_or(thread_mask, thread_mask, 1192 - topology_thread_cpumask(cpu)); 1117 + if (rss_cpus) { 1118 + count = rss_cpus; 1119 + } else { 1120 + if (unlikely(!zalloc_cpumask_var(&thread_mask, GFP_KERNEL))) { 1121 + netif_warn(efx, probe, efx->net_dev, 1122 + "RSS disabled due to allocation failure\n"); 1123 + return 1; 1193 1124 } 1125 + 1126 + count = 0; 1127 + for_each_online_cpu(cpu) { 1128 + if (!cpumask_test_cpu(cpu, thread_mask)) { 1129 + ++count; 1130 + cpumask_or(thread_mask, thread_mask, 1131 + topology_thread_cpumask(cpu)); 1132 + } 1133 + } 1134 + 1135 + free_cpumask_var(thread_mask); 1194 1136 } 1195 1137 1196 - free_cpumask_var(thread_mask); 1138 + /* If RSS is requested for the PF *and* VFs then we can't write RSS 1139 + * table entries that are inaccessible to VFs 1140 + */ 1141 + if (efx_sriov_wanted(efx) && efx_vf_size(efx) > 1 && 1142 + count > efx_vf_size(efx)) { 1143 + netif_warn(efx, probe, efx->net_dev, 1144 + "Reducing number of RSS channels from %u to %u for " 1145 + "VF support. Increase vf-msix-limit to use more " 1146 + "channels on the PF.\n", 1147 + count, efx_vf_size(efx)); 1148 + count = efx_vf_size(efx); 1149 + } 1150 + 1197 1151 return count; 1198 1152 } 1199 1153 ··· 1216 1140 efx_init_rx_cpu_rmap(struct efx_nic *efx, struct msix_entry *xentries) 1217 1141 { 1218 1142 #ifdef CONFIG_RFS_ACCEL 1219 - int i, rc; 1143 + unsigned int i; 1144 + int rc; 1220 1145 1221 1146 efx->net_dev->rx_cpu_rmap = alloc_irq_cpu_rmap(efx->n_rx_channels); 1222 1147 if (!efx->net_dev->rx_cpu_rmap) ··· 1240 1163 */ 1241 1164 static int efx_probe_interrupts(struct efx_nic *efx) 1242 1165 { 1243 - int max_channels = 1244 - min_t(int, efx->type->phys_addr_channels, EFX_MAX_CHANNELS); 1245 - int rc, i; 1166 + unsigned int max_channels = 1167 + min(efx->type->phys_addr_channels, EFX_MAX_CHANNELS); 1168 + unsigned int extra_channels = 0; 1169 + unsigned int i, j; 1170 + int rc; 1171 + 1172 + for (i = 0; i < EFX_MAX_EXTRA_CHANNELS; i++) 1173 + if (efx->extra_channel_type[i]) 1174 + ++extra_channels; 1246 1175 1247 1176 if (efx->interrupt_mode == EFX_INT_MODE_MSIX) { 1248 1177 struct msix_entry xentries[EFX_MAX_CHANNELS]; 1249 - int n_channels; 1178 + unsigned int n_channels; 1250 1179 1251 - n_channels = efx_wanted_parallelism(); 1180 + n_channels = efx_wanted_parallelism(efx); 1252 1181 if (separate_tx_channels) 1253 1182 n_channels *= 2; 1183 + n_channels += extra_channels; 1254 1184 n_channels = min(n_channels, max_channels); 1255 1185 1256 1186 for (i = 0; i < n_channels; i++) ··· 1266 1182 if (rc > 0) { 1267 1183 netif_err(efx, drv, efx->net_dev, 1268 1184 "WARNING: Insufficient MSI-X vectors" 1269 - " available (%d < %d).\n", rc, n_channels); 1185 + " available (%d < %u).\n", rc, n_channels); 1270 1186 netif_err(efx, drv, efx->net_dev, 1271 1187 "WARNING: Performance may be reduced.\n"); 1272 1188 EFX_BUG_ON_PARANOID(rc >= n_channels); ··· 1277 1193 1278 1194 if (rc == 0) { 1279 1195 efx->n_channels = n_channels; 1196 + if (n_channels > extra_channels) 1197 + n_channels -= extra_channels; 1280 1198 if (separate_tx_channels) { 1281 - efx->n_tx_channels = 1282 - max(efx->n_channels / 2, 1U); 1283 - efx->n_rx_channels = 1284 - max(efx->n_channels - 1285 - efx->n_tx_channels, 1U); 1199 + efx->n_tx_channels = max(n_channels / 2, 1U); 1200 + efx->n_rx_channels = max(n_channels - 1201 + efx->n_tx_channels, 1202 + 1U); 1286 1203 } else { 1287 - efx->n_tx_channels = efx->n_channels; 1288 - efx->n_rx_channels = efx->n_channels; 1204 + efx->n_tx_channels = n_channels; 1205 + efx->n_rx_channels = n_channels; 1289 1206 } 1290 1207 rc = efx_init_rx_cpu_rmap(efx, xentries); 1291 1208 if (rc) { 1292 1209 pci_disable_msix(efx->pci_dev); 1293 1210 return rc; 1294 1211 } 1295 - for (i = 0; i < n_channels; i++) 1212 + for (i = 0; i < efx->n_channels; i++) 1296 1213 efx_get_channel(efx, i)->irq = 1297 1214 xentries[i].vector; 1298 1215 } else { ··· 1327 1242 efx->legacy_irq = efx->pci_dev->irq; 1328 1243 } 1329 1244 1245 + /* Assign extra channels if possible */ 1246 + j = efx->n_channels; 1247 + for (i = 0; i < EFX_MAX_EXTRA_CHANNELS; i++) { 1248 + if (!efx->extra_channel_type[i]) 1249 + continue; 1250 + if (efx->interrupt_mode != EFX_INT_MODE_MSIX || 1251 + efx->n_channels <= extra_channels) { 1252 + efx->extra_channel_type[i]->handle_no_channel(efx); 1253 + } else { 1254 + --j; 1255 + efx_get_channel(efx, j)->type = 1256 + efx->extra_channel_type[i]; 1257 + } 1258 + } 1259 + 1260 + /* RSS might be usable on VFs even if it is disabled on the PF */ 1261 + efx->rss_spread = (efx->n_rx_channels > 1 ? 1262 + efx->n_rx_channels : efx_vf_size(efx)); 1263 + 1330 1264 return 0; 1265 + } 1266 + 1267 + /* Enable interrupts, then probe and start the event queues */ 1268 + static void efx_start_interrupts(struct efx_nic *efx, bool may_keep_eventq) 1269 + { 1270 + struct efx_channel *channel; 1271 + 1272 + if (efx->legacy_irq) 1273 + efx->legacy_irq_enabled = true; 1274 + efx_nic_enable_interrupts(efx); 1275 + 1276 + efx_for_each_channel(channel, efx) { 1277 + if (!channel->type->keep_eventq || !may_keep_eventq) 1278 + efx_init_eventq(channel); 1279 + efx_start_eventq(channel); 1280 + } 1281 + 1282 + efx_mcdi_mode_event(efx); 1283 + } 1284 + 1285 + static void efx_stop_interrupts(struct efx_nic *efx, bool may_keep_eventq) 1286 + { 1287 + struct efx_channel *channel; 1288 + 1289 + efx_mcdi_mode_poll(efx); 1290 + 1291 + efx_nic_disable_interrupts(efx); 1292 + if (efx->legacy_irq) { 1293 + synchronize_irq(efx->legacy_irq); 1294 + efx->legacy_irq_enabled = false; 1295 + } 1296 + 1297 + efx_for_each_channel(channel, efx) { 1298 + if (channel->irq) 1299 + synchronize_irq(channel->irq); 1300 + 1301 + efx_stop_eventq(channel); 1302 + if (!channel->type->keep_eventq || !may_keep_eventq) 1303 + efx_fini_eventq(channel); 1304 + } 1331 1305 } 1332 1306 1333 1307 static void efx_remove_interrupts(struct efx_nic *efx) ··· 1439 1295 if (rc) 1440 1296 goto fail; 1441 1297 1298 + efx->type->dimension_resources(efx); 1299 + 1442 1300 if (efx->n_channels > 1) 1443 1301 get_random_bytes(&efx->rx_hash_key, sizeof(efx->rx_hash_key)); 1444 1302 for (i = 0; i < ARRAY_SIZE(efx->rx_indir_table); i++) 1445 1303 efx->rx_indir_table[i] = 1446 - ethtool_rxfh_indir_default(i, efx->n_rx_channels); 1304 + ethtool_rxfh_indir_default(i, efx->rss_spread); 1447 1305 1448 1306 efx_set_channels(efx); 1449 1307 netif_set_real_num_tx_queues(efx->net_dev, efx->n_tx_channels); ··· 1493 1347 } 1494 1348 1495 1349 efx->rxq_entries = efx->txq_entries = EFX_DEFAULT_DMAQ_SIZE; 1496 - rc = efx_probe_channels(efx); 1497 - if (rc) 1498 - goto fail3; 1499 1350 1500 1351 rc = efx_probe_filters(efx); 1501 1352 if (rc) { 1502 1353 netif_err(efx, probe, efx->net_dev, 1503 1354 "failed to create filter tables\n"); 1504 - goto fail4; 1355 + goto fail3; 1505 1356 } 1357 + 1358 + rc = efx_probe_channels(efx); 1359 + if (rc) 1360 + goto fail4; 1506 1361 1507 1362 return 0; 1508 1363 1509 1364 fail4: 1510 - efx_remove_channels(efx); 1365 + efx_remove_filters(efx); 1511 1366 fail3: 1512 1367 efx_remove_port(efx); 1513 1368 fail2: ··· 1517 1370 return rc; 1518 1371 } 1519 1372 1520 - /* Called after previous invocation(s) of efx_stop_all, restarts the 1521 - * port, kernel transmit queue, NAPI processing and hardware interrupts, 1522 - * and ensures that the port is scheduled to be reconfigured. 1523 - * This function is safe to call multiple times when the NIC is in any 1524 - * state. */ 1373 + /* Called after previous invocation(s) of efx_stop_all, restarts the port, 1374 + * kernel transmit queues and NAPI processing, and ensures that the port is 1375 + * scheduled to be reconfigured. This function is safe to call multiple 1376 + * times when the NIC is in any state. 1377 + */ 1525 1378 static void efx_start_all(struct efx_nic *efx) 1526 1379 { 1527 - struct efx_channel *channel; 1528 - 1529 1380 EFX_ASSERT_RESET_SERIALISED(efx); 1530 1381 1531 1382 /* Check that it is appropriate to restart the interface. All ··· 1535 1390 if (!netif_running(efx->net_dev)) 1536 1391 return; 1537 1392 1538 - /* Mark the port as enabled so port reconfigurations can start, then 1539 - * restart the transmit interface early so the watchdog timer stops */ 1540 1393 efx_start_port(efx); 1541 - 1542 - if (netif_device_present(efx->net_dev)) 1543 - netif_tx_wake_all_queues(efx->net_dev); 1544 - 1545 - efx_for_each_channel(channel, efx) 1546 - efx_start_channel(channel); 1547 - 1548 - if (efx->legacy_irq) 1549 - efx->legacy_irq_enabled = true; 1550 - efx_nic_enable_interrupts(efx); 1551 - 1552 - /* Switch to event based MCDI completions after enabling interrupts. 1553 - * If a reset has been scheduled, then we need to stay in polled mode. 1554 - * Rather than serialising efx_mcdi_mode_event() [which sleeps] and 1555 - * reset_pending [modified from an atomic context], we instead guarantee 1556 - * that efx_mcdi_mode_poll() isn't reverted erroneously */ 1557 - efx_mcdi_mode_event(efx); 1558 - if (efx->reset_pending) 1559 - efx_mcdi_mode_poll(efx); 1394 + efx_start_datapath(efx); 1560 1395 1561 1396 /* Start the hardware monitor if there is one. Otherwise (we're link 1562 1397 * event driven), we have to poll the PHY because after an event queue ··· 1572 1447 * taking locks. */ 1573 1448 static void efx_stop_all(struct efx_nic *efx) 1574 1449 { 1575 - struct efx_channel *channel; 1576 - 1577 1450 EFX_ASSERT_RESET_SERIALISED(efx); 1578 1451 1579 1452 /* port_enabled can be read safely under the rtnl lock */ ··· 1579 1456 return; 1580 1457 1581 1458 efx->type->stop_stats(efx); 1582 - 1583 - /* Switch to MCDI polling on Siena before disabling interrupts */ 1584 - efx_mcdi_mode_poll(efx); 1585 - 1586 - /* Disable interrupts and wait for ISR to complete */ 1587 - efx_nic_disable_interrupts(efx); 1588 - if (efx->legacy_irq) { 1589 - synchronize_irq(efx->legacy_irq); 1590 - efx->legacy_irq_enabled = false; 1591 - } 1592 - efx_for_each_channel(channel, efx) { 1593 - if (channel->irq) 1594 - synchronize_irq(channel->irq); 1595 - } 1596 - 1597 - /* Stop all NAPI processing and synchronous rx refills */ 1598 - efx_for_each_channel(channel, efx) 1599 - efx_stop_channel(channel); 1600 - 1601 - /* Stop all asynchronous port reconfigurations. Since all 1602 - * event processing has already been stopped, there is no 1603 - * window to loose phy events */ 1604 1459 efx_stop_port(efx); 1605 1460 1606 1461 /* Flush efx_mac_work(), refill_workqueue, monitor_work */ ··· 1586 1485 1587 1486 /* Stop the kernel transmit interface late, so the watchdog 1588 1487 * timer isn't ticking over the flush */ 1589 - netif_tx_stop_all_queues(efx->net_dev); 1590 - netif_tx_lock_bh(efx->net_dev); 1591 - netif_tx_unlock_bh(efx->net_dev); 1488 + netif_tx_disable(efx->net_dev); 1489 + 1490 + efx_stop_datapath(efx); 1592 1491 } 1593 1492 1594 1493 static void efx_remove_all(struct efx_nic *efx) 1595 1494 { 1596 - efx_remove_filters(efx); 1597 1495 efx_remove_channels(efx); 1496 + efx_remove_filters(efx); 1598 1497 efx_remove_port(efx); 1599 1498 efx_remove_nic(efx); 1600 1499 } ··· 1738 1637 * 1739 1638 **************************************************************************/ 1740 1639 1640 + static void efx_init_napi_channel(struct efx_channel *channel) 1641 + { 1642 + struct efx_nic *efx = channel->efx; 1643 + 1644 + channel->napi_dev = efx->net_dev; 1645 + netif_napi_add(channel->napi_dev, &channel->napi_str, 1646 + efx_poll, napi_weight); 1647 + } 1648 + 1741 1649 static void efx_init_napi(struct efx_nic *efx) 1742 1650 { 1743 1651 struct efx_channel *channel; 1744 1652 1745 - efx_for_each_channel(channel, efx) { 1746 - channel->napi_dev = efx->net_dev; 1747 - netif_napi_add(channel->napi_dev, &channel->napi_str, 1748 - efx_poll, napi_weight); 1749 - } 1653 + efx_for_each_channel(channel, efx) 1654 + efx_init_napi_channel(channel); 1750 1655 } 1751 1656 1752 1657 static void efx_fini_napi_channel(struct efx_channel *channel) ··· 1837 1730 if (efx->state != STATE_DISABLED) { 1838 1731 /* Stop the device and flush all the channels */ 1839 1732 efx_stop_all(efx); 1840 - efx_fini_channels(efx); 1841 - efx_init_channels(efx); 1842 1733 } 1843 1734 1844 1735 return 0; ··· 1907 1802 1908 1803 netif_dbg(efx, drv, efx->net_dev, "changing MTU to %d\n", new_mtu); 1909 1804 1910 - efx_fini_channels(efx); 1911 - 1912 1805 mutex_lock(&efx->mac_lock); 1913 1806 /* Reconfigure the MAC before enabling the dma queues so that 1914 1807 * the RX buffers don't overflow */ 1915 1808 net_dev->mtu = new_mtu; 1916 1809 efx->type->reconfigure_mac(efx); 1917 1810 mutex_unlock(&efx->mac_lock); 1918 - 1919 - efx_init_channels(efx); 1920 1811 1921 1812 efx_start_all(efx); 1922 1813 return 0; ··· 1934 1833 } 1935 1834 1936 1835 memcpy(net_dev->dev_addr, new_addr, net_dev->addr_len); 1836 + efx_sriov_mac_address_changed(efx); 1937 1837 1938 1838 /* Reconfigure the MAC */ 1939 1839 mutex_lock(&efx->mac_lock); ··· 2001 1899 .ndo_set_mac_address = efx_set_mac_address, 2002 1900 .ndo_set_rx_mode = efx_set_rx_mode, 2003 1901 .ndo_set_features = efx_set_features, 1902 + #ifdef CONFIG_SFC_SRIOV 1903 + .ndo_set_vf_mac = efx_sriov_set_vf_mac, 1904 + .ndo_set_vf_vlan = efx_sriov_set_vf_vlan, 1905 + .ndo_set_vf_spoofchk = efx_sriov_set_vf_spoofchk, 1906 + .ndo_get_vf_config = efx_sriov_get_vf_config, 1907 + #endif 2004 1908 #ifdef CONFIG_NET_POLL_CONTROLLER 2005 1909 .ndo_poll_controller = efx_netpoll, 2006 1910 #endif ··· 2137 2029 efx_stop_all(efx); 2138 2030 mutex_lock(&efx->mac_lock); 2139 2031 2140 - efx_fini_channels(efx); 2032 + efx_stop_interrupts(efx, false); 2141 2033 if (efx->port_initialized && method != RESET_TYPE_INVISIBLE) 2142 2034 efx->phy_op->fini(efx); 2143 2035 efx->type->fini(efx); ··· 2174 2066 2175 2067 efx->type->reconfigure_mac(efx); 2176 2068 2177 - efx_init_channels(efx); 2069 + efx_start_interrupts(efx, false); 2178 2070 efx_restore_filters(efx); 2071 + efx_sriov_reset(efx); 2179 2072 2180 2073 mutex_unlock(&efx->mac_lock); 2181 2074 ··· 2381 2272 efx->phy_op = &efx_dummy_phy_operations; 2382 2273 efx->mdio.dev = net_dev; 2383 2274 INIT_WORK(&efx->mac_work, efx_mac_work); 2275 + init_waitqueue_head(&efx->flush_wq); 2384 2276 2385 2277 for (i = 0; i < EFX_MAX_CHANNELS; i++) { 2386 2278 efx->channel[i] = efx_alloc_channel(efx, i, NULL); ··· 2439 2329 free_irq_cpu_rmap(efx->net_dev->rx_cpu_rmap); 2440 2330 efx->net_dev->rx_cpu_rmap = NULL; 2441 2331 #endif 2332 + efx_stop_interrupts(efx, false); 2442 2333 efx_nic_fini_interrupt(efx); 2443 - efx_fini_channels(efx); 2444 2334 efx_fini_port(efx); 2445 2335 efx->type->fini(efx); 2446 2336 efx_fini_napi(efx); ··· 2466 2356 /* Allow any queued efx_resets() to complete */ 2467 2357 rtnl_unlock(); 2468 2358 2359 + efx_stop_interrupts(efx, false); 2360 + efx_sriov_fini(efx); 2469 2361 efx_unregister_netdev(efx); 2470 2362 2471 2363 efx_mtd_remove(efx); ··· 2516 2404 goto fail4; 2517 2405 } 2518 2406 2519 - efx_init_channels(efx); 2520 - 2521 2407 rc = efx_nic_init_interrupt(efx); 2522 2408 if (rc) 2523 2409 goto fail5; 2410 + efx_start_interrupts(efx, false); 2524 2411 2525 2412 return 0; 2526 2413 2527 2414 fail5: 2528 - efx_fini_channels(efx); 2529 2415 efx_fini_port(efx); 2530 2416 fail4: 2531 2417 efx->type->fini(efx); ··· 2549 2439 const struct efx_nic_type *type = (const struct efx_nic_type *) entry->driver_data; 2550 2440 struct net_device *net_dev; 2551 2441 struct efx_nic *efx; 2552 - int i, rc; 2442 + int rc; 2553 2443 2554 2444 /* Allocate and initialise a struct net_device and struct efx_nic */ 2555 2445 net_dev = alloc_etherdev_mqs(sizeof(*efx), EFX_MAX_CORE_TX_QUEUES, ··· 2582 2472 if (rc) 2583 2473 goto fail2; 2584 2474 2585 - /* No serialisation is required with the reset path because 2586 - * we're in STATE_INIT. */ 2587 - for (i = 0; i < 5; i++) { 2588 - rc = efx_pci_probe_main(efx); 2475 + rc = efx_pci_probe_main(efx); 2589 2476 2590 - /* Serialise against efx_reset(). No more resets will be 2591 - * scheduled since efx_stop_all() has been called, and we 2592 - * have not and never have been registered with either 2593 - * the rtnetlink or driverlink layers. */ 2594 - cancel_work_sync(&efx->reset_work); 2477 + /* Serialise against efx_reset(). No more resets will be 2478 + * scheduled since efx_stop_all() has been called, and we have 2479 + * not and never have been registered. 2480 + */ 2481 + cancel_work_sync(&efx->reset_work); 2595 2482 2596 - if (rc == 0) { 2597 - if (efx->reset_pending) { 2598 - /* If there was a scheduled reset during 2599 - * probe, the NIC is probably hosed anyway */ 2600 - efx_pci_remove_main(efx); 2601 - rc = -EIO; 2602 - } else { 2603 - break; 2604 - } 2605 - } 2483 + if (rc) 2484 + goto fail3; 2606 2485 2607 - /* Retry if a recoverably reset event has been scheduled */ 2608 - if (efx->reset_pending & 2609 - ~(1 << RESET_TYPE_INVISIBLE | 1 << RESET_TYPE_ALL) || 2610 - !efx->reset_pending) 2611 - goto fail3; 2612 - 2613 - efx->reset_pending = 0; 2614 - } 2615 - 2616 - if (rc) { 2617 - netif_err(efx, probe, efx->net_dev, "Could not reset NIC\n"); 2486 + /* If there was a scheduled reset during probe, the NIC is 2487 + * probably hosed anyway. 2488 + */ 2489 + if (efx->reset_pending) { 2490 + rc = -EIO; 2618 2491 goto fail4; 2619 2492 } 2620 2493 ··· 2607 2514 2608 2515 rc = efx_register_netdev(efx); 2609 2516 if (rc) 2610 - goto fail5; 2517 + goto fail4; 2518 + 2519 + rc = efx_sriov_init(efx); 2520 + if (rc) 2521 + netif_err(efx, probe, efx->net_dev, 2522 + "SR-IOV can't be enabled rc %d\n", rc); 2611 2523 2612 2524 netif_dbg(efx, probe, efx->net_dev, "initialisation successful\n"); 2613 2525 2526 + /* Try to create MTDs, but allow this to fail */ 2614 2527 rtnl_lock(); 2615 - efx_mtd_probe(efx); /* allowed to fail */ 2528 + rc = efx_mtd_probe(efx); 2616 2529 rtnl_unlock(); 2530 + if (rc) 2531 + netif_warn(efx, probe, efx->net_dev, 2532 + "failed to create MTDs (%d)\n", rc); 2533 + 2617 2534 return 0; 2618 2535 2619 - fail5: 2620 - efx_pci_remove_main(efx); 2621 2536 fail4: 2537 + efx_pci_remove_main(efx); 2622 2538 fail3: 2623 2539 efx_fini_io(efx); 2624 2540 fail2: ··· 2648 2546 netif_device_detach(efx->net_dev); 2649 2547 2650 2548 efx_stop_all(efx); 2651 - efx_fini_channels(efx); 2549 + efx_stop_interrupts(efx, false); 2652 2550 2653 2551 return 0; 2654 2552 } ··· 2659 2557 2660 2558 efx->state = STATE_INIT; 2661 2559 2662 - efx_init_channels(efx); 2560 + efx_start_interrupts(efx, false); 2663 2561 2664 2562 mutex_lock(&efx->mac_lock); 2665 2563 efx->phy_op->reconfigure(efx); ··· 2765 2663 if (rc) 2766 2664 goto err_notifier; 2767 2665 2666 + rc = efx_init_sriov(); 2667 + if (rc) 2668 + goto err_sriov; 2669 + 2768 2670 reset_workqueue = create_singlethread_workqueue("sfc_reset"); 2769 2671 if (!reset_workqueue) { 2770 2672 rc = -ENOMEM; ··· 2784 2678 err_pci: 2785 2679 destroy_workqueue(reset_workqueue); 2786 2680 err_reset: 2681 + efx_fini_sriov(); 2682 + err_sriov: 2787 2683 unregister_netdevice_notifier(&efx_netdev_notifier); 2788 2684 err_notifier: 2789 2685 return rc; ··· 2797 2689 2798 2690 pci_unregister_driver(&efx_pci_driver); 2799 2691 destroy_workqueue(reset_workqueue); 2692 + efx_fini_sriov(); 2800 2693 unregister_netdevice_notifier(&efx_netdev_notifier); 2801 2694 2802 2695 }
+1
drivers/net/ethernet/sfc/efx.h
··· 95 95 #endif 96 96 97 97 /* Channels */ 98 + extern int efx_channel_dummy_op_int(struct efx_channel *channel); 98 99 extern void efx_process_channel_now(struct efx_channel *channel); 99 100 extern int 100 101 efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries);
+44 -18
drivers/net/ethernet/sfc/ethtool.c
··· 808 808 return efx_reset(efx, rc); 809 809 } 810 810 811 + /* MAC address mask including only MC flag */ 812 + static const u8 mac_addr_mc_mask[ETH_ALEN] = { 0x01, 0, 0, 0, 0, 0 }; 813 + 811 814 static int efx_ethtool_get_class_rule(struct efx_nic *efx, 812 815 struct ethtool_rx_flow_spec *rule) 813 816 { 814 817 struct ethtool_tcpip4_spec *ip_entry = &rule->h_u.tcp_ip4_spec; 815 818 struct ethtool_tcpip4_spec *ip_mask = &rule->m_u.tcp_ip4_spec; 819 + struct ethhdr *mac_entry = &rule->h_u.ether_spec; 820 + struct ethhdr *mac_mask = &rule->m_u.ether_spec; 816 821 struct efx_filter_spec spec; 817 822 u16 vid; 818 823 u8 proto; ··· 833 828 else 834 829 rule->ring_cookie = spec.dmaq_id; 835 830 836 - rc = efx_filter_get_eth_local(&spec, &vid, 837 - rule->h_u.ether_spec.h_dest); 831 + if (spec.type == EFX_FILTER_MC_DEF || spec.type == EFX_FILTER_UC_DEF) { 832 + rule->flow_type = ETHER_FLOW; 833 + memcpy(mac_mask->h_dest, mac_addr_mc_mask, ETH_ALEN); 834 + if (spec.type == EFX_FILTER_MC_DEF) 835 + memcpy(mac_entry->h_dest, mac_addr_mc_mask, ETH_ALEN); 836 + return 0; 837 + } 838 + 839 + rc = efx_filter_get_eth_local(&spec, &vid, mac_entry->h_dest); 838 840 if (rc == 0) { 839 841 rule->flow_type = ETHER_FLOW; 840 - memset(rule->m_u.ether_spec.h_dest, ~0, ETH_ALEN); 842 + memset(mac_mask->h_dest, ~0, ETH_ALEN); 841 843 if (vid != EFX_FILTER_VID_UNSPEC) { 842 844 rule->flow_type |= FLOW_EXT; 843 845 rule->h_ext.vlan_tci = htons(vid); ··· 1013 1001 } 1014 1002 1015 1003 case ETHER_FLOW | FLOW_EXT: 1016 - /* Must match all or none of VID */ 1017 - if (rule->m_ext.vlan_tci != htons(0xfff) && 1018 - rule->m_ext.vlan_tci != 0) 1019 - return -EINVAL; 1020 - case ETHER_FLOW: 1021 - /* Must match all of destination */ 1022 - if (!is_broadcast_ether_addr(mac_mask->h_dest)) 1023 - return -EINVAL; 1024 - /* and nothing else */ 1004 + case ETHER_FLOW: { 1005 + u16 vlan_tag_mask = (rule->flow_type & FLOW_EXT ? 1006 + ntohs(rule->m_ext.vlan_tci) : 0); 1007 + 1008 + /* Must not match on source address or Ethertype */ 1025 1009 if (!is_zero_ether_addr(mac_mask->h_source) || 1026 1010 mac_mask->h_proto) 1027 1011 return -EINVAL; 1028 1012 1029 - rc = efx_filter_set_eth_local( 1030 - &spec, 1031 - (rule->flow_type & FLOW_EXT && rule->m_ext.vlan_tci) ? 1032 - ntohs(rule->h_ext.vlan_tci) : EFX_FILTER_VID_UNSPEC, 1033 - mac_entry->h_dest); 1013 + /* Is it a default UC or MC filter? */ 1014 + if (!compare_ether_addr(mac_mask->h_dest, mac_addr_mc_mask) && 1015 + vlan_tag_mask == 0) { 1016 + if (is_multicast_ether_addr(mac_entry->h_dest)) 1017 + rc = efx_filter_set_mc_def(&spec); 1018 + else 1019 + rc = efx_filter_set_uc_def(&spec); 1020 + } 1021 + /* Otherwise, it must match all of destination and all 1022 + * or none of VID. 1023 + */ 1024 + else if (is_broadcast_ether_addr(mac_mask->h_dest) && 1025 + (vlan_tag_mask == 0xfff || vlan_tag_mask == 0)) { 1026 + rc = efx_filter_set_eth_local( 1027 + &spec, 1028 + vlan_tag_mask ? 1029 + ntohs(rule->h_ext.vlan_tci) : EFX_FILTER_VID_UNSPEC, 1030 + mac_entry->h_dest); 1031 + } else { 1032 + rc = -EINVAL; 1033 + } 1034 1034 if (rc) 1035 1035 return rc; 1036 1036 break; 1037 + } 1037 1038 1038 1039 default: 1039 1040 return -EINVAL; ··· 1085 1060 { 1086 1061 struct efx_nic *efx = netdev_priv(net_dev); 1087 1062 1088 - return (efx_nic_rev(efx) < EFX_REV_FALCON_B0 ? 1063 + return ((efx_nic_rev(efx) < EFX_REV_FALCON_B0 || 1064 + efx->n_rx_channels == 1) ? 1089 1065 0 : ARRAY_SIZE(efx->rx_indir_table)); 1090 1066 } 1091 1067
+8 -4
drivers/net/ethernet/sfc/falcon.c
··· 1333 1333 return rc; 1334 1334 } 1335 1335 1336 + static void falcon_dimension_resources(struct efx_nic *efx) 1337 + { 1338 + efx->rx_dc_base = 0x20000; 1339 + efx->tx_dc_base = 0x26000; 1340 + } 1341 + 1336 1342 /* Probe all SPI devices on the NIC */ 1337 1343 static void falcon_probe_spi_devices(struct efx_nic *efx) 1338 1344 { ··· 1755 1749 .probe = falcon_probe_nic, 1756 1750 .remove = falcon_remove_nic, 1757 1751 .init = falcon_init_nic, 1752 + .dimension_resources = falcon_dimension_resources, 1758 1753 .fini = efx_port_dummy_op_void, 1759 1754 .monitor = falcon_monitor, 1760 1755 .map_reset_reason = falcon_map_reset_reason, ··· 1790 1783 .max_interrupt_mode = EFX_INT_MODE_MSI, 1791 1784 .phys_addr_channels = 4, 1792 1785 .timer_period_max = 1 << FRF_AB_TC_TIMER_VAL_WIDTH, 1793 - .tx_dc_base = 0x130000, 1794 - .rx_dc_base = 0x100000, 1795 1786 .offload_features = NETIF_F_IP_CSUM, 1796 1787 }; 1797 1788 ··· 1797 1792 .probe = falcon_probe_nic, 1798 1793 .remove = falcon_remove_nic, 1799 1794 .init = falcon_init_nic, 1795 + .dimension_resources = falcon_dimension_resources, 1800 1796 .fini = efx_port_dummy_op_void, 1801 1797 .monitor = falcon_monitor, 1802 1798 .map_reset_reason = falcon_map_reset_reason, ··· 1841 1835 * interrupt handler only supports 32 1842 1836 * channels */ 1843 1837 .timer_period_max = 1 << FRF_AB_TC_TIMER_VAL_WIDTH, 1844 - .tx_dc_base = 0x130000, 1845 - .rx_dc_base = 0x100000, 1846 1838 .offload_features = NETIF_F_IP_CSUM | NETIF_F_RXHASH | NETIF_F_NTUPLE, 1847 1839 }; 1848 1840
+223 -32
drivers/net/ethernet/sfc/filter.c
··· 35 35 enum efx_filter_table_id { 36 36 EFX_FILTER_TABLE_RX_IP = 0, 37 37 EFX_FILTER_TABLE_RX_MAC, 38 + EFX_FILTER_TABLE_RX_DEF, 39 + EFX_FILTER_TABLE_TX_MAC, 38 40 EFX_FILTER_TABLE_COUNT, 41 + }; 42 + 43 + enum efx_filter_index { 44 + EFX_FILTER_INDEX_UC_DEF, 45 + EFX_FILTER_INDEX_MC_DEF, 46 + EFX_FILTER_SIZE_RX_DEF, 39 47 }; 40 48 41 49 struct efx_filter_table { ··· 98 90 BUILD_BUG_ON(EFX_FILTER_TABLE_RX_IP != (EFX_FILTER_UDP_WILD >> 2)); 99 91 BUILD_BUG_ON(EFX_FILTER_TABLE_RX_MAC != (EFX_FILTER_MAC_FULL >> 2)); 100 92 BUILD_BUG_ON(EFX_FILTER_TABLE_RX_MAC != (EFX_FILTER_MAC_WILD >> 2)); 93 + BUILD_BUG_ON(EFX_FILTER_TABLE_TX_MAC != EFX_FILTER_TABLE_RX_MAC + 2); 101 94 EFX_BUG_ON_PARANOID(spec->type == EFX_FILTER_UNSPEC); 102 - return spec->type >> 2; 95 + return (spec->type >> 2) + ((spec->flags & EFX_FILTER_FLAG_TX) ? 2 : 0); 103 96 } 104 97 105 98 static struct efx_filter_table * ··· 118 109 memset(table->search_depth, 0, sizeof(table->search_depth)); 119 110 } 120 111 121 - static void efx_filter_push_rx_limits(struct efx_nic *efx) 112 + static void efx_filter_push_rx_config(struct efx_nic *efx) 122 113 { 123 114 struct efx_filter_state *state = efx->filter_state; 124 115 struct efx_filter_table *table; ··· 152 143 FILTER_CTL_SRCH_FUDGE_WILD); 153 144 } 154 145 146 + table = &state->table[EFX_FILTER_TABLE_RX_DEF]; 147 + if (table->size) { 148 + EFX_SET_OWORD_FIELD( 149 + filter_ctl, FRF_CZ_UNICAST_NOMATCH_Q_ID, 150 + table->spec[EFX_FILTER_INDEX_UC_DEF].dmaq_id); 151 + EFX_SET_OWORD_FIELD( 152 + filter_ctl, FRF_CZ_UNICAST_NOMATCH_RSS_ENABLED, 153 + !!(table->spec[EFX_FILTER_INDEX_UC_DEF].flags & 154 + EFX_FILTER_FLAG_RX_RSS)); 155 + EFX_SET_OWORD_FIELD( 156 + filter_ctl, FRF_CZ_UNICAST_NOMATCH_IP_OVERRIDE, 157 + !!(table->spec[EFX_FILTER_INDEX_UC_DEF].flags & 158 + EFX_FILTER_FLAG_RX_OVERRIDE_IP)); 159 + EFX_SET_OWORD_FIELD( 160 + filter_ctl, FRF_CZ_MULTICAST_NOMATCH_Q_ID, 161 + table->spec[EFX_FILTER_INDEX_MC_DEF].dmaq_id); 162 + EFX_SET_OWORD_FIELD( 163 + filter_ctl, FRF_CZ_MULTICAST_NOMATCH_RSS_ENABLED, 164 + !!(table->spec[EFX_FILTER_INDEX_MC_DEF].flags & 165 + EFX_FILTER_FLAG_RX_RSS)); 166 + EFX_SET_OWORD_FIELD( 167 + filter_ctl, FRF_CZ_MULTICAST_NOMATCH_IP_OVERRIDE, 168 + !!(table->spec[EFX_FILTER_INDEX_MC_DEF].flags & 169 + EFX_FILTER_FLAG_RX_OVERRIDE_IP)); 170 + } 171 + 155 172 efx_writeo(efx, &filter_ctl, FR_BZ_RX_FILTER_CTL); 173 + } 174 + 175 + static void efx_filter_push_tx_limits(struct efx_nic *efx) 176 + { 177 + struct efx_filter_state *state = efx->filter_state; 178 + struct efx_filter_table *table; 179 + efx_oword_t tx_cfg; 180 + 181 + efx_reado(efx, &tx_cfg, FR_AZ_TX_CFG); 182 + 183 + table = &state->table[EFX_FILTER_TABLE_TX_MAC]; 184 + if (table->size) { 185 + EFX_SET_OWORD_FIELD( 186 + tx_cfg, FRF_CZ_TX_ETH_FILTER_FULL_SEARCH_RANGE, 187 + table->search_depth[EFX_FILTER_MAC_FULL] + 188 + FILTER_CTL_SRCH_FUDGE_FULL); 189 + EFX_SET_OWORD_FIELD( 190 + tx_cfg, FRF_CZ_TX_ETH_FILTER_WILD_SEARCH_RANGE, 191 + table->search_depth[EFX_FILTER_MAC_WILD] + 192 + FILTER_CTL_SRCH_FUDGE_WILD); 193 + } 194 + 195 + efx_writeo(efx, &tx_cfg, FR_AZ_TX_CFG); 156 196 } 157 197 158 198 static inline void __efx_filter_set_ipv4(struct efx_filter_spec *spec, ··· 358 300 int efx_filter_set_eth_local(struct efx_filter_spec *spec, 359 301 u16 vid, const u8 *addr) 360 302 { 361 - EFX_BUG_ON_PARANOID(!(spec->flags & EFX_FILTER_FLAG_RX)); 303 + EFX_BUG_ON_PARANOID(!(spec->flags & 304 + (EFX_FILTER_FLAG_RX | EFX_FILTER_FLAG_TX))); 362 305 363 306 /* This cannot currently be combined with other filtering */ 364 307 if (spec->type != EFX_FILTER_UNSPEC) ··· 376 317 spec->data[1] = addr[2] << 24 | addr[3] << 16 | addr[4] << 8 | addr[5]; 377 318 spec->data[2] = addr[0] << 8 | addr[1]; 378 319 return 0; 320 + } 321 + 322 + /** 323 + * efx_filter_set_uc_def - specify matching otherwise-unmatched unicast 324 + * @spec: Specification to initialise 325 + */ 326 + int efx_filter_set_uc_def(struct efx_filter_spec *spec) 327 + { 328 + EFX_BUG_ON_PARANOID(!(spec->flags & 329 + (EFX_FILTER_FLAG_RX | EFX_FILTER_FLAG_TX))); 330 + 331 + if (spec->type != EFX_FILTER_UNSPEC) 332 + return -EINVAL; 333 + 334 + spec->type = EFX_FILTER_UC_DEF; 335 + memset(spec->data, 0, sizeof(spec->data)); /* ensure equality */ 336 + return 0; 337 + } 338 + 339 + /** 340 + * efx_filter_set_mc_def - specify matching otherwise-unmatched multicast 341 + * @spec: Specification to initialise 342 + */ 343 + int efx_filter_set_mc_def(struct efx_filter_spec *spec) 344 + { 345 + EFX_BUG_ON_PARANOID(!(spec->flags & 346 + (EFX_FILTER_FLAG_RX | EFX_FILTER_FLAG_TX))); 347 + 348 + if (spec->type != EFX_FILTER_UNSPEC) 349 + return -EINVAL; 350 + 351 + spec->type = EFX_FILTER_MC_DEF; 352 + memset(spec->data, 0, sizeof(spec->data)); /* ensure equality */ 353 + return 0; 354 + } 355 + 356 + static void efx_filter_reset_rx_def(struct efx_nic *efx, unsigned filter_idx) 357 + { 358 + struct efx_filter_state *state = efx->filter_state; 359 + struct efx_filter_table *table = &state->table[EFX_FILTER_TABLE_RX_DEF]; 360 + struct efx_filter_spec *spec = &table->spec[filter_idx]; 361 + 362 + efx_filter_init_rx(spec, EFX_FILTER_PRI_MANUAL, 363 + EFX_FILTER_FLAG_RX_RSS, 0); 364 + spec->type = EFX_FILTER_UC_DEF + filter_idx; 365 + table->used_bitmap[0] |= 1 << filter_idx; 379 366 } 380 367 381 368 int efx_filter_get_eth_local(const struct efx_filter_spec *spec, ··· 471 366 break; 472 367 } 473 368 369 + case EFX_FILTER_TABLE_RX_DEF: 370 + /* One filter spec per type */ 371 + BUILD_BUG_ON(EFX_FILTER_INDEX_UC_DEF != 0); 372 + BUILD_BUG_ON(EFX_FILTER_INDEX_MC_DEF != 373 + EFX_FILTER_MC_DEF - EFX_FILTER_UC_DEF); 374 + return spec->type - EFX_FILTER_UC_DEF; 375 + 474 376 case EFX_FILTER_TABLE_RX_MAC: { 475 377 bool is_wild = spec->type == EFX_FILTER_MAC_WILD; 476 378 EFX_POPULATE_OWORD_8( ··· 497 385 break; 498 386 } 499 387 388 + case EFX_FILTER_TABLE_TX_MAC: { 389 + bool is_wild = spec->type == EFX_FILTER_MAC_WILD; 390 + EFX_POPULATE_OWORD_5(*filter, 391 + FRF_CZ_TMFT_TXQ_ID, spec->dmaq_id, 392 + FRF_CZ_TMFT_WILDCARD_MATCH, is_wild, 393 + FRF_CZ_TMFT_SRC_MAC_HI, spec->data[2], 394 + FRF_CZ_TMFT_SRC_MAC_LO, spec->data[1], 395 + FRF_CZ_TMFT_VLAN_ID, spec->data[0]); 396 + data3 = is_wild | spec->dmaq_id << 1; 397 + break; 398 + } 399 + 500 400 default: 501 401 BUG(); 502 402 } ··· 521 397 { 522 398 if (left->type != right->type || 523 399 memcmp(left->data, right->data, sizeof(left->data))) 400 + return false; 401 + 402 + if (left->flags & EFX_FILTER_FLAG_TX && 403 + left->dmaq_id != right->dmaq_id) 524 404 return false; 525 405 526 406 return true; ··· 576 448 * MAC filters without overriding behaviour. 577 449 */ 578 450 451 + #define EFX_FILTER_MATCH_PRI_RX_MAC_OVERRIDE_IP 0 452 + #define EFX_FILTER_MATCH_PRI_RX_DEF_OVERRIDE_IP 1 453 + #define EFX_FILTER_MATCH_PRI_NORMAL_BASE 2 454 + 579 455 #define EFX_FILTER_INDEX_WIDTH 13 580 456 #define EFX_FILTER_INDEX_MASK ((1 << EFX_FILTER_INDEX_WIDTH) - 1) 581 457 582 458 static inline u32 efx_filter_make_id(enum efx_filter_table_id table_id, 583 459 unsigned int index, u8 flags) 584 460 { 585 - return (table_id == EFX_FILTER_TABLE_RX_MAC && 586 - flags & EFX_FILTER_FLAG_RX_OVERRIDE_IP) ? 587 - index : 588 - (table_id + 1) << EFX_FILTER_INDEX_WIDTH | index; 461 + unsigned int match_pri = EFX_FILTER_MATCH_PRI_NORMAL_BASE + table_id; 462 + 463 + if (flags & EFX_FILTER_FLAG_RX_OVERRIDE_IP) { 464 + if (table_id == EFX_FILTER_TABLE_RX_MAC) 465 + match_pri = EFX_FILTER_MATCH_PRI_RX_MAC_OVERRIDE_IP; 466 + else if (table_id == EFX_FILTER_TABLE_RX_DEF) 467 + match_pri = EFX_FILTER_MATCH_PRI_RX_DEF_OVERRIDE_IP; 468 + } 469 + 470 + return match_pri << EFX_FILTER_INDEX_WIDTH | index; 589 471 } 590 472 591 473 static inline enum efx_filter_table_id efx_filter_id_table_id(u32 id) 592 474 { 593 - return (id <= EFX_FILTER_INDEX_MASK) ? 594 - EFX_FILTER_TABLE_RX_MAC : 595 - (id >> EFX_FILTER_INDEX_WIDTH) - 1; 475 + unsigned int match_pri = id >> EFX_FILTER_INDEX_WIDTH; 476 + 477 + switch (match_pri) { 478 + case EFX_FILTER_MATCH_PRI_RX_MAC_OVERRIDE_IP: 479 + return EFX_FILTER_TABLE_RX_MAC; 480 + case EFX_FILTER_MATCH_PRI_RX_DEF_OVERRIDE_IP: 481 + return EFX_FILTER_TABLE_RX_DEF; 482 + default: 483 + return match_pri - EFX_FILTER_MATCH_PRI_NORMAL_BASE; 484 + } 596 485 } 597 486 598 487 static inline unsigned int efx_filter_id_index(u32 id) ··· 619 474 620 475 static inline u8 efx_filter_id_flags(u32 id) 621 476 { 622 - return (id <= EFX_FILTER_INDEX_MASK) ? 623 - EFX_FILTER_FLAG_RX | EFX_FILTER_FLAG_RX_OVERRIDE_IP : 624 - EFX_FILTER_FLAG_RX; 477 + unsigned int match_pri = id >> EFX_FILTER_INDEX_WIDTH; 478 + 479 + if (match_pri < EFX_FILTER_MATCH_PRI_NORMAL_BASE) 480 + return EFX_FILTER_FLAG_RX | EFX_FILTER_FLAG_RX_OVERRIDE_IP; 481 + else if (match_pri <= 482 + EFX_FILTER_MATCH_PRI_NORMAL_BASE + EFX_FILTER_TABLE_RX_DEF) 483 + return EFX_FILTER_FLAG_RX; 484 + else 485 + return EFX_FILTER_FLAG_TX; 625 486 } 626 487 627 488 u32 efx_filter_get_rx_id_limit(struct efx_nic *efx) 628 489 { 629 490 struct efx_filter_state *state = efx->filter_state; 491 + unsigned int table_id = EFX_FILTER_TABLE_RX_DEF; 630 492 631 - if (state->table[EFX_FILTER_TABLE_RX_MAC].size != 0) 632 - return ((EFX_FILTER_TABLE_RX_MAC + 1) << EFX_FILTER_INDEX_WIDTH) 633 - + state->table[EFX_FILTER_TABLE_RX_MAC].size; 634 - else if (state->table[EFX_FILTER_TABLE_RX_IP].size != 0) 635 - return ((EFX_FILTER_TABLE_RX_IP + 1) << EFX_FILTER_INDEX_WIDTH) 636 - + state->table[EFX_FILTER_TABLE_RX_IP].size; 637 - else 638 - return 0; 493 + do { 494 + if (state->table[table_id].size != 0) 495 + return ((EFX_FILTER_MATCH_PRI_NORMAL_BASE + table_id) 496 + << EFX_FILTER_INDEX_WIDTH) + 497 + state->table[table_id].size; 498 + } while (table_id--); 499 + 500 + return 0; 639 501 } 640 502 641 503 /** ··· 700 548 } 701 549 *saved_spec = *spec; 702 550 703 - if (table->search_depth[spec->type] < depth) { 704 - table->search_depth[spec->type] = depth; 705 - efx_filter_push_rx_limits(efx); 706 - } 551 + if (table->id == EFX_FILTER_TABLE_RX_DEF) { 552 + efx_filter_push_rx_config(efx); 553 + } else { 554 + if (table->search_depth[spec->type] < depth) { 555 + table->search_depth[spec->type] = depth; 556 + if (spec->flags & EFX_FILTER_FLAG_TX) 557 + efx_filter_push_tx_limits(efx); 558 + else 559 + efx_filter_push_rx_config(efx); 560 + } 707 561 708 - efx_writeo(efx, &filter, table->offset + table->step * filter_idx); 562 + efx_writeo(efx, &filter, 563 + table->offset + table->step * filter_idx); 564 + } 709 565 710 566 netif_vdbg(efx, hw, efx->net_dev, 711 567 "%s: filter type %d index %d rxq %u set", ··· 731 571 { 732 572 static efx_oword_t filter; 733 573 734 - if (test_bit(filter_idx, table->used_bitmap)) { 574 + if (table->id == EFX_FILTER_TABLE_RX_DEF) { 575 + /* RX default filters must always exist */ 576 + efx_filter_reset_rx_def(efx, filter_idx); 577 + efx_filter_push_rx_config(efx); 578 + } else if (test_bit(filter_idx, table->used_bitmap)) { 735 579 __clear_bit(filter_idx, table->used_bitmap); 736 580 --table->used; 737 581 memset(&table->spec[filter_idx], 0, sizeof(table->spec[0])); ··· 781 617 spin_lock_bh(&state->lock); 782 618 783 619 if (test_bit(filter_idx, table->used_bitmap) && 784 - spec->priority == priority && spec->flags == filter_flags) { 620 + spec->priority == priority && 621 + !((spec->flags ^ filter_flags) & EFX_FILTER_FLAG_RX_OVERRIDE_IP)) { 785 622 efx_filter_table_clear_entry(efx, table, filter_idx); 786 623 if (table->used == 0) 787 624 efx_filter_table_reset_search_depth(table); ··· 833 668 spin_lock_bh(&state->lock); 834 669 835 670 if (test_bit(filter_idx, table->used_bitmap) && 836 - spec->priority == priority && spec->flags == filter_flags) { 671 + spec->priority == priority && 672 + !((spec->flags ^ filter_flags) & EFX_FILTER_FLAG_RX_OVERRIDE_IP)) { 837 673 *spec_buf = *spec; 838 674 rc = 0; 839 675 } else { ··· 888 722 spin_lock_bh(&state->lock); 889 723 890 724 for (table_id = EFX_FILTER_TABLE_RX_IP; 891 - table_id <= EFX_FILTER_TABLE_RX_MAC; 725 + table_id <= EFX_FILTER_TABLE_RX_DEF; 892 726 table_id++) { 893 727 table = &state->table[table_id]; 894 728 for (filter_idx = 0; filter_idx < table->size; filter_idx++) { ··· 916 750 spin_lock_bh(&state->lock); 917 751 918 752 for (table_id = EFX_FILTER_TABLE_RX_IP; 919 - table_id <= EFX_FILTER_TABLE_RX_MAC; 753 + table_id <= EFX_FILTER_TABLE_RX_DEF; 920 754 table_id++) { 921 755 table = &state->table[table_id]; 922 756 for (filter_idx = 0; filter_idx < table->size; filter_idx++) { ··· 951 785 952 786 for (table_id = 0; table_id < EFX_FILTER_TABLE_COUNT; table_id++) { 953 787 table = &state->table[table_id]; 788 + 789 + /* Check whether this is a regular register table */ 790 + if (table->step == 0) 791 + continue; 792 + 954 793 for (filter_idx = 0; filter_idx < table->size; filter_idx++) { 955 794 if (!test_bit(filter_idx, table->used_bitmap)) 956 795 continue; ··· 965 794 } 966 795 } 967 796 968 - efx_filter_push_rx_limits(efx); 797 + efx_filter_push_rx_config(efx); 798 + efx_filter_push_tx_limits(efx); 969 799 970 800 spin_unlock_bh(&state->lock); 971 801 } ··· 1005 833 table->offset = FR_CZ_RX_MAC_FILTER_TBL0; 1006 834 table->size = FR_CZ_RX_MAC_FILTER_TBL0_ROWS; 1007 835 table->step = FR_CZ_RX_MAC_FILTER_TBL0_STEP; 836 + 837 + table = &state->table[EFX_FILTER_TABLE_RX_DEF]; 838 + table->id = EFX_FILTER_TABLE_RX_DEF; 839 + table->size = EFX_FILTER_SIZE_RX_DEF; 840 + 841 + table = &state->table[EFX_FILTER_TABLE_TX_MAC]; 842 + table->id = EFX_FILTER_TABLE_TX_MAC; 843 + table->offset = FR_CZ_TX_MAC_FILTER_TBL0; 844 + table->size = FR_CZ_TX_MAC_FILTER_TBL0_ROWS; 845 + table->step = FR_CZ_TX_MAC_FILTER_TBL0_STEP; 1008 846 } 1009 847 1010 848 for (table_id = 0; table_id < EFX_FILTER_TABLE_COUNT; table_id++) { ··· 1030 848 if (!table->spec) 1031 849 goto fail; 1032 850 } 851 + 852 + if (state->table[EFX_FILTER_TABLE_RX_DEF].size) { 853 + /* RX default filters must always exist */ 854 + unsigned i; 855 + for (i = 0; i < EFX_FILTER_SIZE_RX_DEF; i++) 856 + efx_filter_reset_rx_def(efx, i); 857 + } 858 + 859 + efx_filter_push_rx_config(efx); 1033 860 1034 861 return 0; 1035 862
+19 -1
drivers/net/ethernet/sfc/filter.h
··· 20 20 * @EFX_FILTER_UDP_WILD: Matching UDP/IPv4 destination (host, port) 21 21 * @EFX_FILTER_MAC_FULL: Matching Ethernet destination MAC address, VID 22 22 * @EFX_FILTER_MAC_WILD: Matching Ethernet destination MAC address 23 + * @EFX_FILTER_UC_DEF: Matching all otherwise unmatched unicast 24 + * @EFX_FILTER_MC_DEF: Matching all otherwise unmatched multicast 23 25 * @EFX_FILTER_UNSPEC: Match type is unspecified 24 26 * 25 27 * Falcon NICs only support the TCP/IPv4 and UDP/IPv4 filter types. ··· 33 31 EFX_FILTER_UDP_WILD, 34 32 EFX_FILTER_MAC_FULL = 4, 35 33 EFX_FILTER_MAC_WILD, 34 + EFX_FILTER_UC_DEF = 8, 35 + EFX_FILTER_MC_DEF, 36 36 EFX_FILTER_TYPE_COUNT, /* number of specific types */ 37 37 EFX_FILTER_UNSPEC = 0xf, 38 38 }; ··· 43 39 * enum efx_filter_priority - priority of a hardware filter specification 44 40 * @EFX_FILTER_PRI_HINT: Performance hint 45 41 * @EFX_FILTER_PRI_MANUAL: Manually configured filter 46 - * @EFX_FILTER_PRI_REQUIRED: Required for correct behaviour 42 + * @EFX_FILTER_PRI_REQUIRED: Required for correct behaviour (user-level 43 + * networking and SR-IOV) 47 44 */ 48 45 enum efx_filter_priority { 49 46 EFX_FILTER_PRI_HINT = 0, ··· 65 60 * any IP filter that matches the same packet. By default, IP 66 61 * filters take precedence. 67 62 * @EFX_FILTER_FLAG_RX: Filter is for RX 63 + * @EFX_FILTER_FLAG_TX: Filter is for TX 68 64 */ 69 65 enum efx_filter_flags { 70 66 EFX_FILTER_FLAG_RX_RSS = 0x01, 71 67 EFX_FILTER_FLAG_RX_SCATTER = 0x02, 72 68 EFX_FILTER_FLAG_RX_OVERRIDE_IP = 0x04, 73 69 EFX_FILTER_FLAG_RX = 0x08, 70 + EFX_FILTER_FLAG_TX = 0x10, 74 71 }; 75 72 76 73 /** ··· 110 103 spec->dmaq_id = rxq_id; 111 104 } 112 105 106 + static inline void efx_filter_init_tx(struct efx_filter_spec *spec, 107 + unsigned txq_id) 108 + { 109 + spec->type = EFX_FILTER_UNSPEC; 110 + spec->priority = EFX_FILTER_PRI_REQUIRED; 111 + spec->flags = EFX_FILTER_FLAG_TX; 112 + spec->dmaq_id = txq_id; 113 + } 114 + 113 115 extern int efx_filter_set_ipv4_local(struct efx_filter_spec *spec, u8 proto, 114 116 __be32 host, __be16 port); 115 117 extern int efx_filter_get_ipv4_local(const struct efx_filter_spec *spec, ··· 133 117 u16 vid, const u8 *addr); 134 118 extern int efx_filter_get_eth_local(const struct efx_filter_spec *spec, 135 119 u16 *vid, u8 *addr); 120 + extern int efx_filter_set_uc_def(struct efx_filter_spec *spec); 121 + extern int efx_filter_set_mc_def(struct efx_filter_spec *spec); 136 122 enum { 137 123 EFX_FILTER_VID_UNSPEC = 0xffff, 138 124 };
+34
drivers/net/ethernet/sfc/mcdi.c
··· 560 560 case MCDI_EVENT_CODE_MAC_STATS_DMA: 561 561 /* MAC stats are gather lazily. We can ignore this. */ 562 562 break; 563 + case MCDI_EVENT_CODE_FLR: 564 + efx_sriov_flr(efx, MCDI_EVENT_FIELD(*event, FLR_VF)); 565 + break; 563 566 564 567 default: 565 568 netif_err(efx, hw, efx->net_dev, "Unknown MCDI event 0x%x\n", ··· 1157 1154 return rc; 1158 1155 } 1159 1156 1157 + int efx_mcdi_flush_rxqs(struct efx_nic *efx) 1158 + { 1159 + struct efx_channel *channel; 1160 + struct efx_rx_queue *rx_queue; 1161 + __le32 *qid; 1162 + int rc, count; 1163 + 1164 + qid = kmalloc(EFX_MAX_CHANNELS * sizeof(*qid), GFP_KERNEL); 1165 + if (qid == NULL) 1166 + return -ENOMEM; 1167 + 1168 + count = 0; 1169 + efx_for_each_channel(channel, efx) { 1170 + efx_for_each_channel_rx_queue(rx_queue, channel) { 1171 + if (rx_queue->flush_pending) { 1172 + rx_queue->flush_pending = false; 1173 + atomic_dec(&efx->rxq_flush_pending); 1174 + qid[count++] = cpu_to_le32( 1175 + efx_rx_queue_index(rx_queue)); 1176 + } 1177 + } 1178 + } 1179 + 1180 + rc = efx_mcdi_rpc(efx, MC_CMD_FLUSH_RX_QUEUES, (u8 *)qid, 1181 + count * sizeof(*qid), NULL, 0, NULL); 1182 + WARN_ON(rc > 0); 1183 + 1184 + kfree(qid); 1185 + 1186 + return rc; 1187 + } 1160 1188 1161 1189 int efx_mcdi_wol_filter_reset(struct efx_nic *efx) 1162 1190 {
+2
drivers/net/ethernet/sfc/mcdi.h
··· 146 146 extern int efx_mcdi_wol_filter_get_magic(struct efx_nic *efx, int *id_out); 147 147 extern int efx_mcdi_wol_filter_remove(struct efx_nic *efx, int id); 148 148 extern int efx_mcdi_wol_filter_reset(struct efx_nic *efx); 149 + extern int efx_mcdi_flush_rxqs(struct efx_nic *efx); 150 + extern int efx_mcdi_set_mac(struct efx_nic *efx); 149 151 extern int efx_mcdi_mac_stats(struct efx_nic *efx, dma_addr_t dma_addr, 150 152 u32 dma_len, int enable, int clear); 151 153 extern int efx_mcdi_mac_reconfigure(struct efx_nic *efx);
+3 -1
drivers/net/ethernet/sfc/mcdi_mac.c
··· 12 12 #include "mcdi.h" 13 13 #include "mcdi_pcol.h" 14 14 15 - static int efx_mcdi_set_mac(struct efx_nic *efx) 15 + int efx_mcdi_set_mac(struct efx_nic *efx) 16 16 { 17 17 u32 reject, fcntl; 18 18 u8 cmdbytes[MC_CMD_SET_MAC_IN_LEN]; ··· 44 44 } 45 45 if (efx->wanted_fc & EFX_FC_AUTO) 46 46 fcntl = MC_CMD_FCNTL_AUTO; 47 + if (efx->fc_disable) 48 + fcntl = MC_CMD_FCNTL_OFF; 47 49 48 50 MCDI_SET_DWORD(cmdbytes, SET_MAC_IN_FCNTL, fcntl); 49 51
+1 -1
drivers/net/ethernet/sfc/mtd.c
··· 280 280 --part; 281 281 efx_mtd_remove_partition(part); 282 282 } 283 - /* mtd_device_register() returns 1 if the MTD table is full */ 283 + /* Failure is unlikely here, but probably means we're out of memory */ 284 284 return -ENOMEM; 285 285 } 286 286
+97 -26
drivers/net/ethernet/sfc/net_driver.h
··· 24 24 #include <linux/device.h> 25 25 #include <linux/highmem.h> 26 26 #include <linux/workqueue.h> 27 + #include <linux/mutex.h> 27 28 #include <linux/vmalloc.h> 28 29 #include <linux/i2c.h> 29 30 ··· 53 52 * 54 53 **************************************************************************/ 55 54 56 - #define EFX_MAX_CHANNELS 32 55 + #define EFX_MAX_CHANNELS 32U 57 56 #define EFX_MAX_RX_QUEUES EFX_MAX_CHANNELS 57 + #define EFX_EXTRA_CHANNEL_IOV 0 58 + #define EFX_MAX_EXTRA_CHANNELS 1U 58 59 59 60 /* Checksum generation is a per-queue option in hardware, so each 60 61 * queue visible to the networking core is backed by two hardware TX ··· 84 81 void *addr; 85 82 dma_addr_t dma_addr; 86 83 unsigned int len; 87 - int index; 88 - int entries; 89 - }; 90 - 91 - enum efx_flush_state { 92 - FLUSH_NONE, 93 - FLUSH_PENDING, 94 - FLUSH_FAILED, 95 - FLUSH_DONE, 84 + unsigned int index; 85 + unsigned int entries; 96 86 }; 97 87 98 88 /** ··· 134 138 * @txd: The hardware descriptor ring 135 139 * @ptr_mask: The size of the ring minus 1. 136 140 * @initialised: Has hardware queue been initialised? 137 - * @flushed: Used when handling queue flushing 138 141 * @read_count: Current read pointer. 139 142 * This is the number of buffers that have been removed from both rings. 140 143 * @old_write_count: The value of @write_count when last checked. ··· 176 181 struct efx_special_buffer txd; 177 182 unsigned int ptr_mask; 178 183 bool initialised; 179 - enum efx_flush_state flushed; 180 184 181 185 /* Members used mainly on the completion path */ 182 186 unsigned int read_count ____cacheline_aligned_in_smp; ··· 243 249 * @buffer: The software buffer ring 244 250 * @rxd: The hardware descriptor ring 245 251 * @ptr_mask: The size of the ring minus 1. 252 + * @enabled: Receive queue enabled indicator. 253 + * @flush_pending: Set when a RX flush is pending. Has the same lifetime as 254 + * @rxq_flush_pending. 246 255 * @added_count: Number of buffers added to the receive queue. 247 256 * @notified_count: Number of buffers given to NIC (<= @added_count). 248 257 * @removed_count: Number of buffers removed from the receive queue. ··· 260 263 * @alloc_page_count: RX allocation strategy counter. 261 264 * @alloc_skb_count: RX allocation strategy counter. 262 265 * @slow_fill: Timer used to defer efx_nic_generate_fill_event(). 263 - * @flushed: Use when handling queue flushing 264 266 */ 265 267 struct efx_rx_queue { 266 268 struct efx_nic *efx; 267 269 struct efx_rx_buffer *buffer; 268 270 struct efx_special_buffer rxd; 269 271 unsigned int ptr_mask; 272 + bool enabled; 273 + bool flush_pending; 270 274 271 275 int added_count; 272 276 int notified_count; ··· 281 283 unsigned int alloc_skb_count; 282 284 struct timer_list slow_fill; 283 285 unsigned int slow_fill_count; 284 - 285 - enum efx_flush_state flushed; 286 286 }; 287 287 288 288 /** ··· 314 318 * 315 319 * @efx: Associated Efx NIC 316 320 * @channel: Channel instance number 321 + * @type: Channel type definition 317 322 * @enabled: Channel enabled indicator 318 323 * @irq: IRQ number (MSI and MSI-X only) 319 324 * @irq_moderation: IRQ moderation value (in hardware ticks) ··· 345 348 struct efx_channel { 346 349 struct efx_nic *efx; 347 350 int channel; 351 + const struct efx_channel_type *type; 348 352 bool enabled; 349 353 int irq; 350 354 unsigned int irq_moderation; ··· 382 384 383 385 struct efx_rx_queue rx_queue; 384 386 struct efx_tx_queue tx_queue[EFX_TXQ_TYPES]; 387 + }; 388 + 389 + /** 390 + * struct efx_channel_type - distinguishes traffic and extra channels 391 + * @handle_no_channel: Handle failure to allocate an extra channel 392 + * @pre_probe: Set up extra state prior to initialisation 393 + * @post_remove: Tear down extra state after finalisation, if allocated. 394 + * May be called on channels that have not been probed. 395 + * @get_name: Generate the channel's name (used for its IRQ handler) 396 + * @copy: Copy the channel state prior to reallocation. May be %NULL if 397 + * reallocation is not supported. 398 + * @keep_eventq: Flag for whether event queue should be kept initialised 399 + * while the device is stopped 400 + */ 401 + struct efx_channel_type { 402 + void (*handle_no_channel)(struct efx_nic *); 403 + int (*pre_probe)(struct efx_channel *); 404 + void (*get_name)(struct efx_channel *, char *buf, size_t len); 405 + struct efx_channel *(*copy)(const struct efx_channel *); 406 + bool keep_eventq; 385 407 }; 386 408 387 409 enum efx_led_mode { ··· 631 613 }; 632 614 633 615 struct efx_filter_state; 616 + struct efx_vf; 617 + struct vfdi_status; 634 618 635 619 /** 636 620 * struct efx_nic - an Efx NIC ··· 658 638 * @rx_queue: RX DMA queues 659 639 * @channel: Channels 660 640 * @channel_name: Names for channels and their IRQs 641 + * @extra_channel_types: Types of extra (non-traffic) channels that 642 + * should be allocated for this NIC 661 643 * @rxq_entries: Size of receive queues requested by user. 662 644 * @txq_entries: Size of transmit queues requested by user. 645 + * @tx_dc_base: Base qword address in SRAM of TX queue descriptor caches 646 + * @rx_dc_base: Base qword address in SRAM of RX queue descriptor caches 647 + * @sram_lim_qw: Qword address limit of SRAM 663 648 * @next_buffer_table: First available buffer table id 664 649 * @n_channels: Number of channels in use 665 650 * @n_rx_channels: Number of channels used for RX (= number of RX queues) ··· 702 677 * @promiscuous: Promiscuous flag. Protected by netif_tx_lock. 703 678 * @multicast_hash: Multicast hash table 704 679 * @wanted_fc: Wanted flow control flags 680 + * @fc_disable: When non-zero flow control is disabled. Typically used to 681 + * ensure that network back pressure doesn't delay dma queue flushes. 682 + * Serialised by the rtnl lock. 705 683 * @mac_work: Work item for changing MAC promiscuity and multicast hash 706 684 * @loopback_mode: Loopback status 707 685 * @loopback_modes: Supported loopback mode bitmask 708 686 * @loopback_selftest: Offline self-test private state 687 + * @drain_pending: Count of RX and TX queues that haven't been flushed and drained. 688 + * @rxq_flush_pending: Count of number of receive queues that need to be flushed. 689 + * Decremented when the efx_flush_rx_queue() is called. 690 + * @rxq_flush_outstanding: Count of number of RX flushes started but not yet 691 + * completed (either success or failure). Not used when MCDI is used to 692 + * flush receive queues. 693 + * @flush_wq: wait queue used by efx_nic_flush_queues() to wait for flush completions. 694 + * @vf: Array of &struct efx_vf objects. 695 + * @vf_count: Number of VFs intended to be enabled. 696 + * @vf_init_count: Number of VFs that have been fully initialised. 697 + * @vi_scale: log2 number of vnics per VF. 698 + * @vf_buftbl_base: The zeroth buffer table index used to back VF queues. 699 + * @vfdi_status: Common VFDI status page to be dmad to VF address space. 700 + * @local_addr_list: List of local addresses. Protected by %local_lock. 701 + * @local_page_list: List of DMA addressable pages used to broadcast 702 + * %local_addr_list. Protected by %local_lock. 703 + * @local_lock: Mutex protecting %local_addr_list and %local_page_list. 704 + * @peer_work: Work item to broadcast peer addresses to VMs. 709 705 * @monitor_work: Hardware monitor workitem 710 706 * @biu_lock: BIU (bus interface unit) lock 711 707 * @last_irq_cpu: Last CPU to handle a possible test interrupt. This ··· 766 720 767 721 struct efx_channel *channel[EFX_MAX_CHANNELS]; 768 722 char channel_name[EFX_MAX_CHANNELS][IFNAMSIZ + 6]; 723 + const struct efx_channel_type * 724 + extra_channel_type[EFX_MAX_EXTRA_CHANNELS]; 769 725 770 726 unsigned rxq_entries; 771 727 unsigned txq_entries; 728 + unsigned tx_dc_base; 729 + unsigned rx_dc_base; 730 + unsigned sram_lim_qw; 772 731 unsigned next_buffer_table; 773 732 unsigned n_channels; 774 733 unsigned n_rx_channels; 734 + unsigned rss_spread; 775 735 unsigned tx_channel_offset; 776 736 unsigned n_tx_channels; 777 737 unsigned int rx_buffer_len; ··· 821 769 bool promiscuous; 822 770 union efx_multicast_hash multicast_hash; 823 771 u8 wanted_fc; 772 + unsigned fc_disable; 824 773 825 774 atomic_t rx_reset; 826 775 enum efx_loopback_mode loopback_mode; ··· 830 777 void *loopback_selftest; 831 778 832 779 struct efx_filter_state *filter_state; 780 + 781 + atomic_t drain_pending; 782 + atomic_t rxq_flush_pending; 783 + atomic_t rxq_flush_outstanding; 784 + wait_queue_head_t flush_wq; 785 + 786 + #ifdef CONFIG_SFC_SRIOV 787 + struct efx_channel *vfdi_channel; 788 + struct efx_vf *vf; 789 + unsigned vf_count; 790 + unsigned vf_init_count; 791 + unsigned vi_scale; 792 + unsigned vf_buftbl_base; 793 + struct efx_buffer vfdi_status; 794 + struct list_head local_addr_list; 795 + struct list_head local_page_list; 796 + struct mutex local_lock; 797 + struct work_struct peer_work; 798 + #endif 833 799 834 800 /* The following fields may be written more often */ 835 801 ··· 875 803 * @probe: Probe the controller 876 804 * @remove: Free resources allocated by probe() 877 805 * @init: Initialise the controller 806 + * @dimension_resources: Dimension controller resources (buffer table, 807 + * and VIs once the available interrupt resources are clear) 878 808 * @fini: Shut down the controller 879 809 * @monitor: Periodic function for polling link state and hardware monitor 880 810 * @map_reset_reason: Map ethtool reset reason to a reset method ··· 916 842 * @phys_addr_channels: Number of channels with physically addressed 917 843 * descriptors 918 844 * @timer_period_max: Maximum period of interrupt timer (in ticks) 919 - * @tx_dc_base: Base address in SRAM of TX queue descriptor caches 920 - * @rx_dc_base: Base address in SRAM of RX queue descriptor caches 921 845 * @offload_features: net_device feature flags for protocol offload 922 846 * features implemented in hardware 923 847 */ ··· 923 851 int (*probe)(struct efx_nic *efx); 924 852 void (*remove)(struct efx_nic *efx); 925 853 int (*init)(struct efx_nic *efx); 854 + void (*dimension_resources)(struct efx_nic *efx); 926 855 void (*fini)(struct efx_nic *efx); 927 856 void (*monitor)(struct efx_nic *efx); 928 857 enum reset_type (*map_reset_reason)(enum reset_type reason); ··· 960 887 unsigned int max_interrupt_mode; 961 888 unsigned int phys_addr_channels; 962 889 unsigned int timer_period_max; 963 - unsigned int tx_dc_base; 964 - unsigned int rx_dc_base; 965 890 netdev_features_t offload_features; 966 891 }; 967 892 ··· 982 911 _channel; \ 983 912 _channel = (_channel->channel + 1 < (_efx)->n_channels) ? \ 984 913 (_efx)->channel[_channel->channel + 1] : NULL) 914 + 915 + /* Iterate over all used channels in reverse */ 916 + #define efx_for_each_channel_rev(_channel, _efx) \ 917 + for (_channel = (_efx)->channel[(_efx)->n_channels - 1]; \ 918 + _channel; \ 919 + _channel = _channel->channel ? \ 920 + (_efx)->channel[_channel->channel - 1] : NULL) 985 921 986 922 static inline struct efx_tx_queue * 987 923 efx_get_tx_queue(struct efx_nic *efx, unsigned index, unsigned type) ··· 1033 955 for (_tx_queue = (_channel)->tx_queue; \ 1034 956 _tx_queue < (_channel)->tx_queue + EFX_TXQ_TYPES; \ 1035 957 _tx_queue++) 1036 - 1037 - static inline struct efx_rx_queue * 1038 - efx_get_rx_queue(struct efx_nic *efx, unsigned index) 1039 - { 1040 - EFX_BUG_ON_PARANOID(index >= efx->n_rx_channels); 1041 - return &efx->channel[index]->rx_queue; 1042 - } 1043 958 1044 959 static inline bool efx_channel_has_rx_queue(struct efx_channel *channel) 1045 960 {
+308 -214
drivers/net/ethernet/sfc/nic.c
··· 49 49 #define EFX_INT_ERROR_EXPIRE 3600 50 50 #define EFX_MAX_INT_ERRORS 5 51 51 52 - /* We poll for events every FLUSH_INTERVAL ms, and check FLUSH_POLL_COUNT times 53 - */ 54 - #define EFX_FLUSH_INTERVAL 10 55 - #define EFX_FLUSH_POLL_COUNT 100 56 - 57 - /* Size and alignment of special buffers (4KB) */ 58 - #define EFX_BUF_SIZE 4096 59 - 60 52 /* Depth of RX flush request fifo */ 61 53 #define EFX_RX_FLUSH_COUNT 4 62 54 63 - /* Generated event code for efx_generate_test_event() */ 64 - #define EFX_CHANNEL_MAGIC_TEST(_channel) \ 65 - (0x00010100 + (_channel)->channel) 55 + /* Driver generated events */ 56 + #define _EFX_CHANNEL_MAGIC_TEST 0x000101 57 + #define _EFX_CHANNEL_MAGIC_FILL 0x000102 58 + #define _EFX_CHANNEL_MAGIC_RX_DRAIN 0x000103 59 + #define _EFX_CHANNEL_MAGIC_TX_DRAIN 0x000104 66 60 67 - /* Generated event code for efx_generate_fill_event() */ 68 - #define EFX_CHANNEL_MAGIC_FILL(_channel) \ 69 - (0x00010200 + (_channel)->channel) 61 + #define _EFX_CHANNEL_MAGIC(_code, _data) ((_code) << 8 | (_data)) 62 + #define _EFX_CHANNEL_MAGIC_CODE(_magic) ((_magic) >> 8) 63 + 64 + #define EFX_CHANNEL_MAGIC_TEST(_channel) \ 65 + _EFX_CHANNEL_MAGIC(_EFX_CHANNEL_MAGIC_TEST, (_channel)->channel) 66 + #define EFX_CHANNEL_MAGIC_FILL(_rx_queue) \ 67 + _EFX_CHANNEL_MAGIC(_EFX_CHANNEL_MAGIC_FILL, \ 68 + efx_rx_queue_index(_rx_queue)) 69 + #define EFX_CHANNEL_MAGIC_RX_DRAIN(_rx_queue) \ 70 + _EFX_CHANNEL_MAGIC(_EFX_CHANNEL_MAGIC_RX_DRAIN, \ 71 + efx_rx_queue_index(_rx_queue)) 72 + #define EFX_CHANNEL_MAGIC_TX_DRAIN(_tx_queue) \ 73 + _EFX_CHANNEL_MAGIC(_EFX_CHANNEL_MAGIC_TX_DRAIN, \ 74 + (_tx_queue)->queue) 70 75 71 76 /************************************************************************** 72 77 * ··· 192 187 efx_init_special_buffer(struct efx_nic *efx, struct efx_special_buffer *buffer) 193 188 { 194 189 efx_qword_t buf_desc; 195 - int index; 190 + unsigned int index; 196 191 dma_addr_t dma_addr; 197 192 int i; 198 193 ··· 201 196 /* Write buffer descriptors to NIC */ 202 197 for (i = 0; i < buffer->entries; i++) { 203 198 index = buffer->index + i; 204 - dma_addr = buffer->dma_addr + (i * 4096); 199 + dma_addr = buffer->dma_addr + (i * EFX_BUF_SIZE); 205 200 netif_dbg(efx, probe, efx->net_dev, 206 201 "mapping special buffer %d at %llx\n", 207 202 index, (unsigned long long)dma_addr); ··· 264 259 /* Select new buffer ID */ 265 260 buffer->index = efx->next_buffer_table; 266 261 efx->next_buffer_table += buffer->entries; 262 + #ifdef CONFIG_SFC_SRIOV 263 + BUG_ON(efx_sriov_enabled(efx) && 264 + efx->vf_buftbl_base < efx->next_buffer_table); 265 + #endif 267 266 268 267 netif_dbg(efx, probe, efx->net_dev, 269 268 "allocating special buffers %d-%d at %llx+%x " ··· 439 430 struct efx_nic *efx = tx_queue->efx; 440 431 efx_oword_t reg; 441 432 442 - tx_queue->flushed = FLUSH_NONE; 443 - 444 433 /* Pin TX descriptor ring */ 445 434 efx_init_special_buffer(efx, &tx_queue->txd); 446 435 ··· 495 488 struct efx_nic *efx = tx_queue->efx; 496 489 efx_oword_t tx_flush_descq; 497 490 498 - tx_queue->flushed = FLUSH_PENDING; 499 - 500 - /* Post a flush command */ 501 491 EFX_POPULATE_OWORD_2(tx_flush_descq, 502 492 FRF_AZ_TX_FLUSH_DESCQ_CMD, 1, 503 493 FRF_AZ_TX_FLUSH_DESCQ, tx_queue->queue); ··· 505 501 { 506 502 struct efx_nic *efx = tx_queue->efx; 507 503 efx_oword_t tx_desc_ptr; 508 - 509 - /* The queue should have been flushed */ 510 - WARN_ON(tx_queue->flushed != FLUSH_DONE); 511 504 512 505 /* Remove TX descriptor ring from card */ 513 506 EFX_ZERO_OWORD(tx_desc_ptr); ··· 596 595 efx_rx_queue_index(rx_queue), rx_queue->rxd.index, 597 596 rx_queue->rxd.index + rx_queue->rxd.entries - 1); 598 597 599 - rx_queue->flushed = FLUSH_NONE; 600 - 601 598 /* Pin RX descriptor ring */ 602 599 efx_init_special_buffer(efx, &rx_queue->rxd); 603 600 ··· 624 625 struct efx_nic *efx = rx_queue->efx; 625 626 efx_oword_t rx_flush_descq; 626 627 627 - rx_queue->flushed = FLUSH_PENDING; 628 - 629 - /* Post a flush command */ 630 628 EFX_POPULATE_OWORD_2(rx_flush_descq, 631 629 FRF_AZ_RX_FLUSH_DESCQ_CMD, 1, 632 630 FRF_AZ_RX_FLUSH_DESCQ, ··· 635 639 { 636 640 efx_oword_t rx_desc_ptr; 637 641 struct efx_nic *efx = rx_queue->efx; 638 - 639 - /* The queue should already have been flushed */ 640 - WARN_ON(rx_queue->flushed != FLUSH_DONE); 641 642 642 643 /* Remove RX descriptor ring from card */ 643 644 EFX_ZERO_OWORD(rx_desc_ptr); ··· 649 656 void efx_nic_remove_rx(struct efx_rx_queue *rx_queue) 650 657 { 651 658 efx_free_special_buffer(rx_queue->efx, &rx_queue->rxd); 659 + } 660 + 661 + /************************************************************************** 662 + * 663 + * Flush handling 664 + * 665 + **************************************************************************/ 666 + 667 + /* efx_nic_flush_queues() must be woken up when all flushes are completed, 668 + * or more RX flushes can be kicked off. 669 + */ 670 + static bool efx_flush_wake(struct efx_nic *efx) 671 + { 672 + /* Ensure that all updates are visible to efx_nic_flush_queues() */ 673 + smp_mb(); 674 + 675 + return (atomic_read(&efx->drain_pending) == 0 || 676 + (atomic_read(&efx->rxq_flush_outstanding) < EFX_RX_FLUSH_COUNT 677 + && atomic_read(&efx->rxq_flush_pending) > 0)); 678 + } 679 + 680 + /* Flush all the transmit queues, and continue flushing receive queues until 681 + * they're all flushed. Wait for the DRAIN events to be recieved so that there 682 + * are no more RX and TX events left on any channel. */ 683 + int efx_nic_flush_queues(struct efx_nic *efx) 684 + { 685 + unsigned timeout = msecs_to_jiffies(5000); /* 5s for all flushes and drains */ 686 + struct efx_channel *channel; 687 + struct efx_rx_queue *rx_queue; 688 + struct efx_tx_queue *tx_queue; 689 + int rc = 0; 690 + 691 + efx->fc_disable++; 692 + efx->type->prepare_flush(efx); 693 + 694 + efx_for_each_channel(channel, efx) { 695 + efx_for_each_channel_tx_queue(tx_queue, channel) { 696 + atomic_inc(&efx->drain_pending); 697 + efx_flush_tx_queue(tx_queue); 698 + } 699 + efx_for_each_channel_rx_queue(rx_queue, channel) { 700 + atomic_inc(&efx->drain_pending); 701 + rx_queue->flush_pending = true; 702 + atomic_inc(&efx->rxq_flush_pending); 703 + } 704 + } 705 + 706 + while (timeout && atomic_read(&efx->drain_pending) > 0) { 707 + /* If SRIOV is enabled, then offload receive queue flushing to 708 + * the firmware (though we will still have to poll for 709 + * completion). If that fails, fall back to the old scheme. 710 + */ 711 + if (efx_sriov_enabled(efx)) { 712 + rc = efx_mcdi_flush_rxqs(efx); 713 + if (!rc) 714 + goto wait; 715 + } 716 + 717 + /* The hardware supports four concurrent rx flushes, each of 718 + * which may need to be retried if there is an outstanding 719 + * descriptor fetch 720 + */ 721 + efx_for_each_channel(channel, efx) { 722 + efx_for_each_channel_rx_queue(rx_queue, channel) { 723 + if (atomic_read(&efx->rxq_flush_outstanding) >= 724 + EFX_RX_FLUSH_COUNT) 725 + break; 726 + 727 + if (rx_queue->flush_pending) { 728 + rx_queue->flush_pending = false; 729 + atomic_dec(&efx->rxq_flush_pending); 730 + atomic_inc(&efx->rxq_flush_outstanding); 731 + efx_flush_rx_queue(rx_queue); 732 + } 733 + } 734 + } 735 + 736 + wait: 737 + timeout = wait_event_timeout(efx->flush_wq, efx_flush_wake(efx), 738 + timeout); 739 + } 740 + 741 + if (atomic_read(&efx->drain_pending)) { 742 + netif_err(efx, hw, efx->net_dev, "failed to flush %d queues " 743 + "(rx %d+%d)\n", atomic_read(&efx->drain_pending), 744 + atomic_read(&efx->rxq_flush_outstanding), 745 + atomic_read(&efx->rxq_flush_pending)); 746 + rc = -ETIMEDOUT; 747 + 748 + atomic_set(&efx->drain_pending, 0); 749 + atomic_set(&efx->rxq_flush_pending, 0); 750 + atomic_set(&efx->rxq_flush_outstanding, 0); 751 + } 752 + 753 + efx->fc_disable--; 754 + 755 + return rc; 652 756 } 653 757 654 758 /************************************************************************** ··· 772 682 } 773 683 774 684 /* Use HW to insert a SW defined event */ 775 - static void efx_generate_event(struct efx_channel *channel, efx_qword_t *event) 685 + void efx_generate_event(struct efx_nic *efx, unsigned int evq, 686 + efx_qword_t *event) 776 687 { 777 688 efx_oword_t drv_ev_reg; 778 689 ··· 783 692 drv_ev_reg.u32[1] = event->u32[1]; 784 693 drv_ev_reg.u32[2] = 0; 785 694 drv_ev_reg.u32[3] = 0; 786 - EFX_SET_OWORD_FIELD(drv_ev_reg, FRF_AZ_DRV_EV_QID, channel->channel); 787 - efx_writeo(channel->efx, &drv_ev_reg, FR_AZ_DRV_EV); 695 + EFX_SET_OWORD_FIELD(drv_ev_reg, FRF_AZ_DRV_EV_QID, evq); 696 + efx_writeo(efx, &drv_ev_reg, FR_AZ_DRV_EV); 697 + } 698 + 699 + static void efx_magic_event(struct efx_channel *channel, u32 magic) 700 + { 701 + efx_qword_t event; 702 + 703 + EFX_POPULATE_QWORD_2(event, FSF_AZ_EV_CODE, 704 + FSE_AZ_EV_CODE_DRV_GEN_EV, 705 + FSF_AZ_DRV_GEN_EV_MAGIC, magic); 706 + efx_generate_event(channel->efx, channel->channel, &event); 788 707 } 789 708 790 709 /* Handle a transmit completion event ··· 810 709 struct efx_tx_queue *tx_queue; 811 710 struct efx_nic *efx = channel->efx; 812 711 int tx_packets = 0; 712 + 713 + if (unlikely(ACCESS_ONCE(efx->reset_pending))) 714 + return 0; 813 715 814 716 if (likely(EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_COMP))) { 815 717 /* Transmit completion */ ··· 955 851 bool rx_ev_pkt_ok; 956 852 u16 flags; 957 853 struct efx_rx_queue *rx_queue; 854 + struct efx_nic *efx = channel->efx; 855 + 856 + if (unlikely(ACCESS_ONCE(efx->reset_pending))) 857 + return; 958 858 959 859 /* Basic packet information */ 960 860 rx_ev_byte_cnt = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_BYTE_CNT); ··· 1005 897 efx_rx_packet(rx_queue, rx_ev_desc_ptr, rx_ev_byte_cnt, flags); 1006 898 } 1007 899 900 + /* If this flush done event corresponds to a &struct efx_tx_queue, then 901 + * send an %EFX_CHANNEL_MAGIC_TX_DRAIN event to drain the event queue 902 + * of all transmit completions. 903 + */ 904 + static void 905 + efx_handle_tx_flush_done(struct efx_nic *efx, efx_qword_t *event) 906 + { 907 + struct efx_tx_queue *tx_queue; 908 + int qid; 909 + 910 + qid = EFX_QWORD_FIELD(*event, FSF_AZ_DRIVER_EV_SUBDATA); 911 + if (qid < EFX_TXQ_TYPES * efx->n_tx_channels) { 912 + tx_queue = efx_get_tx_queue(efx, qid / EFX_TXQ_TYPES, 913 + qid % EFX_TXQ_TYPES); 914 + 915 + efx_magic_event(tx_queue->channel, 916 + EFX_CHANNEL_MAGIC_TX_DRAIN(tx_queue)); 917 + } 918 + } 919 + 920 + /* If this flush done event corresponds to a &struct efx_rx_queue: If the flush 921 + * was succesful then send an %EFX_CHANNEL_MAGIC_RX_DRAIN, otherwise add 922 + * the RX queue back to the mask of RX queues in need of flushing. 923 + */ 924 + static void 925 + efx_handle_rx_flush_done(struct efx_nic *efx, efx_qword_t *event) 926 + { 927 + struct efx_channel *channel; 928 + struct efx_rx_queue *rx_queue; 929 + int qid; 930 + bool failed; 931 + 932 + qid = EFX_QWORD_FIELD(*event, FSF_AZ_DRIVER_EV_RX_DESCQ_ID); 933 + failed = EFX_QWORD_FIELD(*event, FSF_AZ_DRIVER_EV_RX_FLUSH_FAIL); 934 + if (qid >= efx->n_channels) 935 + return; 936 + channel = efx_get_channel(efx, qid); 937 + if (!efx_channel_has_rx_queue(channel)) 938 + return; 939 + rx_queue = efx_channel_get_rx_queue(channel); 940 + 941 + if (failed) { 942 + netif_info(efx, hw, efx->net_dev, 943 + "RXQ %d flush retry\n", qid); 944 + rx_queue->flush_pending = true; 945 + atomic_inc(&efx->rxq_flush_pending); 946 + } else { 947 + efx_magic_event(efx_rx_queue_channel(rx_queue), 948 + EFX_CHANNEL_MAGIC_RX_DRAIN(rx_queue)); 949 + } 950 + atomic_dec(&efx->rxq_flush_outstanding); 951 + if (efx_flush_wake(efx)) 952 + wake_up(&efx->flush_wq); 953 + } 954 + 955 + static void 956 + efx_handle_drain_event(struct efx_channel *channel) 957 + { 958 + struct efx_nic *efx = channel->efx; 959 + 960 + WARN_ON(atomic_read(&efx->drain_pending) == 0); 961 + atomic_dec(&efx->drain_pending); 962 + if (efx_flush_wake(efx)) 963 + wake_up(&efx->flush_wq); 964 + } 965 + 1008 966 static void 1009 967 efx_handle_generated_event(struct efx_channel *channel, efx_qword_t *event) 1010 968 { 1011 969 struct efx_nic *efx = channel->efx; 1012 - unsigned code; 970 + struct efx_rx_queue *rx_queue = 971 + efx_channel_has_rx_queue(channel) ? 972 + efx_channel_get_rx_queue(channel) : NULL; 973 + unsigned magic, code; 1013 974 1014 - code = EFX_QWORD_FIELD(*event, FSF_AZ_DRV_GEN_EV_MAGIC); 1015 - if (code == EFX_CHANNEL_MAGIC_TEST(channel)) 1016 - ; /* ignore */ 1017 - else if (code == EFX_CHANNEL_MAGIC_FILL(channel)) 975 + magic = EFX_QWORD_FIELD(*event, FSF_AZ_DRV_GEN_EV_MAGIC); 976 + code = _EFX_CHANNEL_MAGIC_CODE(magic); 977 + 978 + if (magic == EFX_CHANNEL_MAGIC_TEST(channel)) { 979 + /* ignore */ 980 + } else if (rx_queue && magic == EFX_CHANNEL_MAGIC_FILL(rx_queue)) { 1018 981 /* The queue must be empty, so we won't receive any rx 1019 982 * events, so efx_process_channel() won't refill the 1020 983 * queue. Refill it here */ 1021 - efx_fast_push_rx_descriptors(efx_channel_get_rx_queue(channel)); 1022 - else 984 + efx_fast_push_rx_descriptors(rx_queue); 985 + } else if (rx_queue && magic == EFX_CHANNEL_MAGIC_RX_DRAIN(rx_queue)) { 986 + rx_queue->enabled = false; 987 + efx_handle_drain_event(channel); 988 + } else if (code == _EFX_CHANNEL_MAGIC_TX_DRAIN) { 989 + efx_handle_drain_event(channel); 990 + } else { 1023 991 netif_dbg(efx, hw, efx->net_dev, "channel %d received " 1024 992 "generated event "EFX_QWORD_FMT"\n", 1025 993 channel->channel, EFX_QWORD_VAL(*event)); 994 + } 1026 995 } 1027 996 1028 997 static void ··· 1116 931 case FSE_AZ_TX_DESCQ_FLS_DONE_EV: 1117 932 netif_vdbg(efx, hw, efx->net_dev, "channel %d TXQ %d flushed\n", 1118 933 channel->channel, ev_sub_data); 934 + efx_handle_tx_flush_done(efx, event); 935 + efx_sriov_tx_flush_done(efx, event); 1119 936 break; 1120 937 case FSE_AZ_RX_DESCQ_FLS_DONE_EV: 1121 938 netif_vdbg(efx, hw, efx->net_dev, "channel %d RXQ %d flushed\n", 1122 939 channel->channel, ev_sub_data); 940 + efx_handle_rx_flush_done(efx, event); 941 + efx_sriov_rx_flush_done(efx, event); 1123 942 break; 1124 943 case FSE_AZ_EVQ_INIT_DONE_EV: 1125 944 netif_dbg(efx, hw, efx->net_dev, ··· 1155 966 RESET_TYPE_DISABLE); 1156 967 break; 1157 968 case FSE_BZ_RX_DSC_ERROR_EV: 1158 - netif_err(efx, rx_err, efx->net_dev, 1159 - "RX DMA Q %d reports descriptor fetch error." 1160 - " RX Q %d is disabled.\n", ev_sub_data, ev_sub_data); 1161 - efx_schedule_reset(efx, RESET_TYPE_RX_DESC_FETCH); 969 + if (ev_sub_data < EFX_VI_BASE) { 970 + netif_err(efx, rx_err, efx->net_dev, 971 + "RX DMA Q %d reports descriptor fetch error." 972 + " RX Q %d is disabled.\n", ev_sub_data, 973 + ev_sub_data); 974 + efx_schedule_reset(efx, RESET_TYPE_RX_DESC_FETCH); 975 + } else 976 + efx_sriov_desc_fetch_err(efx, ev_sub_data); 1162 977 break; 1163 978 case FSE_BZ_TX_DSC_ERROR_EV: 1164 - netif_err(efx, tx_err, efx->net_dev, 1165 - "TX DMA Q %d reports descriptor fetch error." 1166 - " TX Q %d is disabled.\n", ev_sub_data, ev_sub_data); 1167 - efx_schedule_reset(efx, RESET_TYPE_TX_DESC_FETCH); 979 + if (ev_sub_data < EFX_VI_BASE) { 980 + netif_err(efx, tx_err, efx->net_dev, 981 + "TX DMA Q %d reports descriptor fetch error." 982 + " TX Q %d is disabled.\n", ev_sub_data, 983 + ev_sub_data); 984 + efx_schedule_reset(efx, RESET_TYPE_TX_DESC_FETCH); 985 + } else 986 + efx_sriov_desc_fetch_err(efx, ev_sub_data); 1168 987 break; 1169 988 default: 1170 989 netif_vdbg(efx, hw, efx->net_dev, ··· 1231 1034 break; 1232 1035 case FSE_AZ_EV_CODE_DRIVER_EV: 1233 1036 efx_handle_driver_event(channel, &event); 1037 + break; 1038 + case FSE_CZ_EV_CODE_USER_EV: 1039 + efx_sriov_event(channel, &event); 1234 1040 break; 1235 1041 case FSE_CZ_EV_CODE_MCDI_EV: 1236 1042 efx_mcdi_process_event(channel, &event); ··· 1335 1135 1336 1136 void efx_nic_generate_test_event(struct efx_channel *channel) 1337 1137 { 1338 - unsigned int magic = EFX_CHANNEL_MAGIC_TEST(channel); 1339 - efx_qword_t test_event; 1340 - 1341 - EFX_POPULATE_QWORD_2(test_event, FSF_AZ_EV_CODE, 1342 - FSE_AZ_EV_CODE_DRV_GEN_EV, 1343 - FSF_AZ_DRV_GEN_EV_MAGIC, magic); 1344 - efx_generate_event(channel, &test_event); 1138 + efx_magic_event(channel, EFX_CHANNEL_MAGIC_TEST(channel)); 1345 1139 } 1346 1140 1347 - void efx_nic_generate_fill_event(struct efx_channel *channel) 1141 + void efx_nic_generate_fill_event(struct efx_rx_queue *rx_queue) 1348 1142 { 1349 - unsigned int magic = EFX_CHANNEL_MAGIC_FILL(channel); 1350 - efx_qword_t test_event; 1351 - 1352 - EFX_POPULATE_QWORD_2(test_event, FSF_AZ_EV_CODE, 1353 - FSE_AZ_EV_CODE_DRV_GEN_EV, 1354 - FSF_AZ_DRV_GEN_EV_MAGIC, magic); 1355 - efx_generate_event(channel, &test_event); 1356 - } 1357 - 1358 - /************************************************************************** 1359 - * 1360 - * Flush handling 1361 - * 1362 - **************************************************************************/ 1363 - 1364 - 1365 - static void efx_poll_flush_events(struct efx_nic *efx) 1366 - { 1367 - struct efx_channel *channel = efx_get_channel(efx, 0); 1368 - struct efx_tx_queue *tx_queue; 1369 - struct efx_rx_queue *rx_queue; 1370 - unsigned int read_ptr = channel->eventq_read_ptr; 1371 - unsigned int end_ptr = read_ptr + channel->eventq_mask - 1; 1372 - 1373 - do { 1374 - efx_qword_t *event = efx_event(channel, read_ptr); 1375 - int ev_code, ev_sub_code, ev_queue; 1376 - bool ev_failed; 1377 - 1378 - if (!efx_event_present(event)) 1379 - break; 1380 - 1381 - ev_code = EFX_QWORD_FIELD(*event, FSF_AZ_EV_CODE); 1382 - ev_sub_code = EFX_QWORD_FIELD(*event, 1383 - FSF_AZ_DRIVER_EV_SUBCODE); 1384 - if (ev_code == FSE_AZ_EV_CODE_DRIVER_EV && 1385 - ev_sub_code == FSE_AZ_TX_DESCQ_FLS_DONE_EV) { 1386 - ev_queue = EFX_QWORD_FIELD(*event, 1387 - FSF_AZ_DRIVER_EV_SUBDATA); 1388 - if (ev_queue < EFX_TXQ_TYPES * efx->n_tx_channels) { 1389 - tx_queue = efx_get_tx_queue( 1390 - efx, ev_queue / EFX_TXQ_TYPES, 1391 - ev_queue % EFX_TXQ_TYPES); 1392 - tx_queue->flushed = FLUSH_DONE; 1393 - } 1394 - } else if (ev_code == FSE_AZ_EV_CODE_DRIVER_EV && 1395 - ev_sub_code == FSE_AZ_RX_DESCQ_FLS_DONE_EV) { 1396 - ev_queue = EFX_QWORD_FIELD( 1397 - *event, FSF_AZ_DRIVER_EV_RX_DESCQ_ID); 1398 - ev_failed = EFX_QWORD_FIELD( 1399 - *event, FSF_AZ_DRIVER_EV_RX_FLUSH_FAIL); 1400 - if (ev_queue < efx->n_rx_channels) { 1401 - rx_queue = efx_get_rx_queue(efx, ev_queue); 1402 - rx_queue->flushed = 1403 - ev_failed ? FLUSH_FAILED : FLUSH_DONE; 1404 - } 1405 - } 1406 - 1407 - /* We're about to destroy the queue anyway, so 1408 - * it's ok to throw away every non-flush event */ 1409 - EFX_SET_QWORD(*event); 1410 - 1411 - ++read_ptr; 1412 - } while (read_ptr != end_ptr); 1413 - 1414 - channel->eventq_read_ptr = read_ptr; 1415 - } 1416 - 1417 - /* Handle tx and rx flushes at the same time, since they run in 1418 - * parallel in the hardware and there's no reason for us to 1419 - * serialise them */ 1420 - int efx_nic_flush_queues(struct efx_nic *efx) 1421 - { 1422 - struct efx_channel *channel; 1423 - struct efx_rx_queue *rx_queue; 1424 - struct efx_tx_queue *tx_queue; 1425 - int i, tx_pending, rx_pending; 1426 - 1427 - /* If necessary prepare the hardware for flushing */ 1428 - efx->type->prepare_flush(efx); 1429 - 1430 - /* Flush all tx queues in parallel */ 1431 - efx_for_each_channel(channel, efx) { 1432 - efx_for_each_possible_channel_tx_queue(tx_queue, channel) { 1433 - if (tx_queue->initialised) 1434 - efx_flush_tx_queue(tx_queue); 1435 - } 1436 - } 1437 - 1438 - /* The hardware supports four concurrent rx flushes, each of which may 1439 - * need to be retried if there is an outstanding descriptor fetch */ 1440 - for (i = 0; i < EFX_FLUSH_POLL_COUNT; ++i) { 1441 - rx_pending = tx_pending = 0; 1442 - efx_for_each_channel(channel, efx) { 1443 - efx_for_each_channel_rx_queue(rx_queue, channel) { 1444 - if (rx_queue->flushed == FLUSH_PENDING) 1445 - ++rx_pending; 1446 - } 1447 - } 1448 - efx_for_each_channel(channel, efx) { 1449 - efx_for_each_channel_rx_queue(rx_queue, channel) { 1450 - if (rx_pending == EFX_RX_FLUSH_COUNT) 1451 - break; 1452 - if (rx_queue->flushed == FLUSH_FAILED || 1453 - rx_queue->flushed == FLUSH_NONE) { 1454 - efx_flush_rx_queue(rx_queue); 1455 - ++rx_pending; 1456 - } 1457 - } 1458 - efx_for_each_possible_channel_tx_queue(tx_queue, channel) { 1459 - if (tx_queue->initialised && 1460 - tx_queue->flushed != FLUSH_DONE) 1461 - ++tx_pending; 1462 - } 1463 - } 1464 - 1465 - if (rx_pending == 0 && tx_pending == 0) 1466 - return 0; 1467 - 1468 - msleep(EFX_FLUSH_INTERVAL); 1469 - efx_poll_flush_events(efx); 1470 - } 1471 - 1472 - /* Mark the queues as all flushed. We're going to return failure 1473 - * leading to a reset, or fake up success anyway */ 1474 - efx_for_each_channel(channel, efx) { 1475 - efx_for_each_possible_channel_tx_queue(tx_queue, channel) { 1476 - if (tx_queue->initialised && 1477 - tx_queue->flushed != FLUSH_DONE) 1478 - netif_err(efx, hw, efx->net_dev, 1479 - "tx queue %d flush command timed out\n", 1480 - tx_queue->queue); 1481 - tx_queue->flushed = FLUSH_DONE; 1482 - } 1483 - efx_for_each_channel_rx_queue(rx_queue, channel) { 1484 - if (rx_queue->flushed != FLUSH_DONE) 1485 - netif_err(efx, hw, efx->net_dev, 1486 - "rx queue %d flush command timed out\n", 1487 - efx_rx_queue_index(rx_queue)); 1488 - rx_queue->flushed = FLUSH_DONE; 1489 - } 1490 - } 1491 - 1492 - return -ETIMEDOUT; 1143 + efx_magic_event(efx_rx_queue_channel(rx_queue), 1144 + EFX_CHANNEL_MAGIC_FILL(rx_queue)); 1493 1145 } 1494 1146 1495 1147 /************************************************************************** ··· 1367 1315 1368 1316 void efx_nic_enable_interrupts(struct efx_nic *efx) 1369 1317 { 1370 - struct efx_channel *channel; 1371 - 1372 1318 EFX_ZERO_OWORD(*((efx_oword_t *) efx->irq_status.addr)); 1373 1319 wmb(); /* Ensure interrupt vector is clear before interrupts enabled */ 1374 1320 1375 - /* Enable interrupts */ 1376 1321 efx_nic_interrupts(efx, true, false); 1377 - 1378 - /* Force processing of all the channels to get the EVQ RPTRs up to 1379 - date */ 1380 - efx_for_each_channel(channel, efx) 1381 - efx_schedule_channel(channel); 1382 1322 } 1383 1323 1384 1324 void efx_nic_disable_interrupts(struct efx_nic *efx) ··· 1637 1593 free_irq(efx->legacy_irq, efx); 1638 1594 } 1639 1595 1596 + /* Looks at available SRAM resources and works out how many queues we 1597 + * can support, and where things like descriptor caches should live. 1598 + * 1599 + * SRAM is split up as follows: 1600 + * 0 buftbl entries for channels 1601 + * efx->vf_buftbl_base buftbl entries for SR-IOV 1602 + * efx->rx_dc_base RX descriptor caches 1603 + * efx->tx_dc_base TX descriptor caches 1604 + */ 1605 + void efx_nic_dimension_resources(struct efx_nic *efx, unsigned sram_lim_qw) 1606 + { 1607 + unsigned vi_count, buftbl_min; 1608 + 1609 + /* Account for the buffer table entries backing the datapath channels 1610 + * and the descriptor caches for those channels. 1611 + */ 1612 + buftbl_min = ((efx->n_rx_channels * EFX_MAX_DMAQ_SIZE + 1613 + efx->n_tx_channels * EFX_TXQ_TYPES * EFX_MAX_DMAQ_SIZE + 1614 + efx->n_channels * EFX_MAX_EVQ_SIZE) 1615 + * sizeof(efx_qword_t) / EFX_BUF_SIZE); 1616 + vi_count = max(efx->n_channels, efx->n_tx_channels * EFX_TXQ_TYPES); 1617 + 1618 + #ifdef CONFIG_SFC_SRIOV 1619 + if (efx_sriov_wanted(efx)) { 1620 + unsigned vi_dc_entries, buftbl_free, entries_per_vf, vf_limit; 1621 + 1622 + efx->vf_buftbl_base = buftbl_min; 1623 + 1624 + vi_dc_entries = RX_DC_ENTRIES + TX_DC_ENTRIES; 1625 + vi_count = max(vi_count, EFX_VI_BASE); 1626 + buftbl_free = (sram_lim_qw - buftbl_min - 1627 + vi_count * vi_dc_entries); 1628 + 1629 + entries_per_vf = ((vi_dc_entries + EFX_VF_BUFTBL_PER_VI) * 1630 + efx_vf_size(efx)); 1631 + vf_limit = min(buftbl_free / entries_per_vf, 1632 + (1024U - EFX_VI_BASE) >> efx->vi_scale); 1633 + 1634 + if (efx->vf_count > vf_limit) { 1635 + netif_err(efx, probe, efx->net_dev, 1636 + "Reducing VF count from from %d to %d\n", 1637 + efx->vf_count, vf_limit); 1638 + efx->vf_count = vf_limit; 1639 + } 1640 + vi_count += efx->vf_count * efx_vf_size(efx); 1641 + } 1642 + #endif 1643 + 1644 + efx->tx_dc_base = sram_lim_qw - vi_count * TX_DC_ENTRIES; 1645 + efx->rx_dc_base = efx->tx_dc_base - vi_count * RX_DC_ENTRIES; 1646 + } 1647 + 1640 1648 u32 efx_nic_fpga_ver(struct efx_nic *efx) 1641 1649 { 1642 1650 efx_oword_t altera_build; ··· 1701 1605 efx_oword_t temp; 1702 1606 1703 1607 /* Set positions of descriptor caches in SRAM. */ 1704 - EFX_POPULATE_OWORD_1(temp, FRF_AZ_SRM_TX_DC_BASE_ADR, 1705 - efx->type->tx_dc_base / 8); 1608 + EFX_POPULATE_OWORD_1(temp, FRF_AZ_SRM_TX_DC_BASE_ADR, efx->tx_dc_base); 1706 1609 efx_writeo(efx, &temp, FR_AZ_SRM_TX_DC_CFG); 1707 - EFX_POPULATE_OWORD_1(temp, FRF_AZ_SRM_RX_DC_BASE_ADR, 1708 - efx->type->rx_dc_base / 8); 1610 + EFX_POPULATE_OWORD_1(temp, FRF_AZ_SRM_RX_DC_BASE_ADR, efx->rx_dc_base); 1709 1611 efx_writeo(efx, &temp, FR_AZ_SRM_RX_DC_CFG); 1710 1612 1711 1613 /* Set TX descriptor cache size. */
+99 -3
drivers/net/ethernet/sfc/nic.h
··· 65 65 #define FALCON_GMAC_LOOPBACKS \ 66 66 (1 << LOOPBACK_GMAC) 67 67 68 + /* Alignment of PCIe DMA boundaries (4KB) */ 69 + #define EFX_PAGE_SIZE 4096 70 + /* Size and alignment of buffer table entries (same) */ 71 + #define EFX_BUF_SIZE EFX_PAGE_SIZE 72 + 68 73 /** 69 74 * struct falcon_board_type - board operations and type information 70 75 * @id: Board type id, as found in NVRAM ··· 169 164 } 170 165 #endif 171 166 167 + /* 168 + * On the SFC9000 family each port is associated with 1 PCI physical 169 + * function (PF) handled by sfc and a configurable number of virtual 170 + * functions (VFs) that may be handled by some other driver, often in 171 + * a VM guest. The queue pointer registers are mapped in both PF and 172 + * VF BARs such that an 8K region provides access to a single RX, TX 173 + * and event queue (collectively a Virtual Interface, VI or VNIC). 174 + * 175 + * The PF has access to all 1024 VIs while VFs are mapped to VIs 176 + * according to VI_BASE and VI_SCALE: VF i has access to VIs numbered 177 + * in range [VI_BASE + i << VI_SCALE, VI_BASE + i + 1 << VI_SCALE). 178 + * The number of VIs and the VI_SCALE value are configurable but must 179 + * be established at boot time by firmware. 180 + */ 181 + 182 + /* Maximum VI_SCALE parameter supported by Siena */ 183 + #define EFX_VI_SCALE_MAX 6 184 + /* Base VI to use for SR-IOV. Must be aligned to (1 << EFX_VI_SCALE_MAX), 185 + * so this is the smallest allowed value. */ 186 + #define EFX_VI_BASE 128U 187 + /* Maximum number of VFs allowed */ 188 + #define EFX_VF_COUNT_MAX 127 189 + /* Limit EVQs on VFs to be only 8k to reduce buffer table reservation */ 190 + #define EFX_MAX_VF_EVQ_SIZE 8192UL 191 + /* The number of buffer table entries reserved for each VI on a VF */ 192 + #define EFX_VF_BUFTBL_PER_VI \ 193 + ((EFX_MAX_VF_EVQ_SIZE + 2 * EFX_MAX_DMAQ_SIZE) * \ 194 + sizeof(efx_qword_t) / EFX_BUF_SIZE) 195 + 196 + #ifdef CONFIG_SFC_SRIOV 197 + 198 + static inline bool efx_sriov_wanted(struct efx_nic *efx) 199 + { 200 + return efx->vf_count != 0; 201 + } 202 + static inline bool efx_sriov_enabled(struct efx_nic *efx) 203 + { 204 + return efx->vf_init_count != 0; 205 + } 206 + static inline unsigned int efx_vf_size(struct efx_nic *efx) 207 + { 208 + return 1 << efx->vi_scale; 209 + } 210 + 211 + extern int efx_init_sriov(void); 212 + extern void efx_sriov_probe(struct efx_nic *efx); 213 + extern int efx_sriov_init(struct efx_nic *efx); 214 + extern void efx_sriov_mac_address_changed(struct efx_nic *efx); 215 + extern void efx_sriov_tx_flush_done(struct efx_nic *efx, efx_qword_t *event); 216 + extern void efx_sriov_rx_flush_done(struct efx_nic *efx, efx_qword_t *event); 217 + extern void efx_sriov_event(struct efx_channel *channel, efx_qword_t *event); 218 + extern void efx_sriov_desc_fetch_err(struct efx_nic *efx, unsigned dmaq); 219 + extern void efx_sriov_flr(struct efx_nic *efx, unsigned flr); 220 + extern void efx_sriov_reset(struct efx_nic *efx); 221 + extern void efx_sriov_fini(struct efx_nic *efx); 222 + extern void efx_fini_sriov(void); 223 + 224 + #else 225 + 226 + static inline bool efx_sriov_wanted(struct efx_nic *efx) { return false; } 227 + static inline bool efx_sriov_enabled(struct efx_nic *efx) { return false; } 228 + static inline unsigned int efx_vf_size(struct efx_nic *efx) { return 0; } 229 + 230 + static inline int efx_init_sriov(void) { return 0; } 231 + static inline void efx_sriov_probe(struct efx_nic *efx) {} 232 + static inline int efx_sriov_init(struct efx_nic *efx) { return -EOPNOTSUPP; } 233 + static inline void efx_sriov_mac_address_changed(struct efx_nic *efx) {} 234 + static inline void efx_sriov_tx_flush_done(struct efx_nic *efx, 235 + efx_qword_t *event) {} 236 + static inline void efx_sriov_rx_flush_done(struct efx_nic *efx, 237 + efx_qword_t *event) {} 238 + static inline void efx_sriov_event(struct efx_channel *channel, 239 + efx_qword_t *event) {} 240 + static inline void efx_sriov_desc_fetch_err(struct efx_nic *efx, unsigned dmaq) {} 241 + static inline void efx_sriov_flr(struct efx_nic *efx, unsigned flr) {} 242 + static inline void efx_sriov_reset(struct efx_nic *efx) {} 243 + static inline void efx_sriov_fini(struct efx_nic *efx) {} 244 + static inline void efx_fini_sriov(void) {} 245 + 246 + #endif 247 + 248 + extern int efx_sriov_set_vf_mac(struct net_device *dev, int vf, u8 *mac); 249 + extern int efx_sriov_set_vf_vlan(struct net_device *dev, int vf, 250 + u16 vlan, u8 qos); 251 + extern int efx_sriov_get_vf_config(struct net_device *dev, int vf, 252 + struct ifla_vf_info *ivf); 253 + extern int efx_sriov_set_vf_spoofchk(struct net_device *net_dev, int vf, 254 + bool spoofchk); 255 + 172 256 extern const struct efx_nic_type falcon_a1_nic_type; 173 257 extern const struct efx_nic_type falcon_b0_nic_type; 174 258 extern const struct efx_nic_type siena_a0_nic_type; ··· 284 190 extern void efx_nic_fini_rx(struct efx_rx_queue *rx_queue); 285 191 extern void efx_nic_remove_rx(struct efx_rx_queue *rx_queue); 286 192 extern void efx_nic_notify_rx_desc(struct efx_rx_queue *rx_queue); 193 + extern void efx_nic_generate_fill_event(struct efx_rx_queue *rx_queue); 287 194 288 195 /* Event data path */ 289 196 extern int efx_nic_probe_eventq(struct efx_channel *channel); ··· 306 211 extern int efx_nic_init_interrupt(struct efx_nic *efx); 307 212 extern void efx_nic_enable_interrupts(struct efx_nic *efx); 308 213 extern void efx_nic_generate_test_event(struct efx_channel *channel); 309 - extern void efx_nic_generate_fill_event(struct efx_channel *channel); 310 214 extern void efx_nic_generate_interrupt(struct efx_nic *efx); 311 215 extern void efx_nic_disable_interrupts(struct efx_nic *efx); 312 216 extern void efx_nic_fini_interrupt(struct efx_nic *efx); ··· 319 225 extern void falcon_stop_nic_stats(struct efx_nic *efx); 320 226 extern void falcon_setup_xaui(struct efx_nic *efx); 321 227 extern int falcon_reset_xaui(struct efx_nic *efx); 228 + extern void 229 + efx_nic_dimension_resources(struct efx_nic *efx, unsigned sram_lim_qw); 322 230 extern void efx_nic_init_common(struct efx_nic *efx); 323 231 extern void efx_nic_push_rx_indir_table(struct efx_nic *efx); 324 232 ··· 374 278 #define MAC_DATA_LBN 0 375 279 #define MAC_DATA_WIDTH 32 376 280 377 - extern void efx_nic_generate_event(struct efx_channel *channel, 378 - efx_qword_t *event); 281 + extern void efx_generate_event(struct efx_nic *efx, unsigned int evq, 282 + efx_qword_t *event); 379 283 380 284 extern void falcon_poll_xmac(struct efx_nic *efx); 381 285
+10 -10
drivers/net/ethernet/sfc/regs.h
··· 2446 2446 #define FRF_CZ_RMFT_RXQ_ID_WIDTH 12 2447 2447 #define FRF_CZ_RMFT_WILDCARD_MATCH_LBN 60 2448 2448 #define FRF_CZ_RMFT_WILDCARD_MATCH_WIDTH 1 2449 - #define FRF_CZ_RMFT_DEST_MAC_LBN 16 2450 - #define FRF_CZ_RMFT_DEST_MAC_WIDTH 44 2449 + #define FRF_CZ_RMFT_DEST_MAC_LBN 12 2450 + #define FRF_CZ_RMFT_DEST_MAC_WIDTH 48 2451 2451 #define FRF_CZ_RMFT_VLAN_ID_LBN 0 2452 2452 #define FRF_CZ_RMFT_VLAN_ID_WIDTH 12 2453 2453 ··· 2523 2523 #define FRF_CZ_TMFT_TXQ_ID_WIDTH 12 2524 2524 #define FRF_CZ_TMFT_WILDCARD_MATCH_LBN 60 2525 2525 #define FRF_CZ_TMFT_WILDCARD_MATCH_WIDTH 1 2526 - #define FRF_CZ_TMFT_SRC_MAC_LBN 16 2527 - #define FRF_CZ_TMFT_SRC_MAC_WIDTH 44 2526 + #define FRF_CZ_TMFT_SRC_MAC_LBN 12 2527 + #define FRF_CZ_TMFT_SRC_MAC_WIDTH 48 2528 2528 #define FRF_CZ_TMFT_VLAN_ID_LBN 0 2529 2529 #define FRF_CZ_TMFT_VLAN_ID_WIDTH 12 2530 2530 ··· 2895 2895 2896 2896 /* RX_MAC_FILTER_TBL0 */ 2897 2897 /* RMFT_DEST_MAC is wider than 32 bits */ 2898 - #define FRF_CZ_RMFT_DEST_MAC_LO_LBN 12 2898 + #define FRF_CZ_RMFT_DEST_MAC_LO_LBN FRF_CZ_RMFT_DEST_MAC_LBN 2899 2899 #define FRF_CZ_RMFT_DEST_MAC_LO_WIDTH 32 2900 - #define FRF_CZ_RMFT_DEST_MAC_HI_LBN 44 2901 - #define FRF_CZ_RMFT_DEST_MAC_HI_WIDTH 16 2900 + #define FRF_CZ_RMFT_DEST_MAC_HI_LBN (FRF_CZ_RMFT_DEST_MAC_LBN + 32) 2901 + #define FRF_CZ_RMFT_DEST_MAC_HI_WIDTH (FRF_CZ_RMFT_DEST_MAC_WIDTH - 32) 2902 2902 2903 2903 /* TX_MAC_FILTER_TBL0 */ 2904 2904 /* TMFT_SRC_MAC is wider than 32 bits */ 2905 - #define FRF_CZ_TMFT_SRC_MAC_LO_LBN 12 2905 + #define FRF_CZ_TMFT_SRC_MAC_LO_LBN FRF_CZ_TMFT_SRC_MAC_LBN 2906 2906 #define FRF_CZ_TMFT_SRC_MAC_LO_WIDTH 32 2907 - #define FRF_CZ_TMFT_SRC_MAC_HI_LBN 44 2908 - #define FRF_CZ_TMFT_SRC_MAC_HI_WIDTH 16 2907 + #define FRF_CZ_TMFT_SRC_MAC_HI_LBN (FRF_CZ_TMFT_SRC_MAC_LBN + 32) 2908 + #define FRF_CZ_TMFT_SRC_MAC_HI_WIDTH (FRF_CZ_TMFT_SRC_MAC_WIDTH - 32) 2909 2909 2910 2910 /* TX_PACE_TBL */ 2911 2911 /* Values >20 are documented as reserved, but will result in a queue going
+5 -2
drivers/net/ethernet/sfc/rx.c
··· 405 405 void efx_rx_slow_fill(unsigned long context) 406 406 { 407 407 struct efx_rx_queue *rx_queue = (struct efx_rx_queue *)context; 408 - struct efx_channel *channel = efx_rx_queue_channel(rx_queue); 409 408 410 409 /* Post an event to cause NAPI to run and refill the queue */ 411 - efx_nic_generate_fill_event(channel); 410 + efx_nic_generate_fill_event(rx_queue); 412 411 ++rx_queue->slow_fill_count; 413 412 } 414 413 ··· 705 706 rx_queue->fast_fill_limit = limit; 706 707 707 708 /* Set up RX descriptor ring */ 709 + rx_queue->enabled = true; 708 710 efx_nic_init_rx(rx_queue); 709 711 } 710 712 ··· 716 716 717 717 netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev, 718 718 "shutting down RX queue %d\n", efx_rx_queue_index(rx_queue)); 719 + 720 + /* A flush failure might have left rx_queue->enabled */ 721 + rx_queue->enabled = false; 719 722 720 723 del_timer_sync(&rx_queue->slow_fill); 721 724 efx_nic_fini_rx(rx_queue);
+12 -2
drivers/net/ethernet/sfc/siena.c
··· 225 225 return rc; 226 226 } 227 227 228 + static void siena_dimension_resources(struct efx_nic *efx) 229 + { 230 + /* Each port has a small block of internal SRAM dedicated to 231 + * the buffer table and descriptor caches. In theory we can 232 + * map both blocks to one port, but we don't. 233 + */ 234 + efx_nic_dimension_resources(efx, FR_CZ_BUF_FULL_TBL_ROWS / 2); 235 + } 236 + 228 237 static int siena_probe_nic(struct efx_nic *efx) 229 238 { 230 239 struct siena_nic_data *nic_data; ··· 312 303 rc = efx_mcdi_mon_probe(efx); 313 304 if (rc) 314 305 goto fail5; 306 + 307 + efx_sriov_probe(efx); 315 308 316 309 return 0; 317 310 ··· 630 619 .probe = siena_probe_nic, 631 620 .remove = siena_remove_nic, 632 621 .init = siena_init_nic, 622 + .dimension_resources = siena_dimension_resources, 633 623 .fini = efx_port_dummy_op_void, 634 624 .monitor = NULL, 635 625 .map_reset_reason = siena_map_reset_reason, ··· 669 657 * interrupt handler only supports 32 670 658 * channels */ 671 659 .timer_period_max = 1 << FRF_CZ_TC_TIMER_VAL_WIDTH, 672 - .tx_dc_base = 0x88000, 673 - .rx_dc_base = 0x68000, 674 660 .offload_features = (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | 675 661 NETIF_F_RXHASH | NETIF_F_NTUPLE), 676 662 };
+1642
drivers/net/ethernet/sfc/siena_sriov.c
··· 1 + /**************************************************************************** 2 + * Driver for Solarflare Solarstorm network controllers and boards 3 + * Copyright 2010-2011 Solarflare Communications Inc. 4 + * 5 + * This program is free software; you can redistribute it and/or modify it 6 + * under the terms of the GNU General Public License version 2 as published 7 + * by the Free Software Foundation, incorporated herein by reference. 8 + */ 9 + #include <linux/pci.h> 10 + #include <linux/module.h> 11 + #include "net_driver.h" 12 + #include "efx.h" 13 + #include "nic.h" 14 + #include "io.h" 15 + #include "mcdi.h" 16 + #include "filter.h" 17 + #include "mcdi_pcol.h" 18 + #include "regs.h" 19 + #include "vfdi.h" 20 + 21 + /* Number of longs required to track all the VIs in a VF */ 22 + #define VI_MASK_LENGTH BITS_TO_LONGS(1 << EFX_VI_SCALE_MAX) 23 + 24 + /** 25 + * enum efx_vf_tx_filter_mode - TX MAC filtering behaviour 26 + * @VF_TX_FILTER_OFF: Disabled 27 + * @VF_TX_FILTER_AUTO: Enabled if MAC address assigned to VF and only 28 + * 2 TX queues allowed per VF. 29 + * @VF_TX_FILTER_ON: Enabled 30 + */ 31 + enum efx_vf_tx_filter_mode { 32 + VF_TX_FILTER_OFF, 33 + VF_TX_FILTER_AUTO, 34 + VF_TX_FILTER_ON, 35 + }; 36 + 37 + /** 38 + * struct efx_vf - Back-end resource and protocol state for a PCI VF 39 + * @efx: The Efx NIC owning this VF 40 + * @pci_rid: The PCI requester ID for this VF 41 + * @pci_name: The PCI name (formatted address) of this VF 42 + * @index: Index of VF within its port and PF. 43 + * @req: VFDI incoming request work item. Incoming USR_EV events are received 44 + * by the NAPI handler, but must be handled by executing MCDI requests 45 + * inside a work item. 46 + * @req_addr: VFDI incoming request DMA address (in VF's PCI address space). 47 + * @req_type: Expected next incoming (from VF) %VFDI_EV_TYPE member. 48 + * @req_seqno: Expected next incoming (from VF) %VFDI_EV_SEQ member. 49 + * @msg_seqno: Next %VFDI_EV_SEQ member to reply to VF. Protected by 50 + * @status_lock 51 + * @busy: VFDI request queued to be processed or being processed. Receiving 52 + * a VFDI request when @busy is set is an error condition. 53 + * @buf: Incoming VFDI requests are DMA from the VF into this buffer. 54 + * @buftbl_base: Buffer table entries for this VF start at this index. 55 + * @rx_filtering: Receive filtering has been requested by the VF driver. 56 + * @rx_filter_flags: The flags sent in the %VFDI_OP_INSERT_FILTER request. 57 + * @rx_filter_qid: VF relative qid for RX filter requested by VF. 58 + * @rx_filter_id: Receive MAC filter ID. Only one filter per VF is supported. 59 + * @tx_filter_mode: Transmit MAC filtering mode. 60 + * @tx_filter_id: Transmit MAC filter ID. 61 + * @addr: The MAC address and outer vlan tag of the VF. 62 + * @status_addr: VF DMA address of page for &struct vfdi_status updates. 63 + * @status_lock: Mutex protecting @msg_seqno, @status_addr, @addr, 64 + * @peer_page_addrs and @peer_page_count from simultaneous 65 + * updates by the VM and consumption by 66 + * efx_sriov_update_vf_addr() 67 + * @peer_page_addrs: Pointer to an array of guest pages for local addresses. 68 + * @peer_page_count: Number of entries in @peer_page_count. 69 + * @evq0_addrs: Array of guest pages backing evq0. 70 + * @evq0_count: Number of entries in @evq0_addrs. 71 + * @flush_waitq: wait queue used by %VFDI_OP_FINI_ALL_QUEUES handler 72 + * to wait for flush completions. 73 + * @txq_lock: Mutex for TX queue allocation. 74 + * @txq_mask: Mask of initialized transmit queues. 75 + * @txq_count: Number of initialized transmit queues. 76 + * @rxq_mask: Mask of initialized receive queues. 77 + * @rxq_count: Number of initialized receive queues. 78 + * @rxq_retry_mask: Mask or receive queues that need to be flushed again 79 + * due to flush failure. 80 + * @rxq_retry_count: Number of receive queues in @rxq_retry_mask. 81 + * @reset_work: Work item to schedule a VF reset. 82 + */ 83 + struct efx_vf { 84 + struct efx_nic *efx; 85 + unsigned int pci_rid; 86 + char pci_name[13]; /* dddd:bb:dd.f */ 87 + unsigned int index; 88 + struct work_struct req; 89 + u64 req_addr; 90 + int req_type; 91 + unsigned req_seqno; 92 + unsigned msg_seqno; 93 + bool busy; 94 + struct efx_buffer buf; 95 + unsigned buftbl_base; 96 + bool rx_filtering; 97 + enum efx_filter_flags rx_filter_flags; 98 + unsigned rx_filter_qid; 99 + int rx_filter_id; 100 + enum efx_vf_tx_filter_mode tx_filter_mode; 101 + int tx_filter_id; 102 + struct vfdi_endpoint addr; 103 + u64 status_addr; 104 + struct mutex status_lock; 105 + u64 *peer_page_addrs; 106 + unsigned peer_page_count; 107 + u64 evq0_addrs[EFX_MAX_VF_EVQ_SIZE * sizeof(efx_qword_t) / 108 + EFX_BUF_SIZE]; 109 + unsigned evq0_count; 110 + wait_queue_head_t flush_waitq; 111 + struct mutex txq_lock; 112 + unsigned long txq_mask[VI_MASK_LENGTH]; 113 + unsigned txq_count; 114 + unsigned long rxq_mask[VI_MASK_LENGTH]; 115 + unsigned rxq_count; 116 + unsigned long rxq_retry_mask[VI_MASK_LENGTH]; 117 + atomic_t rxq_retry_count; 118 + struct work_struct reset_work; 119 + }; 120 + 121 + struct efx_memcpy_req { 122 + unsigned int from_rid; 123 + void *from_buf; 124 + u64 from_addr; 125 + unsigned int to_rid; 126 + u64 to_addr; 127 + unsigned length; 128 + }; 129 + 130 + /** 131 + * struct efx_local_addr - A MAC address on the vswitch without a VF. 132 + * 133 + * Siena does not have a switch, so VFs can't transmit data to each 134 + * other. Instead the VFs must be made aware of the local addresses 135 + * on the vswitch, so that they can arrange for an alternative 136 + * software datapath to be used. 137 + * 138 + * @link: List head for insertion into efx->local_addr_list. 139 + * @addr: Ethernet address 140 + */ 141 + struct efx_local_addr { 142 + struct list_head link; 143 + u8 addr[ETH_ALEN]; 144 + }; 145 + 146 + /** 147 + * struct efx_endpoint_page - Page of vfdi_endpoint structures 148 + * 149 + * @link: List head for insertion into efx->local_page_list. 150 + * @ptr: Pointer to page. 151 + * @addr: DMA address of page. 152 + */ 153 + struct efx_endpoint_page { 154 + struct list_head link; 155 + void *ptr; 156 + dma_addr_t addr; 157 + }; 158 + 159 + /* Buffer table entries are reserved txq0,rxq0,evq0,txq1,rxq1,evq1 */ 160 + #define EFX_BUFTBL_TXQ_BASE(_vf, _qid) \ 161 + ((_vf)->buftbl_base + EFX_VF_BUFTBL_PER_VI * (_qid)) 162 + #define EFX_BUFTBL_RXQ_BASE(_vf, _qid) \ 163 + (EFX_BUFTBL_TXQ_BASE(_vf, _qid) + \ 164 + (EFX_MAX_DMAQ_SIZE * sizeof(efx_qword_t) / EFX_BUF_SIZE)) 165 + #define EFX_BUFTBL_EVQ_BASE(_vf, _qid) \ 166 + (EFX_BUFTBL_TXQ_BASE(_vf, _qid) + \ 167 + (2 * EFX_MAX_DMAQ_SIZE * sizeof(efx_qword_t) / EFX_BUF_SIZE)) 168 + 169 + #define EFX_FIELD_MASK(_field) \ 170 + ((1 << _field ## _WIDTH) - 1) 171 + 172 + /* VFs can only use this many transmit channels */ 173 + static unsigned int vf_max_tx_channels = 2; 174 + module_param(vf_max_tx_channels, uint, 0444); 175 + MODULE_PARM_DESC(vf_max_tx_channels, 176 + "Limit the number of TX channels VFs can use"); 177 + 178 + static int max_vfs = -1; 179 + module_param(max_vfs, int, 0444); 180 + MODULE_PARM_DESC(max_vfs, 181 + "Reduce the number of VFs initialized by the driver"); 182 + 183 + /* Workqueue used by VFDI communication. We can't use the global 184 + * workqueue because it may be running the VF driver's probe() 185 + * routine, which will be blocked there waiting for a VFDI response. 186 + */ 187 + static struct workqueue_struct *vfdi_workqueue; 188 + 189 + static unsigned abs_index(struct efx_vf *vf, unsigned index) 190 + { 191 + return EFX_VI_BASE + vf->index * efx_vf_size(vf->efx) + index; 192 + } 193 + 194 + static int efx_sriov_cmd(struct efx_nic *efx, bool enable, 195 + unsigned *vi_scale_out, unsigned *vf_total_out) 196 + { 197 + u8 inbuf[MC_CMD_SRIOV_IN_LEN]; 198 + u8 outbuf[MC_CMD_SRIOV_OUT_LEN]; 199 + unsigned vi_scale, vf_total; 200 + size_t outlen; 201 + int rc; 202 + 203 + MCDI_SET_DWORD(inbuf, SRIOV_IN_ENABLE, enable ? 1 : 0); 204 + MCDI_SET_DWORD(inbuf, SRIOV_IN_VI_BASE, EFX_VI_BASE); 205 + MCDI_SET_DWORD(inbuf, SRIOV_IN_VF_COUNT, efx->vf_count); 206 + 207 + rc = efx_mcdi_rpc(efx, MC_CMD_SRIOV, inbuf, MC_CMD_SRIOV_IN_LEN, 208 + outbuf, MC_CMD_SRIOV_OUT_LEN, &outlen); 209 + if (rc) 210 + return rc; 211 + if (outlen < MC_CMD_SRIOV_OUT_LEN) 212 + return -EIO; 213 + 214 + vf_total = MCDI_DWORD(outbuf, SRIOV_OUT_VF_TOTAL); 215 + vi_scale = MCDI_DWORD(outbuf, SRIOV_OUT_VI_SCALE); 216 + if (vi_scale > EFX_VI_SCALE_MAX) 217 + return -EOPNOTSUPP; 218 + 219 + if (vi_scale_out) 220 + *vi_scale_out = vi_scale; 221 + if (vf_total_out) 222 + *vf_total_out = vf_total; 223 + 224 + return 0; 225 + } 226 + 227 + static void efx_sriov_usrev(struct efx_nic *efx, bool enabled) 228 + { 229 + efx_oword_t reg; 230 + 231 + EFX_POPULATE_OWORD_2(reg, 232 + FRF_CZ_USREV_DIS, enabled ? 0 : 1, 233 + FRF_CZ_DFLT_EVQ, efx->vfdi_channel->channel); 234 + efx_writeo(efx, &reg, FR_CZ_USR_EV_CFG); 235 + } 236 + 237 + static int efx_sriov_memcpy(struct efx_nic *efx, struct efx_memcpy_req *req, 238 + unsigned int count) 239 + { 240 + u8 *inbuf, *record; 241 + unsigned int used; 242 + u32 from_rid, from_hi, from_lo; 243 + int rc; 244 + 245 + mb(); /* Finish writing source/reading dest before DMA starts */ 246 + 247 + used = MC_CMD_MEMCPY_IN_LEN(count); 248 + if (WARN_ON(used > MCDI_CTL_SDU_LEN_MAX)) 249 + return -ENOBUFS; 250 + 251 + /* Allocate room for the largest request */ 252 + inbuf = kzalloc(MCDI_CTL_SDU_LEN_MAX, GFP_KERNEL); 253 + if (inbuf == NULL) 254 + return -ENOMEM; 255 + 256 + record = inbuf; 257 + MCDI_SET_DWORD(record, MEMCPY_IN_RECORD, count); 258 + while (count-- > 0) { 259 + MCDI_SET_DWORD(record, MEMCPY_RECORD_TYPEDEF_TO_RID, 260 + req->to_rid); 261 + MCDI_SET_DWORD(record, MEMCPY_RECORD_TYPEDEF_TO_ADDR_LO, 262 + (u32)req->to_addr); 263 + MCDI_SET_DWORD(record, MEMCPY_RECORD_TYPEDEF_TO_ADDR_HI, 264 + (u32)(req->to_addr >> 32)); 265 + if (req->from_buf == NULL) { 266 + from_rid = req->from_rid; 267 + from_lo = (u32)req->from_addr; 268 + from_hi = (u32)(req->from_addr >> 32); 269 + } else { 270 + if (WARN_ON(used + req->length > MCDI_CTL_SDU_LEN_MAX)) { 271 + rc = -ENOBUFS; 272 + goto out; 273 + } 274 + 275 + from_rid = MC_CMD_MEMCPY_RECORD_TYPEDEF_RID_INLINE; 276 + from_lo = used; 277 + from_hi = 0; 278 + memcpy(inbuf + used, req->from_buf, req->length); 279 + used += req->length; 280 + } 281 + 282 + MCDI_SET_DWORD(record, MEMCPY_RECORD_TYPEDEF_FROM_RID, from_rid); 283 + MCDI_SET_DWORD(record, MEMCPY_RECORD_TYPEDEF_FROM_ADDR_LO, 284 + from_lo); 285 + MCDI_SET_DWORD(record, MEMCPY_RECORD_TYPEDEF_FROM_ADDR_HI, 286 + from_hi); 287 + MCDI_SET_DWORD(record, MEMCPY_RECORD_TYPEDEF_LENGTH, 288 + req->length); 289 + 290 + ++req; 291 + record += MC_CMD_MEMCPY_IN_RECORD_LEN; 292 + } 293 + 294 + rc = efx_mcdi_rpc(efx, MC_CMD_MEMCPY, inbuf, used, NULL, 0, NULL); 295 + out: 296 + kfree(inbuf); 297 + 298 + mb(); /* Don't write source/read dest before DMA is complete */ 299 + 300 + return rc; 301 + } 302 + 303 + /* The TX filter is entirely controlled by this driver, and is modified 304 + * underneath the feet of the VF 305 + */ 306 + static void efx_sriov_reset_tx_filter(struct efx_vf *vf) 307 + { 308 + struct efx_nic *efx = vf->efx; 309 + struct efx_filter_spec filter; 310 + u16 vlan; 311 + int rc; 312 + 313 + if (vf->tx_filter_id != -1) { 314 + efx_filter_remove_id_safe(efx, EFX_FILTER_PRI_REQUIRED, 315 + vf->tx_filter_id); 316 + netif_dbg(efx, hw, efx->net_dev, "Removed vf %s tx filter %d\n", 317 + vf->pci_name, vf->tx_filter_id); 318 + vf->tx_filter_id = -1; 319 + } 320 + 321 + if (is_zero_ether_addr(vf->addr.mac_addr)) 322 + return; 323 + 324 + /* Turn on TX filtering automatically if not explicitly 325 + * enabled or disabled. 326 + */ 327 + if (vf->tx_filter_mode == VF_TX_FILTER_AUTO && vf_max_tx_channels <= 2) 328 + vf->tx_filter_mode = VF_TX_FILTER_ON; 329 + 330 + vlan = ntohs(vf->addr.tci) & VLAN_VID_MASK; 331 + efx_filter_init_tx(&filter, abs_index(vf, 0)); 332 + rc = efx_filter_set_eth_local(&filter, 333 + vlan ? vlan : EFX_FILTER_VID_UNSPEC, 334 + vf->addr.mac_addr); 335 + BUG_ON(rc); 336 + 337 + rc = efx_filter_insert_filter(efx, &filter, true); 338 + if (rc < 0) { 339 + netif_warn(efx, hw, efx->net_dev, 340 + "Unable to migrate tx filter for vf %s\n", 341 + vf->pci_name); 342 + } else { 343 + netif_dbg(efx, hw, efx->net_dev, "Inserted vf %s tx filter %d\n", 344 + vf->pci_name, rc); 345 + vf->tx_filter_id = rc; 346 + } 347 + } 348 + 349 + /* The RX filter is managed here on behalf of the VF driver */ 350 + static void efx_sriov_reset_rx_filter(struct efx_vf *vf) 351 + { 352 + struct efx_nic *efx = vf->efx; 353 + struct efx_filter_spec filter; 354 + u16 vlan; 355 + int rc; 356 + 357 + if (vf->rx_filter_id != -1) { 358 + efx_filter_remove_id_safe(efx, EFX_FILTER_PRI_REQUIRED, 359 + vf->rx_filter_id); 360 + netif_dbg(efx, hw, efx->net_dev, "Removed vf %s rx filter %d\n", 361 + vf->pci_name, vf->rx_filter_id); 362 + vf->rx_filter_id = -1; 363 + } 364 + 365 + if (!vf->rx_filtering || is_zero_ether_addr(vf->addr.mac_addr)) 366 + return; 367 + 368 + vlan = ntohs(vf->addr.tci) & VLAN_VID_MASK; 369 + efx_filter_init_rx(&filter, EFX_FILTER_PRI_REQUIRED, 370 + vf->rx_filter_flags, 371 + abs_index(vf, vf->rx_filter_qid)); 372 + rc = efx_filter_set_eth_local(&filter, 373 + vlan ? vlan : EFX_FILTER_VID_UNSPEC, 374 + vf->addr.mac_addr); 375 + BUG_ON(rc); 376 + 377 + rc = efx_filter_insert_filter(efx, &filter, true); 378 + if (rc < 0) { 379 + netif_warn(efx, hw, efx->net_dev, 380 + "Unable to insert rx filter for vf %s\n", 381 + vf->pci_name); 382 + } else { 383 + netif_dbg(efx, hw, efx->net_dev, "Inserted vf %s rx filter %d\n", 384 + vf->pci_name, rc); 385 + vf->rx_filter_id = rc; 386 + } 387 + } 388 + 389 + static void __efx_sriov_update_vf_addr(struct efx_vf *vf) 390 + { 391 + efx_sriov_reset_tx_filter(vf); 392 + efx_sriov_reset_rx_filter(vf); 393 + queue_work(vfdi_workqueue, &vf->efx->peer_work); 394 + } 395 + 396 + /* Push the peer list to this VF. The caller must hold status_lock to interlock 397 + * with VFDI requests, and they must be serialised against manipulation of 398 + * local_page_list, either by acquiring local_lock or by running from 399 + * efx_sriov_peer_work() 400 + */ 401 + static void __efx_sriov_push_vf_status(struct efx_vf *vf) 402 + { 403 + struct efx_nic *efx = vf->efx; 404 + struct vfdi_status *status = efx->vfdi_status.addr; 405 + struct efx_memcpy_req copy[4]; 406 + struct efx_endpoint_page *epp; 407 + unsigned int pos, count; 408 + unsigned data_offset; 409 + efx_qword_t event; 410 + 411 + WARN_ON(!mutex_is_locked(&vf->status_lock)); 412 + WARN_ON(!vf->status_addr); 413 + 414 + status->local = vf->addr; 415 + status->generation_end = ++status->generation_start; 416 + 417 + memset(copy, '\0', sizeof(copy)); 418 + /* Write generation_start */ 419 + copy[0].from_buf = &status->generation_start; 420 + copy[0].to_rid = vf->pci_rid; 421 + copy[0].to_addr = vf->status_addr + offsetof(struct vfdi_status, 422 + generation_start); 423 + copy[0].length = sizeof(status->generation_start); 424 + /* DMA the rest of the structure (excluding the generations). This 425 + * assumes that the non-generation portion of vfdi_status is in 426 + * one chunk starting at the version member. 427 + */ 428 + data_offset = offsetof(struct vfdi_status, version); 429 + copy[1].from_rid = efx->pci_dev->devfn; 430 + copy[1].from_addr = efx->vfdi_status.dma_addr + data_offset; 431 + copy[1].to_rid = vf->pci_rid; 432 + copy[1].to_addr = vf->status_addr + data_offset; 433 + copy[1].length = status->length - data_offset; 434 + 435 + /* Copy the peer pages */ 436 + pos = 2; 437 + count = 0; 438 + list_for_each_entry(epp, &efx->local_page_list, link) { 439 + if (count == vf->peer_page_count) { 440 + /* The VF driver will know they need to provide more 441 + * pages because peer_addr_count is too large. 442 + */ 443 + break; 444 + } 445 + copy[pos].from_buf = NULL; 446 + copy[pos].from_rid = efx->pci_dev->devfn; 447 + copy[pos].from_addr = epp->addr; 448 + copy[pos].to_rid = vf->pci_rid; 449 + copy[pos].to_addr = vf->peer_page_addrs[count]; 450 + copy[pos].length = EFX_PAGE_SIZE; 451 + 452 + if (++pos == ARRAY_SIZE(copy)) { 453 + efx_sriov_memcpy(efx, copy, ARRAY_SIZE(copy)); 454 + pos = 0; 455 + } 456 + ++count; 457 + } 458 + 459 + /* Write generation_end */ 460 + copy[pos].from_buf = &status->generation_end; 461 + copy[pos].to_rid = vf->pci_rid; 462 + copy[pos].to_addr = vf->status_addr + offsetof(struct vfdi_status, 463 + generation_end); 464 + copy[pos].length = sizeof(status->generation_end); 465 + efx_sriov_memcpy(efx, copy, pos + 1); 466 + 467 + /* Notify the guest */ 468 + EFX_POPULATE_QWORD_3(event, 469 + FSF_AZ_EV_CODE, FSE_CZ_EV_CODE_USER_EV, 470 + VFDI_EV_SEQ, (vf->msg_seqno & 0xff), 471 + VFDI_EV_TYPE, VFDI_EV_TYPE_STATUS); 472 + ++vf->msg_seqno; 473 + efx_generate_event(efx, EFX_VI_BASE + vf->index * efx_vf_size(efx), 474 + &event); 475 + } 476 + 477 + static void efx_sriov_bufs(struct efx_nic *efx, unsigned offset, 478 + u64 *addr, unsigned count) 479 + { 480 + efx_qword_t buf; 481 + unsigned pos; 482 + 483 + for (pos = 0; pos < count; ++pos) { 484 + EFX_POPULATE_QWORD_3(buf, 485 + FRF_AZ_BUF_ADR_REGION, 0, 486 + FRF_AZ_BUF_ADR_FBUF, 487 + addr ? addr[pos] >> 12 : 0, 488 + FRF_AZ_BUF_OWNER_ID_FBUF, 0); 489 + efx_sram_writeq(efx, efx->membase + FR_BZ_BUF_FULL_TBL, 490 + &buf, offset + pos); 491 + } 492 + } 493 + 494 + static bool bad_vf_index(struct efx_nic *efx, unsigned index) 495 + { 496 + return index >= efx_vf_size(efx); 497 + } 498 + 499 + static bool bad_buf_count(unsigned buf_count, unsigned max_entry_count) 500 + { 501 + unsigned max_buf_count = max_entry_count * 502 + sizeof(efx_qword_t) / EFX_BUF_SIZE; 503 + 504 + return ((buf_count & (buf_count - 1)) || buf_count > max_buf_count); 505 + } 506 + 507 + /* Check that VI specified by per-port index belongs to a VF. 508 + * Optionally set VF index and VI index within the VF. 509 + */ 510 + static bool map_vi_index(struct efx_nic *efx, unsigned abs_index, 511 + struct efx_vf **vf_out, unsigned *rel_index_out) 512 + { 513 + unsigned vf_i; 514 + 515 + if (abs_index < EFX_VI_BASE) 516 + return true; 517 + vf_i = (abs_index - EFX_VI_BASE) * efx_vf_size(efx); 518 + if (vf_i >= efx->vf_init_count) 519 + return true; 520 + 521 + if (vf_out) 522 + *vf_out = efx->vf + vf_i; 523 + if (rel_index_out) 524 + *rel_index_out = abs_index % efx_vf_size(efx); 525 + return false; 526 + } 527 + 528 + static int efx_vfdi_init_evq(struct efx_vf *vf) 529 + { 530 + struct efx_nic *efx = vf->efx; 531 + struct vfdi_req *req = vf->buf.addr; 532 + unsigned vf_evq = req->u.init_evq.index; 533 + unsigned buf_count = req->u.init_evq.buf_count; 534 + unsigned abs_evq = abs_index(vf, vf_evq); 535 + unsigned buftbl = EFX_BUFTBL_EVQ_BASE(vf, vf_evq); 536 + efx_oword_t reg; 537 + 538 + if (bad_vf_index(efx, vf_evq) || 539 + bad_buf_count(buf_count, EFX_MAX_VF_EVQ_SIZE)) { 540 + if (net_ratelimit()) 541 + netif_err(efx, hw, efx->net_dev, 542 + "ERROR: Invalid INIT_EVQ from %s: evq %d bufs %d\n", 543 + vf->pci_name, vf_evq, buf_count); 544 + return VFDI_RC_EINVAL; 545 + } 546 + 547 + efx_sriov_bufs(efx, buftbl, req->u.init_evq.addr, buf_count); 548 + 549 + EFX_POPULATE_OWORD_3(reg, 550 + FRF_CZ_TIMER_Q_EN, 1, 551 + FRF_CZ_HOST_NOTIFY_MODE, 0, 552 + FRF_CZ_TIMER_MODE, FFE_CZ_TIMER_MODE_DIS); 553 + efx_writeo_table(efx, &reg, FR_BZ_TIMER_TBL, abs_evq); 554 + EFX_POPULATE_OWORD_3(reg, 555 + FRF_AZ_EVQ_EN, 1, 556 + FRF_AZ_EVQ_SIZE, __ffs(buf_count), 557 + FRF_AZ_EVQ_BUF_BASE_ID, buftbl); 558 + efx_writeo_table(efx, &reg, FR_BZ_EVQ_PTR_TBL, abs_evq); 559 + 560 + if (vf_evq == 0) { 561 + memcpy(vf->evq0_addrs, req->u.init_evq.addr, 562 + buf_count * sizeof(u64)); 563 + vf->evq0_count = buf_count; 564 + } 565 + 566 + return VFDI_RC_SUCCESS; 567 + } 568 + 569 + static int efx_vfdi_init_rxq(struct efx_vf *vf) 570 + { 571 + struct efx_nic *efx = vf->efx; 572 + struct vfdi_req *req = vf->buf.addr; 573 + unsigned vf_rxq = req->u.init_rxq.index; 574 + unsigned vf_evq = req->u.init_rxq.evq; 575 + unsigned buf_count = req->u.init_rxq.buf_count; 576 + unsigned buftbl = EFX_BUFTBL_RXQ_BASE(vf, vf_rxq); 577 + unsigned label; 578 + efx_oword_t reg; 579 + 580 + if (bad_vf_index(efx, vf_evq) || bad_vf_index(efx, vf_rxq) || 581 + bad_buf_count(buf_count, EFX_MAX_DMAQ_SIZE)) { 582 + if (net_ratelimit()) 583 + netif_err(efx, hw, efx->net_dev, 584 + "ERROR: Invalid INIT_RXQ from %s: rxq %d evq %d " 585 + "buf_count %d\n", vf->pci_name, vf_rxq, 586 + vf_evq, buf_count); 587 + return VFDI_RC_EINVAL; 588 + } 589 + if (__test_and_set_bit(req->u.init_rxq.index, vf->rxq_mask)) 590 + ++vf->rxq_count; 591 + efx_sriov_bufs(efx, buftbl, req->u.init_rxq.addr, buf_count); 592 + 593 + label = req->u.init_rxq.label & EFX_FIELD_MASK(FRF_AZ_RX_DESCQ_LABEL); 594 + EFX_POPULATE_OWORD_6(reg, 595 + FRF_AZ_RX_DESCQ_BUF_BASE_ID, buftbl, 596 + FRF_AZ_RX_DESCQ_EVQ_ID, abs_index(vf, vf_evq), 597 + FRF_AZ_RX_DESCQ_LABEL, label, 598 + FRF_AZ_RX_DESCQ_SIZE, __ffs(buf_count), 599 + FRF_AZ_RX_DESCQ_JUMBO, 600 + !!(req->u.init_rxq.flags & 601 + VFDI_RXQ_FLAG_SCATTER_EN), 602 + FRF_AZ_RX_DESCQ_EN, 1); 603 + efx_writeo_table(efx, &reg, FR_BZ_RX_DESC_PTR_TBL, 604 + abs_index(vf, vf_rxq)); 605 + 606 + return VFDI_RC_SUCCESS; 607 + } 608 + 609 + static int efx_vfdi_init_txq(struct efx_vf *vf) 610 + { 611 + struct efx_nic *efx = vf->efx; 612 + struct vfdi_req *req = vf->buf.addr; 613 + unsigned vf_txq = req->u.init_txq.index; 614 + unsigned vf_evq = req->u.init_txq.evq; 615 + unsigned buf_count = req->u.init_txq.buf_count; 616 + unsigned buftbl = EFX_BUFTBL_TXQ_BASE(vf, vf_txq); 617 + unsigned label, eth_filt_en; 618 + efx_oword_t reg; 619 + 620 + if (bad_vf_index(efx, vf_evq) || bad_vf_index(efx, vf_txq) || 621 + vf_txq >= vf_max_tx_channels || 622 + bad_buf_count(buf_count, EFX_MAX_DMAQ_SIZE)) { 623 + if (net_ratelimit()) 624 + netif_err(efx, hw, efx->net_dev, 625 + "ERROR: Invalid INIT_TXQ from %s: txq %d evq %d " 626 + "buf_count %d\n", vf->pci_name, vf_txq, 627 + vf_evq, buf_count); 628 + return VFDI_RC_EINVAL; 629 + } 630 + 631 + mutex_lock(&vf->txq_lock); 632 + if (__test_and_set_bit(req->u.init_txq.index, vf->txq_mask)) 633 + ++vf->txq_count; 634 + mutex_unlock(&vf->txq_lock); 635 + efx_sriov_bufs(efx, buftbl, req->u.init_txq.addr, buf_count); 636 + 637 + eth_filt_en = vf->tx_filter_mode == VF_TX_FILTER_ON; 638 + 639 + label = req->u.init_txq.label & EFX_FIELD_MASK(FRF_AZ_TX_DESCQ_LABEL); 640 + EFX_POPULATE_OWORD_8(reg, 641 + FRF_CZ_TX_DPT_Q_MASK_WIDTH, min(efx->vi_scale, 1U), 642 + FRF_CZ_TX_DPT_ETH_FILT_EN, eth_filt_en, 643 + FRF_AZ_TX_DESCQ_EN, 1, 644 + FRF_AZ_TX_DESCQ_BUF_BASE_ID, buftbl, 645 + FRF_AZ_TX_DESCQ_EVQ_ID, abs_index(vf, vf_evq), 646 + FRF_AZ_TX_DESCQ_LABEL, label, 647 + FRF_AZ_TX_DESCQ_SIZE, __ffs(buf_count), 648 + FRF_BZ_TX_NON_IP_DROP_DIS, 1); 649 + efx_writeo_table(efx, &reg, FR_BZ_TX_DESC_PTR_TBL, 650 + abs_index(vf, vf_txq)); 651 + 652 + return VFDI_RC_SUCCESS; 653 + } 654 + 655 + /* Returns true when efx_vfdi_fini_all_queues should wake */ 656 + static bool efx_vfdi_flush_wake(struct efx_vf *vf) 657 + { 658 + /* Ensure that all updates are visible to efx_vfdi_fini_all_queues() */ 659 + smp_mb(); 660 + 661 + return (!vf->txq_count && !vf->rxq_count) || 662 + atomic_read(&vf->rxq_retry_count); 663 + } 664 + 665 + static void efx_vfdi_flush_clear(struct efx_vf *vf) 666 + { 667 + memset(vf->txq_mask, 0, sizeof(vf->txq_mask)); 668 + vf->txq_count = 0; 669 + memset(vf->rxq_mask, 0, sizeof(vf->rxq_mask)); 670 + vf->rxq_count = 0; 671 + memset(vf->rxq_retry_mask, 0, sizeof(vf->rxq_retry_mask)); 672 + atomic_set(&vf->rxq_retry_count, 0); 673 + } 674 + 675 + static int efx_vfdi_fini_all_queues(struct efx_vf *vf) 676 + { 677 + struct efx_nic *efx = vf->efx; 678 + efx_oword_t reg; 679 + unsigned count = efx_vf_size(efx); 680 + unsigned vf_offset = EFX_VI_BASE + vf->index * efx_vf_size(efx); 681 + unsigned timeout = HZ; 682 + unsigned index, rxqs_count; 683 + __le32 *rxqs; 684 + int rc; 685 + 686 + rxqs = kmalloc(count * sizeof(*rxqs), GFP_KERNEL); 687 + if (rxqs == NULL) 688 + return VFDI_RC_ENOMEM; 689 + 690 + rtnl_lock(); 691 + if (efx->fc_disable++ == 0) 692 + efx_mcdi_set_mac(efx); 693 + rtnl_unlock(); 694 + 695 + /* Flush all the initialized queues */ 696 + rxqs_count = 0; 697 + for (index = 0; index < count; ++index) { 698 + if (test_bit(index, vf->txq_mask)) { 699 + EFX_POPULATE_OWORD_2(reg, 700 + FRF_AZ_TX_FLUSH_DESCQ_CMD, 1, 701 + FRF_AZ_TX_FLUSH_DESCQ, 702 + vf_offset + index); 703 + efx_writeo(efx, &reg, FR_AZ_TX_FLUSH_DESCQ); 704 + } 705 + if (test_bit(index, vf->rxq_mask)) 706 + rxqs[rxqs_count++] = cpu_to_le32(vf_offset + index); 707 + } 708 + 709 + atomic_set(&vf->rxq_retry_count, 0); 710 + while (timeout && (vf->rxq_count || vf->txq_count)) { 711 + rc = efx_mcdi_rpc(efx, MC_CMD_FLUSH_RX_QUEUES, (u8 *)rxqs, 712 + rxqs_count * sizeof(*rxqs), NULL, 0, NULL); 713 + WARN_ON(rc < 0); 714 + 715 + timeout = wait_event_timeout(vf->flush_waitq, 716 + efx_vfdi_flush_wake(vf), 717 + timeout); 718 + rxqs_count = 0; 719 + for (index = 0; index < count; ++index) { 720 + if (test_and_clear_bit(index, vf->rxq_retry_mask)) { 721 + atomic_dec(&vf->rxq_retry_count); 722 + rxqs[rxqs_count++] = 723 + cpu_to_le32(vf_offset + index); 724 + } 725 + } 726 + } 727 + 728 + rtnl_lock(); 729 + if (--efx->fc_disable == 0) 730 + efx_mcdi_set_mac(efx); 731 + rtnl_unlock(); 732 + 733 + /* Irrespective of success/failure, fini the queues */ 734 + EFX_ZERO_OWORD(reg); 735 + for (index = 0; index < count; ++index) { 736 + efx_writeo_table(efx, &reg, FR_BZ_RX_DESC_PTR_TBL, 737 + vf_offset + index); 738 + efx_writeo_table(efx, &reg, FR_BZ_TX_DESC_PTR_TBL, 739 + vf_offset + index); 740 + efx_writeo_table(efx, &reg, FR_BZ_EVQ_PTR_TBL, 741 + vf_offset + index); 742 + efx_writeo_table(efx, &reg, FR_BZ_TIMER_TBL, 743 + vf_offset + index); 744 + } 745 + efx_sriov_bufs(efx, vf->buftbl_base, NULL, 746 + EFX_VF_BUFTBL_PER_VI * efx_vf_size(efx)); 747 + kfree(rxqs); 748 + efx_vfdi_flush_clear(vf); 749 + 750 + vf->evq0_count = 0; 751 + 752 + return timeout ? 0 : VFDI_RC_ETIMEDOUT; 753 + } 754 + 755 + static int efx_vfdi_insert_filter(struct efx_vf *vf) 756 + { 757 + struct efx_nic *efx = vf->efx; 758 + struct vfdi_req *req = vf->buf.addr; 759 + unsigned vf_rxq = req->u.mac_filter.rxq; 760 + unsigned flags; 761 + 762 + if (bad_vf_index(efx, vf_rxq) || vf->rx_filtering) { 763 + if (net_ratelimit()) 764 + netif_err(efx, hw, efx->net_dev, 765 + "ERROR: Invalid INSERT_FILTER from %s: rxq %d " 766 + "flags 0x%x\n", vf->pci_name, vf_rxq, 767 + req->u.mac_filter.flags); 768 + return VFDI_RC_EINVAL; 769 + } 770 + 771 + flags = 0; 772 + if (req->u.mac_filter.flags & VFDI_MAC_FILTER_FLAG_RSS) 773 + flags |= EFX_FILTER_FLAG_RX_RSS; 774 + if (req->u.mac_filter.flags & VFDI_MAC_FILTER_FLAG_SCATTER) 775 + flags |= EFX_FILTER_FLAG_RX_SCATTER; 776 + vf->rx_filter_flags = flags; 777 + vf->rx_filter_qid = vf_rxq; 778 + vf->rx_filtering = true; 779 + 780 + efx_sriov_reset_rx_filter(vf); 781 + queue_work(vfdi_workqueue, &efx->peer_work); 782 + 783 + return VFDI_RC_SUCCESS; 784 + } 785 + 786 + static int efx_vfdi_remove_all_filters(struct efx_vf *vf) 787 + { 788 + vf->rx_filtering = false; 789 + efx_sriov_reset_rx_filter(vf); 790 + queue_work(vfdi_workqueue, &vf->efx->peer_work); 791 + 792 + return VFDI_RC_SUCCESS; 793 + } 794 + 795 + static int efx_vfdi_set_status_page(struct efx_vf *vf) 796 + { 797 + struct efx_nic *efx = vf->efx; 798 + struct vfdi_req *req = vf->buf.addr; 799 + unsigned int page_count; 800 + 801 + page_count = req->u.set_status_page.peer_page_count; 802 + if (!req->u.set_status_page.dma_addr || EFX_PAGE_SIZE < 803 + offsetof(struct vfdi_req, 804 + u.set_status_page.peer_page_addr[page_count])) { 805 + if (net_ratelimit()) 806 + netif_err(efx, hw, efx->net_dev, 807 + "ERROR: Invalid SET_STATUS_PAGE from %s\n", 808 + vf->pci_name); 809 + return VFDI_RC_EINVAL; 810 + } 811 + 812 + mutex_lock(&efx->local_lock); 813 + mutex_lock(&vf->status_lock); 814 + vf->status_addr = req->u.set_status_page.dma_addr; 815 + 816 + kfree(vf->peer_page_addrs); 817 + vf->peer_page_addrs = NULL; 818 + vf->peer_page_count = 0; 819 + 820 + if (page_count) { 821 + vf->peer_page_addrs = kcalloc(page_count, sizeof(u64), 822 + GFP_KERNEL); 823 + if (vf->peer_page_addrs) { 824 + memcpy(vf->peer_page_addrs, 825 + req->u.set_status_page.peer_page_addr, 826 + page_count * sizeof(u64)); 827 + vf->peer_page_count = page_count; 828 + } 829 + } 830 + 831 + __efx_sriov_push_vf_status(vf); 832 + mutex_unlock(&vf->status_lock); 833 + mutex_unlock(&efx->local_lock); 834 + 835 + return VFDI_RC_SUCCESS; 836 + } 837 + 838 + static int efx_vfdi_clear_status_page(struct efx_vf *vf) 839 + { 840 + mutex_lock(&vf->status_lock); 841 + vf->status_addr = 0; 842 + mutex_unlock(&vf->status_lock); 843 + 844 + return VFDI_RC_SUCCESS; 845 + } 846 + 847 + typedef int (*efx_vfdi_op_t)(struct efx_vf *vf); 848 + 849 + static const efx_vfdi_op_t vfdi_ops[VFDI_OP_LIMIT] = { 850 + [VFDI_OP_INIT_EVQ] = efx_vfdi_init_evq, 851 + [VFDI_OP_INIT_TXQ] = efx_vfdi_init_txq, 852 + [VFDI_OP_INIT_RXQ] = efx_vfdi_init_rxq, 853 + [VFDI_OP_FINI_ALL_QUEUES] = efx_vfdi_fini_all_queues, 854 + [VFDI_OP_INSERT_FILTER] = efx_vfdi_insert_filter, 855 + [VFDI_OP_REMOVE_ALL_FILTERS] = efx_vfdi_remove_all_filters, 856 + [VFDI_OP_SET_STATUS_PAGE] = efx_vfdi_set_status_page, 857 + [VFDI_OP_CLEAR_STATUS_PAGE] = efx_vfdi_clear_status_page, 858 + }; 859 + 860 + static void efx_sriov_vfdi(struct work_struct *work) 861 + { 862 + struct efx_vf *vf = container_of(work, struct efx_vf, req); 863 + struct efx_nic *efx = vf->efx; 864 + struct vfdi_req *req = vf->buf.addr; 865 + struct efx_memcpy_req copy[2]; 866 + int rc; 867 + 868 + /* Copy this page into the local address space */ 869 + memset(copy, '\0', sizeof(copy)); 870 + copy[0].from_rid = vf->pci_rid; 871 + copy[0].from_addr = vf->req_addr; 872 + copy[0].to_rid = efx->pci_dev->devfn; 873 + copy[0].to_addr = vf->buf.dma_addr; 874 + copy[0].length = EFX_PAGE_SIZE; 875 + rc = efx_sriov_memcpy(efx, copy, 1); 876 + if (rc) { 877 + /* If we can't get the request, we can't reply to the caller */ 878 + if (net_ratelimit()) 879 + netif_err(efx, hw, efx->net_dev, 880 + "ERROR: Unable to fetch VFDI request from %s rc %d\n", 881 + vf->pci_name, -rc); 882 + vf->busy = false; 883 + return; 884 + } 885 + 886 + if (req->op < VFDI_OP_LIMIT && vfdi_ops[req->op] != NULL) { 887 + rc = vfdi_ops[req->op](vf); 888 + if (rc == 0) { 889 + netif_dbg(efx, hw, efx->net_dev, 890 + "vfdi request %d from %s ok\n", 891 + req->op, vf->pci_name); 892 + } 893 + } else { 894 + netif_dbg(efx, hw, efx->net_dev, 895 + "ERROR: Unrecognised request %d from VF %s addr " 896 + "%llx\n", req->op, vf->pci_name, 897 + (unsigned long long)vf->req_addr); 898 + rc = VFDI_RC_EOPNOTSUPP; 899 + } 900 + 901 + /* Allow subsequent VF requests */ 902 + vf->busy = false; 903 + smp_wmb(); 904 + 905 + /* Respond to the request */ 906 + req->rc = rc; 907 + req->op = VFDI_OP_RESPONSE; 908 + 909 + memset(copy, '\0', sizeof(copy)); 910 + copy[0].from_buf = &req->rc; 911 + copy[0].to_rid = vf->pci_rid; 912 + copy[0].to_addr = vf->req_addr + offsetof(struct vfdi_req, rc); 913 + copy[0].length = sizeof(req->rc); 914 + copy[1].from_buf = &req->op; 915 + copy[1].to_rid = vf->pci_rid; 916 + copy[1].to_addr = vf->req_addr + offsetof(struct vfdi_req, op); 917 + copy[1].length = sizeof(req->op); 918 + 919 + (void) efx_sriov_memcpy(efx, copy, ARRAY_SIZE(copy)); 920 + } 921 + 922 + 923 + 924 + /* After a reset the event queues inside the guests no longer exist. Fill the 925 + * event ring in guest memory with VFDI reset events, then (re-initialise) the 926 + * event queue to raise an interrupt. The guest driver will then recover. 927 + */ 928 + static void efx_sriov_reset_vf(struct efx_vf *vf, struct efx_buffer *buffer) 929 + { 930 + struct efx_nic *efx = vf->efx; 931 + struct efx_memcpy_req copy_req[4]; 932 + efx_qword_t event; 933 + unsigned int pos, count, k, buftbl, abs_evq; 934 + efx_oword_t reg; 935 + efx_dword_t ptr; 936 + int rc; 937 + 938 + BUG_ON(buffer->len != EFX_PAGE_SIZE); 939 + 940 + if (!vf->evq0_count) 941 + return; 942 + BUG_ON(vf->evq0_count & (vf->evq0_count - 1)); 943 + 944 + mutex_lock(&vf->status_lock); 945 + EFX_POPULATE_QWORD_3(event, 946 + FSF_AZ_EV_CODE, FSE_CZ_EV_CODE_USER_EV, 947 + VFDI_EV_SEQ, vf->msg_seqno, 948 + VFDI_EV_TYPE, VFDI_EV_TYPE_RESET); 949 + vf->msg_seqno++; 950 + for (pos = 0; pos < EFX_PAGE_SIZE; pos += sizeof(event)) 951 + memcpy(buffer->addr + pos, &event, sizeof(event)); 952 + 953 + for (pos = 0; pos < vf->evq0_count; pos += count) { 954 + count = min_t(unsigned, vf->evq0_count - pos, 955 + ARRAY_SIZE(copy_req)); 956 + for (k = 0; k < count; k++) { 957 + copy_req[k].from_buf = NULL; 958 + copy_req[k].from_rid = efx->pci_dev->devfn; 959 + copy_req[k].from_addr = buffer->dma_addr; 960 + copy_req[k].to_rid = vf->pci_rid; 961 + copy_req[k].to_addr = vf->evq0_addrs[pos + k]; 962 + copy_req[k].length = EFX_PAGE_SIZE; 963 + } 964 + rc = efx_sriov_memcpy(efx, copy_req, count); 965 + if (rc) { 966 + if (net_ratelimit()) 967 + netif_err(efx, hw, efx->net_dev, 968 + "ERROR: Unable to notify %s of reset" 969 + ": %d\n", vf->pci_name, -rc); 970 + break; 971 + } 972 + } 973 + 974 + /* Reinitialise, arm and trigger evq0 */ 975 + abs_evq = abs_index(vf, 0); 976 + buftbl = EFX_BUFTBL_EVQ_BASE(vf, 0); 977 + efx_sriov_bufs(efx, buftbl, vf->evq0_addrs, vf->evq0_count); 978 + 979 + EFX_POPULATE_OWORD_3(reg, 980 + FRF_CZ_TIMER_Q_EN, 1, 981 + FRF_CZ_HOST_NOTIFY_MODE, 0, 982 + FRF_CZ_TIMER_MODE, FFE_CZ_TIMER_MODE_DIS); 983 + efx_writeo_table(efx, &reg, FR_BZ_TIMER_TBL, abs_evq); 984 + EFX_POPULATE_OWORD_3(reg, 985 + FRF_AZ_EVQ_EN, 1, 986 + FRF_AZ_EVQ_SIZE, __ffs(vf->evq0_count), 987 + FRF_AZ_EVQ_BUF_BASE_ID, buftbl); 988 + efx_writeo_table(efx, &reg, FR_BZ_EVQ_PTR_TBL, abs_evq); 989 + EFX_POPULATE_DWORD_1(ptr, FRF_AZ_EVQ_RPTR, 0); 990 + efx_writed_table(efx, &ptr, FR_BZ_EVQ_RPTR, abs_evq); 991 + 992 + mutex_unlock(&vf->status_lock); 993 + } 994 + 995 + static void efx_sriov_reset_vf_work(struct work_struct *work) 996 + { 997 + struct efx_vf *vf = container_of(work, struct efx_vf, req); 998 + struct efx_nic *efx = vf->efx; 999 + struct efx_buffer buf; 1000 + 1001 + if (!efx_nic_alloc_buffer(efx, &buf, EFX_PAGE_SIZE)) { 1002 + efx_sriov_reset_vf(vf, &buf); 1003 + efx_nic_free_buffer(efx, &buf); 1004 + } 1005 + } 1006 + 1007 + static void efx_sriov_handle_no_channel(struct efx_nic *efx) 1008 + { 1009 + netif_err(efx, drv, efx->net_dev, 1010 + "ERROR: IOV requires MSI-X and 1 additional interrupt" 1011 + "vector. IOV disabled\n"); 1012 + efx->vf_count = 0; 1013 + } 1014 + 1015 + static int efx_sriov_probe_channel(struct efx_channel *channel) 1016 + { 1017 + channel->efx->vfdi_channel = channel; 1018 + return 0; 1019 + } 1020 + 1021 + static void 1022 + efx_sriov_get_channel_name(struct efx_channel *channel, char *buf, size_t len) 1023 + { 1024 + snprintf(buf, len, "%s-iov", channel->efx->name); 1025 + } 1026 + 1027 + static const struct efx_channel_type efx_sriov_channel_type = { 1028 + .handle_no_channel = efx_sriov_handle_no_channel, 1029 + .pre_probe = efx_sriov_probe_channel, 1030 + .get_name = efx_sriov_get_channel_name, 1031 + /* no copy operation; channel must not be reallocated */ 1032 + .keep_eventq = true, 1033 + }; 1034 + 1035 + void efx_sriov_probe(struct efx_nic *efx) 1036 + { 1037 + unsigned count; 1038 + 1039 + if (!max_vfs) 1040 + return; 1041 + 1042 + if (efx_sriov_cmd(efx, false, &efx->vi_scale, &count)) 1043 + return; 1044 + if (count > 0 && count > max_vfs) 1045 + count = max_vfs; 1046 + 1047 + /* efx_nic_dimension_resources() will reduce vf_count as appopriate */ 1048 + efx->vf_count = count; 1049 + 1050 + efx->extra_channel_type[EFX_EXTRA_CHANNEL_IOV] = &efx_sriov_channel_type; 1051 + } 1052 + 1053 + /* Copy the list of individual addresses into the vfdi_status.peers 1054 + * array and auxillary pages, protected by %local_lock. Drop that lock 1055 + * and then broadcast the address list to every VF. 1056 + */ 1057 + static void efx_sriov_peer_work(struct work_struct *data) 1058 + { 1059 + struct efx_nic *efx = container_of(data, struct efx_nic, peer_work); 1060 + struct vfdi_status *vfdi_status = efx->vfdi_status.addr; 1061 + struct efx_vf *vf; 1062 + struct efx_local_addr *local_addr; 1063 + struct vfdi_endpoint *peer; 1064 + struct efx_endpoint_page *epp; 1065 + struct list_head pages; 1066 + unsigned int peer_space; 1067 + unsigned int peer_count; 1068 + unsigned int pos; 1069 + 1070 + mutex_lock(&efx->local_lock); 1071 + 1072 + /* Move the existing peer pages off %local_page_list */ 1073 + INIT_LIST_HEAD(&pages); 1074 + list_splice_tail_init(&efx->local_page_list, &pages); 1075 + 1076 + /* Populate the VF addresses starting from entry 1 (entry 0 is 1077 + * the PF address) 1078 + */ 1079 + peer = vfdi_status->peers + 1; 1080 + peer_space = ARRAY_SIZE(vfdi_status->peers) - 1; 1081 + peer_count = 1; 1082 + for (pos = 0; pos < efx->vf_count; ++pos) { 1083 + vf = efx->vf + pos; 1084 + 1085 + mutex_lock(&vf->status_lock); 1086 + if (vf->rx_filtering && !is_zero_ether_addr(vf->addr.mac_addr)) { 1087 + *peer++ = vf->addr; 1088 + ++peer_count; 1089 + --peer_space; 1090 + BUG_ON(peer_space == 0); 1091 + } 1092 + mutex_unlock(&vf->status_lock); 1093 + } 1094 + 1095 + /* Fill the remaining addresses */ 1096 + list_for_each_entry(local_addr, &efx->local_addr_list, link) { 1097 + memcpy(peer->mac_addr, local_addr->addr, ETH_ALEN); 1098 + peer->tci = 0; 1099 + ++peer; 1100 + ++peer_count; 1101 + if (--peer_space == 0) { 1102 + if (list_empty(&pages)) { 1103 + epp = kmalloc(sizeof(*epp), GFP_KERNEL); 1104 + if (!epp) 1105 + break; 1106 + epp->ptr = dma_alloc_coherent( 1107 + &efx->pci_dev->dev, EFX_PAGE_SIZE, 1108 + &epp->addr, GFP_KERNEL); 1109 + if (!epp->ptr) { 1110 + kfree(epp); 1111 + break; 1112 + } 1113 + } else { 1114 + epp = list_first_entry( 1115 + &pages, struct efx_endpoint_page, link); 1116 + list_del(&epp->link); 1117 + } 1118 + 1119 + list_add_tail(&epp->link, &efx->local_page_list); 1120 + peer = (struct vfdi_endpoint *)epp->ptr; 1121 + peer_space = EFX_PAGE_SIZE / sizeof(struct vfdi_endpoint); 1122 + } 1123 + } 1124 + vfdi_status->peer_count = peer_count; 1125 + mutex_unlock(&efx->local_lock); 1126 + 1127 + /* Free any now unused endpoint pages */ 1128 + while (!list_empty(&pages)) { 1129 + epp = list_first_entry( 1130 + &pages, struct efx_endpoint_page, link); 1131 + list_del(&epp->link); 1132 + dma_free_coherent(&efx->pci_dev->dev, EFX_PAGE_SIZE, 1133 + epp->ptr, epp->addr); 1134 + kfree(epp); 1135 + } 1136 + 1137 + /* Finally, push the pages */ 1138 + for (pos = 0; pos < efx->vf_count; ++pos) { 1139 + vf = efx->vf + pos; 1140 + 1141 + mutex_lock(&vf->status_lock); 1142 + if (vf->status_addr) 1143 + __efx_sriov_push_vf_status(vf); 1144 + mutex_unlock(&vf->status_lock); 1145 + } 1146 + } 1147 + 1148 + static void efx_sriov_free_local(struct efx_nic *efx) 1149 + { 1150 + struct efx_local_addr *local_addr; 1151 + struct efx_endpoint_page *epp; 1152 + 1153 + while (!list_empty(&efx->local_addr_list)) { 1154 + local_addr = list_first_entry(&efx->local_addr_list, 1155 + struct efx_local_addr, link); 1156 + list_del(&local_addr->link); 1157 + kfree(local_addr); 1158 + } 1159 + 1160 + while (!list_empty(&efx->local_page_list)) { 1161 + epp = list_first_entry(&efx->local_page_list, 1162 + struct efx_endpoint_page, link); 1163 + list_del(&epp->link); 1164 + dma_free_coherent(&efx->pci_dev->dev, EFX_PAGE_SIZE, 1165 + epp->ptr, epp->addr); 1166 + kfree(epp); 1167 + } 1168 + } 1169 + 1170 + static int efx_sriov_vf_alloc(struct efx_nic *efx) 1171 + { 1172 + unsigned index; 1173 + struct efx_vf *vf; 1174 + 1175 + efx->vf = kzalloc(sizeof(struct efx_vf) * efx->vf_count, GFP_KERNEL); 1176 + if (!efx->vf) 1177 + return -ENOMEM; 1178 + 1179 + for (index = 0; index < efx->vf_count; ++index) { 1180 + vf = efx->vf + index; 1181 + 1182 + vf->efx = efx; 1183 + vf->index = index; 1184 + vf->rx_filter_id = -1; 1185 + vf->tx_filter_mode = VF_TX_FILTER_AUTO; 1186 + vf->tx_filter_id = -1; 1187 + INIT_WORK(&vf->req, efx_sriov_vfdi); 1188 + INIT_WORK(&vf->reset_work, efx_sriov_reset_vf_work); 1189 + init_waitqueue_head(&vf->flush_waitq); 1190 + mutex_init(&vf->status_lock); 1191 + mutex_init(&vf->txq_lock); 1192 + } 1193 + 1194 + return 0; 1195 + } 1196 + 1197 + static void efx_sriov_vfs_fini(struct efx_nic *efx) 1198 + { 1199 + struct efx_vf *vf; 1200 + unsigned int pos; 1201 + 1202 + for (pos = 0; pos < efx->vf_count; ++pos) { 1203 + vf = efx->vf + pos; 1204 + 1205 + efx_nic_free_buffer(efx, &vf->buf); 1206 + kfree(vf->peer_page_addrs); 1207 + vf->peer_page_addrs = NULL; 1208 + vf->peer_page_count = 0; 1209 + 1210 + vf->evq0_count = 0; 1211 + } 1212 + } 1213 + 1214 + static int efx_sriov_vfs_init(struct efx_nic *efx) 1215 + { 1216 + struct pci_dev *pci_dev = efx->pci_dev; 1217 + unsigned index, devfn, sriov, buftbl_base; 1218 + u16 offset, stride; 1219 + struct efx_vf *vf; 1220 + int rc; 1221 + 1222 + sriov = pci_find_ext_capability(pci_dev, PCI_EXT_CAP_ID_SRIOV); 1223 + if (!sriov) 1224 + return -ENOENT; 1225 + 1226 + pci_read_config_word(pci_dev, sriov + PCI_SRIOV_VF_OFFSET, &offset); 1227 + pci_read_config_word(pci_dev, sriov + PCI_SRIOV_VF_STRIDE, &stride); 1228 + 1229 + buftbl_base = efx->vf_buftbl_base; 1230 + devfn = pci_dev->devfn + offset; 1231 + for (index = 0; index < efx->vf_count; ++index) { 1232 + vf = efx->vf + index; 1233 + 1234 + /* Reserve buffer entries */ 1235 + vf->buftbl_base = buftbl_base; 1236 + buftbl_base += EFX_VF_BUFTBL_PER_VI * efx_vf_size(efx); 1237 + 1238 + vf->pci_rid = devfn; 1239 + snprintf(vf->pci_name, sizeof(vf->pci_name), 1240 + "%04x:%02x:%02x.%d", 1241 + pci_domain_nr(pci_dev->bus), pci_dev->bus->number, 1242 + PCI_SLOT(devfn), PCI_FUNC(devfn)); 1243 + 1244 + rc = efx_nic_alloc_buffer(efx, &vf->buf, EFX_PAGE_SIZE); 1245 + if (rc) 1246 + goto fail; 1247 + 1248 + devfn += stride; 1249 + } 1250 + 1251 + return 0; 1252 + 1253 + fail: 1254 + efx_sriov_vfs_fini(efx); 1255 + return rc; 1256 + } 1257 + 1258 + int efx_sriov_init(struct efx_nic *efx) 1259 + { 1260 + struct net_device *net_dev = efx->net_dev; 1261 + struct vfdi_status *vfdi_status; 1262 + int rc; 1263 + 1264 + /* Ensure there's room for vf_channel */ 1265 + BUILD_BUG_ON(EFX_MAX_CHANNELS + 1 >= EFX_VI_BASE); 1266 + /* Ensure that VI_BASE is aligned on VI_SCALE */ 1267 + BUILD_BUG_ON(EFX_VI_BASE & ((1 << EFX_VI_SCALE_MAX) - 1)); 1268 + 1269 + if (efx->vf_count == 0) 1270 + return 0; 1271 + 1272 + rc = efx_sriov_cmd(efx, true, NULL, NULL); 1273 + if (rc) 1274 + goto fail_cmd; 1275 + 1276 + rc = efx_nic_alloc_buffer(efx, &efx->vfdi_status, sizeof(*vfdi_status)); 1277 + if (rc) 1278 + goto fail_status; 1279 + vfdi_status = efx->vfdi_status.addr; 1280 + memset(vfdi_status, 0, sizeof(*vfdi_status)); 1281 + vfdi_status->version = 1; 1282 + vfdi_status->length = sizeof(*vfdi_status); 1283 + vfdi_status->max_tx_channels = vf_max_tx_channels; 1284 + vfdi_status->vi_scale = efx->vi_scale; 1285 + vfdi_status->rss_rxq_count = efx->rss_spread; 1286 + vfdi_status->peer_count = 1 + efx->vf_count; 1287 + vfdi_status->timer_quantum_ns = efx->timer_quantum_ns; 1288 + 1289 + rc = efx_sriov_vf_alloc(efx); 1290 + if (rc) 1291 + goto fail_alloc; 1292 + 1293 + mutex_init(&efx->local_lock); 1294 + INIT_WORK(&efx->peer_work, efx_sriov_peer_work); 1295 + INIT_LIST_HEAD(&efx->local_addr_list); 1296 + INIT_LIST_HEAD(&efx->local_page_list); 1297 + 1298 + rc = efx_sriov_vfs_init(efx); 1299 + if (rc) 1300 + goto fail_vfs; 1301 + 1302 + rtnl_lock(); 1303 + memcpy(vfdi_status->peers[0].mac_addr, 1304 + net_dev->dev_addr, ETH_ALEN); 1305 + efx->vf_init_count = efx->vf_count; 1306 + rtnl_unlock(); 1307 + 1308 + efx_sriov_usrev(efx, true); 1309 + 1310 + /* At this point we must be ready to accept VFDI requests */ 1311 + 1312 + rc = pci_enable_sriov(efx->pci_dev, efx->vf_count); 1313 + if (rc) 1314 + goto fail_pci; 1315 + 1316 + netif_info(efx, probe, net_dev, 1317 + "enabled SR-IOV for %d VFs, %d VI per VF\n", 1318 + efx->vf_count, efx_vf_size(efx)); 1319 + return 0; 1320 + 1321 + fail_pci: 1322 + efx_sriov_usrev(efx, false); 1323 + rtnl_lock(); 1324 + efx->vf_init_count = 0; 1325 + rtnl_unlock(); 1326 + efx_sriov_vfs_fini(efx); 1327 + fail_vfs: 1328 + cancel_work_sync(&efx->peer_work); 1329 + efx_sriov_free_local(efx); 1330 + kfree(efx->vf); 1331 + fail_alloc: 1332 + efx_nic_free_buffer(efx, &efx->vfdi_status); 1333 + fail_status: 1334 + efx_sriov_cmd(efx, false, NULL, NULL); 1335 + fail_cmd: 1336 + return rc; 1337 + } 1338 + 1339 + void efx_sriov_fini(struct efx_nic *efx) 1340 + { 1341 + struct efx_vf *vf; 1342 + unsigned int pos; 1343 + 1344 + if (efx->vf_init_count == 0) 1345 + return; 1346 + 1347 + /* Disable all interfaces to reconfiguration */ 1348 + BUG_ON(efx->vfdi_channel->enabled); 1349 + efx_sriov_usrev(efx, false); 1350 + rtnl_lock(); 1351 + efx->vf_init_count = 0; 1352 + rtnl_unlock(); 1353 + 1354 + /* Flush all reconfiguration work */ 1355 + for (pos = 0; pos < efx->vf_count; ++pos) { 1356 + vf = efx->vf + pos; 1357 + cancel_work_sync(&vf->req); 1358 + cancel_work_sync(&vf->reset_work); 1359 + } 1360 + cancel_work_sync(&efx->peer_work); 1361 + 1362 + pci_disable_sriov(efx->pci_dev); 1363 + 1364 + /* Tear down back-end state */ 1365 + efx_sriov_vfs_fini(efx); 1366 + efx_sriov_free_local(efx); 1367 + kfree(efx->vf); 1368 + efx_nic_free_buffer(efx, &efx->vfdi_status); 1369 + efx_sriov_cmd(efx, false, NULL, NULL); 1370 + } 1371 + 1372 + void efx_sriov_event(struct efx_channel *channel, efx_qword_t *event) 1373 + { 1374 + struct efx_nic *efx = channel->efx; 1375 + struct efx_vf *vf; 1376 + unsigned qid, seq, type, data; 1377 + 1378 + qid = EFX_QWORD_FIELD(*event, FSF_CZ_USER_QID); 1379 + 1380 + /* USR_EV_REG_VALUE is dword0, so access the VFDI_EV fields directly */ 1381 + BUILD_BUG_ON(FSF_CZ_USER_EV_REG_VALUE_LBN != 0); 1382 + seq = EFX_QWORD_FIELD(*event, VFDI_EV_SEQ); 1383 + type = EFX_QWORD_FIELD(*event, VFDI_EV_TYPE); 1384 + data = EFX_QWORD_FIELD(*event, VFDI_EV_DATA); 1385 + 1386 + netif_vdbg(efx, hw, efx->net_dev, 1387 + "USR_EV event from qid %d seq 0x%x type %d data 0x%x\n", 1388 + qid, seq, type, data); 1389 + 1390 + if (map_vi_index(efx, qid, &vf, NULL)) 1391 + return; 1392 + if (vf->busy) 1393 + goto error; 1394 + 1395 + if (type == VFDI_EV_TYPE_REQ_WORD0) { 1396 + /* Resynchronise */ 1397 + vf->req_type = VFDI_EV_TYPE_REQ_WORD0; 1398 + vf->req_seqno = seq + 1; 1399 + vf->req_addr = 0; 1400 + } else if (seq != (vf->req_seqno++ & 0xff) || type != vf->req_type) 1401 + goto error; 1402 + 1403 + switch (vf->req_type) { 1404 + case VFDI_EV_TYPE_REQ_WORD0: 1405 + case VFDI_EV_TYPE_REQ_WORD1: 1406 + case VFDI_EV_TYPE_REQ_WORD2: 1407 + vf->req_addr |= (u64)data << (vf->req_type << 4); 1408 + ++vf->req_type; 1409 + return; 1410 + 1411 + case VFDI_EV_TYPE_REQ_WORD3: 1412 + vf->req_addr |= (u64)data << 48; 1413 + vf->req_type = VFDI_EV_TYPE_REQ_WORD0; 1414 + vf->busy = true; 1415 + queue_work(vfdi_workqueue, &vf->req); 1416 + return; 1417 + } 1418 + 1419 + error: 1420 + if (net_ratelimit()) 1421 + netif_err(efx, hw, efx->net_dev, 1422 + "ERROR: Screaming VFDI request from %s\n", 1423 + vf->pci_name); 1424 + /* Reset the request and sequence number */ 1425 + vf->req_type = VFDI_EV_TYPE_REQ_WORD0; 1426 + vf->req_seqno = seq + 1; 1427 + } 1428 + 1429 + void efx_sriov_flr(struct efx_nic *efx, unsigned vf_i) 1430 + { 1431 + struct efx_vf *vf; 1432 + 1433 + if (vf_i > efx->vf_init_count) 1434 + return; 1435 + vf = efx->vf + vf_i; 1436 + netif_info(efx, hw, efx->net_dev, 1437 + "FLR on VF %s\n", vf->pci_name); 1438 + 1439 + vf->status_addr = 0; 1440 + efx_vfdi_remove_all_filters(vf); 1441 + efx_vfdi_flush_clear(vf); 1442 + 1443 + vf->evq0_count = 0; 1444 + } 1445 + 1446 + void efx_sriov_mac_address_changed(struct efx_nic *efx) 1447 + { 1448 + struct vfdi_status *vfdi_status = efx->vfdi_status.addr; 1449 + 1450 + if (!efx->vf_init_count) 1451 + return; 1452 + memcpy(vfdi_status->peers[0].mac_addr, 1453 + efx->net_dev->dev_addr, ETH_ALEN); 1454 + queue_work(vfdi_workqueue, &efx->peer_work); 1455 + } 1456 + 1457 + void efx_sriov_tx_flush_done(struct efx_nic *efx, efx_qword_t *event) 1458 + { 1459 + struct efx_vf *vf; 1460 + unsigned queue, qid; 1461 + 1462 + queue = EFX_QWORD_FIELD(*event, FSF_AZ_DRIVER_EV_SUBDATA); 1463 + if (map_vi_index(efx, queue, &vf, &qid)) 1464 + return; 1465 + /* Ignore flush completions triggered by an FLR */ 1466 + if (!test_bit(qid, vf->txq_mask)) 1467 + return; 1468 + 1469 + __clear_bit(qid, vf->txq_mask); 1470 + --vf->txq_count; 1471 + 1472 + if (efx_vfdi_flush_wake(vf)) 1473 + wake_up(&vf->flush_waitq); 1474 + } 1475 + 1476 + void efx_sriov_rx_flush_done(struct efx_nic *efx, efx_qword_t *event) 1477 + { 1478 + struct efx_vf *vf; 1479 + unsigned ev_failed, queue, qid; 1480 + 1481 + queue = EFX_QWORD_FIELD(*event, FSF_AZ_DRIVER_EV_RX_DESCQ_ID); 1482 + ev_failed = EFX_QWORD_FIELD(*event, 1483 + FSF_AZ_DRIVER_EV_RX_FLUSH_FAIL); 1484 + if (map_vi_index(efx, queue, &vf, &qid)) 1485 + return; 1486 + if (!test_bit(qid, vf->rxq_mask)) 1487 + return; 1488 + 1489 + if (ev_failed) { 1490 + set_bit(qid, vf->rxq_retry_mask); 1491 + atomic_inc(&vf->rxq_retry_count); 1492 + } else { 1493 + __clear_bit(qid, vf->rxq_mask); 1494 + --vf->rxq_count; 1495 + } 1496 + if (efx_vfdi_flush_wake(vf)) 1497 + wake_up(&vf->flush_waitq); 1498 + } 1499 + 1500 + /* Called from napi. Schedule the reset work item */ 1501 + void efx_sriov_desc_fetch_err(struct efx_nic *efx, unsigned dmaq) 1502 + { 1503 + struct efx_vf *vf; 1504 + unsigned int rel; 1505 + 1506 + if (map_vi_index(efx, dmaq, &vf, &rel)) 1507 + return; 1508 + 1509 + if (net_ratelimit()) 1510 + netif_err(efx, hw, efx->net_dev, 1511 + "VF %d DMA Q %d reports descriptor fetch error.\n", 1512 + vf->index, rel); 1513 + queue_work(vfdi_workqueue, &vf->reset_work); 1514 + } 1515 + 1516 + /* Reset all VFs */ 1517 + void efx_sriov_reset(struct efx_nic *efx) 1518 + { 1519 + unsigned int vf_i; 1520 + struct efx_buffer buf; 1521 + struct efx_vf *vf; 1522 + 1523 + ASSERT_RTNL(); 1524 + 1525 + if (efx->vf_init_count == 0) 1526 + return; 1527 + 1528 + efx_sriov_usrev(efx, true); 1529 + (void)efx_sriov_cmd(efx, true, NULL, NULL); 1530 + 1531 + if (efx_nic_alloc_buffer(efx, &buf, EFX_PAGE_SIZE)) 1532 + return; 1533 + 1534 + for (vf_i = 0; vf_i < efx->vf_init_count; ++vf_i) { 1535 + vf = efx->vf + vf_i; 1536 + efx_sriov_reset_vf(vf, &buf); 1537 + } 1538 + 1539 + efx_nic_free_buffer(efx, &buf); 1540 + } 1541 + 1542 + int efx_init_sriov(void) 1543 + { 1544 + /* A single threaded workqueue is sufficient. efx_sriov_vfdi() and 1545 + * efx_sriov_peer_work() spend almost all their time sleeping for 1546 + * MCDI to complete anyway 1547 + */ 1548 + vfdi_workqueue = create_singlethread_workqueue("sfc_vfdi"); 1549 + if (!vfdi_workqueue) 1550 + return -ENOMEM; 1551 + 1552 + return 0; 1553 + } 1554 + 1555 + void efx_fini_sriov(void) 1556 + { 1557 + destroy_workqueue(vfdi_workqueue); 1558 + } 1559 + 1560 + int efx_sriov_set_vf_mac(struct net_device *net_dev, int vf_i, u8 *mac) 1561 + { 1562 + struct efx_nic *efx = netdev_priv(net_dev); 1563 + struct efx_vf *vf; 1564 + 1565 + if (vf_i >= efx->vf_init_count) 1566 + return -EINVAL; 1567 + vf = efx->vf + vf_i; 1568 + 1569 + mutex_lock(&vf->status_lock); 1570 + memcpy(vf->addr.mac_addr, mac, ETH_ALEN); 1571 + __efx_sriov_update_vf_addr(vf); 1572 + mutex_unlock(&vf->status_lock); 1573 + 1574 + return 0; 1575 + } 1576 + 1577 + int efx_sriov_set_vf_vlan(struct net_device *net_dev, int vf_i, 1578 + u16 vlan, u8 qos) 1579 + { 1580 + struct efx_nic *efx = netdev_priv(net_dev); 1581 + struct efx_vf *vf; 1582 + u16 tci; 1583 + 1584 + if (vf_i >= efx->vf_init_count) 1585 + return -EINVAL; 1586 + vf = efx->vf + vf_i; 1587 + 1588 + mutex_lock(&vf->status_lock); 1589 + tci = (vlan & VLAN_VID_MASK) | ((qos & 0x7) << VLAN_PRIO_SHIFT); 1590 + vf->addr.tci = htons(tci); 1591 + __efx_sriov_update_vf_addr(vf); 1592 + mutex_unlock(&vf->status_lock); 1593 + 1594 + return 0; 1595 + } 1596 + 1597 + int efx_sriov_set_vf_spoofchk(struct net_device *net_dev, int vf_i, 1598 + bool spoofchk) 1599 + { 1600 + struct efx_nic *efx = netdev_priv(net_dev); 1601 + struct efx_vf *vf; 1602 + int rc; 1603 + 1604 + if (vf_i >= efx->vf_init_count) 1605 + return -EINVAL; 1606 + vf = efx->vf + vf_i; 1607 + 1608 + mutex_lock(&vf->txq_lock); 1609 + if (vf->txq_count == 0) { 1610 + vf->tx_filter_mode = 1611 + spoofchk ? VF_TX_FILTER_ON : VF_TX_FILTER_OFF; 1612 + rc = 0; 1613 + } else { 1614 + /* This cannot be changed while TX queues are running */ 1615 + rc = -EBUSY; 1616 + } 1617 + mutex_unlock(&vf->txq_lock); 1618 + return rc; 1619 + } 1620 + 1621 + int efx_sriov_get_vf_config(struct net_device *net_dev, int vf_i, 1622 + struct ifla_vf_info *ivi) 1623 + { 1624 + struct efx_nic *efx = netdev_priv(net_dev); 1625 + struct efx_vf *vf; 1626 + u16 tci; 1627 + 1628 + if (vf_i >= efx->vf_init_count) 1629 + return -EINVAL; 1630 + vf = efx->vf + vf_i; 1631 + 1632 + ivi->vf = vf_i; 1633 + memcpy(ivi->mac, vf->addr.mac_addr, ETH_ALEN); 1634 + ivi->tx_rate = 0; 1635 + tci = ntohs(vf->addr.tci); 1636 + ivi->vlan = tci & VLAN_VID_MASK; 1637 + ivi->qos = (tci >> VLAN_PRIO_SHIFT) & 0x7; 1638 + ivi->spoofchk = vf->tx_filter_mode == VF_TX_FILTER_ON; 1639 + 1640 + return 0; 1641 + } 1642 +
+1 -1
drivers/net/ethernet/sfc/tx.c
··· 110 110 * little benefit from using descriptors that cross those 111 111 * boundaries and we keep things simple by not doing so. 112 112 */ 113 - unsigned len = (~dma_addr & 0xfff) + 1; 113 + unsigned len = (~dma_addr & (EFX_PAGE_SIZE - 1)) + 1; 114 114 115 115 /* Work around hardware bug for unaligned buffers. */ 116 116 if (EFX_WORKAROUND_5391(efx) && (dma_addr & 0xf))
+254
drivers/net/ethernet/sfc/vfdi.h
··· 1 + /**************************************************************************** 2 + * Driver for Solarflare Solarstorm network controllers and boards 3 + * Copyright 2010-2012 Solarflare Communications Inc. 4 + * 5 + * This program is free software; you can redistribute it and/or modify it 6 + * under the terms of the GNU General Public License version 2 as published 7 + * by the Free Software Foundation, incorporated herein by reference. 8 + */ 9 + #ifndef _VFDI_H 10 + #define _VFDI_H 11 + 12 + /** 13 + * DOC: Virtual Function Driver Interface 14 + * 15 + * This file contains software structures used to form a two way 16 + * communication channel between the VF driver and the PF driver, 17 + * named Virtual Function Driver Interface (VFDI). 18 + * 19 + * For the purposes of VFDI, a page is a memory region with size and 20 + * alignment of 4K. All addresses are DMA addresses to be used within 21 + * the domain of the relevant VF. 22 + * 23 + * The only hardware-defined channels for a VF driver to communicate 24 + * with the PF driver are the event mailboxes (%FR_CZ_USR_EV 25 + * registers). Writing to these registers generates an event with 26 + * EV_CODE = EV_CODE_USR_EV, USER_QID set to the index of the mailbox 27 + * and USER_EV_REG_VALUE set to the value written. The PF driver may 28 + * direct or disable delivery of these events by setting 29 + * %FR_CZ_USR_EV_CFG. 30 + * 31 + * The PF driver can send arbitrary events to arbitrary event queues. 32 + * However, for consistency, VFDI events from the PF are defined to 33 + * follow the same form and be sent to the first event queue assigned 34 + * to the VF while that queue is enabled by the VF driver. 35 + * 36 + * The general form of the variable bits of VFDI events is: 37 + * 38 + * 0 16 24 31 39 + * | DATA | TYPE | SEQ | 40 + * 41 + * SEQ is a sequence number which should be incremented by 1 (modulo 42 + * 256) for each event. The sequence numbers used in each direction 43 + * are independent. 44 + * 45 + * The VF submits requests of type &struct vfdi_req by sending the 46 + * address of the request (ADDR) in a series of 4 events: 47 + * 48 + * 0 16 24 31 49 + * | ADDR[0:15] | VFDI_EV_TYPE_REQ_WORD0 | SEQ | 50 + * | ADDR[16:31] | VFDI_EV_TYPE_REQ_WORD1 | SEQ+1 | 51 + * | ADDR[32:47] | VFDI_EV_TYPE_REQ_WORD2 | SEQ+2 | 52 + * | ADDR[48:63] | VFDI_EV_TYPE_REQ_WORD3 | SEQ+3 | 53 + * 54 + * The address must be page-aligned. After receiving such a valid 55 + * series of events, the PF driver will attempt to read the request 56 + * and write a response to the same address. In case of an invalid 57 + * sequence of events or a DMA error, there will be no response. 58 + * 59 + * The VF driver may request that the PF driver writes status 60 + * information into its domain asynchronously. After writing the 61 + * status, the PF driver will send an event of the form: 62 + * 63 + * 0 16 24 31 64 + * | reserved | VFDI_EV_TYPE_STATUS | SEQ | 65 + * 66 + * In case the VF must be reset for any reason, the PF driver will 67 + * send an event of the form: 68 + * 69 + * 0 16 24 31 70 + * | reserved | VFDI_EV_TYPE_RESET | SEQ | 71 + * 72 + * It is then the responsibility of the VF driver to request 73 + * reinitialisation of its queues. 74 + */ 75 + #define VFDI_EV_SEQ_LBN 24 76 + #define VFDI_EV_SEQ_WIDTH 8 77 + #define VFDI_EV_TYPE_LBN 16 78 + #define VFDI_EV_TYPE_WIDTH 8 79 + #define VFDI_EV_TYPE_REQ_WORD0 0 80 + #define VFDI_EV_TYPE_REQ_WORD1 1 81 + #define VFDI_EV_TYPE_REQ_WORD2 2 82 + #define VFDI_EV_TYPE_REQ_WORD3 3 83 + #define VFDI_EV_TYPE_STATUS 4 84 + #define VFDI_EV_TYPE_RESET 5 85 + #define VFDI_EV_DATA_LBN 0 86 + #define VFDI_EV_DATA_WIDTH 16 87 + 88 + struct vfdi_endpoint { 89 + u8 mac_addr[ETH_ALEN]; 90 + __be16 tci; 91 + }; 92 + 93 + /** 94 + * enum vfdi_op - VFDI operation enumeration 95 + * @VFDI_OP_RESPONSE: Indicates a response to the request. 96 + * @VFDI_OP_INIT_EVQ: Initialize SRAM entries and initialize an EVQ. 97 + * @VFDI_OP_INIT_RXQ: Initialize SRAM entries and initialize an RXQ. 98 + * @VFDI_OP_INIT_TXQ: Initialize SRAM entries and initialize a TXQ. 99 + * @VFDI_OP_FINI_ALL_QUEUES: Flush all queues, finalize all queues, then 100 + * finalize the SRAM entries. 101 + * @VFDI_OP_INSERT_FILTER: Insert a MAC filter targetting the given RXQ. 102 + * @VFDI_OP_REMOVE_ALL_FILTERS: Remove all filters. 103 + * @VFDI_OP_SET_STATUS_PAGE: Set the DMA page(s) used for status updates 104 + * from PF and write the initial status. 105 + * @VFDI_OP_CLEAR_STATUS_PAGE: Clear the DMA page(s) used for status 106 + * updates from PF. 107 + */ 108 + enum vfdi_op { 109 + VFDI_OP_RESPONSE = 0, 110 + VFDI_OP_INIT_EVQ = 1, 111 + VFDI_OP_INIT_RXQ = 2, 112 + VFDI_OP_INIT_TXQ = 3, 113 + VFDI_OP_FINI_ALL_QUEUES = 4, 114 + VFDI_OP_INSERT_FILTER = 5, 115 + VFDI_OP_REMOVE_ALL_FILTERS = 6, 116 + VFDI_OP_SET_STATUS_PAGE = 7, 117 + VFDI_OP_CLEAR_STATUS_PAGE = 8, 118 + VFDI_OP_LIMIT, 119 + }; 120 + 121 + /* Response codes for VFDI operations. Other values may be used in future. */ 122 + #define VFDI_RC_SUCCESS 0 123 + #define VFDI_RC_ENOMEM (-12) 124 + #define VFDI_RC_EINVAL (-22) 125 + #define VFDI_RC_EOPNOTSUPP (-95) 126 + #define VFDI_RC_ETIMEDOUT (-110) 127 + 128 + /** 129 + * struct vfdi_req - Request from VF driver to PF driver 130 + * @op: Operation code or response indicator, taken from &enum vfdi_op. 131 + * @rc: Response code. Set to 0 on success or a negative error code on failure. 132 + * @u.init_evq.index: Index of event queue to create. 133 + * @u.init_evq.buf_count: Number of 4k buffers backing event queue. 134 + * @u.init_evq.addr: Array of length %u.init_evq.buf_count containing DMA 135 + * address of each page backing the event queue. 136 + * @u.init_rxq.index: Index of receive queue to create. 137 + * @u.init_rxq.buf_count: Number of 4k buffers backing receive queue. 138 + * @u.init_rxq.evq: Instance of event queue to target receive events at. 139 + * @u.init_rxq.label: Label used in receive events. 140 + * @u.init_rxq.flags: Unused. 141 + * @u.init_rxq.addr: Array of length %u.init_rxq.buf_count containing DMA 142 + * address of each page backing the receive queue. 143 + * @u.init_txq.index: Index of transmit queue to create. 144 + * @u.init_txq.buf_count: Number of 4k buffers backing transmit queue. 145 + * @u.init_txq.evq: Instance of event queue to target transmit completion 146 + * events at. 147 + * @u.init_txq.label: Label used in transmit completion events. 148 + * @u.init_txq.flags: Checksum offload flags. 149 + * @u.init_txq.addr: Array of length %u.init_txq.buf_count containing DMA 150 + * address of each page backing the transmit queue. 151 + * @u.mac_filter.rxq: Insert MAC filter at VF local address/VLAN targetting 152 + * all traffic at this receive queue. 153 + * @u.mac_filter.flags: MAC filter flags. 154 + * @u.set_status_page.dma_addr: Base address for the &struct vfdi_status. 155 + * This address must be such that the structure fits within a page. 156 + * @u.set_status_page.peer_page_count: Number of additional pages the VF 157 + * has provided into which peer addresses may be DMAd. 158 + * @u.set_status_page.peer_page_addr: Array of DMA addresses of pages. 159 + * If the number of peers exceeds 256, then the VF must provide 160 + * additional pages in this array. The PF will then DMA up to 161 + * 512 vfdi_endpoint structures into each page. These addresses 162 + * must be page-aligned. 163 + */ 164 + struct vfdi_req { 165 + u32 op; 166 + u32 reserved1; 167 + s32 rc; 168 + u32 reserved2; 169 + union { 170 + struct { 171 + u32 index; 172 + u32 buf_count; 173 + u64 addr[]; 174 + } init_evq; 175 + struct { 176 + u32 index; 177 + u32 buf_count; 178 + u32 evq; 179 + u32 label; 180 + u32 flags; 181 + #define VFDI_RXQ_FLAG_SCATTER_EN 1 182 + u32 reserved; 183 + u64 addr[]; 184 + } init_rxq; 185 + struct { 186 + u32 index; 187 + u32 buf_count; 188 + u32 evq; 189 + u32 label; 190 + u32 flags; 191 + #define VFDI_TXQ_FLAG_IP_CSUM_DIS 1 192 + #define VFDI_TXQ_FLAG_TCPUDP_CSUM_DIS 2 193 + u32 reserved; 194 + u64 addr[]; 195 + } init_txq; 196 + struct { 197 + u32 rxq; 198 + u32 flags; 199 + #define VFDI_MAC_FILTER_FLAG_RSS 1 200 + #define VFDI_MAC_FILTER_FLAG_SCATTER 2 201 + } mac_filter; 202 + struct { 203 + u64 dma_addr; 204 + u64 peer_page_count; 205 + u64 peer_page_addr[]; 206 + } set_status_page; 207 + } u; 208 + }; 209 + 210 + /** 211 + * struct vfdi_status - Status provided by PF driver to VF driver 212 + * @generation_start: A generation count DMA'd to VF *before* the 213 + * rest of the structure. 214 + * @generation_end: A generation count DMA'd to VF *after* the 215 + * rest of the structure. 216 + * @version: Version of this structure; currently set to 1. Later 217 + * versions must either be layout-compatible or only be sent to VFs 218 + * that specifically request them. 219 + * @length: Total length of this structure including embedded tables 220 + * @vi_scale: log2 the number of VIs available on this VF. This quantity 221 + * is used by the hardware for register decoding. 222 + * @max_tx_channels: The maximum number of transmit queues the VF can use. 223 + * @rss_rxq_count: The number of receive queues present in the shared RSS 224 + * indirection table. 225 + * @peer_count: Total number of peers in the complete peer list. If larger 226 + * than ARRAY_SIZE(%peers), then the VF must provide sufficient 227 + * additional pages each of which is filled with vfdi_endpoint structures. 228 + * @local: The MAC address and outer VLAN tag of *this* VF 229 + * @peers: Table of peer addresses. The @tci fields in these structures 230 + * are currently unused and must be ignored. Additional peers are 231 + * written into any additional pages provided by the VF. 232 + * @timer_quantum_ns: Timer quantum (nominal period between timer ticks) 233 + * for interrupt moderation timers, in nanoseconds. This member is only 234 + * present if @length is sufficiently large. 235 + */ 236 + struct vfdi_status { 237 + u32 generation_start; 238 + u32 generation_end; 239 + u32 version; 240 + u32 length; 241 + u8 vi_scale; 242 + u8 max_tx_channels; 243 + u8 rss_rxq_count; 244 + u8 reserved1; 245 + u16 peer_count; 246 + u16 reserved2; 247 + struct vfdi_endpoint local; 248 + struct vfdi_endpoint peers[256]; 249 + 250 + /* Members below here extend version 1 of this structure */ 251 + u32 timer_quantum_ns; 252 + }; 253 + 254 + #endif