Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'hns3-VF-reset'

Salil Mehta says:

====================
Add support of VF Reset to HNS3 VF driver

This patch-set adds the support of VF reset to the existing VF driver.
VF Reset can be triggered due to TX watchdog firing as a result of TX
data-path not working. VF reset could also be a result of some internal
configuration changes if that requires reset, or as a result of the
PF/Core/Global/IMP(Integrated Management Processor) reset happened in
the PF.

Summary of Patches:
* Watchdog timer trigger chnages are present in Patch 1.
* Reset Service Task and related Event handling is present in Patches {2,3}
* Changes to send reset request to PF, reset stack and re-initialization
of the hclge device is present in Patches {4,5,6}
* Changes related to ARQ (Asynchronous Receive Queue) and its event handling
are present in Patches {7,8}
* Changes required in PF to handle the VF Reset request and actually perform
hardware VF reset is there in Patch 9.

NOTE: This patch depends upon "[PATCH net-next 00/11] fix some bugs for HNS3 driver"
Link: https://lkml.org/lkml/2018/3/21/72
====================

Signed-off-by: David S. Miller <davem@davemloft.net>

+534 -71
+16
drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
··· 11 11 12 12 enum HCLGE_MBX_OPCODE { 13 13 HCLGE_MBX_RESET = 0x01, /* (VF -> PF) assert reset */ 14 + HCLGE_MBX_ASSERTING_RESET, /* (PF -> VF) PF is asserting reset*/ 14 15 HCLGE_MBX_SET_UNICAST, /* (VF -> PF) set UC addr */ 15 16 HCLGE_MBX_SET_MULTICAST, /* (VF -> PF) set MC addr */ 16 17 HCLGE_MBX_SET_VLAN, /* (VF -> PF) set VLAN */ ··· 86 85 u16 msg[8]; 87 86 }; 88 87 88 + /* used by VF to store the received Async responses from PF */ 89 + struct hclgevf_mbx_arq_ring { 90 + #define HCLGE_MBX_MAX_ARQ_MSG_SIZE 8 91 + #define HCLGE_MBX_MAX_ARQ_MSG_NUM 1024 92 + struct hclgevf_dev *hdev; 93 + u32 head; 94 + u32 tail; 95 + u32 count; 96 + u16 msg_q[HCLGE_MBX_MAX_ARQ_MSG_NUM][HCLGE_MBX_MAX_ARQ_MSG_SIZE]; 97 + }; 98 + 89 99 #define hclge_mbx_ring_ptr_move_crq(crq) \ 90 100 (crq->next_to_use = (crq->next_to_use + 1) % crq->desc_num) 101 + #define hclge_mbx_tail_ptr_move_arq(arq) \ 102 + (arq.tail = (arq.tail + 1) % HCLGE_MBX_MAX_ARQ_MSG_SIZE) 103 + #define hclge_mbx_head_ptr_move_arq(arq) \ 104 + (arq.head = (arq.head + 1) % HCLGE_MBX_MAX_ARQ_MSG_SIZE) 91 105 #endif
+6 -2
drivers/net/ethernet/hisilicon/hns3/hnae3.h
··· 118 118 }; 119 119 120 120 enum hnae3_reset_type { 121 + HNAE3_VF_RESET, 122 + HNAE3_VF_FULL_RESET, 121 123 HNAE3_FUNC_RESET, 122 124 HNAE3_CORE_RESET, 123 125 HNAE3_GLOBAL_RESET, ··· 402 400 int (*set_vf_vlan_filter)(struct hnae3_handle *handle, int vfid, 403 401 u16 vlan, u8 qos, __be16 proto); 404 402 int (*enable_hw_strip_rxvtag)(struct hnae3_handle *handle, bool enable); 405 - void (*reset_event)(struct hnae3_handle *handle, 406 - enum hnae3_reset_type reset); 403 + void (*reset_event)(struct hnae3_handle *handle); 407 404 void (*get_channels)(struct hnae3_handle *handle, 408 405 struct ethtool_channels *ch); 409 406 void (*get_tqps_and_rss_info)(struct hnae3_handle *h, ··· 495 494 void *priv; 496 495 struct hnae3_ae_algo *ae_algo; /* the class who provides this handle */ 497 496 u64 flags; /* Indicate the capabilities for this handle*/ 497 + 498 + unsigned long last_reset_time; 499 + enum hnae3_reset_type reset_level; 498 500 499 501 union { 500 502 struct net_device *netdev; /* first member */
+7 -23
drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
··· 320 320 return ret; 321 321 } 322 322 323 - priv->last_reset_time = jiffies; 323 + priv->ae_handle->last_reset_time = jiffies; 324 324 return 0; 325 325 } 326 326 ··· 1543 1543 static void hns3_nic_net_timeout(struct net_device *ndev) 1544 1544 { 1545 1545 struct hns3_nic_priv *priv = netdev_priv(ndev); 1546 - unsigned long last_reset_time = priv->last_reset_time; 1547 1546 struct hnae3_handle *h = priv->ae_handle; 1548 1547 1549 1548 if (!hns3_get_tx_timeo_queue_info(ndev)) ··· 1550 1551 1551 1552 priv->tx_timeout_count++; 1552 1553 1553 - /* This timeout is far away enough from last timeout, 1554 - * if timeout again,set the reset type to PF reset 1555 - */ 1556 - if (time_after(jiffies, (last_reset_time + 20 * HZ))) 1557 - priv->reset_level = HNAE3_FUNC_RESET; 1558 - 1559 - /* Don't do any new action before the next timeout */ 1560 - else if (time_before(jiffies, (last_reset_time + ndev->watchdog_timeo))) 1554 + if (time_before(jiffies, (h->last_reset_time + ndev->watchdog_timeo))) 1561 1555 return; 1562 1556 1563 - priv->last_reset_time = jiffies; 1564 - 1557 + /* request the reset */ 1565 1558 if (h->ae_algo->ops->reset_event) 1566 - h->ae_algo->ops->reset_event(h, priv->reset_level); 1567 - 1568 - priv->reset_level++; 1569 - if (priv->reset_level > HNAE3_GLOBAL_RESET) 1570 - priv->reset_level = HNAE3_GLOBAL_RESET; 1559 + h->ae_algo->ops->reset_event(h); 1571 1560 } 1572 1561 1573 1562 static const struct net_device_ops hns3_nic_netdev_ops = { ··· 3109 3122 priv->dev = &pdev->dev; 3110 3123 priv->netdev = netdev; 3111 3124 priv->ae_handle = handle; 3112 - priv->last_reset_time = jiffies; 3113 - priv->reset_level = HNAE3_FUNC_RESET; 3125 + priv->ae_handle->reset_level = HNAE3_NONE_RESET; 3126 + priv->ae_handle->last_reset_time = jiffies; 3114 3127 priv->tx_timeout_count = 0; 3115 3128 3116 3129 handle->kinfo.netdev = netdev; ··· 3342 3355 static int hns3_reset_notify_up_enet(struct hnae3_handle *handle) 3343 3356 { 3344 3357 struct hnae3_knic_private_info *kinfo = &handle->kinfo; 3345 - struct hns3_nic_priv *priv = netdev_priv(kinfo->netdev); 3346 3358 int ret = 0; 3347 3359 3348 3360 if (netif_running(kinfo->netdev)) { ··· 3351 3365 "hns net up fail, ret=%d!\n", ret); 3352 3366 return ret; 3353 3367 } 3354 - 3355 - priv->last_reset_time = jiffies; 3368 + handle->last_reset_time = jiffies; 3356 3369 } 3357 3370 3358 3371 return ret; ··· 3363 3378 struct hns3_nic_priv *priv = netdev_priv(netdev); 3364 3379 int ret; 3365 3380 3366 - priv->reset_level = 1; 3367 3381 hns3_init_mac_addr(netdev); 3368 3382 hns3_nic_set_rx_mode(netdev); 3369 3383 hns3_recover_hw_addr(netdev);
-2
drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
··· 532 532 /* The most recently read link state */ 533 533 int link; 534 534 u64 tx_timeout_count; 535 - enum hnae3_reset_type reset_level; 536 - unsigned long last_reset_time; 537 535 538 536 unsigned long state; 539 537
+21 -17
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
··· 2749 2749 return 0; 2750 2750 } 2751 2751 2752 - static int hclge_func_reset_cmd(struct hclge_dev *hdev, int func_id) 2752 + int hclge_func_reset_cmd(struct hclge_dev *hdev, int func_id) 2753 2753 { 2754 2754 struct hclge_desc desc; 2755 2755 struct hclge_reset_cmd *req = (struct hclge_reset_cmd *)desc.data; ··· 2845 2845 hclge_notify_client(hdev, HNAE3_UP_CLIENT); 2846 2846 } 2847 2847 2848 - static void hclge_reset_event(struct hnae3_handle *handle, 2849 - enum hnae3_reset_type reset) 2848 + static void hclge_reset_event(struct hnae3_handle *handle) 2850 2849 { 2851 2850 struct hclge_vport *vport = hclge_get_vport(handle); 2852 2851 struct hclge_dev *hdev = vport->back; 2853 2852 2854 - dev_info(&hdev->pdev->dev, 2855 - "Receive reset event , reset_type is %d", reset); 2853 + /* check if this is a new reset request and we are not here just because 2854 + * last reset attempt did not succeed and watchdog hit us again. We will 2855 + * know this if last reset request did not occur very recently (watchdog 2856 + * timer = 5*HZ, let us check after sufficiently large time, say 4*5*Hz) 2857 + * In case of new request we reset the "reset level" to PF reset. 2858 + */ 2859 + if (time_after(jiffies, (handle->last_reset_time + 4 * 5 * HZ))) 2860 + handle->reset_level = HNAE3_FUNC_RESET; 2856 2861 2857 - switch (reset) { 2858 - case HNAE3_FUNC_RESET: 2859 - case HNAE3_CORE_RESET: 2860 - case HNAE3_GLOBAL_RESET: 2861 - /* request reset & schedule reset task */ 2862 - set_bit(reset, &hdev->reset_request); 2863 - hclge_reset_task_schedule(hdev); 2864 - break; 2865 - default: 2866 - dev_warn(&hdev->pdev->dev, "Unsupported reset event:%d", reset); 2867 - break; 2868 - } 2862 + dev_info(&hdev->pdev->dev, "received reset event , reset type is %d", 2863 + handle->reset_level); 2864 + 2865 + /* request reset & schedule reset task */ 2866 + set_bit(handle->reset_level, &hdev->reset_request); 2867 + hclge_reset_task_schedule(hdev); 2868 + 2869 + if (handle->reset_level < HNAE3_GLOBAL_RESET) 2870 + handle->reset_level++; 2871 + 2872 + handle->last_reset_time = jiffies; 2869 2873 } 2870 2874 2871 2875 static void hclge_reset_subtask(struct hclge_dev *hdev)
+1
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
··· 657 657 void hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id); 658 658 void hclge_reset_vf_queue(struct hclge_vport *vport, u16 queue_id); 659 659 int hclge_cfg_flowctrl(struct hclge_dev *hdev); 660 + int hclge_func_reset_cmd(struct hclge_dev *hdev, int func_id); 660 661 #endif
+42
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
··· 79 79 return status; 80 80 } 81 81 82 + int hclge_inform_reset_assert_to_vf(struct hclge_vport *vport) 83 + { 84 + u8 msg_data[2]; 85 + u8 dest_vfid; 86 + 87 + dest_vfid = (u8)vport->vport_id; 88 + 89 + /* send this requested info to VF */ 90 + return hclge_send_mbx_msg(vport, msg_data, sizeof(u8), 91 + HCLGE_MBX_ASSERTING_RESET, dest_vfid); 92 + } 93 + 82 94 static void hclge_free_vector_ring_chain(struct hnae3_ring_chain_node *head) 83 95 { 84 96 struct hnae3_ring_chain_node *chain_tmp, *chain; ··· 351 339 hclge_gen_resp_to_vf(vport, mbx_req, 0, NULL, 0); 352 340 } 353 341 342 + static void hclge_reset_vf(struct hclge_vport *vport, 343 + struct hclge_mbx_vf_to_pf_cmd *mbx_req) 344 + { 345 + struct hclge_dev *hdev = vport->back; 346 + int ret; 347 + 348 + dev_warn(&hdev->pdev->dev, "PF received VF reset request from VF %d!", 349 + mbx_req->mbx_src_vfid); 350 + 351 + /* Acknowledge VF that PF is now about to assert the reset for the VF. 352 + * On receiving this message VF will get into pending state and will 353 + * start polling for the hardware reset completion status. 354 + */ 355 + ret = hclge_inform_reset_assert_to_vf(vport); 356 + if (ret) { 357 + dev_err(&hdev->pdev->dev, 358 + "PF fail(%d) to inform VF(%d)of reset, reset failed!\n", 359 + ret, vport->vport_id); 360 + return; 361 + } 362 + 363 + dev_warn(&hdev->pdev->dev, "PF is now resetting VF %d.\n", 364 + mbx_req->mbx_src_vfid); 365 + /* reset this virtual function */ 366 + hclge_func_reset_cmd(hdev, mbx_req->mbx_src_vfid); 367 + } 368 + 354 369 void hclge_mbx_handler(struct hclge_dev *hdev) 355 370 { 356 371 struct hclge_cmq_ring *crq = &hdev->hw.cmq.crq; ··· 454 415 break; 455 416 case HCLGE_MBX_QUEUE_RESET: 456 417 hclge_mbx_reset_vf_queue(vport, req); 418 + break; 419 + case HCLGE_MBX_RESET: 420 + hclge_reset_vf(vport, req); 457 421 break; 458 422 default: 459 423 dev_err(&hdev->pdev->dev,
+6
drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
··· 315 315 goto err_csq; 316 316 } 317 317 318 + /* initialize the pointers of async rx queue of mailbox */ 319 + hdev->arq.hdev = hdev; 320 + hdev->arq.head = 0; 321 + hdev->arq.tail = 0; 322 + hdev->arq.count = 0; 323 + 318 324 /* get firmware version */ 319 325 ret = hclgevf_cmd_query_firmware_version(&hdev->hw, &version); 320 326 if (ret) {
+318 -18
drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
··· 2 2 // Copyright (c) 2016-2017 Hisilicon Limited. 3 3 4 4 #include <linux/etherdevice.h> 5 + #include <net/rtnetlink.h> 5 6 #include "hclgevf_cmd.h" 6 7 #include "hclgevf_main.h" 7 8 #include "hclge_mbx.h" ··· 10 9 11 10 #define HCLGEVF_NAME "hclgevf" 12 11 12 + static int hclgevf_init_hdev(struct hclgevf_dev *hdev); 13 + static void hclgevf_uninit_hdev(struct hclgevf_dev *hdev); 13 14 static struct hnae3_ae_algo ae_algovf; 14 15 15 16 static const struct pci_device_id ae_algovf_pci_tbl[] = { ··· 211 208 struct hclgevf_tqp *tqp; 212 209 int i; 213 210 211 + /* if this is on going reset then we need to re-allocate the TPQs 212 + * since we cannot assume we would get same number of TPQs back from PF 213 + */ 214 + if (hclgevf_dev_ongoing_reset(hdev)) 215 + devm_kfree(&hdev->pdev->dev, hdev->htqp); 216 + 214 217 hdev->htqp = devm_kcalloc(&hdev->pdev->dev, hdev->num_tqps, 215 218 sizeof(struct hclgevf_tqp), GFP_KERNEL); 216 219 if (!hdev->htqp) ··· 259 250 = min_t(u16, hdev->rss_size_max, new_tqps / kinfo->num_tc); 260 251 new_tqps = kinfo->rss_size * kinfo->num_tc; 261 252 kinfo->num_tqps = min(new_tqps, hdev->num_tqps); 253 + 254 + /* if this is on going reset then we need to re-allocate the hnae queues 255 + * as well since number of TPQs from PF might have changed. 256 + */ 257 + if (hclgevf_dev_ongoing_reset(hdev)) 258 + devm_kfree(&hdev->pdev->dev, kinfo->tqp); 262 259 263 260 kinfo->tqp = devm_kcalloc(&hdev->pdev->dev, kinfo->num_tqps, 264 261 sizeof(struct hnae3_queue *), GFP_KERNEL); ··· 847 832 2, true, NULL, 0); 848 833 } 849 834 835 + static int hclgevf_notify_client(struct hclgevf_dev *hdev, 836 + enum hnae3_reset_notify_type type) 837 + { 838 + struct hnae3_client *client = hdev->nic_client; 839 + struct hnae3_handle *handle = &hdev->nic; 840 + 841 + if (!client->ops->reset_notify) 842 + return -EOPNOTSUPP; 843 + 844 + return client->ops->reset_notify(handle, type); 845 + } 846 + 847 + static int hclgevf_reset_wait(struct hclgevf_dev *hdev) 848 + { 849 + #define HCLGEVF_RESET_WAIT_MS 500 850 + #define HCLGEVF_RESET_WAIT_CNT 20 851 + u32 val, cnt = 0; 852 + 853 + /* wait to check the hardware reset completion status */ 854 + val = hclgevf_read_dev(&hdev->hw, HCLGEVF_FUN_RST_ING); 855 + while (hnae_get_bit(val, HCLGEVF_FUN_RST_ING_B) && 856 + (cnt < HCLGEVF_RESET_WAIT_CNT)) { 857 + msleep(HCLGEVF_RESET_WAIT_MS); 858 + val = hclgevf_read_dev(&hdev->hw, HCLGEVF_FUN_RST_ING); 859 + cnt++; 860 + } 861 + 862 + /* hardware completion status should be available by this time */ 863 + if (cnt >= HCLGEVF_RESET_WAIT_CNT) { 864 + dev_warn(&hdev->pdev->dev, 865 + "could'nt get reset done status from h/w, timeout!\n"); 866 + return -EBUSY; 867 + } 868 + 869 + /* we will wait a bit more to let reset of the stack to complete. This 870 + * might happen in case reset assertion was made by PF. Yes, this also 871 + * means we might end up waiting bit more even for VF reset. 872 + */ 873 + msleep(5000); 874 + 875 + return 0; 876 + } 877 + 878 + static int hclgevf_reset_stack(struct hclgevf_dev *hdev) 879 + { 880 + int ret; 881 + 882 + /* uninitialize the nic client */ 883 + hclgevf_notify_client(hdev, HNAE3_UNINIT_CLIENT); 884 + 885 + /* re-initialize the hclge device */ 886 + ret = hclgevf_init_hdev(hdev); 887 + if (ret) { 888 + dev_err(&hdev->pdev->dev, 889 + "hclge device re-init failed, VF is disabled!\n"); 890 + return ret; 891 + } 892 + 893 + /* bring up the nic client again */ 894 + hclgevf_notify_client(hdev, HNAE3_INIT_CLIENT); 895 + 896 + return 0; 897 + } 898 + 899 + static int hclgevf_reset(struct hclgevf_dev *hdev) 900 + { 901 + int ret; 902 + 903 + rtnl_lock(); 904 + 905 + /* bring down the nic to stop any ongoing TX/RX */ 906 + hclgevf_notify_client(hdev, HNAE3_DOWN_CLIENT); 907 + 908 + /* check if VF could successfully fetch the hardware reset completion 909 + * status from the hardware 910 + */ 911 + ret = hclgevf_reset_wait(hdev); 912 + if (ret) { 913 + /* can't do much in this situation, will disable VF */ 914 + dev_err(&hdev->pdev->dev, 915 + "VF failed(=%d) to fetch H/W reset completion status\n", 916 + ret); 917 + 918 + dev_warn(&hdev->pdev->dev, "VF reset failed, disabling VF!\n"); 919 + hclgevf_notify_client(hdev, HNAE3_UNINIT_CLIENT); 920 + 921 + rtnl_unlock(); 922 + return ret; 923 + } 924 + 925 + /* now, re-initialize the nic client and ae device*/ 926 + ret = hclgevf_reset_stack(hdev); 927 + if (ret) 928 + dev_err(&hdev->pdev->dev, "failed to reset VF stack\n"); 929 + 930 + /* bring up the nic to enable TX/RX again */ 931 + hclgevf_notify_client(hdev, HNAE3_UP_CLIENT); 932 + 933 + rtnl_unlock(); 934 + 935 + return ret; 936 + } 937 + 938 + static int hclgevf_do_reset(struct hclgevf_dev *hdev) 939 + { 940 + int status; 941 + u8 respmsg; 942 + 943 + status = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_RESET, 0, NULL, 944 + 0, false, &respmsg, sizeof(u8)); 945 + if (status) 946 + dev_err(&hdev->pdev->dev, 947 + "VF reset request to PF failed(=%d)\n", status); 948 + 949 + return status; 950 + } 951 + 952 + static void hclgevf_reset_event(struct hnae3_handle *handle) 953 + { 954 + struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); 955 + 956 + dev_info(&hdev->pdev->dev, "received reset request from VF enet\n"); 957 + 958 + handle->reset_level = HNAE3_VF_RESET; 959 + 960 + /* reset of this VF requested */ 961 + set_bit(HCLGEVF_RESET_REQUESTED, &hdev->reset_state); 962 + hclgevf_reset_task_schedule(hdev); 963 + 964 + handle->last_reset_time = jiffies; 965 + } 966 + 850 967 static u32 hclgevf_get_fw_version(struct hnae3_handle *handle) 851 968 { 852 969 struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); ··· 1001 854 hdev->num_msi_used += 1; 1002 855 } 1003 856 1004 - static void hclgevf_mbx_task_schedule(struct hclgevf_dev *hdev) 857 + void hclgevf_reset_task_schedule(struct hclgevf_dev *hdev) 1005 858 { 1006 - if (!test_and_set_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state)) 859 + if (!test_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state) && 860 + !test_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state)) { 861 + set_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state); 862 + schedule_work(&hdev->rst_service_task); 863 + } 864 + } 865 + 866 + void hclgevf_mbx_task_schedule(struct hclgevf_dev *hdev) 867 + { 868 + if (!test_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state) && 869 + !test_bit(HCLGEVF_STATE_MBX_HANDLING, &hdev->state)) { 870 + set_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state); 1007 871 schedule_work(&hdev->mbx_service_task); 872 + } 1008 873 } 1009 874 1010 875 static void hclgevf_task_schedule(struct hclgevf_dev *hdev) ··· 1026 867 schedule_work(&hdev->service_task); 1027 868 } 1028 869 870 + static void hclgevf_deferred_task_schedule(struct hclgevf_dev *hdev) 871 + { 872 + /* if we have any pending mailbox event then schedule the mbx task */ 873 + if (hdev->mbx_event_pending) 874 + hclgevf_mbx_task_schedule(hdev); 875 + 876 + if (test_bit(HCLGEVF_RESET_PENDING, &hdev->reset_state)) 877 + hclgevf_reset_task_schedule(hdev); 878 + } 879 + 1029 880 static void hclgevf_service_timer(struct timer_list *t) 1030 881 { 1031 882 struct hclgevf_dev *hdev = from_timer(hdev, t, service_timer); ··· 1043 874 mod_timer(&hdev->service_timer, jiffies + 5 * HZ); 1044 875 1045 876 hclgevf_task_schedule(hdev); 877 + } 878 + 879 + static void hclgevf_reset_service_task(struct work_struct *work) 880 + { 881 + struct hclgevf_dev *hdev = 882 + container_of(work, struct hclgevf_dev, rst_service_task); 883 + int ret; 884 + 885 + if (test_and_set_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state)) 886 + return; 887 + 888 + clear_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state); 889 + 890 + if (test_and_clear_bit(HCLGEVF_RESET_PENDING, 891 + &hdev->reset_state)) { 892 + /* PF has initmated that it is about to reset the hardware. 893 + * We now have to poll & check if harware has actually completed 894 + * the reset sequence. On hardware reset completion, VF needs to 895 + * reset the client and ae device. 896 + */ 897 + hdev->reset_attempts = 0; 898 + 899 + ret = hclgevf_reset(hdev); 900 + if (ret) 901 + dev_err(&hdev->pdev->dev, "VF stack reset failed.\n"); 902 + } else if (test_and_clear_bit(HCLGEVF_RESET_REQUESTED, 903 + &hdev->reset_state)) { 904 + /* we could be here when either of below happens: 905 + * 1. reset was initiated due to watchdog timeout due to 906 + * a. IMP was earlier reset and our TX got choked down and 907 + * which resulted in watchdog reacting and inducing VF 908 + * reset. This also means our cmdq would be unreliable. 909 + * b. problem in TX due to other lower layer(example link 910 + * layer not functioning properly etc.) 911 + * 2. VF reset might have been initiated due to some config 912 + * change. 913 + * 914 + * NOTE: Theres no clear way to detect above cases than to react 915 + * to the response of PF for this reset request. PF will ack the 916 + * 1b and 2. cases but we will not get any intimation about 1a 917 + * from PF as cmdq would be in unreliable state i.e. mailbox 918 + * communication between PF and VF would be broken. 919 + */ 920 + 921 + /* if we are never geting into pending state it means either: 922 + * 1. PF is not receiving our request which could be due to IMP 923 + * reset 924 + * 2. PF is screwed 925 + * We cannot do much for 2. but to check first we can try reset 926 + * our PCIe + stack and see if it alleviates the problem. 927 + */ 928 + if (hdev->reset_attempts > 3) { 929 + /* prepare for full reset of stack + pcie interface */ 930 + hdev->nic.reset_level = HNAE3_VF_FULL_RESET; 931 + 932 + /* "defer" schedule the reset task again */ 933 + set_bit(HCLGEVF_RESET_PENDING, &hdev->reset_state); 934 + } else { 935 + hdev->reset_attempts++; 936 + 937 + /* request PF for resetting this VF via mailbox */ 938 + ret = hclgevf_do_reset(hdev); 939 + if (ret) 940 + dev_warn(&hdev->pdev->dev, 941 + "VF rst fail, stack will call\n"); 942 + } 943 + } 944 + 945 + clear_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state); 1046 946 } 1047 947 1048 948 static void hclgevf_mailbox_service_task(struct work_struct *work) ··· 1125 887 1126 888 clear_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state); 1127 889 1128 - hclgevf_mbx_handler(hdev); 890 + hclgevf_mbx_async_handler(hdev); 1129 891 1130 892 clear_bit(HCLGEVF_STATE_MBX_HANDLING, &hdev->state); 1131 893 } ··· 1140 902 * about such updates in future so we might remove this later 1141 903 */ 1142 904 hclgevf_request_link_info(hdev); 905 + 906 + hclgevf_deferred_task_schedule(hdev); 1143 907 1144 908 clear_bit(HCLGEVF_STATE_SERVICE_SCHED, &hdev->state); 1145 909 } ··· 1185 945 if (!hclgevf_check_event_cause(hdev, &clearval)) 1186 946 goto skip_sched; 1187 947 1188 - /* schedule the VF mailbox service task, if not already scheduled */ 1189 - hclgevf_mbx_task_schedule(hdev); 948 + hclgevf_mbx_handler(hdev); 1190 949 1191 950 hclgevf_clear_event_cause(hdev, clearval); 1192 951 ··· 1205 966 return ret; 1206 967 /* get tc configuration from PF */ 1207 968 return hclgevf_get_tc_info(hdev); 969 + } 970 + 971 + static int hclgevf_alloc_hdev(struct hnae3_ae_dev *ae_dev) 972 + { 973 + struct pci_dev *pdev = ae_dev->pdev; 974 + struct hclgevf_dev *hdev = ae_dev->priv; 975 + 976 + hdev = devm_kzalloc(&pdev->dev, sizeof(*hdev), GFP_KERNEL); 977 + if (!hdev) 978 + return -ENOMEM; 979 + 980 + hdev->pdev = pdev; 981 + hdev->ae_dev = ae_dev; 982 + ae_dev->priv = hdev; 983 + 984 + return 0; 1208 985 } 1209 986 1210 987 static int hclgevf_init_roce_base_info(struct hclgevf_dev *hdev) ··· 1328 1073 1329 1074 static void hclgevf_state_init(struct hclgevf_dev *hdev) 1330 1075 { 1076 + /* if this is on going reset then skip this initialization */ 1077 + if (hclgevf_dev_ongoing_reset(hdev)) 1078 + return; 1079 + 1331 1080 /* setup tasks for the MBX */ 1332 1081 INIT_WORK(&hdev->mbx_service_task, hclgevf_mailbox_service_task); 1333 1082 clear_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state); ··· 1342 1083 1343 1084 INIT_WORK(&hdev->service_task, hclgevf_service_task); 1344 1085 clear_bit(HCLGEVF_STATE_SERVICE_SCHED, &hdev->state); 1086 + 1087 + INIT_WORK(&hdev->rst_service_task, hclgevf_reset_service_task); 1345 1088 1346 1089 mutex_init(&hdev->mbx_resp.mbx_mutex); 1347 1090 ··· 1361 1100 cancel_work_sync(&hdev->service_task); 1362 1101 if (hdev->mbx_service_task.func) 1363 1102 cancel_work_sync(&hdev->mbx_service_task); 1103 + if (hdev->rst_service_task.func) 1104 + cancel_work_sync(&hdev->rst_service_task); 1364 1105 1365 1106 mutex_destroy(&hdev->mbx_resp.mbx_mutex); 1366 1107 } ··· 1372 1109 struct pci_dev *pdev = hdev->pdev; 1373 1110 int vectors; 1374 1111 int i; 1112 + 1113 + /* if this is on going reset then skip this initialization */ 1114 + if (hclgevf_dev_ongoing_reset(hdev)) 1115 + return 0; 1375 1116 1376 1117 hdev->num_msi = HCLGEVF_MAX_VF_VECTOR_NUM; 1377 1118 ··· 1426 1159 static int hclgevf_misc_irq_init(struct hclgevf_dev *hdev) 1427 1160 { 1428 1161 int ret = 0; 1162 + 1163 + /* if this is on going reset then skip this initialization */ 1164 + if (hclgevf_dev_ongoing_reset(hdev)) 1165 + return 0; 1429 1166 1430 1167 hclgevf_get_misc_vector(hdev); 1431 1168 ··· 1541 1270 struct hclgevf_hw *hw; 1542 1271 int ret; 1543 1272 1273 + /* check if we need to skip initialization of pci. This will happen if 1274 + * device is undergoing VF reset. Otherwise, we would need to 1275 + * re-initialize pci interface again i.e. when device is not going 1276 + * through *any* reset or actually undergoing full reset. 1277 + */ 1278 + if (hclgevf_dev_ongoing_reset(hdev)) 1279 + return 0; 1280 + 1544 1281 ret = pci_enable_device(pdev); 1545 1282 if (ret) { 1546 1283 dev_err(&pdev->dev, "failed to enable PCI device\n"); ··· 1600 1321 pci_set_drvdata(pdev, NULL); 1601 1322 } 1602 1323 1603 - static int hclgevf_init_ae_dev(struct hnae3_ae_dev *ae_dev) 1324 + static int hclgevf_init_hdev(struct hclgevf_dev *hdev) 1604 1325 { 1605 - struct pci_dev *pdev = ae_dev->pdev; 1606 - struct hclgevf_dev *hdev; 1326 + struct pci_dev *pdev = hdev->pdev; 1607 1327 int ret; 1608 1328 1609 - hdev = devm_kzalloc(&pdev->dev, sizeof(*hdev), GFP_KERNEL); 1610 - if (!hdev) 1611 - return -ENOMEM; 1612 - 1613 - hdev->pdev = pdev; 1614 - hdev->ae_dev = ae_dev; 1615 - ae_dev->priv = hdev; 1329 + /* check if device is on-going full reset(i.e. pcie as well) */ 1330 + if (hclgevf_dev_ongoing_full_reset(hdev)) { 1331 + dev_warn(&pdev->dev, "device is going full reset\n"); 1332 + hclgevf_uninit_hdev(hdev); 1333 + } 1616 1334 1617 1335 ret = hclgevf_pci_init(hdev); 1618 1336 if (ret) { ··· 1694 1418 return ret; 1695 1419 } 1696 1420 1697 - static void hclgevf_uninit_ae_dev(struct hnae3_ae_dev *ae_dev) 1421 + static void hclgevf_uninit_hdev(struct hclgevf_dev *hdev) 1698 1422 { 1699 - struct hclgevf_dev *hdev = ae_dev->priv; 1700 - 1701 1423 hclgevf_cmd_uninit(hdev); 1702 1424 hclgevf_misc_irq_uninit(hdev); 1703 1425 hclgevf_state_uninit(hdev); 1704 1426 hclgevf_uninit_msi(hdev); 1705 1427 hclgevf_pci_uninit(hdev); 1428 + } 1429 + 1430 + static int hclgevf_init_ae_dev(struct hnae3_ae_dev *ae_dev) 1431 + { 1432 + struct pci_dev *pdev = ae_dev->pdev; 1433 + int ret; 1434 + 1435 + ret = hclgevf_alloc_hdev(ae_dev); 1436 + if (ret) { 1437 + dev_err(&pdev->dev, "hclge device allocation failed\n"); 1438 + return ret; 1439 + } 1440 + 1441 + ret = hclgevf_init_hdev(ae_dev->priv); 1442 + if (ret) 1443 + dev_err(&pdev->dev, "hclge device initialization failed\n"); 1444 + 1445 + return ret; 1446 + } 1447 + 1448 + static void hclgevf_uninit_ae_dev(struct hnae3_ae_dev *ae_dev) 1449 + { 1450 + struct hclgevf_dev *hdev = ae_dev->priv; 1451 + 1452 + hclgevf_uninit_hdev(hdev); 1706 1453 ae_dev->priv = NULL; 1707 1454 } 1708 1455 ··· 1825 1526 .get_tc_size = hclgevf_get_tc_size, 1826 1527 .get_fw_version = hclgevf_get_fw_version, 1827 1528 .set_vlan_filter = hclgevf_set_vlan_filter, 1529 + .reset_event = hclgevf_reset_event, 1828 1530 .get_channels = hclgevf_get_channels, 1829 1531 .get_tqps_and_rss_info = hclgevf_get_tqps_and_rss_info, 1830 1532 .get_status = hclgevf_get_status,
+31
drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
··· 34 34 #define HCLGEVF_VECTOR0_RX_CMDQ_INT_B 1 35 35 36 36 #define HCLGEVF_TQP_RESET_TRY_TIMES 10 37 + /* Reset related Registers */ 38 + #define HCLGEVF_FUN_RST_ING 0x20C00 39 + #define HCLGEVF_FUN_RST_ING_B 0 37 40 38 41 #define HCLGEVF_RSS_IND_TBL_SIZE 512 39 42 #define HCLGEVF_RSS_SET_BITMAP_MSK 0xffff ··· 55 52 HCLGEVF_STATE_DISABLED, 56 53 /* task states */ 57 54 HCLGEVF_STATE_SERVICE_SCHED, 55 + HCLGEVF_STATE_RST_SERVICE_SCHED, 56 + HCLGEVF_STATE_RST_HANDLING, 58 57 HCLGEVF_STATE_MBX_SERVICE_SCHED, 59 58 HCLGEVF_STATE_MBX_HANDLING, 60 59 }; ··· 127 122 struct hclgevf_rss_cfg rss_cfg; 128 123 unsigned long state; 129 124 125 + #define HCLGEVF_RESET_REQUESTED 0 126 + #define HCLGEVF_RESET_PENDING 1 127 + unsigned long reset_state; /* requested, pending */ 128 + u32 reset_attempts; 129 + 130 130 u32 fw_version; 131 131 u16 num_tqps; /* num task queue pairs of this PF */ 132 132 ··· 152 142 int *vector_irq; 153 143 154 144 bool accept_mta_mc; /* whether to accept mta filter multicast */ 145 + bool mbx_event_pending; 155 146 struct hclgevf_mbx_resp_status mbx_resp; /* mailbox response */ 147 + struct hclgevf_mbx_arq_ring arq; /* mailbox async rx queue */ 156 148 157 149 struct timer_list service_timer; 158 150 struct work_struct service_task; 151 + struct work_struct rst_service_task; 159 152 struct work_struct mbx_service_task; 160 153 161 154 struct hclgevf_tqp *htqp; ··· 171 158 u32 flag; 172 159 }; 173 160 161 + static inline bool hclgevf_dev_ongoing_reset(struct hclgevf_dev *hdev) 162 + { 163 + return (hdev && 164 + (test_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state)) && 165 + (hdev->nic.reset_level == HNAE3_VF_RESET)); 166 + } 167 + 168 + static inline bool hclgevf_dev_ongoing_full_reset(struct hclgevf_dev *hdev) 169 + { 170 + return (hdev && 171 + (test_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state)) && 172 + (hdev->nic.reset_level == HNAE3_VF_FULL_RESET)); 173 + } 174 + 174 175 int hclgevf_send_mbx_msg(struct hclgevf_dev *hdev, u16 code, u16 subcode, 175 176 const u8 *msg_data, u8 msg_len, bool need_resp, 176 177 u8 *resp_data, u16 resp_len); 177 178 void hclgevf_mbx_handler(struct hclgevf_dev *hdev); 179 + void hclgevf_mbx_async_handler(struct hclgevf_dev *hdev); 180 + 178 181 void hclgevf_update_link_status(struct hclgevf_dev *hdev, int link_state); 179 182 void hclgevf_update_speed_duplex(struct hclgevf_dev *hdev, u32 speed, 180 183 u8 duplex); 184 + void hclgevf_reset_task_schedule(struct hclgevf_dev *hdev); 185 + void hclgevf_mbx_task_schedule(struct hclgevf_dev *hdev); 181 186 #endif
+86 -9
drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
··· 132 132 struct hclge_mbx_pf_to_vf_cmd *req; 133 133 struct hclgevf_cmq_ring *crq; 134 134 struct hclgevf_desc *desc; 135 - u16 link_status, flag; 136 - u32 speed; 137 - u8 duplex; 135 + u16 *msg_q; 136 + u16 flag; 138 137 u8 *temp; 139 138 int i; 140 139 ··· 145 146 desc = &crq->desc[crq->next_to_use]; 146 147 req = (struct hclge_mbx_pf_to_vf_cmd *)desc->data; 147 148 149 + /* synchronous messages are time critical and need preferential 150 + * treatment. Therefore, we need to acknowledge all the sync 151 + * responses as quickly as possible so that waiting tasks do not 152 + * timeout and simultaneously queue the async messages for later 153 + * prcessing in context of mailbox task i.e. the slow path. 154 + */ 148 155 switch (req->msg[0]) { 149 156 case HCLGE_MBX_PF_VF_RESP: 150 157 if (resp->received_resp) ··· 170 165 } 171 166 break; 172 167 case HCLGE_MBX_LINK_STAT_CHANGE: 173 - link_status = le16_to_cpu(req->msg[1]); 174 - memcpy(&speed, &req->msg[2], sizeof(speed)); 175 - duplex = (u8)le16_to_cpu(req->msg[4]); 168 + case HCLGE_MBX_ASSERTING_RESET: 169 + /* set this mbx event as pending. This is required as we 170 + * might loose interrupt event when mbx task is busy 171 + * handling. This shall be cleared when mbx task just 172 + * enters handling state. 173 + */ 174 + hdev->mbx_event_pending = true; 176 175 177 - /* update upper layer with new link link status */ 178 - hclgevf_update_link_status(hdev, link_status); 179 - hclgevf_update_speed_duplex(hdev, speed, duplex); 176 + /* we will drop the async msg if we find ARQ as full 177 + * and continue with next message 178 + */ 179 + if (hdev->arq.count >= HCLGE_MBX_MAX_ARQ_MSG_NUM) { 180 + dev_warn(&hdev->pdev->dev, 181 + "Async Q full, dropping msg(%d)\n", 182 + req->msg[1]); 183 + break; 184 + } 185 + 186 + /* tail the async message in arq */ 187 + msg_q = hdev->arq.msg_q[hdev->arq.tail]; 188 + memcpy(&msg_q[0], req->msg, HCLGE_MBX_MAX_ARQ_MSG_SIZE); 189 + hclge_mbx_tail_ptr_move_arq(hdev->arq); 190 + hdev->arq.count++; 191 + 192 + hclgevf_mbx_task_schedule(hdev); 180 193 181 194 break; 182 195 default: ··· 211 188 /* Write back CMDQ_RQ header pointer, M7 need this pointer */ 212 189 hclgevf_write_dev(&hdev->hw, HCLGEVF_NIC_CRQ_HEAD_REG, 213 190 crq->next_to_use); 191 + } 192 + 193 + void hclgevf_mbx_async_handler(struct hclgevf_dev *hdev) 194 + { 195 + u16 link_status; 196 + u16 *msg_q; 197 + u8 duplex; 198 + u32 speed; 199 + u32 tail; 200 + 201 + /* we can safely clear it now as we are at start of the async message 202 + * processing 203 + */ 204 + hdev->mbx_event_pending = false; 205 + 206 + tail = hdev->arq.tail; 207 + 208 + /* process all the async queue messages */ 209 + while (tail != hdev->arq.head) { 210 + msg_q = hdev->arq.msg_q[hdev->arq.head]; 211 + 212 + switch (msg_q[0]) { 213 + case HCLGE_MBX_LINK_STAT_CHANGE: 214 + link_status = le16_to_cpu(msg_q[1]); 215 + memcpy(&speed, &msg_q[2], sizeof(speed)); 216 + duplex = (u8)le16_to_cpu(msg_q[4]); 217 + 218 + /* update upper layer with new link link status */ 219 + hclgevf_update_link_status(hdev, link_status); 220 + hclgevf_update_speed_duplex(hdev, speed, duplex); 221 + 222 + break; 223 + case HCLGE_MBX_ASSERTING_RESET: 224 + /* PF has asserted reset hence VF should go in pending 225 + * state and poll for the hardware reset status till it 226 + * has been completely reset. After this stack should 227 + * eventually be re-initialized. 228 + */ 229 + hdev->nic.reset_level = HNAE3_VF_RESET; 230 + set_bit(HCLGEVF_RESET_PENDING, &hdev->reset_state); 231 + hclgevf_reset_task_schedule(hdev); 232 + 233 + break; 234 + default: 235 + dev_err(&hdev->pdev->dev, 236 + "fetched unsupported(%d) message from arq\n", 237 + msg_q[0]); 238 + break; 239 + } 240 + 241 + hclge_mbx_head_ptr_move_arq(hdev->arq); 242 + hdev->arq.count--; 243 + msg_q = hdev->arq.msg_q[hdev->arq.head]; 244 + } 214 245 }