Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

RDMA/mlx5: Add steering support in optional flow counters

Adding steering infrastructure for adding and removing optional counter.
This allows to add and remove the counters dynamically in order not to
hurt performance.

Link: https://lore.kernel.org/r/20211008122439.166063-12-markzhang@nvidia.com
Signed-off-by: Aharon Landau <aharonl@nvidia.com>
Reviewed-by: Maor Gottlieb <maorg@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Mark Zhang <markzhang@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>

authored by

Aharon Landau and committed by
Jason Gunthorpe
ffa501ef 886773d2

+212
+187
drivers/infiniband/hw/mlx5/fs.c
··· 10 10 #include <rdma/uverbs_std_types.h> 11 11 #include <rdma/mlx5_user_ioctl_cmds.h> 12 12 #include <rdma/mlx5_user_ioctl_verbs.h> 13 + #include <rdma/ib_hdrs.h> 13 14 #include <rdma/ib_umem.h> 14 15 #include <linux/mlx5/driver.h> 15 16 #include <linux/mlx5/fs.h> 16 17 #include <linux/mlx5/fs_helpers.h> 17 18 #include <linux/mlx5/accel.h> 18 19 #include <linux/mlx5/eswitch.h> 20 + #include <net/inet_ecn.h> 19 21 #include "mlx5_ib.h" 20 22 #include "counters.h" 21 23 #include "devx.h" ··· 847 845 flags); 848 846 849 847 return prio; 848 + } 849 + 850 + enum { 851 + RDMA_RX_ECN_OPCOUNTER_PRIO, 852 + RDMA_RX_CNP_OPCOUNTER_PRIO, 853 + }; 854 + 855 + enum { 856 + RDMA_TX_CNP_OPCOUNTER_PRIO, 857 + }; 858 + 859 + static int set_vhca_port_spec(struct mlx5_ib_dev *dev, u32 port_num, 860 + struct mlx5_flow_spec *spec) 861 + { 862 + if (!MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev, 863 + ft_field_support.source_vhca_port) || 864 + !MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev, 865 + ft_field_support.source_vhca_port)) 866 + return -EOPNOTSUPP; 867 + 868 + MLX5_SET_TO_ONES(fte_match_param, &spec->match_criteria, 869 + misc_parameters.source_vhca_port); 870 + MLX5_SET(fte_match_param, &spec->match_value, 871 + misc_parameters.source_vhca_port, port_num); 872 + 873 + return 0; 874 + } 875 + 876 + static int set_ecn_ce_spec(struct mlx5_ib_dev *dev, u32 port_num, 877 + struct mlx5_flow_spec *spec, int ipv) 878 + { 879 + if (!MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev, 880 + ft_field_support.outer_ip_version)) 881 + return -EOPNOTSUPP; 882 + 883 + if (mlx5_core_mp_enabled(dev->mdev) && 884 + set_vhca_port_spec(dev, port_num, spec)) 885 + return -EOPNOTSUPP; 886 + 887 + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, 888 + outer_headers.ip_ecn); 889 + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_ecn, 890 + INET_ECN_CE); 891 + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, 892 + outer_headers.ip_version); 893 + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, 894 + ipv); 895 + 896 + spec->match_criteria_enable = 897 + get_match_criteria_enable(spec->match_criteria); 898 + 899 + return 0; 900 + } 901 + 902 + static int set_cnp_spec(struct mlx5_ib_dev *dev, u32 port_num, 903 + struct mlx5_flow_spec *spec) 904 + { 905 + if (mlx5_core_mp_enabled(dev->mdev) && 906 + set_vhca_port_spec(dev, port_num, spec)) 907 + return -EOPNOTSUPP; 908 + 909 + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, 910 + misc_parameters.bth_opcode); 911 + MLX5_SET(fte_match_param, spec->match_value, misc_parameters.bth_opcode, 912 + IB_BTH_OPCODE_CNP); 913 + 914 + spec->match_criteria_enable = 915 + get_match_criteria_enable(spec->match_criteria); 916 + 917 + return 0; 918 + } 919 + 920 + int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num, 921 + struct mlx5_ib_op_fc *opfc, 922 + enum mlx5_ib_optional_counter_type type) 923 + { 924 + enum mlx5_flow_namespace_type fn_type; 925 + int priority, i, err, spec_num; 926 + struct mlx5_flow_act flow_act = {}; 927 + struct mlx5_flow_destination dst; 928 + struct mlx5_flow_namespace *ns; 929 + struct mlx5_ib_flow_prio *prio; 930 + struct mlx5_flow_spec *spec; 931 + 932 + spec = kcalloc(MAX_OPFC_RULES, sizeof(*spec), GFP_KERNEL); 933 + if (!spec) 934 + return -ENOMEM; 935 + 936 + switch (type) { 937 + case MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS: 938 + if (set_ecn_ce_spec(dev, port_num, &spec[0], 939 + MLX5_FS_IPV4_VERSION) || 940 + set_ecn_ce_spec(dev, port_num, &spec[1], 941 + MLX5_FS_IPV6_VERSION)) { 942 + err = -EOPNOTSUPP; 943 + goto free; 944 + } 945 + spec_num = 2; 946 + fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS; 947 + priority = RDMA_RX_ECN_OPCOUNTER_PRIO; 948 + break; 949 + 950 + case MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS: 951 + if (!MLX5_CAP_FLOWTABLE(dev->mdev, 952 + ft_field_support_2_nic_receive_rdma.bth_opcode) || 953 + set_cnp_spec(dev, port_num, &spec[0])) { 954 + err = -EOPNOTSUPP; 955 + goto free; 956 + } 957 + spec_num = 1; 958 + fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS; 959 + priority = RDMA_RX_CNP_OPCOUNTER_PRIO; 960 + break; 961 + 962 + case MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS: 963 + if (!MLX5_CAP_FLOWTABLE(dev->mdev, 964 + ft_field_support_2_nic_transmit_rdma.bth_opcode) || 965 + set_cnp_spec(dev, port_num, &spec[0])) { 966 + err = -EOPNOTSUPP; 967 + goto free; 968 + } 969 + spec_num = 1; 970 + fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS; 971 + priority = RDMA_TX_CNP_OPCOUNTER_PRIO; 972 + break; 973 + 974 + default: 975 + err = -EOPNOTSUPP; 976 + goto free; 977 + } 978 + 979 + ns = mlx5_get_flow_namespace(dev->mdev, fn_type); 980 + if (!ns) { 981 + err = -EOPNOTSUPP; 982 + goto free; 983 + } 984 + 985 + prio = &dev->flow_db->opfcs[type]; 986 + if (!prio->flow_table) { 987 + prio = _get_prio(ns, prio, priority, 988 + dev->num_ports * MAX_OPFC_RULES, 1, 0); 989 + if (IS_ERR(prio)) { 990 + err = PTR_ERR(prio); 991 + goto free; 992 + } 993 + } 994 + 995 + dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; 996 + dst.counter_id = mlx5_fc_id(opfc->fc); 997 + 998 + flow_act.action = 999 + MLX5_FLOW_CONTEXT_ACTION_COUNT | MLX5_FLOW_CONTEXT_ACTION_ALLOW; 1000 + 1001 + for (i = 0; i < spec_num; i++) { 1002 + opfc->rule[i] = mlx5_add_flow_rules(prio->flow_table, &spec[i], 1003 + &flow_act, &dst, 1); 1004 + if (IS_ERR(opfc->rule[i])) { 1005 + err = PTR_ERR(opfc->rule[i]); 1006 + goto del_rules; 1007 + } 1008 + } 1009 + prio->refcount += spec_num; 1010 + kfree(spec); 1011 + 1012 + return 0; 1013 + 1014 + del_rules: 1015 + for (i -= 1; i >= 0; i--) 1016 + mlx5_del_flow_rules(opfc->rule[i]); 1017 + put_flow_table(dev, prio, false); 1018 + free: 1019 + kfree(spec); 1020 + return err; 1021 + } 1022 + 1023 + void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev, 1024 + struct mlx5_ib_op_fc *opfc, 1025 + enum mlx5_ib_optional_counter_type type) 1026 + { 1027 + int i; 1028 + 1029 + for (i = 0; i < MAX_OPFC_RULES && opfc->rule[i]; i++) { 1030 + mlx5_del_flow_rules(opfc->rule[i]); 1031 + put_flow_table(dev, &dev->flow_db->opfcs[type], true); 1032 + } 850 1033 } 851 1034 852 1035 static void set_underlay_qp(struct mlx5_ib_dev *dev,
+24
drivers/infiniband/hw/mlx5/mlx5_ib.h
··· 263 263 struct mlx5_core_dev *mdev; 264 264 }; 265 265 266 + enum mlx5_ib_optional_counter_type { 267 + MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS, 268 + MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS, 269 + MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS, 270 + 271 + MLX5_IB_OPCOUNTER_MAX, 272 + }; 273 + 266 274 struct mlx5_ib_flow_db { 267 275 struct mlx5_ib_flow_prio prios[MLX5_IB_NUM_FLOW_FT]; 268 276 struct mlx5_ib_flow_prio egress_prios[MLX5_IB_NUM_FLOW_FT]; ··· 279 271 struct mlx5_ib_flow_prio fdb; 280 272 struct mlx5_ib_flow_prio rdma_rx[MLX5_IB_NUM_FLOW_FT]; 281 273 struct mlx5_ib_flow_prio rdma_tx[MLX5_IB_NUM_FLOW_FT]; 274 + struct mlx5_ib_flow_prio opfcs[MLX5_IB_OPCOUNTER_MAX]; 282 275 struct mlx5_flow_table *lag_demux_ft; 283 276 /* Protect flow steering bypass flow tables 284 277 * when add/del flow rules. ··· 806 797 struct mlx5_ib_port_resources ports[2]; 807 798 }; 808 799 800 + #define MAX_OPFC_RULES 2 801 + 802 + struct mlx5_ib_op_fc { 803 + struct mlx5_fc *fc; 804 + struct mlx5_flow_handle *rule[MAX_OPFC_RULES]; 805 + }; 806 + 809 807 struct mlx5_ib_counters { 810 808 struct rdma_stat_desc *descs; 811 809 size_t *offsets; ··· 822 806 u32 num_op_counters; 823 807 u16 set_id; 824 808 }; 809 + 810 + int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num, 811 + struct mlx5_ib_op_fc *opfc, 812 + enum mlx5_ib_optional_counter_type type); 813 + 814 + void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev, 815 + struct mlx5_ib_op_fc *opfc, 816 + enum mlx5_ib_optional_counter_type type); 825 817 826 818 struct mlx5_ib_multiport_info; 827 819
+1
include/rdma/ib_hdrs.h
··· 232 232 #define IB_BTH_SE_SHIFT 23 233 233 #define IB_BTH_TVER_MASK 0xf 234 234 #define IB_BTH_TVER_SHIFT 16 235 + #define IB_BTH_OPCODE_CNP 0x81 235 236 236 237 static inline u8 ib_bth_get_pad(struct ib_other_headers *ohdr) 237 238 {