Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'sfc-pedit-offloads'

Pieter Jansen van Vuuren says:

====================
sfc: introduce eth, ipv4 and ipv6 pedit offloads

This set introduces mac source and destination pedit set action offloads.
It also adds offload for ipv4 ttl and ipv6 hop limit pedit set action as
well pedit add actions that would result in the same semantics as
decrementing the ttl and hop limit.

v2:
- fix 'efx_tc_mangle' kdoc which was orphaned when adding 'efx_tc_pedit_add'.
- add description of 'match' in 'efx_tc_mangle' kdoc.
- correct some inconsistent kdoc indentation.

v1: https://lore.kernel.org/netdev/20230823111725.28090-1-pieter.jansen-van-vuuren@amd.com/
====================

Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

+614 -13
+83 -6
drivers/net/ethernet/sfc/mae.c
··· 1219 1219 return rc; 1220 1220 } 1221 1221 1222 + /** 1223 + * efx_mae_allocate_pedit_mac() - allocate pedit MAC address in HW. 1224 + * @efx: NIC we're installing a pedit MAC address on 1225 + * @ped: pedit MAC action to be installed 1226 + * 1227 + * Attempts to install @ped in HW and populates its id with an index of this 1228 + * entry in the firmware MAC address table on success. 1229 + * 1230 + * Return: negative value on error, 0 in success. 1231 + */ 1232 + int efx_mae_allocate_pedit_mac(struct efx_nic *efx, 1233 + struct efx_tc_mac_pedit_action *ped) 1234 + { 1235 + MCDI_DECLARE_BUF(outbuf, MC_CMD_MAE_MAC_ADDR_ALLOC_OUT_LEN); 1236 + MCDI_DECLARE_BUF(inbuf, MC_CMD_MAE_MAC_ADDR_ALLOC_IN_LEN); 1237 + size_t outlen; 1238 + int rc; 1239 + 1240 + BUILD_BUG_ON(MC_CMD_MAE_MAC_ADDR_ALLOC_IN_MAC_ADDR_LEN != 1241 + sizeof(ped->h_addr)); 1242 + memcpy(MCDI_PTR(inbuf, MAE_MAC_ADDR_ALLOC_IN_MAC_ADDR), ped->h_addr, 1243 + sizeof(ped->h_addr)); 1244 + rc = efx_mcdi_rpc(efx, MC_CMD_MAE_MAC_ADDR_ALLOC, inbuf, sizeof(inbuf), 1245 + outbuf, sizeof(outbuf), &outlen); 1246 + if (rc) 1247 + return rc; 1248 + if (outlen < sizeof(outbuf)) 1249 + return -EIO; 1250 + ped->fw_id = MCDI_DWORD(outbuf, MAE_MAC_ADDR_ALLOC_OUT_MAC_ID); 1251 + return 0; 1252 + } 1253 + 1254 + /** 1255 + * efx_mae_free_pedit_mac() - free pedit MAC address in HW. 1256 + * @efx: NIC we're installing a pedit MAC address on 1257 + * @ped: pedit MAC action that needs to be freed 1258 + * 1259 + * Frees @ped in HW, check that firmware did not free a different one and clears 1260 + * the id (which denotes the index of the entry in the MAC address table). 1261 + */ 1262 + void efx_mae_free_pedit_mac(struct efx_nic *efx, 1263 + struct efx_tc_mac_pedit_action *ped) 1264 + { 1265 + MCDI_DECLARE_BUF(outbuf, MC_CMD_MAE_MAC_ADDR_FREE_OUT_LEN(1)); 1266 + MCDI_DECLARE_BUF(inbuf, MC_CMD_MAE_MAC_ADDR_FREE_IN_LEN(1)); 1267 + size_t outlen; 1268 + int rc; 1269 + 1270 + MCDI_SET_DWORD(inbuf, MAE_MAC_ADDR_FREE_IN_MAC_ID, ped->fw_id); 1271 + rc = efx_mcdi_rpc(efx, MC_CMD_MAE_MAC_ADDR_FREE, inbuf, 1272 + sizeof(inbuf), outbuf, sizeof(outbuf), &outlen); 1273 + if (rc || outlen < sizeof(outbuf)) 1274 + return; 1275 + /* FW freed a different ID than we asked for, should also never happen. 1276 + * Warn because it means we've now got a different idea to the FW of 1277 + * what MAC addresses exist, which could cause mayhem later. 1278 + */ 1279 + if (WARN_ON(MCDI_DWORD(outbuf, MAE_MAC_ADDR_FREE_OUT_FREED_MAC_ID) != ped->fw_id)) 1280 + return; 1281 + /* We're probably about to free @ped, but let's just make sure its 1282 + * fw_id is blatted so that it won't look valid if it leaks out. 1283 + */ 1284 + ped->fw_id = MC_CMD_MAE_MAC_ADDR_ALLOC_OUT_MAC_ID_NULL; 1285 + } 1286 + 1222 1287 int efx_mae_alloc_action_set(struct efx_nic *efx, struct efx_tc_action_set *act) 1223 1288 { 1224 1289 MCDI_DECLARE_BUF(outbuf, MC_CMD_MAE_ACTION_SET_ALLOC_OUT_LEN); ··· 1291 1226 size_t outlen; 1292 1227 int rc; 1293 1228 1294 - MCDI_POPULATE_DWORD_3(inbuf, MAE_ACTION_SET_ALLOC_IN_FLAGS, 1229 + MCDI_POPULATE_DWORD_4(inbuf, MAE_ACTION_SET_ALLOC_IN_FLAGS, 1295 1230 MAE_ACTION_SET_ALLOC_IN_VLAN_PUSH, act->vlan_push, 1296 1231 MAE_ACTION_SET_ALLOC_IN_VLAN_POP, act->vlan_pop, 1297 - MAE_ACTION_SET_ALLOC_IN_DECAP, act->decap); 1232 + MAE_ACTION_SET_ALLOC_IN_DECAP, act->decap, 1233 + MAE_ACTION_SET_ALLOC_IN_DO_DECR_IP_TTL, 1234 + act->do_ttl_dec); 1298 1235 1299 - MCDI_SET_DWORD(inbuf, MAE_ACTION_SET_ALLOC_IN_SRC_MAC_ID, 1300 - MC_CMD_MAE_MAC_ADDR_ALLOC_OUT_MAC_ID_NULL); 1301 - MCDI_SET_DWORD(inbuf, MAE_ACTION_SET_ALLOC_IN_DST_MAC_ID, 1302 - MC_CMD_MAE_MAC_ADDR_ALLOC_OUT_MAC_ID_NULL); 1236 + if (act->src_mac) 1237 + MCDI_SET_DWORD(inbuf, MAE_ACTION_SET_ALLOC_IN_SRC_MAC_ID, 1238 + act->src_mac->fw_id); 1239 + else 1240 + MCDI_SET_DWORD(inbuf, MAE_ACTION_SET_ALLOC_IN_SRC_MAC_ID, 1241 + MC_CMD_MAE_MAC_ADDR_ALLOC_OUT_MAC_ID_NULL); 1242 + 1243 + if (act->dst_mac) 1244 + MCDI_SET_DWORD(inbuf, MAE_ACTION_SET_ALLOC_IN_DST_MAC_ID, 1245 + act->dst_mac->fw_id); 1246 + else 1247 + MCDI_SET_DWORD(inbuf, MAE_ACTION_SET_ALLOC_IN_DST_MAC_ID, 1248 + MC_CMD_MAE_MAC_ADDR_ALLOC_OUT_MAC_ID_NULL); 1249 + 1303 1250 if (act->count && !WARN_ON(!act->count->cnt)) 1304 1251 MCDI_SET_DWORD(inbuf, MAE_ACTION_SET_ALLOC_IN_COUNTER_ID, 1305 1252 act->count->cnt->fw_id);
+4
drivers/net/ethernet/sfc/mae.h
··· 103 103 int efx_mae_free_encap_md(struct efx_nic *efx, 104 104 struct efx_tc_encap_action *encap); 105 105 106 + int efx_mae_allocate_pedit_mac(struct efx_nic *efx, 107 + struct efx_tc_mac_pedit_action *ped); 108 + void efx_mae_free_pedit_mac(struct efx_nic *efx, 109 + struct efx_tc_mac_pedit_action *ped); 106 110 int efx_mae_alloc_action_set(struct efx_nic *efx, struct efx_tc_action_set *act); 107 111 int efx_mae_free_action_set(struct efx_nic *efx, u32 fw_id); 108 112
+476
drivers/net/ethernet/sfc/tc.c
··· 31 31 return EFX_ENCAP_TYPE_NONE; 32 32 } 33 33 34 + #define EFX_TC_HDR_TYPE_TTL_MASK ((u32)0xff) 35 + /* Hoplimit is stored in the most significant byte in the pedit ipv6 header action */ 36 + #define EFX_TC_HDR_TYPE_HLIMIT_MASK ~((u32)0xff000000) 34 37 #define EFX_EFV_PF NULL 35 38 /* Look up the representor information (efv) for a device. 36 39 * May return NULL for the PF (us), or an error pointer for a device that ··· 89 86 return mport; 90 87 } 91 88 89 + static const struct rhashtable_params efx_tc_mac_ht_params = { 90 + .key_len = offsetofend(struct efx_tc_mac_pedit_action, h_addr), 91 + .key_offset = 0, 92 + .head_offset = offsetof(struct efx_tc_mac_pedit_action, linkage), 93 + }; 94 + 92 95 static const struct rhashtable_params efx_tc_encap_match_ht_params = { 93 96 .key_len = offsetof(struct efx_tc_encap_match, linkage), 94 97 .key_offset = 0, ··· 118 109 .key_offset = 0, 119 110 .head_offset = offsetof(struct efx_tc_recirc_id, linkage), 120 111 }; 112 + 113 + static struct efx_tc_mac_pedit_action *efx_tc_flower_get_mac(struct efx_nic *efx, 114 + unsigned char h_addr[ETH_ALEN], 115 + struct netlink_ext_ack *extack) 116 + { 117 + struct efx_tc_mac_pedit_action *ped, *old; 118 + int rc; 119 + 120 + ped = kzalloc(sizeof(*ped), GFP_USER); 121 + if (!ped) 122 + return ERR_PTR(-ENOMEM); 123 + memcpy(ped->h_addr, h_addr, ETH_ALEN); 124 + old = rhashtable_lookup_get_insert_fast(&efx->tc->mac_ht, 125 + &ped->linkage, 126 + efx_tc_mac_ht_params); 127 + if (old) { 128 + /* don't need our new entry */ 129 + kfree(ped); 130 + if (!refcount_inc_not_zero(&old->ref)) 131 + return ERR_PTR(-EAGAIN); 132 + /* existing entry found, ref taken */ 133 + return old; 134 + } 135 + 136 + rc = efx_mae_allocate_pedit_mac(efx, ped); 137 + if (rc < 0) { 138 + NL_SET_ERR_MSG_MOD(extack, "Failed to store pedit MAC address in hw"); 139 + goto out_remove; 140 + } 141 + 142 + /* ref and return */ 143 + refcount_set(&ped->ref, 1); 144 + return ped; 145 + out_remove: 146 + rhashtable_remove_fast(&efx->tc->mac_ht, &ped->linkage, 147 + efx_tc_mac_ht_params); 148 + kfree(ped); 149 + return ERR_PTR(rc); 150 + } 151 + 152 + static void efx_tc_flower_put_mac(struct efx_nic *efx, 153 + struct efx_tc_mac_pedit_action *ped) 154 + { 155 + if (!refcount_dec_and_test(&ped->ref)) 156 + return; /* still in use */ 157 + rhashtable_remove_fast(&efx->tc->mac_ht, &ped->linkage, 158 + efx_tc_mac_ht_params); 159 + efx_mae_free_pedit_mac(efx, ped); 160 + kfree(ped); 161 + } 121 162 122 163 static void efx_tc_free_action_set(struct efx_nic *efx, 123 164 struct efx_tc_action_set *act, bool in_hw) ··· 194 135 list_del(&act->encap_user); 195 136 efx_tc_flower_release_encap_md(efx, act->encap_md); 196 137 } 138 + if (act->src_mac) 139 + efx_tc_flower_put_mac(efx, act->src_mac); 140 + if (act->dst_mac) 141 + efx_tc_flower_put_mac(efx, act->dst_mac); 197 142 kfree(act); 198 143 } 199 144 ··· 760 697 /* For details of action order constraints refer to SF-123102-TC-1§12.6.1 */ 761 698 enum efx_tc_action_order { 762 699 EFX_TC_AO_DECAP, 700 + EFX_TC_AO_DEC_TTL, 701 + EFX_TC_AO_PEDIT_MAC_ADDRS, 763 702 EFX_TC_AO_VLAN_POP, 764 703 EFX_TC_AO_VLAN_PUSH, 765 704 EFX_TC_AO_COUNT, ··· 775 710 switch (new) { 776 711 case EFX_TC_AO_DECAP: 777 712 if (act->decap) 713 + return false; 714 + /* PEDIT_MAC_ADDRS must not happen before DECAP, though it 715 + * can wait until much later 716 + */ 717 + if (act->dst_mac || act->src_mac) 718 + return false; 719 + 720 + /* Decrementing ttl must not happen before DECAP */ 721 + if (act->do_ttl_dec) 778 722 return false; 779 723 fallthrough; 780 724 case EFX_TC_AO_VLAN_POP: ··· 804 730 if (act->count) 805 731 return false; 806 732 fallthrough; 733 + case EFX_TC_AO_PEDIT_MAC_ADDRS: 807 734 case EFX_TC_AO_ENCAP: 808 735 if (act->encap_md) 809 736 return false; 810 737 fallthrough; 811 738 case EFX_TC_AO_DELIVER: 812 739 return !act->deliver; 740 + case EFX_TC_AO_DEC_TTL: 741 + if (act->encap_md) 742 + return false; 743 + return !act->do_ttl_dec; 813 744 default: 814 745 /* Bad caller. Whatever they wanted to do, say they can't. */ 815 746 WARN_ON_ONCE(1); ··· 977 898 efx_tc_ct_unregister_zone(efx, act->zone); 978 899 if (act->count) 979 900 efx_tc_flower_put_counter_index(efx, act->count); 901 + } 902 + 903 + /** 904 + * struct efx_tc_mangler_state - accumulates 32-bit pedits into fields 905 + * 906 + * @dst_mac_32: dst_mac[0:3] has been populated 907 + * @dst_mac_16: dst_mac[4:5] has been populated 908 + * @src_mac_16: src_mac[0:1] has been populated 909 + * @src_mac_32: src_mac[2:5] has been populated 910 + * @dst_mac: h_dest field of ethhdr 911 + * @src_mac: h_source field of ethhdr 912 + * 913 + * Since FLOW_ACTION_MANGLE comes in 32-bit chunks that do not 914 + * necessarily equate to whole fields of the packet header, this 915 + * structure is used to hold the cumulative effect of the partial 916 + * field pedits that have been processed so far. 917 + */ 918 + struct efx_tc_mangler_state { 919 + u8 dst_mac_32:1; /* eth->h_dest[0:3] */ 920 + u8 dst_mac_16:1; /* eth->h_dest[4:5] */ 921 + u8 src_mac_16:1; /* eth->h_source[0:1] */ 922 + u8 src_mac_32:1; /* eth->h_source[2:5] */ 923 + unsigned char dst_mac[ETH_ALEN]; 924 + unsigned char src_mac[ETH_ALEN]; 925 + }; 926 + 927 + /** efx_tc_complete_mac_mangle() - pull complete field pedits out of @mung 928 + * @efx: NIC we're installing a flow rule on 929 + * @act: action set (cursor) to update 930 + * @mung: accumulated partial mangles 931 + * @extack: netlink extended ack for reporting errors 932 + * 933 + * Check @mung to find any combinations of partial mangles that can be 934 + * combined into a complete packet field edit, add that edit to @act, 935 + * and consume the partial mangles from @mung. 936 + */ 937 + 938 + static int efx_tc_complete_mac_mangle(struct efx_nic *efx, 939 + struct efx_tc_action_set *act, 940 + struct efx_tc_mangler_state *mung, 941 + struct netlink_ext_ack *extack) 942 + { 943 + struct efx_tc_mac_pedit_action *ped; 944 + 945 + if (mung->dst_mac_32 && mung->dst_mac_16) { 946 + ped = efx_tc_flower_get_mac(efx, mung->dst_mac, extack); 947 + if (IS_ERR(ped)) 948 + return PTR_ERR(ped); 949 + 950 + /* Check that we have not already populated dst_mac */ 951 + if (act->dst_mac) 952 + efx_tc_flower_put_mac(efx, act->dst_mac); 953 + 954 + act->dst_mac = ped; 955 + 956 + /* consume the incomplete state */ 957 + mung->dst_mac_32 = 0; 958 + mung->dst_mac_16 = 0; 959 + } 960 + if (mung->src_mac_16 && mung->src_mac_32) { 961 + ped = efx_tc_flower_get_mac(efx, mung->src_mac, extack); 962 + if (IS_ERR(ped)) 963 + return PTR_ERR(ped); 964 + 965 + /* Check that we have not already populated src_mac */ 966 + if (act->src_mac) 967 + efx_tc_flower_put_mac(efx, act->src_mac); 968 + 969 + act->src_mac = ped; 970 + 971 + /* consume the incomplete state */ 972 + mung->src_mac_32 = 0; 973 + mung->src_mac_16 = 0; 974 + } 975 + return 0; 976 + } 977 + 978 + static int efx_tc_pedit_add(struct efx_nic *efx, struct efx_tc_action_set *act, 979 + const struct flow_action_entry *fa, 980 + struct netlink_ext_ack *extack) 981 + { 982 + switch (fa->mangle.htype) { 983 + case FLOW_ACT_MANGLE_HDR_TYPE_IP4: 984 + switch (fa->mangle.offset) { 985 + case offsetof(struct iphdr, ttl): 986 + /* check that pedit applies to ttl only */ 987 + if (fa->mangle.mask != ~EFX_TC_HDR_TYPE_TTL_MASK) 988 + break; 989 + 990 + /* Adding 0xff is equivalent to decrementing the ttl. 991 + * Other added values are not supported. 992 + */ 993 + if ((fa->mangle.val & EFX_TC_HDR_TYPE_TTL_MASK) != U8_MAX) 994 + break; 995 + 996 + /* check that we do not decrement ttl twice */ 997 + if (!efx_tc_flower_action_order_ok(act, 998 + EFX_TC_AO_DEC_TTL)) { 999 + NL_SET_ERR_MSG_MOD(extack, "Unsupported: multiple dec ttl"); 1000 + return -EOPNOTSUPP; 1001 + } 1002 + act->do_ttl_dec = 1; 1003 + return 0; 1004 + default: 1005 + break; 1006 + } 1007 + break; 1008 + case FLOW_ACT_MANGLE_HDR_TYPE_IP6: 1009 + switch (fa->mangle.offset) { 1010 + case round_down(offsetof(struct ipv6hdr, hop_limit), 4): 1011 + /* check that pedit applies to hoplimit only */ 1012 + if (fa->mangle.mask != EFX_TC_HDR_TYPE_HLIMIT_MASK) 1013 + break; 1014 + 1015 + /* Adding 0xff is equivalent to decrementing the hoplimit. 1016 + * Other added values are not supported. 1017 + */ 1018 + if ((fa->mangle.val >> 24) != U8_MAX) 1019 + break; 1020 + 1021 + /* check that we do not decrement hoplimit twice */ 1022 + if (!efx_tc_flower_action_order_ok(act, 1023 + EFX_TC_AO_DEC_TTL)) { 1024 + NL_SET_ERR_MSG_MOD(extack, "Unsupported: multiple dec ttl"); 1025 + return -EOPNOTSUPP; 1026 + } 1027 + act->do_ttl_dec = 1; 1028 + return 0; 1029 + default: 1030 + break; 1031 + } 1032 + break; 1033 + default: 1034 + break; 1035 + } 1036 + 1037 + NL_SET_ERR_MSG_FMT_MOD(extack, 1038 + "Unsupported: ttl add action type %x %x %x/%x", 1039 + fa->mangle.htype, fa->mangle.offset, 1040 + fa->mangle.val, fa->mangle.mask); 1041 + return -EOPNOTSUPP; 1042 + } 1043 + 1044 + /** 1045 + * efx_tc_mangle() - handle a single 32-bit (or less) pedit 1046 + * @efx: NIC we're installing a flow rule on 1047 + * @act: action set (cursor) to update 1048 + * @fa: FLOW_ACTION_MANGLE action metadata 1049 + * @mung: accumulator for partial mangles 1050 + * @extack: netlink extended ack for reporting errors 1051 + * @match: original match used along with the mangle action 1052 + * 1053 + * Identify the fields written by a FLOW_ACTION_MANGLE, and record 1054 + * the partial mangle state in @mung. If this mangle completes an 1055 + * earlier partial mangle, consume and apply to @act by calling 1056 + * efx_tc_complete_mac_mangle(). 1057 + */ 1058 + 1059 + static int efx_tc_mangle(struct efx_nic *efx, struct efx_tc_action_set *act, 1060 + const struct flow_action_entry *fa, 1061 + struct efx_tc_mangler_state *mung, 1062 + struct netlink_ext_ack *extack, 1063 + struct efx_tc_match *match) 1064 + { 1065 + __le32 mac32; 1066 + __le16 mac16; 1067 + u8 tr_ttl; 1068 + 1069 + switch (fa->mangle.htype) { 1070 + case FLOW_ACT_MANGLE_HDR_TYPE_ETH: 1071 + BUILD_BUG_ON(offsetof(struct ethhdr, h_dest) != 0); 1072 + BUILD_BUG_ON(offsetof(struct ethhdr, h_source) != 6); 1073 + if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_PEDIT_MAC_ADDRS)) { 1074 + NL_SET_ERR_MSG_MOD(extack, 1075 + "Pedit mangle mac action violates action order"); 1076 + return -EOPNOTSUPP; 1077 + } 1078 + switch (fa->mangle.offset) { 1079 + case 0: 1080 + if (fa->mangle.mask) { 1081 + NL_SET_ERR_MSG_FMT_MOD(extack, 1082 + "Unsupported: mask (%#x) of eth.dst32 mangle", 1083 + fa->mangle.mask); 1084 + return -EOPNOTSUPP; 1085 + } 1086 + /* Ethernet address is little-endian */ 1087 + mac32 = cpu_to_le32(fa->mangle.val); 1088 + memcpy(mung->dst_mac, &mac32, sizeof(mac32)); 1089 + mung->dst_mac_32 = 1; 1090 + return efx_tc_complete_mac_mangle(efx, act, mung, extack); 1091 + case 4: 1092 + if (fa->mangle.mask == 0xffff) { 1093 + mac16 = cpu_to_le16(fa->mangle.val >> 16); 1094 + memcpy(mung->src_mac, &mac16, sizeof(mac16)); 1095 + mung->src_mac_16 = 1; 1096 + } else if (fa->mangle.mask == 0xffff0000) { 1097 + mac16 = cpu_to_le16((u16)fa->mangle.val); 1098 + memcpy(mung->dst_mac + 4, &mac16, sizeof(mac16)); 1099 + mung->dst_mac_16 = 1; 1100 + } else { 1101 + NL_SET_ERR_MSG_FMT_MOD(extack, 1102 + "Unsupported: mask (%#x) of eth+4 mangle is not high or low 16b", 1103 + fa->mangle.mask); 1104 + return -EOPNOTSUPP; 1105 + } 1106 + return efx_tc_complete_mac_mangle(efx, act, mung, extack); 1107 + case 8: 1108 + if (fa->mangle.mask) { 1109 + NL_SET_ERR_MSG_FMT_MOD(extack, 1110 + "Unsupported: mask (%#x) of eth.src32 mangle", 1111 + fa->mangle.mask); 1112 + return -EOPNOTSUPP; 1113 + } 1114 + mac32 = cpu_to_le32(fa->mangle.val); 1115 + memcpy(mung->src_mac + 2, &mac32, sizeof(mac32)); 1116 + mung->src_mac_32 = 1; 1117 + return efx_tc_complete_mac_mangle(efx, act, mung, extack); 1118 + default: 1119 + NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported: mangle eth+%u %x/%x", 1120 + fa->mangle.offset, fa->mangle.val, fa->mangle.mask); 1121 + return -EOPNOTSUPP; 1122 + } 1123 + break; 1124 + case FLOW_ACT_MANGLE_HDR_TYPE_IP4: 1125 + switch (fa->mangle.offset) { 1126 + case offsetof(struct iphdr, ttl): 1127 + /* we currently only support pedit IP4 when it applies 1128 + * to TTL and then only when it can be achieved with a 1129 + * decrement ttl action 1130 + */ 1131 + 1132 + /* check that pedit applies to ttl only */ 1133 + if (fa->mangle.mask != ~EFX_TC_HDR_TYPE_TTL_MASK) { 1134 + NL_SET_ERR_MSG_FMT_MOD(extack, 1135 + "Unsupported: mask (%#x) out of range, only support mangle action on ipv4.ttl", 1136 + fa->mangle.mask); 1137 + return -EOPNOTSUPP; 1138 + } 1139 + 1140 + /* we can only convert to a dec ttl when we have an 1141 + * exact match on the ttl field 1142 + */ 1143 + if (match->mask.ip_ttl != U8_MAX) { 1144 + NL_SET_ERR_MSG_FMT_MOD(extack, 1145 + "Unsupported: only support mangle ipv4.ttl when we have an exact match on ttl, mask used for match (%#x)", 1146 + match->mask.ip_ttl); 1147 + return -EOPNOTSUPP; 1148 + } 1149 + 1150 + /* check that we don't try to decrement 0, which equates 1151 + * to setting the ttl to 0xff 1152 + */ 1153 + if (match->value.ip_ttl == 0) { 1154 + NL_SET_ERR_MSG_MOD(extack, 1155 + "Unsupported: we cannot decrement ttl past 0"); 1156 + return -EOPNOTSUPP; 1157 + } 1158 + 1159 + /* check that we do not decrement ttl twice */ 1160 + if (!efx_tc_flower_action_order_ok(act, 1161 + EFX_TC_AO_DEC_TTL)) { 1162 + NL_SET_ERR_MSG_MOD(extack, 1163 + "Unsupported: multiple dec ttl"); 1164 + return -EOPNOTSUPP; 1165 + } 1166 + 1167 + /* check pedit can be achieved with decrement action */ 1168 + tr_ttl = match->value.ip_ttl - 1; 1169 + if ((fa->mangle.val & EFX_TC_HDR_TYPE_TTL_MASK) == tr_ttl) { 1170 + act->do_ttl_dec = 1; 1171 + return 0; 1172 + } 1173 + 1174 + fallthrough; 1175 + default: 1176 + NL_SET_ERR_MSG_FMT_MOD(extack, 1177 + "Unsupported: only support mangle on the ttl field (offset is %u)", 1178 + fa->mangle.offset); 1179 + return -EOPNOTSUPP; 1180 + } 1181 + break; 1182 + case FLOW_ACT_MANGLE_HDR_TYPE_IP6: 1183 + switch (fa->mangle.offset) { 1184 + case round_down(offsetof(struct ipv6hdr, hop_limit), 4): 1185 + /* we currently only support pedit IP6 when it applies 1186 + * to the hoplimit and then only when it can be achieved 1187 + * with a decrement hoplimit action 1188 + */ 1189 + 1190 + /* check that pedit applies to ttl only */ 1191 + if (fa->mangle.mask != EFX_TC_HDR_TYPE_HLIMIT_MASK) { 1192 + NL_SET_ERR_MSG_FMT_MOD(extack, 1193 + "Unsupported: mask (%#x) out of range, only support mangle action on ipv6.hop_limit", 1194 + fa->mangle.mask); 1195 + 1196 + return -EOPNOTSUPP; 1197 + } 1198 + 1199 + /* we can only convert to a dec ttl when we have an 1200 + * exact match on the ttl field 1201 + */ 1202 + if (match->mask.ip_ttl != U8_MAX) { 1203 + NL_SET_ERR_MSG_FMT_MOD(extack, 1204 + "Unsupported: only support mangle ipv6.hop_limit when we have an exact match on ttl, mask used for match (%#x)", 1205 + match->mask.ip_ttl); 1206 + return -EOPNOTSUPP; 1207 + } 1208 + 1209 + /* check that we don't try to decrement 0, which equates 1210 + * to setting the ttl to 0xff 1211 + */ 1212 + if (match->value.ip_ttl == 0) { 1213 + NL_SET_ERR_MSG_MOD(extack, 1214 + "Unsupported: we cannot decrement hop_limit past 0"); 1215 + return -EOPNOTSUPP; 1216 + } 1217 + 1218 + /* check that we do not decrement hoplimit twice */ 1219 + if (!efx_tc_flower_action_order_ok(act, 1220 + EFX_TC_AO_DEC_TTL)) { 1221 + NL_SET_ERR_MSG_MOD(extack, 1222 + "Unsupported: multiple dec ttl"); 1223 + return -EOPNOTSUPP; 1224 + } 1225 + 1226 + /* check pedit can be achieved with decrement action */ 1227 + tr_ttl = match->value.ip_ttl - 1; 1228 + if ((fa->mangle.val >> 24) == tr_ttl) { 1229 + act->do_ttl_dec = 1; 1230 + return 0; 1231 + } 1232 + 1233 + fallthrough; 1234 + default: 1235 + NL_SET_ERR_MSG_FMT_MOD(extack, 1236 + "Unsupported: only support mangle on the hop_limit field"); 1237 + return -EOPNOTSUPP; 1238 + } 1239 + default: 1240 + NL_SET_ERR_MSG_FMT_MOD(extack, "Unhandled mangle htype %u for action rule", 1241 + fa->mangle.htype); 1242 + return -EOPNOTSUPP; 1243 + } 1244 + return 0; 1245 + } 1246 + 1247 + /** 1248 + * efx_tc_incomplete_mangle() - check for leftover partial pedits 1249 + * @mung: accumulator for partial mangles 1250 + * @extack: netlink extended ack for reporting errors 1251 + * 1252 + * Since the MAE can only overwrite whole fields, any partial 1253 + * field mangle left over on reaching packet delivery (mirred or 1254 + * end of TC actions) cannot be offloaded. Check for any such 1255 + * and reject them with -%EOPNOTSUPP. 1256 + */ 1257 + 1258 + static int efx_tc_incomplete_mangle(struct efx_tc_mangler_state *mung, 1259 + struct netlink_ext_ack *extack) 1260 + { 1261 + if (mung->dst_mac_32 || mung->dst_mac_16) { 1262 + NL_SET_ERR_MSG_MOD(extack, "Incomplete pedit of destination MAC address"); 1263 + return -EOPNOTSUPP; 1264 + } 1265 + if (mung->src_mac_16 || mung->src_mac_32) { 1266 + NL_SET_ERR_MSG_MOD(extack, "Incomplete pedit of source MAC address"); 1267 + return -EOPNOTSUPP; 1268 + } 1269 + return 0; 980 1270 } 981 1271 982 1272 static int efx_tc_flower_replace_foreign(struct efx_nic *efx, ··· 1743 1295 struct netlink_ext_ack *extack = tc->common.extack; 1744 1296 const struct ip_tunnel_info *encap_info = NULL; 1745 1297 struct efx_tc_flow_rule *rule = NULL, *old; 1298 + struct efx_tc_mangler_state mung = {}; 1746 1299 struct efx_tc_action_set *act = NULL; 1747 1300 const struct flow_action_entry *fa; 1748 1301 struct efx_rep *from_efv, *to_efv; ··· 2080 1631 act->vlan_proto[act->vlan_push] = fa->vlan.proto; 2081 1632 act->vlan_push++; 2082 1633 break; 1634 + case FLOW_ACTION_ADD: 1635 + rc = efx_tc_pedit_add(efx, act, fa, extack); 1636 + if (rc < 0) 1637 + goto release; 1638 + break; 1639 + case FLOW_ACTION_MANGLE: 1640 + rc = efx_tc_mangle(efx, act, fa, &mung, extack, &match); 1641 + if (rc < 0) 1642 + goto release; 1643 + break; 2083 1644 case FLOW_ACTION_TUNNEL_ENCAP: 2084 1645 if (encap_info) { 2085 1646 /* Can't specify encap multiple times. ··· 2129 1670 } 2130 1671 } 2131 1672 1673 + rc = efx_tc_incomplete_mangle(&mung, extack); 1674 + if (rc < 0) 1675 + goto release; 2132 1676 if (act) { 2133 1677 /* Not shot/redirected, so deliver to default dest */ 2134 1678 if (from_efv == EFX_EFV_PF) ··· 2618 2156 kfree(rule); 2619 2157 } 2620 2158 2159 + static void efx_tc_mac_free(void *ptr, void *__unused) 2160 + { 2161 + struct efx_tc_mac_pedit_action *ped = ptr; 2162 + 2163 + WARN_ON(refcount_read(&ped->ref)); 2164 + kfree(ped); 2165 + } 2166 + 2621 2167 static void efx_tc_flow_free(void *ptr, void *arg) 2622 2168 { 2623 2169 struct efx_tc_flow_rule *rule = ptr; ··· 2666 2196 rc = efx_tc_init_counters(efx); 2667 2197 if (rc < 0) 2668 2198 goto fail_counters; 2199 + rc = rhashtable_init(&efx->tc->mac_ht, &efx_tc_mac_ht_params); 2200 + if (rc < 0) 2201 + goto fail_mac_ht; 2669 2202 rc = rhashtable_init(&efx->tc->encap_match_ht, &efx_tc_encap_match_ht_params); 2670 2203 if (rc < 0) 2671 2204 goto fail_encap_match_ht; ··· 2706 2233 fail_match_action_ht: 2707 2234 rhashtable_destroy(&efx->tc->encap_match_ht); 2708 2235 fail_encap_match_ht: 2236 + rhashtable_destroy(&efx->tc->mac_ht); 2237 + fail_mac_ht: 2709 2238 efx_tc_destroy_counters(efx); 2710 2239 fail_counters: 2711 2240 efx_tc_destroy_encap_actions(efx); ··· 2743 2268 rhashtable_free_and_destroy(&efx->tc->recirc_ht, efx_tc_recirc_free, efx); 2744 2269 WARN_ON(!ida_is_empty(&efx->tc->recirc_ida)); 2745 2270 ida_destroy(&efx->tc->recirc_ida); 2271 + rhashtable_free_and_destroy(&efx->tc->mac_ht, efx_tc_mac_free, NULL); 2746 2272 efx_tc_fini_counters(efx); 2747 2273 efx_tc_fini_encap_actions(efx); 2748 2274 mutex_unlock(&efx->tc->mutex);
+51 -7
drivers/net/ethernet/sfc/tc.h
··· 18 18 19 19 #define IS_ALL_ONES(v) (!(typeof (v))~(v)) 20 20 21 + /** 22 + * struct efx_tc_mac_pedit_action - mac pedit action fields 23 + * 24 + * @h_addr: mac address field of ethernet header 25 + * @linkage: rhashtable reference 26 + * @ref: reference count 27 + * @fw_id: index of this entry in firmware MAC address table 28 + * 29 + * MAC address edits are indirected through a table in the hardware 30 + */ 31 + struct efx_tc_mac_pedit_action { 32 + u8 h_addr[ETH_ALEN]; 33 + struct rhash_head linkage; 34 + refcount_t ref; 35 + u32 fw_id; /* index of this entry in firmware MAC address table */ 36 + }; 37 + 21 38 static inline bool efx_ipv6_addr_all_ones(struct in6_addr *addr) 22 39 { 23 40 return !memchr_inv(addr, 0xff, sizeof(*addr)); ··· 42 25 43 26 struct efx_tc_encap_action; /* see tc_encap_actions.h */ 44 27 28 + /** 29 + * struct efx_tc_action_set - collection of tc action fields 30 + * 31 + * @vlan_push: the number of vlan headers to push 32 + * @vlan_pop: the number of vlan headers to pop 33 + * @decap: used to indicate a tunnel header decapsulation should take place 34 + * @do_ttl_dec: used to indicate IP TTL / Hop Limit should be decremented 35 + * @deliver: used to indicate a deliver action should take place 36 + * @vlan_tci: tci fields for vlan push actions 37 + * @vlan_proto: ethernet types for vlan push actions 38 + * @count: counter mapping 39 + * @encap_md: encap entry in tc_encap_ht table 40 + * @encap_user: linked list of encap users (encap_md->users) 41 + * @user: owning action-set-list. Only populated if @encap_md is; used by efx_tc_update_encap() fallback handling 42 + * @count_user: linked list of counter users (counter->users) 43 + * @dest_mport: destination mport 44 + * @src_mac: source mac entry in tc_mac_ht table 45 + * @dst_mac: destination mac entry in tc_mac_ht table 46 + * @fw_id: index of this entry in firmware actions table 47 + * @list: linked list of tc actions 48 + * 49 + */ 45 50 struct efx_tc_action_set { 46 51 u16 vlan_push:2; 47 52 u16 vlan_pop:2; 48 53 u16 decap:1; 54 + u16 do_ttl_dec:1; 49 55 u16 deliver:1; 50 - __be16 vlan_tci[2]; /* TCIs for vlan_push */ 51 - __be16 vlan_proto[2]; /* Ethertypes for vlan_push */ 56 + __be16 vlan_tci[2]; 57 + __be16 vlan_proto[2]; 52 58 struct efx_tc_counter_index *count; 53 - struct efx_tc_encap_action *encap_md; /* entry in tc_encap_ht table */ 54 - struct list_head encap_user; /* entry on encap_md->users list */ 55 - struct efx_tc_action_set_list *user; /* Only populated if encap_md */ 56 - struct list_head count_user; /* entry on counter->users list, if encap */ 59 + struct efx_tc_encap_action *encap_md; 60 + struct list_head encap_user; 61 + struct efx_tc_action_set_list *user; 62 + struct list_head count_user; 57 63 u32 dest_mport; 58 - u32 fw_id; /* index of this entry in firmware actions table */ 64 + struct efx_tc_mac_pedit_action *src_mac; 65 + struct efx_tc_mac_pedit_action *dst_mac; 66 + u32 fw_id; 59 67 struct list_head list; 60 68 }; 61 69 ··· 262 220 * @counter_ht: Hashtable of TC counters (FW IDs and counter values) 263 221 * @counter_id_ht: Hashtable mapping TC counter cookies to counters 264 222 * @encap_ht: Hashtable of TC encap actions 223 + * @mac_ht: Hashtable of MAC address entries (for pedits) 265 224 * @encap_match_ht: Hashtable of TC encap matches 266 225 * @match_action_ht: Hashtable of TC match-action rules 267 226 * @lhs_rule_ht: Hashtable of TC left-hand (act ct & goto chain) rules ··· 300 257 struct rhashtable counter_ht; 301 258 struct rhashtable counter_id_ht; 302 259 struct rhashtable encap_ht; 260 + struct rhashtable mac_ht; 303 261 struct rhashtable encap_match_ht; 304 262 struct rhashtable match_action_ht; 305 263 struct rhashtable lhs_rule_ht;