Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net: openvswitch: Add a new action check_pkt_len

This patch adds a new action - 'check_pkt_len' which checks the
packet length and executes a set of actions if the packet
length is greater than the specified length or executes
another set of actions if the packet length is lesser or equal to.

This action takes below nlattrs
* OVS_CHECK_PKT_LEN_ATTR_PKT_LEN - 'pkt_len' to check for

* OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER - Nested actions
to apply if the packet length is greater than the specified 'pkt_len'

* OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL - Nested
actions to apply if the packet length is lesser or equal to the
specified 'pkt_len'.

The main use case for adding this action is to solve the packet
drops because of MTU mismatch in OVN virtual networking solution.
When a VM (which belongs to a logical switch of OVN) sends a packet
destined to go via the gateway router and if the nic which provides
external connectivity, has a lesser MTU, OVS drops the packet
if the packet length is greater than this MTU.

With the help of this action, OVN will check the packet length
and if it is greater than the MTU size, it will generate an
ICMP packet (type 3, code 4) and includes the next hop mtu in it
so that the sender can fragment the packets.

Reported-at:
https://mail.openvswitch.org/pipermail/ovs-discuss/2018-July/047039.html
Suggested-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: Numan Siddique <nusiddiq@redhat.com>
CC: Gregory Rose <gvrose8192@gmail.com>
CC: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Tested-by: Greg Rose <gvrose8192@gmail.com>
Reviewed-by: Greg Rose <gvrose8192@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Numan Siddique and committed by
David S. Miller
4d5ec89f d7aa0338

+261
+42
include/uapi/linux/openvswitch.h
··· 798 798 struct ovs_key_ethernet addresses; 799 799 }; 800 800 801 + /* 802 + * enum ovs_check_pkt_len_attr - Attributes for %OVS_ACTION_ATTR_CHECK_PKT_LEN. 803 + * 804 + * @OVS_CHECK_PKT_LEN_ATTR_PKT_LEN: u16 Packet length to check for. 805 + * @OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER: Nested OVS_ACTION_ATTR_* 806 + * actions to apply if the packer length is greater than the specified 807 + * length in the attr - OVS_CHECK_PKT_LEN_ATTR_PKT_LEN. 808 + * @OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL - Nested OVS_ACTION_ATTR_* 809 + * actions to apply if the packer length is lesser or equal to the specified 810 + * length in the attr - OVS_CHECK_PKT_LEN_ATTR_PKT_LEN. 811 + */ 812 + enum ovs_check_pkt_len_attr { 813 + OVS_CHECK_PKT_LEN_ATTR_UNSPEC, 814 + OVS_CHECK_PKT_LEN_ATTR_PKT_LEN, 815 + OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER, 816 + OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL, 817 + __OVS_CHECK_PKT_LEN_ATTR_MAX, 818 + 819 + #ifdef __KERNEL__ 820 + OVS_CHECK_PKT_LEN_ATTR_ARG /* struct check_pkt_len_arg */ 821 + #endif 822 + }; 823 + 824 + #define OVS_CHECK_PKT_LEN_ATTR_MAX (__OVS_CHECK_PKT_LEN_ATTR_MAX - 1) 825 + 826 + #ifdef __KERNEL__ 827 + struct check_pkt_len_arg { 828 + u16 pkt_len; /* Same value as OVS_CHECK_PKT_LEN_ATTR_PKT_LEN'. */ 829 + bool exec_for_greater; /* When true, actions in IF_GREATER will 830 + * not change flow keys. False otherwise. 831 + */ 832 + bool exec_for_lesser_equal; /* When true, actions in IF_LESS_EQUAL 833 + * will not change flow keys. False 834 + * otherwise. 835 + */ 836 + }; 837 + #endif 838 + 801 839 /** 802 840 * enum ovs_action_attr - Action types. 803 841 * ··· 880 842 * packet, or modify the packet (e.g., change the DSCP field). 881 843 * @OVS_ACTION_ATTR_CLONE: make a copy of the packet and execute a list of 882 844 * actions without affecting the original packet and key. 845 + * @OVS_ACTION_ATTR_CHECK_PKT_LEN: Check the packet length and execute a set 846 + * of actions if greater than the specified packet length, else execute 847 + * another set of actions. 883 848 * 884 849 * Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all 885 850 * fields within a header are modifiable, e.g. the IPv4 protocol and fragment ··· 917 876 OVS_ACTION_ATTR_POP_NSH, /* No argument. */ 918 877 OVS_ACTION_ATTR_METER, /* u32 meter ID. */ 919 878 OVS_ACTION_ATTR_CLONE, /* Nested OVS_CLONE_ATTR_*. */ 879 + OVS_ACTION_ATTR_CHECK_PKT_LEN, /* Nested OVS_CHECK_PKT_LEN_ATTR_*. */ 920 880 921 881 __OVS_ACTION_ATTR_MAX, /* Nothing past this will be accepted 922 882 * from userspace. */
+48
net/openvswitch/actions.c
··· 169 169 const struct nlattr *actions, int len, 170 170 bool last, bool clone_flow_key); 171 171 172 + static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, 173 + struct sw_flow_key *key, 174 + const struct nlattr *attr, int len); 175 + 172 176 static void update_ethertype(struct sk_buff *skb, struct ethhdr *hdr, 173 177 __be16 ethertype) 174 178 { ··· 1217 1213 return clone_execute(dp, skb, key, recirc_id, NULL, 0, last, true); 1218 1214 } 1219 1215 1216 + static int execute_check_pkt_len(struct datapath *dp, struct sk_buff *skb, 1217 + struct sw_flow_key *key, 1218 + const struct nlattr *attr, bool last) 1219 + { 1220 + const struct nlattr *actions, *cpl_arg; 1221 + const struct check_pkt_len_arg *arg; 1222 + int rem = nla_len(attr); 1223 + bool clone_flow_key; 1224 + 1225 + /* The first netlink attribute in 'attr' is always 1226 + * 'OVS_CHECK_PKT_LEN_ATTR_ARG'. 1227 + */ 1228 + cpl_arg = nla_data(attr); 1229 + arg = nla_data(cpl_arg); 1230 + 1231 + if (skb->len <= arg->pkt_len) { 1232 + /* Second netlink attribute in 'attr' is always 1233 + * 'OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL'. 1234 + */ 1235 + actions = nla_next(cpl_arg, &rem); 1236 + clone_flow_key = !arg->exec_for_lesser_equal; 1237 + } else { 1238 + /* Third netlink attribute in 'attr' is always 1239 + * 'OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER'. 1240 + */ 1241 + actions = nla_next(cpl_arg, &rem); 1242 + actions = nla_next(actions, &rem); 1243 + clone_flow_key = !arg->exec_for_greater; 1244 + } 1245 + 1246 + return clone_execute(dp, skb, key, 0, nla_data(actions), 1247 + nla_len(actions), last, clone_flow_key); 1248 + } 1249 + 1220 1250 /* Execute a list of actions against 'skb'. */ 1221 1251 static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, 1222 1252 struct sw_flow_key *key, ··· 1407 1369 bool last = nla_is_last(a, rem); 1408 1370 1409 1371 err = clone(dp, skb, key, a, last); 1372 + if (last) 1373 + return err; 1374 + 1375 + break; 1376 + } 1377 + 1378 + case OVS_ACTION_ATTR_CHECK_PKT_LEN: { 1379 + bool last = nla_is_last(a, rem); 1380 + 1381 + err = execute_check_pkt_len(dp, skb, key, a, last); 1410 1382 if (last) 1411 1383 return err; 1412 1384
+171
net/openvswitch/flow_netlink.c
··· 91 91 case OVS_ACTION_ATTR_SET: 92 92 case OVS_ACTION_ATTR_SET_MASKED: 93 93 case OVS_ACTION_ATTR_METER: 94 + case OVS_ACTION_ATTR_CHECK_PKT_LEN: 94 95 default: 95 96 return true; 96 97 } ··· 2839 2838 return 0; 2840 2839 } 2841 2840 2841 + static const struct nla_policy cpl_policy[OVS_CHECK_PKT_LEN_ATTR_MAX + 1] = { 2842 + [OVS_CHECK_PKT_LEN_ATTR_PKT_LEN] = {.type = NLA_U16 }, 2843 + [OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER] = {.type = NLA_NESTED }, 2844 + [OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL] = {.type = NLA_NESTED }, 2845 + }; 2846 + 2847 + static int validate_and_copy_check_pkt_len(struct net *net, 2848 + const struct nlattr *attr, 2849 + const struct sw_flow_key *key, 2850 + struct sw_flow_actions **sfa, 2851 + __be16 eth_type, __be16 vlan_tci, 2852 + bool log, bool last) 2853 + { 2854 + const struct nlattr *acts_if_greater, *acts_if_lesser_eq; 2855 + struct nlattr *a[OVS_CHECK_PKT_LEN_ATTR_MAX + 1]; 2856 + struct check_pkt_len_arg arg; 2857 + int nested_acts_start; 2858 + int start, err; 2859 + 2860 + err = nla_parse_strict(a, OVS_CHECK_PKT_LEN_ATTR_MAX, nla_data(attr), 2861 + nla_len(attr), cpl_policy, NULL); 2862 + if (err) 2863 + return err; 2864 + 2865 + if (!a[OVS_CHECK_PKT_LEN_ATTR_PKT_LEN] || 2866 + !nla_get_u16(a[OVS_CHECK_PKT_LEN_ATTR_PKT_LEN])) 2867 + return -EINVAL; 2868 + 2869 + acts_if_lesser_eq = a[OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL]; 2870 + acts_if_greater = a[OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER]; 2871 + 2872 + /* Both the nested action should be present. */ 2873 + if (!acts_if_greater || !acts_if_lesser_eq) 2874 + return -EINVAL; 2875 + 2876 + /* validation done, copy the nested actions. */ 2877 + start = add_nested_action_start(sfa, OVS_ACTION_ATTR_CHECK_PKT_LEN, 2878 + log); 2879 + if (start < 0) 2880 + return start; 2881 + 2882 + arg.pkt_len = nla_get_u16(a[OVS_CHECK_PKT_LEN_ATTR_PKT_LEN]); 2883 + arg.exec_for_lesser_equal = 2884 + last || !actions_may_change_flow(acts_if_lesser_eq); 2885 + arg.exec_for_greater = 2886 + last || !actions_may_change_flow(acts_if_greater); 2887 + 2888 + err = ovs_nla_add_action(sfa, OVS_CHECK_PKT_LEN_ATTR_ARG, &arg, 2889 + sizeof(arg), log); 2890 + if (err) 2891 + return err; 2892 + 2893 + nested_acts_start = add_nested_action_start(sfa, 2894 + OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL, log); 2895 + if (nested_acts_start < 0) 2896 + return nested_acts_start; 2897 + 2898 + err = __ovs_nla_copy_actions(net, acts_if_lesser_eq, key, sfa, 2899 + eth_type, vlan_tci, log); 2900 + 2901 + if (err) 2902 + return err; 2903 + 2904 + add_nested_action_end(*sfa, nested_acts_start); 2905 + 2906 + nested_acts_start = add_nested_action_start(sfa, 2907 + OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER, log); 2908 + if (nested_acts_start < 0) 2909 + return nested_acts_start; 2910 + 2911 + err = __ovs_nla_copy_actions(net, acts_if_greater, key, sfa, 2912 + eth_type, vlan_tci, log); 2913 + 2914 + if (err) 2915 + return err; 2916 + 2917 + add_nested_action_end(*sfa, nested_acts_start); 2918 + add_nested_action_end(*sfa, start); 2919 + return 0; 2920 + } 2921 + 2842 2922 static int copy_action(const struct nlattr *from, 2843 2923 struct sw_flow_actions **sfa, bool log) 2844 2924 { ··· 2966 2884 [OVS_ACTION_ATTR_POP_NSH] = 0, 2967 2885 [OVS_ACTION_ATTR_METER] = sizeof(u32), 2968 2886 [OVS_ACTION_ATTR_CLONE] = (u32)-1, 2887 + [OVS_ACTION_ATTR_CHECK_PKT_LEN] = (u32)-1, 2969 2888 }; 2970 2889 const struct ovs_action_push_vlan *vlan; 2971 2890 int type = nla_type(a); ··· 3168 3085 break; 3169 3086 } 3170 3087 3088 + case OVS_ACTION_ATTR_CHECK_PKT_LEN: { 3089 + bool last = nla_is_last(a, rem); 3090 + 3091 + err = validate_and_copy_check_pkt_len(net, a, key, sfa, 3092 + eth_type, 3093 + vlan_tci, log, 3094 + last); 3095 + if (err) 3096 + return err; 3097 + skip_copy = true; 3098 + break; 3099 + } 3100 + 3171 3101 default: 3172 3102 OVS_NLERR(log, "Unknown Action type %d", type); 3173 3103 return -EINVAL; ··· 3279 3183 return err; 3280 3184 } 3281 3185 3186 + static int check_pkt_len_action_to_attr(const struct nlattr *attr, 3187 + struct sk_buff *skb) 3188 + { 3189 + struct nlattr *start, *ac_start = NULL; 3190 + const struct check_pkt_len_arg *arg; 3191 + const struct nlattr *a, *cpl_arg; 3192 + int err = 0, rem = nla_len(attr); 3193 + 3194 + start = nla_nest_start(skb, OVS_ACTION_ATTR_CHECK_PKT_LEN); 3195 + if (!start) 3196 + return -EMSGSIZE; 3197 + 3198 + /* The first nested attribute in 'attr' is always 3199 + * 'OVS_CHECK_PKT_LEN_ATTR_ARG'. 3200 + */ 3201 + cpl_arg = nla_data(attr); 3202 + arg = nla_data(cpl_arg); 3203 + 3204 + if (nla_put_u16(skb, OVS_CHECK_PKT_LEN_ATTR_PKT_LEN, arg->pkt_len)) { 3205 + err = -EMSGSIZE; 3206 + goto out; 3207 + } 3208 + 3209 + /* Second nested attribute in 'attr' is always 3210 + * 'OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL'. 3211 + */ 3212 + a = nla_next(cpl_arg, &rem); 3213 + ac_start = nla_nest_start(skb, 3214 + OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL); 3215 + if (!ac_start) { 3216 + err = -EMSGSIZE; 3217 + goto out; 3218 + } 3219 + 3220 + err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb); 3221 + if (err) { 3222 + nla_nest_cancel(skb, ac_start); 3223 + goto out; 3224 + } else { 3225 + nla_nest_end(skb, ac_start); 3226 + } 3227 + 3228 + /* Third nested attribute in 'attr' is always 3229 + * OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER. 3230 + */ 3231 + a = nla_next(a, &rem); 3232 + ac_start = nla_nest_start(skb, 3233 + OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER); 3234 + if (!ac_start) { 3235 + err = -EMSGSIZE; 3236 + goto out; 3237 + } 3238 + 3239 + err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb); 3240 + if (err) { 3241 + nla_nest_cancel(skb, ac_start); 3242 + goto out; 3243 + } else { 3244 + nla_nest_end(skb, ac_start); 3245 + } 3246 + 3247 + nla_nest_end(skb, start); 3248 + return 0; 3249 + 3250 + out: 3251 + nla_nest_cancel(skb, start); 3252 + return err; 3253 + } 3254 + 3282 3255 static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb) 3283 3256 { 3284 3257 const struct nlattr *ovs_key = nla_data(a); ··· 3438 3273 3439 3274 case OVS_ACTION_ATTR_CLONE: 3440 3275 err = clone_action_to_attr(a, skb); 3276 + if (err) 3277 + return err; 3278 + break; 3279 + 3280 + case OVS_ACTION_ATTR_CHECK_PKT_LEN: 3281 + err = check_pkt_len_action_to_attr(a, skb); 3441 3282 if (err) 3442 3283 return err; 3443 3284 break;