Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * lwtunnel Infrastructure for light weight tunnels like mpls
4 *
5 * Authors: Roopa Prabhu, <roopa@cumulusnetworks.com>
6 */
7
8#include <linux/capability.h>
9#include <linux/module.h>
10#include <linux/types.h>
11#include <linux/kernel.h>
12#include <linux/slab.h>
13#include <linux/uaccess.h>
14#include <linux/skbuff.h>
15#include <linux/netdevice.h>
16#include <linux/lwtunnel.h>
17#include <linux/in.h>
18#include <linux/init.h>
19#include <linux/err.h>
20
21#include <net/lwtunnel.h>
22#include <net/rtnetlink.h>
23#include <net/ip6_fib.h>
24#include <net/rtnh.h>
25
26#include "dev.h"
27
28DEFINE_STATIC_KEY_FALSE(nf_hooks_lwtunnel_enabled);
29EXPORT_SYMBOL_GPL(nf_hooks_lwtunnel_enabled);
30
31#ifdef CONFIG_MODULES
32
33static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type)
34{
35 /* Only lwt encaps implemented without using an interface for
36 * the encap need to return a string here.
37 */
38 switch (encap_type) {
39 case LWTUNNEL_ENCAP_MPLS:
40 return "MPLS";
41 case LWTUNNEL_ENCAP_ILA:
42 return "ILA";
43 case LWTUNNEL_ENCAP_SEG6:
44 return "SEG6";
45 case LWTUNNEL_ENCAP_BPF:
46 return "BPF";
47 case LWTUNNEL_ENCAP_SEG6_LOCAL:
48 return "SEG6LOCAL";
49 case LWTUNNEL_ENCAP_RPL:
50 return "RPL";
51 case LWTUNNEL_ENCAP_IOAM6:
52 return "IOAM6";
53 case LWTUNNEL_ENCAP_XFRM:
54 /* module autoload not supported for encap type */
55 return NULL;
56 case LWTUNNEL_ENCAP_IP6:
57 case LWTUNNEL_ENCAP_IP:
58 case LWTUNNEL_ENCAP_NONE:
59 case __LWTUNNEL_ENCAP_MAX:
60 /* should not have got here */
61 WARN_ON(1);
62 break;
63 }
64 return NULL;
65}
66
67#endif /* CONFIG_MODULES */
68
69struct lwtunnel_state *lwtunnel_state_alloc(int encap_len)
70{
71 struct lwtunnel_state *lws;
72
73 lws = kzalloc(sizeof(*lws) + encap_len, GFP_ATOMIC);
74
75 return lws;
76}
77EXPORT_SYMBOL_GPL(lwtunnel_state_alloc);
78
79static const struct lwtunnel_encap_ops __rcu *
80 lwtun_encaps[LWTUNNEL_ENCAP_MAX + 1] __read_mostly;
81
82int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *ops,
83 unsigned int num)
84{
85 if (num > LWTUNNEL_ENCAP_MAX)
86 return -ERANGE;
87
88 return !cmpxchg((const struct lwtunnel_encap_ops **)
89 &lwtun_encaps[num],
90 NULL, ops) ? 0 : -1;
91}
92EXPORT_SYMBOL_GPL(lwtunnel_encap_add_ops);
93
94int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *ops,
95 unsigned int encap_type)
96{
97 int ret;
98
99 if (encap_type == LWTUNNEL_ENCAP_NONE ||
100 encap_type > LWTUNNEL_ENCAP_MAX)
101 return -ERANGE;
102
103 ret = (cmpxchg((const struct lwtunnel_encap_ops **)
104 &lwtun_encaps[encap_type],
105 ops, NULL) == ops) ? 0 : -1;
106
107 synchronize_net();
108
109 return ret;
110}
111EXPORT_SYMBOL_GPL(lwtunnel_encap_del_ops);
112
113int lwtunnel_build_state(struct net *net, u16 encap_type,
114 struct nlattr *encap, unsigned int family,
115 const void *cfg, struct lwtunnel_state **lws,
116 struct netlink_ext_ack *extack)
117{
118 const struct lwtunnel_encap_ops *ops;
119 bool found = false;
120 int ret = -EINVAL;
121
122 if (encap_type == LWTUNNEL_ENCAP_NONE ||
123 encap_type > LWTUNNEL_ENCAP_MAX) {
124 NL_SET_ERR_MSG_ATTR(extack, encap,
125 "Unknown LWT encapsulation type");
126 return ret;
127 }
128
129 ret = -EOPNOTSUPP;
130 rcu_read_lock();
131 ops = rcu_dereference(lwtun_encaps[encap_type]);
132 if (likely(ops && ops->build_state && try_module_get(ops->owner)))
133 found = true;
134 rcu_read_unlock();
135
136 if (found) {
137 ret = ops->build_state(net, encap, family, cfg, lws, extack);
138 if (ret)
139 module_put(ops->owner);
140 } else {
141 /* don't rely on -EOPNOTSUPP to detect match as build_state
142 * handlers could return it
143 */
144 NL_SET_ERR_MSG_ATTR(extack, encap,
145 "LWT encapsulation type not supported");
146 }
147
148 return ret;
149}
150EXPORT_SYMBOL_GPL(lwtunnel_build_state);
151
152int lwtunnel_valid_encap_type(u16 encap_type, struct netlink_ext_ack *extack,
153 bool rtnl_is_held)
154{
155 const struct lwtunnel_encap_ops *ops;
156 int ret = -EINVAL;
157
158 if (encap_type == LWTUNNEL_ENCAP_NONE ||
159 encap_type > LWTUNNEL_ENCAP_MAX) {
160 NL_SET_ERR_MSG(extack, "Unknown lwt encapsulation type");
161 return ret;
162 }
163
164 ops = rcu_access_pointer(lwtun_encaps[encap_type]);
165#ifdef CONFIG_MODULES
166 if (!ops) {
167 const char *encap_type_str = lwtunnel_encap_str(encap_type);
168
169 if (encap_type_str) {
170 if (rtnl_is_held)
171 __rtnl_unlock();
172 request_module("rtnl-lwt-%s", encap_type_str);
173 if (rtnl_is_held)
174 rtnl_lock();
175
176 ops = rcu_access_pointer(lwtun_encaps[encap_type]);
177 }
178 }
179#endif
180 ret = ops ? 0 : -EOPNOTSUPP;
181 if (ret < 0)
182 NL_SET_ERR_MSG(extack, "lwt encapsulation type not supported");
183
184 return ret;
185}
186EXPORT_SYMBOL_GPL(lwtunnel_valid_encap_type);
187
188int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining,
189 struct netlink_ext_ack *extack,
190 bool rtnl_is_held)
191{
192 struct rtnexthop *rtnh = (struct rtnexthop *)attr;
193 struct nlattr *nla_entype;
194 struct nlattr *attrs;
195 u16 encap_type;
196 int attrlen;
197
198 while (rtnh_ok(rtnh, remaining)) {
199 attrlen = rtnh_attrlen(rtnh);
200 if (attrlen > 0) {
201 attrs = rtnh_attrs(rtnh);
202 nla_entype = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
203
204 if (nla_entype) {
205 if (nla_len(nla_entype) < sizeof(u16)) {
206 NL_SET_ERR_MSG(extack, "Invalid RTA_ENCAP_TYPE");
207 return -EINVAL;
208 }
209 encap_type = nla_get_u16(nla_entype);
210
211 if (lwtunnel_valid_encap_type(encap_type,
212 extack,
213 rtnl_is_held) != 0)
214 return -EOPNOTSUPP;
215 }
216 }
217 rtnh = rtnh_next(rtnh, &remaining);
218 }
219
220 return 0;
221}
222EXPORT_SYMBOL_GPL(lwtunnel_valid_encap_type_attr);
223
224void lwtstate_free(struct lwtunnel_state *lws)
225{
226 const struct lwtunnel_encap_ops *ops = lwtun_encaps[lws->type];
227
228 if (ops->destroy_state) {
229 ops->destroy_state(lws);
230 kfree_rcu(lws, rcu);
231 } else {
232 kfree(lws);
233 }
234 module_put(ops->owner);
235}
236EXPORT_SYMBOL_GPL(lwtstate_free);
237
238int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate,
239 int encap_attr, int encap_type_attr)
240{
241 const struct lwtunnel_encap_ops *ops;
242 struct nlattr *nest;
243 int ret;
244
245 if (!lwtstate)
246 return 0;
247
248 if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
249 lwtstate->type > LWTUNNEL_ENCAP_MAX)
250 return 0;
251
252 nest = nla_nest_start_noflag(skb, encap_attr);
253 if (!nest)
254 return -EMSGSIZE;
255
256 ret = -EOPNOTSUPP;
257 rcu_read_lock();
258 ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
259 if (likely(ops && ops->fill_encap))
260 ret = ops->fill_encap(skb, lwtstate);
261 rcu_read_unlock();
262
263 if (ret)
264 goto nla_put_failure;
265 nla_nest_end(skb, nest);
266 ret = nla_put_u16(skb, encap_type_attr, lwtstate->type);
267 if (ret)
268 goto nla_put_failure;
269
270 return 0;
271
272nla_put_failure:
273 nla_nest_cancel(skb, nest);
274
275 return (ret == -EOPNOTSUPP ? 0 : ret);
276}
277EXPORT_SYMBOL_GPL(lwtunnel_fill_encap);
278
279int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate)
280{
281 const struct lwtunnel_encap_ops *ops;
282 int ret = 0;
283
284 if (!lwtstate)
285 return 0;
286
287 if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
288 lwtstate->type > LWTUNNEL_ENCAP_MAX)
289 return 0;
290
291 rcu_read_lock();
292 ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
293 if (likely(ops && ops->get_encap_size))
294 ret = nla_total_size(ops->get_encap_size(lwtstate));
295 rcu_read_unlock();
296
297 return ret;
298}
299EXPORT_SYMBOL_GPL(lwtunnel_get_encap_size);
300
301int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b)
302{
303 const struct lwtunnel_encap_ops *ops;
304 int ret = 0;
305
306 if (!a && !b)
307 return 0;
308
309 if (!a || !b)
310 return 1;
311
312 if (a->type != b->type)
313 return 1;
314
315 if (a->type == LWTUNNEL_ENCAP_NONE ||
316 a->type > LWTUNNEL_ENCAP_MAX)
317 return 0;
318
319 rcu_read_lock();
320 ops = rcu_dereference(lwtun_encaps[a->type]);
321 if (likely(ops && ops->cmp_encap))
322 ret = ops->cmp_encap(a, b);
323 rcu_read_unlock();
324
325 return ret;
326}
327EXPORT_SYMBOL_GPL(lwtunnel_cmp_encap);
328
329int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb)
330{
331 const struct lwtunnel_encap_ops *ops;
332 struct lwtunnel_state *lwtstate;
333 struct dst_entry *dst;
334 int ret;
335
336 if (dev_xmit_recursion()) {
337 net_crit_ratelimited("%s(): recursion limit reached on datapath\n",
338 __func__);
339 ret = -ENETDOWN;
340 goto drop;
341 }
342
343 dst = skb_dst(skb);
344 if (!dst) {
345 ret = -EINVAL;
346 goto drop;
347 }
348 lwtstate = dst->lwtstate;
349
350 if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
351 lwtstate->type > LWTUNNEL_ENCAP_MAX)
352 return 0;
353
354 ret = -EOPNOTSUPP;
355 rcu_read_lock();
356 ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
357 if (likely(ops && ops->output)) {
358 dev_xmit_recursion_inc();
359 ret = ops->output(net, sk, skb);
360 dev_xmit_recursion_dec();
361 }
362 rcu_read_unlock();
363
364 if (ret == -EOPNOTSUPP)
365 goto drop;
366
367 return ret;
368
369drop:
370 kfree_skb(skb);
371
372 return ret;
373}
374EXPORT_SYMBOL_GPL(lwtunnel_output);
375
376int lwtunnel_xmit(struct sk_buff *skb)
377{
378 const struct lwtunnel_encap_ops *ops;
379 struct lwtunnel_state *lwtstate;
380 struct dst_entry *dst;
381 int ret;
382
383 if (dev_xmit_recursion()) {
384 net_crit_ratelimited("%s(): recursion limit reached on datapath\n",
385 __func__);
386 ret = -ENETDOWN;
387 goto drop;
388 }
389
390 dst = skb_dst(skb);
391 if (!dst) {
392 ret = -EINVAL;
393 goto drop;
394 }
395
396 lwtstate = dst->lwtstate;
397
398 if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
399 lwtstate->type > LWTUNNEL_ENCAP_MAX)
400 return 0;
401
402 ret = -EOPNOTSUPP;
403 rcu_read_lock();
404 ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
405 if (likely(ops && ops->xmit)) {
406 dev_xmit_recursion_inc();
407 ret = ops->xmit(skb);
408 dev_xmit_recursion_dec();
409 }
410 rcu_read_unlock();
411
412 if (ret == -EOPNOTSUPP)
413 goto drop;
414
415 return ret;
416
417drop:
418 kfree_skb(skb);
419
420 return ret;
421}
422EXPORT_SYMBOL_GPL(lwtunnel_xmit);
423
424int lwtunnel_input(struct sk_buff *skb)
425{
426 const struct lwtunnel_encap_ops *ops;
427 struct lwtunnel_state *lwtstate;
428 struct dst_entry *dst;
429 int ret;
430
431 if (dev_xmit_recursion()) {
432 net_crit_ratelimited("%s(): recursion limit reached on datapath\n",
433 __func__);
434 ret = -ENETDOWN;
435 goto drop;
436 }
437
438 dst = skb_dst(skb);
439 if (!dst) {
440 ret = -EINVAL;
441 goto drop;
442 }
443 lwtstate = dst->lwtstate;
444
445 if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
446 lwtstate->type > LWTUNNEL_ENCAP_MAX)
447 return 0;
448
449 ret = -EOPNOTSUPP;
450 rcu_read_lock();
451 ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
452 if (likely(ops && ops->input)) {
453 dev_xmit_recursion_inc();
454 ret = ops->input(skb);
455 dev_xmit_recursion_dec();
456 }
457 rcu_read_unlock();
458
459 if (ret == -EOPNOTSUPP)
460 goto drop;
461
462 return ret;
463
464drop:
465 kfree_skb(skb);
466
467 return ret;
468}
469EXPORT_SYMBOL_GPL(lwtunnel_input);