Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1/*
2 * Userspace interface
3 * Linux ethernet bridge
4 *
5 * Authors:
6 * Lennert Buytenhek <buytenh@gnu.org>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14#include <linux/kernel.h>
15#include <linux/netdevice.h>
16#include <linux/etherdevice.h>
17#include <linux/netpoll.h>
18#include <linux/ethtool.h>
19#include <linux/if_arp.h>
20#include <linux/module.h>
21#include <linux/init.h>
22#include <linux/rtnetlink.h>
23#include <linux/if_ether.h>
24#include <linux/slab.h>
25#include <net/dsa.h>
26#include <net/sock.h>
27#include <linux/if_vlan.h>
28#include <net/switchdev.h>
29
30#include "br_private.h"
31
32/*
33 * Determine initial path cost based on speed.
34 * using recommendations from 802.1d standard
35 *
36 * Since driver might sleep need to not be holding any locks.
37 */
38static int port_cost(struct net_device *dev)
39{
40 struct ethtool_link_ksettings ecmd;
41
42 if (!__ethtool_get_link_ksettings(dev, &ecmd)) {
43 switch (ecmd.base.speed) {
44 case SPEED_10000:
45 return 2;
46 case SPEED_1000:
47 return 4;
48 case SPEED_100:
49 return 19;
50 case SPEED_10:
51 return 100;
52 }
53 }
54
55 /* Old silly heuristics based on name */
56 if (!strncmp(dev->name, "lec", 3))
57 return 7;
58
59 if (!strncmp(dev->name, "plip", 4))
60 return 2500;
61
62 return 100; /* assume old 10Mbps */
63}
64
65
66/* Check for port carrier transitions. */
67void br_port_carrier_check(struct net_bridge_port *p, bool *notified)
68{
69 struct net_device *dev = p->dev;
70 struct net_bridge *br = p->br;
71
72 if (!(p->flags & BR_ADMIN_COST) &&
73 netif_running(dev) && netif_oper_up(dev))
74 p->path_cost = port_cost(dev);
75
76 *notified = false;
77 if (!netif_running(br->dev))
78 return;
79
80 spin_lock_bh(&br->lock);
81 if (netif_running(dev) && netif_oper_up(dev)) {
82 if (p->state == BR_STATE_DISABLED) {
83 br_stp_enable_port(p);
84 *notified = true;
85 }
86 } else {
87 if (p->state != BR_STATE_DISABLED) {
88 br_stp_disable_port(p);
89 *notified = true;
90 }
91 }
92 spin_unlock_bh(&br->lock);
93}
94
95static void br_port_set_promisc(struct net_bridge_port *p)
96{
97 int err = 0;
98
99 if (br_promisc_port(p))
100 return;
101
102 err = dev_set_promiscuity(p->dev, 1);
103 if (err)
104 return;
105
106 br_fdb_unsync_static(p->br, p);
107 p->flags |= BR_PROMISC;
108}
109
110static void br_port_clear_promisc(struct net_bridge_port *p)
111{
112 int err;
113
114 /* Check if the port is already non-promisc or if it doesn't
115 * support UNICAST filtering. Without unicast filtering support
116 * we'll end up re-enabling promisc mode anyway, so just check for
117 * it here.
118 */
119 if (!br_promisc_port(p) || !(p->dev->priv_flags & IFF_UNICAST_FLT))
120 return;
121
122 /* Since we'll be clearing the promisc mode, program the port
123 * first so that we don't have interruption in traffic.
124 */
125 err = br_fdb_sync_static(p->br, p);
126 if (err)
127 return;
128
129 dev_set_promiscuity(p->dev, -1);
130 p->flags &= ~BR_PROMISC;
131}
132
133/* When a port is added or removed or when certain port flags
134 * change, this function is called to automatically manage
135 * promiscuity setting of all the bridge ports. We are always called
136 * under RTNL so can skip using rcu primitives.
137 */
138void br_manage_promisc(struct net_bridge *br)
139{
140 struct net_bridge_port *p;
141 bool set_all = false;
142
143 /* If vlan filtering is disabled or bridge interface is placed
144 * into promiscuous mode, place all ports in promiscuous mode.
145 */
146 if ((br->dev->flags & IFF_PROMISC) || !br_vlan_enabled(br->dev))
147 set_all = true;
148
149 list_for_each_entry(p, &br->port_list, list) {
150 if (set_all) {
151 br_port_set_promisc(p);
152 } else {
153 /* If the number of auto-ports is <= 1, then all other
154 * ports will have their output configuration
155 * statically specified through fdbs. Since ingress
156 * on the auto-port becomes forwarding/egress to other
157 * ports and egress configuration is statically known,
158 * we can say that ingress configuration of the
159 * auto-port is also statically known.
160 * This lets us disable promiscuous mode and write
161 * this config to hw.
162 */
163 if (br->auto_cnt == 0 ||
164 (br->auto_cnt == 1 && br_auto_port(p)))
165 br_port_clear_promisc(p);
166 else
167 br_port_set_promisc(p);
168 }
169 }
170}
171
172static void nbp_update_port_count(struct net_bridge *br)
173{
174 struct net_bridge_port *p;
175 u32 cnt = 0;
176
177 list_for_each_entry(p, &br->port_list, list) {
178 if (br_auto_port(p))
179 cnt++;
180 }
181 if (br->auto_cnt != cnt) {
182 br->auto_cnt = cnt;
183 br_manage_promisc(br);
184 }
185}
186
187static void nbp_delete_promisc(struct net_bridge_port *p)
188{
189 /* If port is currently promiscuous, unset promiscuity.
190 * Otherwise, it is a static port so remove all addresses
191 * from it.
192 */
193 dev_set_allmulti(p->dev, -1);
194 if (br_promisc_port(p))
195 dev_set_promiscuity(p->dev, -1);
196 else
197 br_fdb_unsync_static(p->br, p);
198}
199
200static void release_nbp(struct kobject *kobj)
201{
202 struct net_bridge_port *p
203 = container_of(kobj, struct net_bridge_port, kobj);
204 kfree(p);
205}
206
207static struct kobj_type brport_ktype = {
208#ifdef CONFIG_SYSFS
209 .sysfs_ops = &brport_sysfs_ops,
210#endif
211 .release = release_nbp,
212};
213
214static void destroy_nbp(struct net_bridge_port *p)
215{
216 struct net_device *dev = p->dev;
217
218 p->br = NULL;
219 p->dev = NULL;
220 dev_put(dev);
221
222 kobject_put(&p->kobj);
223}
224
225static void destroy_nbp_rcu(struct rcu_head *head)
226{
227 struct net_bridge_port *p =
228 container_of(head, struct net_bridge_port, rcu);
229 destroy_nbp(p);
230}
231
232static unsigned get_max_headroom(struct net_bridge *br)
233{
234 unsigned max_headroom = 0;
235 struct net_bridge_port *p;
236
237 list_for_each_entry(p, &br->port_list, list) {
238 unsigned dev_headroom = netdev_get_fwd_headroom(p->dev);
239
240 if (dev_headroom > max_headroom)
241 max_headroom = dev_headroom;
242 }
243
244 return max_headroom;
245}
246
247static void update_headroom(struct net_bridge *br, int new_hr)
248{
249 struct net_bridge_port *p;
250
251 list_for_each_entry(p, &br->port_list, list)
252 netdev_set_rx_headroom(p->dev, new_hr);
253
254 br->dev->needed_headroom = new_hr;
255}
256
257/* Delete port(interface) from bridge is done in two steps.
258 * via RCU. First step, marks device as down. That deletes
259 * all the timers and stops new packets from flowing through.
260 *
261 * Final cleanup doesn't occur until after all CPU's finished
262 * processing packets.
263 *
264 * Protected from multiple admin operations by RTNL mutex
265 */
266static void del_nbp(struct net_bridge_port *p)
267{
268 struct net_bridge *br = p->br;
269 struct net_device *dev = p->dev;
270
271 sysfs_remove_link(br->ifobj, p->dev->name);
272
273 nbp_delete_promisc(p);
274
275 spin_lock_bh(&br->lock);
276 br_stp_disable_port(p);
277 spin_unlock_bh(&br->lock);
278
279 br_ifinfo_notify(RTM_DELLINK, NULL, p);
280
281 list_del_rcu(&p->list);
282 if (netdev_get_fwd_headroom(dev) == br->dev->needed_headroom)
283 update_headroom(br, get_max_headroom(br));
284 netdev_reset_rx_headroom(dev);
285
286 nbp_vlan_flush(p);
287 br_fdb_delete_by_port(br, p, 0, 1);
288 switchdev_deferred_process();
289
290 nbp_update_port_count(br);
291
292 netdev_upper_dev_unlink(dev, br->dev);
293
294 dev->priv_flags &= ~IFF_BRIDGE_PORT;
295
296 netdev_rx_handler_unregister(dev);
297
298 br_multicast_del_port(p);
299
300 kobject_uevent(&p->kobj, KOBJ_REMOVE);
301 kobject_del(&p->kobj);
302
303 br_netpoll_disable(p);
304
305 call_rcu(&p->rcu, destroy_nbp_rcu);
306}
307
308/* Delete bridge device */
309void br_dev_delete(struct net_device *dev, struct list_head *head)
310{
311 struct net_bridge *br = netdev_priv(dev);
312 struct net_bridge_port *p, *n;
313
314 list_for_each_entry_safe(p, n, &br->port_list, list) {
315 del_nbp(p);
316 }
317
318 br_recalculate_neigh_suppress_enabled(br);
319
320 br_fdb_delete_by_port(br, NULL, 0, 1);
321
322 cancel_delayed_work_sync(&br->gc_work);
323
324 br_sysfs_delbr(br->dev);
325 unregister_netdevice_queue(br->dev, head);
326}
327
328/* find an available port number */
329static int find_portno(struct net_bridge *br)
330{
331 int index;
332 struct net_bridge_port *p;
333 unsigned long *inuse;
334
335 inuse = kcalloc(BITS_TO_LONGS(BR_MAX_PORTS), sizeof(unsigned long),
336 GFP_KERNEL);
337 if (!inuse)
338 return -ENOMEM;
339
340 set_bit(0, inuse); /* zero is reserved */
341 list_for_each_entry(p, &br->port_list, list) {
342 set_bit(p->port_no, inuse);
343 }
344 index = find_first_zero_bit(inuse, BR_MAX_PORTS);
345 kfree(inuse);
346
347 return (index >= BR_MAX_PORTS) ? -EXFULL : index;
348}
349
350/* called with RTNL but without bridge lock */
351static struct net_bridge_port *new_nbp(struct net_bridge *br,
352 struct net_device *dev)
353{
354 struct net_bridge_port *p;
355 int index, err;
356
357 index = find_portno(br);
358 if (index < 0)
359 return ERR_PTR(index);
360
361 p = kzalloc(sizeof(*p), GFP_KERNEL);
362 if (p == NULL)
363 return ERR_PTR(-ENOMEM);
364
365 p->br = br;
366 dev_hold(dev);
367 p->dev = dev;
368 p->path_cost = port_cost(dev);
369 p->priority = 0x8000 >> BR_PORT_BITS;
370 p->port_no = index;
371 p->flags = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD | BR_BCAST_FLOOD;
372 br_init_port(p);
373 br_set_state(p, BR_STATE_DISABLED);
374 br_stp_port_timer_init(p);
375 err = br_multicast_add_port(p);
376 if (err) {
377 dev_put(dev);
378 kfree(p);
379 p = ERR_PTR(err);
380 }
381
382 return p;
383}
384
385int br_add_bridge(struct net *net, const char *name)
386{
387 struct net_device *dev;
388 int res;
389
390 dev = alloc_netdev(sizeof(struct net_bridge), name, NET_NAME_UNKNOWN,
391 br_dev_setup);
392
393 if (!dev)
394 return -ENOMEM;
395
396 dev_net_set(dev, net);
397 dev->rtnl_link_ops = &br_link_ops;
398
399 res = register_netdev(dev);
400 if (res)
401 free_netdev(dev);
402 return res;
403}
404
405int br_del_bridge(struct net *net, const char *name)
406{
407 struct net_device *dev;
408 int ret = 0;
409
410 rtnl_lock();
411 dev = __dev_get_by_name(net, name);
412 if (dev == NULL)
413 ret = -ENXIO; /* Could not find device */
414
415 else if (!(dev->priv_flags & IFF_EBRIDGE)) {
416 /* Attempt to delete non bridge device! */
417 ret = -EPERM;
418 }
419
420 else if (dev->flags & IFF_UP) {
421 /* Not shutdown yet. */
422 ret = -EBUSY;
423 }
424
425 else
426 br_dev_delete(dev, NULL);
427
428 rtnl_unlock();
429 return ret;
430}
431
432/* MTU of the bridge pseudo-device: ETH_DATA_LEN or the minimum of the ports */
433static int br_mtu_min(const struct net_bridge *br)
434{
435 const struct net_bridge_port *p;
436 int ret_mtu = 0;
437
438 list_for_each_entry(p, &br->port_list, list)
439 if (!ret_mtu || ret_mtu > p->dev->mtu)
440 ret_mtu = p->dev->mtu;
441
442 return ret_mtu ? ret_mtu : ETH_DATA_LEN;
443}
444
445void br_mtu_auto_adjust(struct net_bridge *br)
446{
447 ASSERT_RTNL();
448
449 /* if the bridge MTU was manually configured don't mess with it */
450 if (br->mtu_set_by_user)
451 return;
452
453 /* change to the minimum MTU and clear the flag which was set by
454 * the bridge ndo_change_mtu callback
455 */
456 dev_set_mtu(br->dev, br_mtu_min(br));
457 br->mtu_set_by_user = false;
458}
459
460static void br_set_gso_limits(struct net_bridge *br)
461{
462 unsigned int gso_max_size = GSO_MAX_SIZE;
463 u16 gso_max_segs = GSO_MAX_SEGS;
464 const struct net_bridge_port *p;
465
466 list_for_each_entry(p, &br->port_list, list) {
467 gso_max_size = min(gso_max_size, p->dev->gso_max_size);
468 gso_max_segs = min(gso_max_segs, p->dev->gso_max_segs);
469 }
470 br->dev->gso_max_size = gso_max_size;
471 br->dev->gso_max_segs = gso_max_segs;
472}
473
474/*
475 * Recomputes features using slave's features
476 */
477netdev_features_t br_features_recompute(struct net_bridge *br,
478 netdev_features_t features)
479{
480 struct net_bridge_port *p;
481 netdev_features_t mask;
482
483 if (list_empty(&br->port_list))
484 return features;
485
486 mask = features;
487 features &= ~NETIF_F_ONE_FOR_ALL;
488
489 list_for_each_entry(p, &br->port_list, list) {
490 features = netdev_increment_features(features,
491 p->dev->features, mask);
492 }
493 features = netdev_add_tso_features(features, mask);
494
495 return features;
496}
497
498/* called with RTNL */
499int br_add_if(struct net_bridge *br, struct net_device *dev,
500 struct netlink_ext_ack *extack)
501{
502 struct net_bridge_port *p;
503 int err = 0;
504 unsigned br_hr, dev_hr;
505 bool changed_addr;
506
507 /* Don't allow bridging non-ethernet like devices, or DSA-enabled
508 * master network devices since the bridge layer rx_handler prevents
509 * the DSA fake ethertype handler to be invoked, so we do not strip off
510 * the DSA switch tag protocol header and the bridge layer just return
511 * RX_HANDLER_CONSUMED, stopping RX processing for these frames.
512 */
513 if ((dev->flags & IFF_LOOPBACK) ||
514 dev->type != ARPHRD_ETHER || dev->addr_len != ETH_ALEN ||
515 !is_valid_ether_addr(dev->dev_addr) ||
516 netdev_uses_dsa(dev))
517 return -EINVAL;
518
519 /* No bridging of bridges */
520 if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit) {
521 NL_SET_ERR_MSG(extack,
522 "Can not enslave a bridge to a bridge");
523 return -ELOOP;
524 }
525
526 /* Device has master upper dev */
527 if (netdev_master_upper_dev_get(dev))
528 return -EBUSY;
529
530 /* No bridging devices that dislike that (e.g. wireless) */
531 if (dev->priv_flags & IFF_DONT_BRIDGE) {
532 NL_SET_ERR_MSG(extack,
533 "Device does not allow enslaving to a bridge");
534 return -EOPNOTSUPP;
535 }
536
537 p = new_nbp(br, dev);
538 if (IS_ERR(p))
539 return PTR_ERR(p);
540
541 call_netdevice_notifiers(NETDEV_JOIN, dev);
542
543 err = dev_set_allmulti(dev, 1);
544 if (err)
545 goto put_back;
546
547 err = kobject_init_and_add(&p->kobj, &brport_ktype, &(dev->dev.kobj),
548 SYSFS_BRIDGE_PORT_ATTR);
549 if (err)
550 goto err1;
551
552 err = br_sysfs_addif(p);
553 if (err)
554 goto err2;
555
556 err = br_netpoll_enable(p);
557 if (err)
558 goto err3;
559
560 err = netdev_rx_handler_register(dev, br_handle_frame, p);
561 if (err)
562 goto err4;
563
564 dev->priv_flags |= IFF_BRIDGE_PORT;
565
566 err = netdev_master_upper_dev_link(dev, br->dev, NULL, NULL, extack);
567 if (err)
568 goto err5;
569
570 err = nbp_switchdev_mark_set(p);
571 if (err)
572 goto err6;
573
574 dev_disable_lro(dev);
575
576 list_add_rcu(&p->list, &br->port_list);
577
578 nbp_update_port_count(br);
579
580 netdev_update_features(br->dev);
581
582 br_hr = br->dev->needed_headroom;
583 dev_hr = netdev_get_fwd_headroom(dev);
584 if (br_hr < dev_hr)
585 update_headroom(br, dev_hr);
586 else
587 netdev_set_rx_headroom(dev, br_hr);
588
589 if (br_fdb_insert(br, p, dev->dev_addr, 0))
590 netdev_err(dev, "failed insert local address bridge forwarding table\n");
591
592 err = nbp_vlan_init(p);
593 if (err) {
594 netdev_err(dev, "failed to initialize vlan filtering on this port\n");
595 goto err7;
596 }
597
598 spin_lock_bh(&br->lock);
599 changed_addr = br_stp_recalculate_bridge_id(br);
600
601 if (netif_running(dev) && netif_oper_up(dev) &&
602 (br->dev->flags & IFF_UP))
603 br_stp_enable_port(p);
604 spin_unlock_bh(&br->lock);
605
606 br_ifinfo_notify(RTM_NEWLINK, NULL, p);
607
608 if (changed_addr)
609 call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev);
610
611 br_mtu_auto_adjust(br);
612 br_set_gso_limits(br);
613
614 kobject_uevent(&p->kobj, KOBJ_ADD);
615
616 return 0;
617
618err7:
619 list_del_rcu(&p->list);
620 br_fdb_delete_by_port(br, p, 0, 1);
621 nbp_update_port_count(br);
622err6:
623 netdev_upper_dev_unlink(dev, br->dev);
624err5:
625 dev->priv_flags &= ~IFF_BRIDGE_PORT;
626 netdev_rx_handler_unregister(dev);
627err4:
628 br_netpoll_disable(p);
629err3:
630 sysfs_remove_link(br->ifobj, p->dev->name);
631err2:
632 kobject_put(&p->kobj);
633 p = NULL; /* kobject_put frees */
634err1:
635 dev_set_allmulti(dev, -1);
636put_back:
637 dev_put(dev);
638 kfree(p);
639 return err;
640}
641
642/* called with RTNL */
643int br_del_if(struct net_bridge *br, struct net_device *dev)
644{
645 struct net_bridge_port *p;
646 bool changed_addr;
647
648 p = br_port_get_rtnl(dev);
649 if (!p || p->br != br)
650 return -EINVAL;
651
652 /* Since more than one interface can be attached to a bridge,
653 * there still maybe an alternate path for netconsole to use;
654 * therefore there is no reason for a NETDEV_RELEASE event.
655 */
656 del_nbp(p);
657
658 br_mtu_auto_adjust(br);
659 br_set_gso_limits(br);
660
661 spin_lock_bh(&br->lock);
662 changed_addr = br_stp_recalculate_bridge_id(br);
663 spin_unlock_bh(&br->lock);
664
665 if (changed_addr)
666 call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev);
667
668 netdev_update_features(br->dev);
669
670 return 0;
671}
672
673void br_port_flags_change(struct net_bridge_port *p, unsigned long mask)
674{
675 struct net_bridge *br = p->br;
676
677 if (mask & BR_AUTO_MASK)
678 nbp_update_port_count(br);
679
680 if (mask & BR_NEIGH_SUPPRESS)
681 br_recalculate_neigh_suppress_enabled(br);
682}