Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

xen/netfront: harden netfront against event channel storms

The Xen netfront driver is still vulnerable for an attack via excessive
number of events sent by the backend. Fix that by using lateeoi event
channels.

For being able to detect the case of no rx responses being added while
the carrier is down a new lock is needed in order to update and test
rsp_cons and the number of seen unconsumed responses atomically.

This is part of XSA-391

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
---
V2:
- don't eoi irq in case of interface set broken (Jan Beulich)
- handle carrier off + no new responses added (Jan Beulich)
V3:
- add rx_ prefix to rsp_unconsumed (Jan Beulich)
- correct xennet_set_rx_rsp_cons() spelling (Jan Beulich)

+95 -32
+95 -32
drivers/net/xen-netfront.c
··· 148 148 grant_ref_t gref_rx_head; 149 149 grant_ref_t grant_rx_ref[NET_RX_RING_SIZE]; 150 150 151 + unsigned int rx_rsp_unconsumed; 152 + spinlock_t rx_cons_lock; 153 + 151 154 struct page_pool *page_pool; 152 155 struct xdp_rxq_info xdp_rxq; 153 156 }; ··· 379 376 return 0; 380 377 } 381 378 382 - static void xennet_tx_buf_gc(struct netfront_queue *queue) 379 + static bool xennet_tx_buf_gc(struct netfront_queue *queue) 383 380 { 384 381 RING_IDX cons, prod; 385 382 unsigned short id; 386 383 struct sk_buff *skb; 387 384 bool more_to_do; 385 + bool work_done = false; 388 386 const struct device *dev = &queue->info->netdev->dev; 389 387 390 388 BUG_ON(!netif_carrier_ok(queue->info->netdev)); ··· 401 397 402 398 for (cons = queue->tx.rsp_cons; cons != prod; cons++) { 403 399 struct xen_netif_tx_response txrsp; 400 + 401 + work_done = true; 404 402 405 403 RING_COPY_RESPONSE(&queue->tx, cons, &txrsp); 406 404 if (txrsp.status == XEN_NETIF_RSP_NULL) ··· 447 441 448 442 xennet_maybe_wake_tx(queue); 449 443 450 - return; 444 + return work_done; 451 445 452 446 err: 453 447 queue->info->broken = true; 454 448 dev_alert(dev, "Disabled for further use\n"); 449 + 450 + return work_done; 455 451 } 456 452 457 453 struct xennet_gnttab_make_txreq { ··· 842 834 return 0; 843 835 } 844 836 837 + static void xennet_set_rx_rsp_cons(struct netfront_queue *queue, RING_IDX val) 838 + { 839 + unsigned long flags; 840 + 841 + spin_lock_irqsave(&queue->rx_cons_lock, flags); 842 + queue->rx.rsp_cons = val; 843 + queue->rx_rsp_unconsumed = RING_HAS_UNCONSUMED_RESPONSES(&queue->rx); 844 + spin_unlock_irqrestore(&queue->rx_cons_lock, flags); 845 + } 846 + 845 847 static void xennet_move_rx_slot(struct netfront_queue *queue, struct sk_buff *skb, 846 848 grant_ref_t ref) 847 849 { ··· 903 885 xennet_move_rx_slot(queue, skb, ref); 904 886 } while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE); 905 887 906 - queue->rx.rsp_cons = cons; 888 + xennet_set_rx_rsp_cons(queue, cons); 907 889 return err; 908 890 } 909 891 ··· 1057 1039 } 1058 1040 1059 1041 if (unlikely(err)) 1060 - queue->rx.rsp_cons = cons + slots; 1042 + xennet_set_rx_rsp_cons(queue, cons + slots); 1061 1043 1062 1044 return err; 1063 1045 } ··· 1111 1093 __pskb_pull_tail(skb, pull_to - skb_headlen(skb)); 1112 1094 } 1113 1095 if (unlikely(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS)) { 1114 - queue->rx.rsp_cons = ++cons + skb_queue_len(list); 1096 + xennet_set_rx_rsp_cons(queue, 1097 + ++cons + skb_queue_len(list)); 1115 1098 kfree_skb(nskb); 1116 1099 return -ENOENT; 1117 1100 } ··· 1125 1106 kfree_skb(nskb); 1126 1107 } 1127 1108 1128 - queue->rx.rsp_cons = cons; 1109 + xennet_set_rx_rsp_cons(queue, cons); 1129 1110 1130 1111 return 0; 1131 1112 } ··· 1248 1229 1249 1230 if (unlikely(xennet_set_skb_gso(skb, gso))) { 1250 1231 __skb_queue_head(&tmpq, skb); 1251 - queue->rx.rsp_cons += skb_queue_len(&tmpq); 1232 + xennet_set_rx_rsp_cons(queue, 1233 + queue->rx.rsp_cons + 1234 + skb_queue_len(&tmpq)); 1252 1235 goto err; 1253 1236 } 1254 1237 } ··· 1274 1253 1275 1254 __skb_queue_tail(&rxq, skb); 1276 1255 1277 - i = ++queue->rx.rsp_cons; 1256 + i = queue->rx.rsp_cons + 1; 1257 + xennet_set_rx_rsp_cons(queue, i); 1278 1258 work_done++; 1279 1259 } 1280 1260 if (need_xdp_flush) ··· 1439 1417 return 0; 1440 1418 } 1441 1419 1442 - static irqreturn_t xennet_tx_interrupt(int irq, void *dev_id) 1420 + static bool xennet_handle_tx(struct netfront_queue *queue, unsigned int *eoi) 1443 1421 { 1444 - struct netfront_queue *queue = dev_id; 1445 1422 unsigned long flags; 1446 1423 1447 - if (queue->info->broken) 1448 - return IRQ_HANDLED; 1424 + if (unlikely(queue->info->broken)) 1425 + return false; 1449 1426 1450 1427 spin_lock_irqsave(&queue->tx_lock, flags); 1451 - xennet_tx_buf_gc(queue); 1428 + if (xennet_tx_buf_gc(queue)) 1429 + *eoi = 0; 1452 1430 spin_unlock_irqrestore(&queue->tx_lock, flags); 1431 + 1432 + return true; 1433 + } 1434 + 1435 + static irqreturn_t xennet_tx_interrupt(int irq, void *dev_id) 1436 + { 1437 + unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS; 1438 + 1439 + if (likely(xennet_handle_tx(dev_id, &eoiflag))) 1440 + xen_irq_lateeoi(irq, eoiflag); 1453 1441 1454 1442 return IRQ_HANDLED; 1455 1443 } 1456 1444 1445 + static bool xennet_handle_rx(struct netfront_queue *queue, unsigned int *eoi) 1446 + { 1447 + unsigned int work_queued; 1448 + unsigned long flags; 1449 + 1450 + if (unlikely(queue->info->broken)) 1451 + return false; 1452 + 1453 + spin_lock_irqsave(&queue->rx_cons_lock, flags); 1454 + work_queued = RING_HAS_UNCONSUMED_RESPONSES(&queue->rx); 1455 + if (work_queued > queue->rx_rsp_unconsumed) { 1456 + queue->rx_rsp_unconsumed = work_queued; 1457 + *eoi = 0; 1458 + } else if (unlikely(work_queued < queue->rx_rsp_unconsumed)) { 1459 + const struct device *dev = &queue->info->netdev->dev; 1460 + 1461 + spin_unlock_irqrestore(&queue->rx_cons_lock, flags); 1462 + dev_alert(dev, "RX producer index going backwards\n"); 1463 + dev_alert(dev, "Disabled for further use\n"); 1464 + queue->info->broken = true; 1465 + return false; 1466 + } 1467 + spin_unlock_irqrestore(&queue->rx_cons_lock, flags); 1468 + 1469 + if (likely(netif_carrier_ok(queue->info->netdev) && work_queued)) 1470 + napi_schedule(&queue->napi); 1471 + 1472 + return true; 1473 + } 1474 + 1457 1475 static irqreturn_t xennet_rx_interrupt(int irq, void *dev_id) 1458 1476 { 1459 - struct netfront_queue *queue = dev_id; 1460 - struct net_device *dev = queue->info->netdev; 1477 + unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS; 1461 1478 1462 - if (queue->info->broken) 1463 - return IRQ_HANDLED; 1464 - 1465 - if (likely(netif_carrier_ok(dev) && 1466 - RING_HAS_UNCONSUMED_RESPONSES(&queue->rx))) 1467 - napi_schedule(&queue->napi); 1479 + if (likely(xennet_handle_rx(dev_id, &eoiflag))) 1480 + xen_irq_lateeoi(irq, eoiflag); 1468 1481 1469 1482 return IRQ_HANDLED; 1470 1483 } 1471 1484 1472 1485 static irqreturn_t xennet_interrupt(int irq, void *dev_id) 1473 1486 { 1474 - xennet_tx_interrupt(irq, dev_id); 1475 - xennet_rx_interrupt(irq, dev_id); 1487 + unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS; 1488 + 1489 + if (xennet_handle_tx(dev_id, &eoiflag) && 1490 + xennet_handle_rx(dev_id, &eoiflag)) 1491 + xen_irq_lateeoi(irq, eoiflag); 1492 + 1476 1493 return IRQ_HANDLED; 1477 1494 } 1478 1495 ··· 1829 1768 if (err < 0) 1830 1769 goto fail; 1831 1770 1832 - err = bind_evtchn_to_irqhandler(queue->tx_evtchn, 1833 - xennet_interrupt, 1834 - 0, queue->info->netdev->name, queue); 1771 + err = bind_evtchn_to_irqhandler_lateeoi(queue->tx_evtchn, 1772 + xennet_interrupt, 0, 1773 + queue->info->netdev->name, 1774 + queue); 1835 1775 if (err < 0) 1836 1776 goto bind_fail; 1837 1777 queue->rx_evtchn = queue->tx_evtchn; ··· 1860 1798 1861 1799 snprintf(queue->tx_irq_name, sizeof(queue->tx_irq_name), 1862 1800 "%s-tx", queue->name); 1863 - err = bind_evtchn_to_irqhandler(queue->tx_evtchn, 1864 - xennet_tx_interrupt, 1865 - 0, queue->tx_irq_name, queue); 1801 + err = bind_evtchn_to_irqhandler_lateeoi(queue->tx_evtchn, 1802 + xennet_tx_interrupt, 0, 1803 + queue->tx_irq_name, queue); 1866 1804 if (err < 0) 1867 1805 goto bind_tx_fail; 1868 1806 queue->tx_irq = err; 1869 1807 1870 1808 snprintf(queue->rx_irq_name, sizeof(queue->rx_irq_name), 1871 1809 "%s-rx", queue->name); 1872 - err = bind_evtchn_to_irqhandler(queue->rx_evtchn, 1873 - xennet_rx_interrupt, 1874 - 0, queue->rx_irq_name, queue); 1810 + err = bind_evtchn_to_irqhandler_lateeoi(queue->rx_evtchn, 1811 + xennet_rx_interrupt, 0, 1812 + queue->rx_irq_name, queue); 1875 1813 if (err < 0) 1876 1814 goto bind_rx_fail; 1877 1815 queue->rx_irq = err; ··· 1973 1911 1974 1912 spin_lock_init(&queue->tx_lock); 1975 1913 spin_lock_init(&queue->rx_lock); 1914 + spin_lock_init(&queue->rx_cons_lock); 1976 1915 1977 1916 timer_setup(&queue->rx_refill_timer, rx_refill_timeout, 0); 1978 1917