Merge tag 'xsa-5.19-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip

Pull xen security fixes from Juergen Gross:

- XSA-403 (4 patches for blkfront and netfront drivers):

Linux Block and Network PV device frontends don't zero memory regions
before sharing them with the backend (CVE-2022-26365,
CVE-2022-33740). Additionally the granularity of the grant table
doesn't allow sharing less than a 4K page, leading to unrelated data
residing in the same 4K page as data shared with a backend being
accessible by such backend (CVE-2022-33741, CVE-2022-33742).

- XSA-405 (1 patch for netfront driver, only 5.10 and newer):

While adding logic to support XDP (eXpress Data Path), a code label
was moved in a way allowing for SKBs having references (pointers)
retained for further processing to nevertheless be freed.

- XSA-406 (1 patch for Arm specific dom0 code):

When mapping pages of guests on Arm, dom0 is using an rbtree to keep
track of the foreign mappings.

Updating of that rbtree is not always done completely with the
related lock held, resulting in a small race window, which can be
used by unprivileged guests via PV devices to cause inconsistencies
of the rbtree. These inconsistencies can lead to Denial of Service
(DoS) of dom0, e.g. by causing crashes or the inability to perform
further mappings of other guests' memory pages.

* tag 'xsa-5.19-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip:
xen/arm: Fix race in RB-tree based P2M accounting
xen-netfront: restore __skb_queue_tail() positioning in xennet_get_responses()
xen/blkfront: force data bouncing when backend is untrusted
xen/netfront: force data bouncing when backend is untrusted
xen/netfront: fix leaking data in shared pages
xen/blkfront: fix leaking data in shared pages

+93 -23
+4 -2
arch/arm/xen/p2m.c
··· 63 64 unsigned long __pfn_to_mfn(unsigned long pfn) 65 { 66 - struct rb_node *n = phys_to_mach.rb_node; 67 struct xen_p2m_entry *entry; 68 unsigned long irqflags; 69 70 read_lock_irqsave(&p2m_lock, irqflags); 71 while (n) { 72 entry = rb_entry(n, struct xen_p2m_entry, rbnode_phys); 73 if (entry->pfn <= pfn && ··· 153 int rc; 154 unsigned long irqflags; 155 struct xen_p2m_entry *p2m_entry; 156 - struct rb_node *n = phys_to_mach.rb_node; 157 158 if (mfn == INVALID_P2M_ENTRY) { 159 write_lock_irqsave(&p2m_lock, irqflags); 160 while (n) { 161 p2m_entry = rb_entry(n, struct xen_p2m_entry, rbnode_phys); 162 if (p2m_entry->pfn <= pfn &&
··· 63 64 unsigned long __pfn_to_mfn(unsigned long pfn) 65 { 66 + struct rb_node *n; 67 struct xen_p2m_entry *entry; 68 unsigned long irqflags; 69 70 read_lock_irqsave(&p2m_lock, irqflags); 71 + n = phys_to_mach.rb_node; 72 while (n) { 73 entry = rb_entry(n, struct xen_p2m_entry, rbnode_phys); 74 if (entry->pfn <= pfn && ··· 152 int rc; 153 unsigned long irqflags; 154 struct xen_p2m_entry *p2m_entry; 155 + struct rb_node *n; 156 157 if (mfn == INVALID_P2M_ENTRY) { 158 write_lock_irqsave(&p2m_lock, irqflags); 159 + n = phys_to_mach.rb_node; 160 while (n) { 161 p2m_entry = rb_entry(n, struct xen_p2m_entry, rbnode_phys); 162 if (p2m_entry->pfn <= pfn &&
+37 -17
drivers/block/xen-blkfront.c
··· 152 module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, 0444); 153 MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring"); 154 155 #define BLK_RING_SIZE(info) \ 156 __CONST_RING_SIZE(blkif, XEN_PAGE_SIZE * (info)->nr_ring_pages) 157 ··· 214 unsigned int feature_discard:1; 215 unsigned int feature_secdiscard:1; 216 unsigned int feature_persistent:1; 217 unsigned int discard_granularity; 218 unsigned int discard_alignment; 219 /* Number of 4KB segments handled */ ··· 315 if (!gnt_list_entry) 316 goto out_of_memory; 317 318 - if (info->feature_persistent) { 319 - granted_page = alloc_page(GFP_NOIO); 320 if (!granted_page) { 321 kfree(gnt_list_entry); 322 goto out_of_memory; ··· 335 list_for_each_entry_safe(gnt_list_entry, n, 336 &rinfo->grants, node) { 337 list_del(&gnt_list_entry->node); 338 - if (info->feature_persistent) 339 __free_page(gnt_list_entry->page); 340 kfree(gnt_list_entry); 341 i--; ··· 381 /* Assign a gref to this page */ 382 gnt_list_entry->gref = gnttab_claim_grant_reference(gref_head); 383 BUG_ON(gnt_list_entry->gref == -ENOSPC); 384 - if (info->feature_persistent) 385 grant_foreign_access(gnt_list_entry, info); 386 else { 387 /* Grant access to the GFN passed by the caller */ ··· 405 /* Assign a gref to this page */ 406 gnt_list_entry->gref = gnttab_claim_grant_reference(gref_head); 407 BUG_ON(gnt_list_entry->gref == -ENOSPC); 408 - if (!info->feature_persistent) { 409 struct page *indirect_page; 410 411 /* Fetch a pre-allocated page to use for indirect grefs */ ··· 708 .grant_idx = 0, 709 .segments = NULL, 710 .rinfo = rinfo, 711 - .need_copy = rq_data_dir(req) && info->feature_persistent, 712 }; 713 714 /* ··· 986 { 987 blk_queue_write_cache(info->rq, info->feature_flush ? true : false, 988 info->feature_fua ? true : false); 989 - pr_info("blkfront: %s: %s %s %s %s %s\n", 990 info->gd->disk_name, flush_info(info), 991 "persistent grants:", info->feature_persistent ? 992 "enabled;" : "disabled;", "indirect descriptors:", 993 - info->max_indirect_segments ? "enabled;" : "disabled;"); 994 } 995 996 static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset) ··· 1213 if (!list_empty(&rinfo->indirect_pages)) { 1214 struct page *indirect_page, *n; 1215 1216 - BUG_ON(info->feature_persistent); 1217 list_for_each_entry_safe(indirect_page, n, &rinfo->indirect_pages, lru) { 1218 list_del(&indirect_page->lru); 1219 __free_page(indirect_page); ··· 1230 NULL); 1231 rinfo->persistent_gnts_c--; 1232 } 1233 - if (info->feature_persistent) 1234 __free_page(persistent_gnt->page); 1235 kfree(persistent_gnt); 1236 } ··· 1251 for (j = 0; j < segs; j++) { 1252 persistent_gnt = rinfo->shadow[i].grants_used[j]; 1253 gnttab_end_foreign_access(persistent_gnt->gref, NULL); 1254 - if (info->feature_persistent) 1255 __free_page(persistent_gnt->page); 1256 kfree(persistent_gnt); 1257 } ··· 1434 data.s = s; 1435 num_sg = s->num_sg; 1436 1437 - if (bret->operation == BLKIF_OP_READ && info->feature_persistent) { 1438 for_each_sg(s->sg, sg, num_sg, i) { 1439 BUG_ON(sg->offset + sg->length > PAGE_SIZE); 1440 ··· 1493 * Add the used indirect page back to the list of 1494 * available pages for indirect grefs. 1495 */ 1496 - if (!info->feature_persistent) { 1497 indirect_page = s->indirect_grants[i]->page; 1498 list_add(&indirect_page->lru, &rinfo->indirect_pages); 1499 } ··· 1769 1770 if (!info) 1771 return -ENODEV; 1772 1773 max_page_order = xenbus_read_unsigned(info->xbdev->otherend, 1774 "max-ring-page-order", 0); ··· 2183 if (err) 2184 goto out_of_memory; 2185 2186 - if (!info->feature_persistent && info->max_indirect_segments) { 2187 /* 2188 - * We are using indirect descriptors but not persistent 2189 - * grants, we need to allocate a set of pages that can be 2190 * used for mapping indirect grefs 2191 */ 2192 int num = INDIRECT_GREFS(grants) * BLK_RING_SIZE(info); 2193 2194 BUG_ON(!list_empty(&rinfo->indirect_pages)); 2195 for (i = 0; i < num; i++) { 2196 - struct page *indirect_page = alloc_page(GFP_KERNEL); 2197 if (!indirect_page) 2198 goto out_of_memory; 2199 list_add(&indirect_page->lru, &rinfo->indirect_pages); ··· 2287 info->feature_persistent = 2288 !!xenbus_read_unsigned(info->xbdev->otherend, 2289 "feature-persistent", 0); 2290 2291 indirect_segments = xenbus_read_unsigned(info->xbdev->otherend, 2292 "feature-max-indirect-segments", 0); ··· 2559 { 2560 struct blkfront_info *info; 2561 bool need_schedule_work = false; 2562 2563 mutex_lock(&blkfront_mutex); 2564
··· 152 module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, 0444); 153 MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring"); 154 155 + static bool __read_mostly xen_blkif_trusted = true; 156 + module_param_named(trusted, xen_blkif_trusted, bool, 0644); 157 + MODULE_PARM_DESC(trusted, "Is the backend trusted"); 158 + 159 #define BLK_RING_SIZE(info) \ 160 __CONST_RING_SIZE(blkif, XEN_PAGE_SIZE * (info)->nr_ring_pages) 161 ··· 210 unsigned int feature_discard:1; 211 unsigned int feature_secdiscard:1; 212 unsigned int feature_persistent:1; 213 + unsigned int bounce:1; 214 unsigned int discard_granularity; 215 unsigned int discard_alignment; 216 /* Number of 4KB segments handled */ ··· 310 if (!gnt_list_entry) 311 goto out_of_memory; 312 313 + if (info->bounce) { 314 + granted_page = alloc_page(GFP_NOIO | __GFP_ZERO); 315 if (!granted_page) { 316 kfree(gnt_list_entry); 317 goto out_of_memory; ··· 330 list_for_each_entry_safe(gnt_list_entry, n, 331 &rinfo->grants, node) { 332 list_del(&gnt_list_entry->node); 333 + if (info->bounce) 334 __free_page(gnt_list_entry->page); 335 kfree(gnt_list_entry); 336 i--; ··· 376 /* Assign a gref to this page */ 377 gnt_list_entry->gref = gnttab_claim_grant_reference(gref_head); 378 BUG_ON(gnt_list_entry->gref == -ENOSPC); 379 + if (info->bounce) 380 grant_foreign_access(gnt_list_entry, info); 381 else { 382 /* Grant access to the GFN passed by the caller */ ··· 400 /* Assign a gref to this page */ 401 gnt_list_entry->gref = gnttab_claim_grant_reference(gref_head); 402 BUG_ON(gnt_list_entry->gref == -ENOSPC); 403 + if (!info->bounce) { 404 struct page *indirect_page; 405 406 /* Fetch a pre-allocated page to use for indirect grefs */ ··· 703 .grant_idx = 0, 704 .segments = NULL, 705 .rinfo = rinfo, 706 + .need_copy = rq_data_dir(req) && info->bounce, 707 }; 708 709 /* ··· 981 { 982 blk_queue_write_cache(info->rq, info->feature_flush ? true : false, 983 info->feature_fua ? true : false); 984 + pr_info("blkfront: %s: %s %s %s %s %s %s %s\n", 985 info->gd->disk_name, flush_info(info), 986 "persistent grants:", info->feature_persistent ? 987 "enabled;" : "disabled;", "indirect descriptors:", 988 + info->max_indirect_segments ? "enabled;" : "disabled;", 989 + "bounce buffer:", info->bounce ? "enabled" : "disabled;"); 990 } 991 992 static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset) ··· 1207 if (!list_empty(&rinfo->indirect_pages)) { 1208 struct page *indirect_page, *n; 1209 1210 + BUG_ON(info->bounce); 1211 list_for_each_entry_safe(indirect_page, n, &rinfo->indirect_pages, lru) { 1212 list_del(&indirect_page->lru); 1213 __free_page(indirect_page); ··· 1224 NULL); 1225 rinfo->persistent_gnts_c--; 1226 } 1227 + if (info->bounce) 1228 __free_page(persistent_gnt->page); 1229 kfree(persistent_gnt); 1230 } ··· 1245 for (j = 0; j < segs; j++) { 1246 persistent_gnt = rinfo->shadow[i].grants_used[j]; 1247 gnttab_end_foreign_access(persistent_gnt->gref, NULL); 1248 + if (info->bounce) 1249 __free_page(persistent_gnt->page); 1250 kfree(persistent_gnt); 1251 } ··· 1428 data.s = s; 1429 num_sg = s->num_sg; 1430 1431 + if (bret->operation == BLKIF_OP_READ && info->bounce) { 1432 for_each_sg(s->sg, sg, num_sg, i) { 1433 BUG_ON(sg->offset + sg->length > PAGE_SIZE); 1434 ··· 1487 * Add the used indirect page back to the list of 1488 * available pages for indirect grefs. 1489 */ 1490 + if (!info->bounce) { 1491 indirect_page = s->indirect_grants[i]->page; 1492 list_add(&indirect_page->lru, &rinfo->indirect_pages); 1493 } ··· 1763 1764 if (!info) 1765 return -ENODEV; 1766 + 1767 + /* Check if backend is trusted. */ 1768 + info->bounce = !xen_blkif_trusted || 1769 + !xenbus_read_unsigned(dev->nodename, "trusted", 1); 1770 1771 max_page_order = xenbus_read_unsigned(info->xbdev->otherend, 1772 "max-ring-page-order", 0); ··· 2173 if (err) 2174 goto out_of_memory; 2175 2176 + if (!info->bounce && info->max_indirect_segments) { 2177 /* 2178 + * We are using indirect descriptors but don't have a bounce 2179 + * buffer, we need to allocate a set of pages that can be 2180 * used for mapping indirect grefs 2181 */ 2182 int num = INDIRECT_GREFS(grants) * BLK_RING_SIZE(info); 2183 2184 BUG_ON(!list_empty(&rinfo->indirect_pages)); 2185 for (i = 0; i < num; i++) { 2186 + struct page *indirect_page = alloc_page(GFP_KERNEL | 2187 + __GFP_ZERO); 2188 if (!indirect_page) 2189 goto out_of_memory; 2190 list_add(&indirect_page->lru, &rinfo->indirect_pages); ··· 2276 info->feature_persistent = 2277 !!xenbus_read_unsigned(info->xbdev->otherend, 2278 "feature-persistent", 0); 2279 + if (info->feature_persistent) 2280 + info->bounce = true; 2281 2282 indirect_segments = xenbus_read_unsigned(info->xbdev->otherend, 2283 "feature-max-indirect-segments", 0); ··· 2546 { 2547 struct blkfront_info *info; 2548 bool need_schedule_work = false; 2549 + 2550 + /* 2551 + * Note that when using bounce buffers but not persistent grants 2552 + * there's no need to run blkfront_delay_work because grants are 2553 + * revoked in blkif_completion or else an error is reported and the 2554 + * connection is closed. 2555 + */ 2556 2557 mutex_lock(&blkfront_mutex); 2558
+52 -4
drivers/net/xen-netfront.c
··· 66 MODULE_PARM_DESC(max_queues, 67 "Maximum number of queues per virtual interface"); 68 69 #define XENNET_TIMEOUT (5 * HZ) 70 71 static const struct ethtool_ops xennet_ethtool_ops; ··· 177 /* Is device behaving sane? */ 178 bool broken; 179 180 atomic_t rx_gso_checksum_fixup; 181 }; 182 ··· 278 if (unlikely(!skb)) 279 return NULL; 280 281 - page = page_pool_dev_alloc_pages(queue->page_pool); 282 if (unlikely(!page)) { 283 kfree_skb(skb); 284 return NULL; ··· 673 return nxmit; 674 } 675 676 677 #define MAX_XEN_SKB_FRAGS (65536 / XEN_PAGE_SIZE + 1) 678 ··· 753 754 /* The first req should be at least ETH_HLEN size or the packet will be 755 * dropped by netback. 756 */ 757 - if (unlikely(PAGE_SIZE - offset < ETH_HLEN)) { 758 - nskb = skb_copy(skb, GFP_ATOMIC); 759 if (!nskb) 760 goto drop; 761 dev_consume_skb_any(skb); ··· 1092 } 1093 } 1094 rcu_read_unlock(); 1095 - next: 1096 __skb_queue_tail(list, skb); 1097 if (!(rx->flags & XEN_NETRXF_more_data)) 1098 break; 1099 ··· 2255 2256 info->netdev->irq = 0; 2257 2258 /* Check if backend supports multiple queues */ 2259 max_queues = xenbus_read_unsigned(info->xbdev->otherend, 2260 "multi-queue-max-queues", 1); ··· 2426 return err; 2427 if (np->netback_has_xdp_headroom) 2428 pr_info("backend supports XDP headroom\n"); 2429 2430 /* talk_to_netback() sets the correct number of queues */ 2431 num_queues = dev->real_num_tx_queues;
··· 66 MODULE_PARM_DESC(max_queues, 67 "Maximum number of queues per virtual interface"); 68 69 + static bool __read_mostly xennet_trusted = true; 70 + module_param_named(trusted, xennet_trusted, bool, 0644); 71 + MODULE_PARM_DESC(trusted, "Is the backend trusted"); 72 + 73 #define XENNET_TIMEOUT (5 * HZ) 74 75 static const struct ethtool_ops xennet_ethtool_ops; ··· 173 /* Is device behaving sane? */ 174 bool broken; 175 176 + /* Should skbs be bounced into a zeroed buffer? */ 177 + bool bounce; 178 + 179 atomic_t rx_gso_checksum_fixup; 180 }; 181 ··· 271 if (unlikely(!skb)) 272 return NULL; 273 274 + page = page_pool_alloc_pages(queue->page_pool, 275 + GFP_ATOMIC | __GFP_NOWARN | __GFP_ZERO); 276 if (unlikely(!page)) { 277 kfree_skb(skb); 278 return NULL; ··· 665 return nxmit; 666 } 667 668 + struct sk_buff *bounce_skb(const struct sk_buff *skb) 669 + { 670 + unsigned int headerlen = skb_headroom(skb); 671 + /* Align size to allocate full pages and avoid contiguous data leaks */ 672 + unsigned int size = ALIGN(skb_end_offset(skb) + skb->data_len, 673 + XEN_PAGE_SIZE); 674 + struct sk_buff *n = alloc_skb(size, GFP_ATOMIC | __GFP_ZERO); 675 + 676 + if (!n) 677 + return NULL; 678 + 679 + if (!IS_ALIGNED((uintptr_t)n->head, XEN_PAGE_SIZE)) { 680 + WARN_ONCE(1, "misaligned skb allocated\n"); 681 + kfree_skb(n); 682 + return NULL; 683 + } 684 + 685 + /* Set the data pointer */ 686 + skb_reserve(n, headerlen); 687 + /* Set the tail pointer and length */ 688 + skb_put(n, skb->len); 689 + 690 + BUG_ON(skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len)); 691 + 692 + skb_copy_header(n, skb); 693 + return n; 694 + } 695 696 #define MAX_XEN_SKB_FRAGS (65536 / XEN_PAGE_SIZE + 1) 697 ··· 718 719 /* The first req should be at least ETH_HLEN size or the packet will be 720 * dropped by netback. 721 + * 722 + * If the backend is not trusted bounce all data to zeroed pages to 723 + * avoid exposing contiguous data on the granted page not belonging to 724 + * the skb. 725 */ 726 + if (np->bounce || unlikely(PAGE_SIZE - offset < ETH_HLEN)) { 727 + nskb = bounce_skb(skb); 728 if (!nskb) 729 goto drop; 730 dev_consume_skb_any(skb); ··· 1053 } 1054 } 1055 rcu_read_unlock(); 1056 + 1057 __skb_queue_tail(list, skb); 1058 + 1059 + next: 1060 if (!(rx->flags & XEN_NETRXF_more_data)) 1061 break; 1062 ··· 2214 2215 info->netdev->irq = 0; 2216 2217 + /* Check if backend is trusted. */ 2218 + info->bounce = !xennet_trusted || 2219 + !xenbus_read_unsigned(dev->nodename, "trusted", 1); 2220 + 2221 /* Check if backend supports multiple queues */ 2222 max_queues = xenbus_read_unsigned(info->xbdev->otherend, 2223 "multi-queue-max-queues", 1); ··· 2381 return err; 2382 if (np->netback_has_xdp_headroom) 2383 pr_info("backend supports XDP headroom\n"); 2384 + if (np->bounce) 2385 + dev_info(&np->xbdev->dev, 2386 + "bouncing transmitted data to zeroed pages\n"); 2387 2388 /* talk_to_netback() sets the correct number of queues */ 2389 num_queues = dev->real_num_tx_queues;