Merge tag 'xsa-5.19-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip

Pull xen security fixes from Juergen Gross:

- XSA-403 (4 patches for blkfront and netfront drivers):

Linux Block and Network PV device frontends don't zero memory regions
before sharing them with the backend (CVE-2022-26365,
CVE-2022-33740). Additionally the granularity of the grant table
doesn't allow sharing less than a 4K page, leading to unrelated data
residing in the same 4K page as data shared with a backend being
accessible by such backend (CVE-2022-33741, CVE-2022-33742).

- XSA-405 (1 patch for netfront driver, only 5.10 and newer):

While adding logic to support XDP (eXpress Data Path), a code label
was moved in a way allowing for SKBs having references (pointers)
retained for further processing to nevertheless be freed.

- XSA-406 (1 patch for Arm specific dom0 code):

When mapping pages of guests on Arm, dom0 is using an rbtree to keep
track of the foreign mappings.

Updating of that rbtree is not always done completely with the
related lock held, resulting in a small race window, which can be
used by unprivileged guests via PV devices to cause inconsistencies
of the rbtree. These inconsistencies can lead to Denial of Service
(DoS) of dom0, e.g. by causing crashes or the inability to perform
further mappings of other guests' memory pages.

* tag 'xsa-5.19-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip:
xen/arm: Fix race in RB-tree based P2M accounting
xen-netfront: restore __skb_queue_tail() positioning in xennet_get_responses()
xen/blkfront: force data bouncing when backend is untrusted
xen/netfront: force data bouncing when backend is untrusted
xen/netfront: fix leaking data in shared pages
xen/blkfront: fix leaking data in shared pages

+93 -23
+4 -2
arch/arm/xen/p2m.c
··· 63 63 64 64 unsigned long __pfn_to_mfn(unsigned long pfn) 65 65 { 66 - struct rb_node *n = phys_to_mach.rb_node; 66 + struct rb_node *n; 67 67 struct xen_p2m_entry *entry; 68 68 unsigned long irqflags; 69 69 70 70 read_lock_irqsave(&p2m_lock, irqflags); 71 + n = phys_to_mach.rb_node; 71 72 while (n) { 72 73 entry = rb_entry(n, struct xen_p2m_entry, rbnode_phys); 73 74 if (entry->pfn <= pfn && ··· 153 152 int rc; 154 153 unsigned long irqflags; 155 154 struct xen_p2m_entry *p2m_entry; 156 - struct rb_node *n = phys_to_mach.rb_node; 155 + struct rb_node *n; 157 156 158 157 if (mfn == INVALID_P2M_ENTRY) { 159 158 write_lock_irqsave(&p2m_lock, irqflags); 159 + n = phys_to_mach.rb_node; 160 160 while (n) { 161 161 p2m_entry = rb_entry(n, struct xen_p2m_entry, rbnode_phys); 162 162 if (p2m_entry->pfn <= pfn &&
+37 -17
drivers/block/xen-blkfront.c
··· 152 152 module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, 0444); 153 153 MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring"); 154 154 155 + static bool __read_mostly xen_blkif_trusted = true; 156 + module_param_named(trusted, xen_blkif_trusted, bool, 0644); 157 + MODULE_PARM_DESC(trusted, "Is the backend trusted"); 158 + 155 159 #define BLK_RING_SIZE(info) \ 156 160 __CONST_RING_SIZE(blkif, XEN_PAGE_SIZE * (info)->nr_ring_pages) 157 161 ··· 214 210 unsigned int feature_discard:1; 215 211 unsigned int feature_secdiscard:1; 216 212 unsigned int feature_persistent:1; 213 + unsigned int bounce:1; 217 214 unsigned int discard_granularity; 218 215 unsigned int discard_alignment; 219 216 /* Number of 4KB segments handled */ ··· 315 310 if (!gnt_list_entry) 316 311 goto out_of_memory; 317 312 318 - if (info->feature_persistent) { 319 - granted_page = alloc_page(GFP_NOIO); 313 + if (info->bounce) { 314 + granted_page = alloc_page(GFP_NOIO | __GFP_ZERO); 320 315 if (!granted_page) { 321 316 kfree(gnt_list_entry); 322 317 goto out_of_memory; ··· 335 330 list_for_each_entry_safe(gnt_list_entry, n, 336 331 &rinfo->grants, node) { 337 332 list_del(&gnt_list_entry->node); 338 - if (info->feature_persistent) 333 + if (info->bounce) 339 334 __free_page(gnt_list_entry->page); 340 335 kfree(gnt_list_entry); 341 336 i--; ··· 381 376 /* Assign a gref to this page */ 382 377 gnt_list_entry->gref = gnttab_claim_grant_reference(gref_head); 383 378 BUG_ON(gnt_list_entry->gref == -ENOSPC); 384 - if (info->feature_persistent) 379 + if (info->bounce) 385 380 grant_foreign_access(gnt_list_entry, info); 386 381 else { 387 382 /* Grant access to the GFN passed by the caller */ ··· 405 400 /* Assign a gref to this page */ 406 401 gnt_list_entry->gref = gnttab_claim_grant_reference(gref_head); 407 402 BUG_ON(gnt_list_entry->gref == -ENOSPC); 408 - if (!info->feature_persistent) { 403 + if (!info->bounce) { 409 404 struct page *indirect_page; 410 405 411 406 /* Fetch a pre-allocated page to use for indirect grefs */ ··· 708 703 .grant_idx = 0, 709 704 .segments = NULL, 710 705 .rinfo = rinfo, 711 - .need_copy = rq_data_dir(req) && info->feature_persistent, 706 + .need_copy = rq_data_dir(req) && info->bounce, 712 707 }; 713 708 714 709 /* ··· 986 981 { 987 982 blk_queue_write_cache(info->rq, info->feature_flush ? true : false, 988 983 info->feature_fua ? true : false); 989 - pr_info("blkfront: %s: %s %s %s %s %s\n", 984 + pr_info("blkfront: %s: %s %s %s %s %s %s %s\n", 990 985 info->gd->disk_name, flush_info(info), 991 986 "persistent grants:", info->feature_persistent ? 992 987 "enabled;" : "disabled;", "indirect descriptors:", 993 - info->max_indirect_segments ? "enabled;" : "disabled;"); 988 + info->max_indirect_segments ? "enabled;" : "disabled;", 989 + "bounce buffer:", info->bounce ? "enabled" : "disabled;"); 994 990 } 995 991 996 992 static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset) ··· 1213 1207 if (!list_empty(&rinfo->indirect_pages)) { 1214 1208 struct page *indirect_page, *n; 1215 1209 1216 - BUG_ON(info->feature_persistent); 1210 + BUG_ON(info->bounce); 1217 1211 list_for_each_entry_safe(indirect_page, n, &rinfo->indirect_pages, lru) { 1218 1212 list_del(&indirect_page->lru); 1219 1213 __free_page(indirect_page); ··· 1230 1224 NULL); 1231 1225 rinfo->persistent_gnts_c--; 1232 1226 } 1233 - if (info->feature_persistent) 1227 + if (info->bounce) 1234 1228 __free_page(persistent_gnt->page); 1235 1229 kfree(persistent_gnt); 1236 1230 } ··· 1251 1245 for (j = 0; j < segs; j++) { 1252 1246 persistent_gnt = rinfo->shadow[i].grants_used[j]; 1253 1247 gnttab_end_foreign_access(persistent_gnt->gref, NULL); 1254 - if (info->feature_persistent) 1248 + if (info->bounce) 1255 1249 __free_page(persistent_gnt->page); 1256 1250 kfree(persistent_gnt); 1257 1251 } ··· 1434 1428 data.s = s; 1435 1429 num_sg = s->num_sg; 1436 1430 1437 - if (bret->operation == BLKIF_OP_READ && info->feature_persistent) { 1431 + if (bret->operation == BLKIF_OP_READ && info->bounce) { 1438 1432 for_each_sg(s->sg, sg, num_sg, i) { 1439 1433 BUG_ON(sg->offset + sg->length > PAGE_SIZE); 1440 1434 ··· 1493 1487 * Add the used indirect page back to the list of 1494 1488 * available pages for indirect grefs. 1495 1489 */ 1496 - if (!info->feature_persistent) { 1490 + if (!info->bounce) { 1497 1491 indirect_page = s->indirect_grants[i]->page; 1498 1492 list_add(&indirect_page->lru, &rinfo->indirect_pages); 1499 1493 } ··· 1769 1763 1770 1764 if (!info) 1771 1765 return -ENODEV; 1766 + 1767 + /* Check if backend is trusted. */ 1768 + info->bounce = !xen_blkif_trusted || 1769 + !xenbus_read_unsigned(dev->nodename, "trusted", 1); 1772 1770 1773 1771 max_page_order = xenbus_read_unsigned(info->xbdev->otherend, 1774 1772 "max-ring-page-order", 0); ··· 2183 2173 if (err) 2184 2174 goto out_of_memory; 2185 2175 2186 - if (!info->feature_persistent && info->max_indirect_segments) { 2176 + if (!info->bounce && info->max_indirect_segments) { 2187 2177 /* 2188 - * We are using indirect descriptors but not persistent 2189 - * grants, we need to allocate a set of pages that can be 2178 + * We are using indirect descriptors but don't have a bounce 2179 + * buffer, we need to allocate a set of pages that can be 2190 2180 * used for mapping indirect grefs 2191 2181 */ 2192 2182 int num = INDIRECT_GREFS(grants) * BLK_RING_SIZE(info); 2193 2183 2194 2184 BUG_ON(!list_empty(&rinfo->indirect_pages)); 2195 2185 for (i = 0; i < num; i++) { 2196 - struct page *indirect_page = alloc_page(GFP_KERNEL); 2186 + struct page *indirect_page = alloc_page(GFP_KERNEL | 2187 + __GFP_ZERO); 2197 2188 if (!indirect_page) 2198 2189 goto out_of_memory; 2199 2190 list_add(&indirect_page->lru, &rinfo->indirect_pages); ··· 2287 2276 info->feature_persistent = 2288 2277 !!xenbus_read_unsigned(info->xbdev->otherend, 2289 2278 "feature-persistent", 0); 2279 + if (info->feature_persistent) 2280 + info->bounce = true; 2290 2281 2291 2282 indirect_segments = xenbus_read_unsigned(info->xbdev->otherend, 2292 2283 "feature-max-indirect-segments", 0); ··· 2559 2546 { 2560 2547 struct blkfront_info *info; 2561 2548 bool need_schedule_work = false; 2549 + 2550 + /* 2551 + * Note that when using bounce buffers but not persistent grants 2552 + * there's no need to run blkfront_delay_work because grants are 2553 + * revoked in blkif_completion or else an error is reported and the 2554 + * connection is closed. 2555 + */ 2562 2556 2563 2557 mutex_lock(&blkfront_mutex); 2564 2558
+52 -4
drivers/net/xen-netfront.c
··· 66 66 MODULE_PARM_DESC(max_queues, 67 67 "Maximum number of queues per virtual interface"); 68 68 69 + static bool __read_mostly xennet_trusted = true; 70 + module_param_named(trusted, xennet_trusted, bool, 0644); 71 + MODULE_PARM_DESC(trusted, "Is the backend trusted"); 72 + 69 73 #define XENNET_TIMEOUT (5 * HZ) 70 74 71 75 static const struct ethtool_ops xennet_ethtool_ops; ··· 177 173 /* Is device behaving sane? */ 178 174 bool broken; 179 175 176 + /* Should skbs be bounced into a zeroed buffer? */ 177 + bool bounce; 178 + 180 179 atomic_t rx_gso_checksum_fixup; 181 180 }; 182 181 ··· 278 271 if (unlikely(!skb)) 279 272 return NULL; 280 273 281 - page = page_pool_dev_alloc_pages(queue->page_pool); 274 + page = page_pool_alloc_pages(queue->page_pool, 275 + GFP_ATOMIC | __GFP_NOWARN | __GFP_ZERO); 282 276 if (unlikely(!page)) { 283 277 kfree_skb(skb); 284 278 return NULL; ··· 673 665 return nxmit; 674 666 } 675 667 668 + struct sk_buff *bounce_skb(const struct sk_buff *skb) 669 + { 670 + unsigned int headerlen = skb_headroom(skb); 671 + /* Align size to allocate full pages and avoid contiguous data leaks */ 672 + unsigned int size = ALIGN(skb_end_offset(skb) + skb->data_len, 673 + XEN_PAGE_SIZE); 674 + struct sk_buff *n = alloc_skb(size, GFP_ATOMIC | __GFP_ZERO); 675 + 676 + if (!n) 677 + return NULL; 678 + 679 + if (!IS_ALIGNED((uintptr_t)n->head, XEN_PAGE_SIZE)) { 680 + WARN_ONCE(1, "misaligned skb allocated\n"); 681 + kfree_skb(n); 682 + return NULL; 683 + } 684 + 685 + /* Set the data pointer */ 686 + skb_reserve(n, headerlen); 687 + /* Set the tail pointer and length */ 688 + skb_put(n, skb->len); 689 + 690 + BUG_ON(skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len)); 691 + 692 + skb_copy_header(n, skb); 693 + return n; 694 + } 676 695 677 696 #define MAX_XEN_SKB_FRAGS (65536 / XEN_PAGE_SIZE + 1) 678 697 ··· 753 718 754 719 /* The first req should be at least ETH_HLEN size or the packet will be 755 720 * dropped by netback. 721 + * 722 + * If the backend is not trusted bounce all data to zeroed pages to 723 + * avoid exposing contiguous data on the granted page not belonging to 724 + * the skb. 756 725 */ 757 - if (unlikely(PAGE_SIZE - offset < ETH_HLEN)) { 758 - nskb = skb_copy(skb, GFP_ATOMIC); 726 + if (np->bounce || unlikely(PAGE_SIZE - offset < ETH_HLEN)) { 727 + nskb = bounce_skb(skb); 759 728 if (!nskb) 760 729 goto drop; 761 730 dev_consume_skb_any(skb); ··· 1092 1053 } 1093 1054 } 1094 1055 rcu_read_unlock(); 1095 - next: 1056 + 1096 1057 __skb_queue_tail(list, skb); 1058 + 1059 + next: 1097 1060 if (!(rx->flags & XEN_NETRXF_more_data)) 1098 1061 break; 1099 1062 ··· 2255 2214 2256 2215 info->netdev->irq = 0; 2257 2216 2217 + /* Check if backend is trusted. */ 2218 + info->bounce = !xennet_trusted || 2219 + !xenbus_read_unsigned(dev->nodename, "trusted", 1); 2220 + 2258 2221 /* Check if backend supports multiple queues */ 2259 2222 max_queues = xenbus_read_unsigned(info->xbdev->otherend, 2260 2223 "multi-queue-max-queues", 1); ··· 2426 2381 return err; 2427 2382 if (np->netback_has_xdp_headroom) 2428 2383 pr_info("backend supports XDP headroom\n"); 2384 + if (np->bounce) 2385 + dev_info(&np->xbdev->dev, 2386 + "bouncing transmitted data to zeroed pages\n"); 2429 2387 2430 2388 /* talk_to_netback() sets the correct number of queues */ 2431 2389 num_queues = dev->real_num_tx_queues;