Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost

Pull virtio updates from Michael Tsirkin:
"A small number of improvements all over the place:

- vdpa/octeon support for multiple interrupts

- virtio-pci support for error recovery

- vp_vdpa support for notification with data

- vhost/net fix to set num_buffers for spec compliance

- virtio-mem now works with kdump on s390

And small cleanups all over the place"

* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost: (23 commits)
virtio_blk: Add support for transport error recovery
virtio_pci: Add support for PCIe Function Level Reset
vhost/net: Set num_buffers for virtio 1.0
vdpa/octeon_ep: read vendor-specific PCI capability
virtio-pci: define type and header for PCI vendor data
vdpa/octeon_ep: handle device config change events
vdpa/octeon_ep: enable support for multiple interrupts per device
vdpa: solidrun: Replace deprecated PCI functions
s390/kdump: virtio-mem kdump support (CONFIG_PROC_VMCORE_DEVICE_RAM)
virtio-mem: support CONFIG_PROC_VMCORE_DEVICE_RAM
virtio-mem: remember usable region size
virtio-mem: mark device ready before registering callbacks in kdump mode
fs/proc/vmcore: introduce PROC_VMCORE_DEVICE_RAM to detect device RAM ranges in 2nd kernel
fs/proc/vmcore: factor out freeing a list of vmcore ranges
fs/proc/vmcore: factor out allocating a vmcore range and adding it to a list
fs/proc/vmcore: move vmcore definitions out of kcore.h
fs/proc/vmcore: prefix all pr_* with "vmcore:"
fs/proc/vmcore: disallow vmcore modifications while the vmcore is open
fs/proc/vmcore: replace vmcoredd_mutex by vmcore_mutex
fs/proc/vmcore: convert vmcore_cb_lock into vmcore_mutex
...

+735 -193
+1
arch/s390/Kconfig
··· 244 244 select MODULES_USE_ELF_RELA 245 245 select NEED_DMA_MAP_STATE if PCI 246 246 select NEED_PER_CPU_EMBED_FIRST_CHUNK 247 + select NEED_PROC_VMCORE_DEVICE_RAM if PROC_VMCORE 247 248 select NEED_SG_DMA_LENGTH if PCI 248 249 select OLD_SIGACTION 249 250 select OLD_SIGSUSPEND3
+31 -8
arch/s390/kernel/crash_dump.c
··· 506 506 return cnt; 507 507 } 508 508 509 + static void fill_ptload(Elf64_Phdr *phdr, unsigned long paddr, 510 + unsigned long vaddr, unsigned long size) 511 + { 512 + phdr->p_type = PT_LOAD; 513 + phdr->p_vaddr = vaddr; 514 + phdr->p_offset = paddr; 515 + phdr->p_paddr = paddr; 516 + phdr->p_filesz = size; 517 + phdr->p_memsz = size; 518 + phdr->p_flags = PF_R | PF_W | PF_X; 519 + phdr->p_align = PAGE_SIZE; 520 + } 521 + 509 522 /* 510 523 * Initialize ELF loads (new kernel) 511 524 */ ··· 531 518 if (os_info_has_vm) 532 519 old_identity_base = os_info_old_value(OS_INFO_IDENTITY_BASE); 533 520 for_each_physmem_range(idx, &oldmem_type, &start, &end) { 534 - phdr->p_type = PT_LOAD; 535 - phdr->p_vaddr = old_identity_base + start; 536 - phdr->p_offset = start; 537 - phdr->p_paddr = start; 538 - phdr->p_filesz = end - start; 539 - phdr->p_memsz = end - start; 540 - phdr->p_flags = PF_R | PF_W | PF_X; 541 - phdr->p_align = PAGE_SIZE; 521 + fill_ptload(phdr, start, old_identity_base + start, 522 + end - start); 542 523 phdr++; 543 524 } 544 525 } ··· 541 534 { 542 535 return os_info_old_value(OS_INFO_KASLR_OFFSET); 543 536 } 537 + 538 + #ifdef CONFIG_PROC_VMCORE_DEVICE_RAM 539 + /* 540 + * Fill PT_LOAD for a physical memory range owned by a device and detected by 541 + * its device driver. 542 + */ 543 + void elfcorehdr_fill_device_ram_ptload_elf64(Elf64_Phdr *phdr, 544 + unsigned long long paddr, unsigned long long size) 545 + { 546 + unsigned long old_identity_base = 0; 547 + 548 + if (os_info_has_vm()) 549 + old_identity_base = os_info_old_value(OS_INFO_IDENTITY_BASE); 550 + fill_ptload(phdr, paddr, old_identity_base + paddr, size); 551 + } 552 + #endif 544 553 545 554 /* 546 555 * Prepare PT_LOAD type program header for kernel image region
+25 -3
drivers/block/virtio_blk.c
··· 1579 1579 put_disk(vblk->disk); 1580 1580 } 1581 1581 1582 - #ifdef CONFIG_PM_SLEEP 1583 - static int virtblk_freeze(struct virtio_device *vdev) 1582 + static int virtblk_freeze_priv(struct virtio_device *vdev) 1584 1583 { 1585 1584 struct virtio_blk *vblk = vdev->priv; 1586 1585 struct request_queue *q = vblk->disk->queue; ··· 1601 1602 return 0; 1602 1603 } 1603 1604 1604 - static int virtblk_restore(struct virtio_device *vdev) 1605 + static int virtblk_restore_priv(struct virtio_device *vdev) 1605 1606 { 1606 1607 struct virtio_blk *vblk = vdev->priv; 1607 1608 int ret; ··· 1615 1616 1616 1617 return 0; 1617 1618 } 1619 + 1620 + #ifdef CONFIG_PM_SLEEP 1621 + static int virtblk_freeze(struct virtio_device *vdev) 1622 + { 1623 + return virtblk_freeze_priv(vdev); 1624 + } 1625 + 1626 + static int virtblk_restore(struct virtio_device *vdev) 1627 + { 1628 + return virtblk_restore_priv(vdev); 1629 + } 1618 1630 #endif 1631 + 1632 + static int virtblk_reset_prepare(struct virtio_device *vdev) 1633 + { 1634 + return virtblk_freeze_priv(vdev); 1635 + } 1636 + 1637 + static int virtblk_reset_done(struct virtio_device *vdev) 1638 + { 1639 + return virtblk_restore_priv(vdev); 1640 + } 1619 1641 1620 1642 static const struct virtio_device_id id_table[] = { 1621 1643 { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID }, ··· 1673 1653 .freeze = virtblk_freeze, 1674 1654 .restore = virtblk_restore, 1675 1655 #endif 1656 + .reset_prepare = virtblk_reset_prepare, 1657 + .reset_done = virtblk_reset_done, 1676 1658 }; 1677 1659 1678 1660 static int __init virtio_blk_init(void)
+26 -6
drivers/vdpa/octeon_ep/octep_vdpa.h
··· 8 8 #include <linux/pci_regs.h> 9 9 #include <linux/vdpa.h> 10 10 #include <linux/virtio_pci_modern.h> 11 + #include <uapi/linux/virtio_crypto.h> 11 12 #include <uapi/linux/virtio_net.h> 12 13 #include <uapi/linux/virtio_blk.h> 13 14 #include <uapi/linux/virtio_config.h> ··· 30 29 #define OCTEP_EPF_RINFO(x) (0x000209f0 | ((x) << 25)) 31 30 #define OCTEP_VF_MBOX_DATA(x) (0x00010210 | ((x) << 17)) 32 31 #define OCTEP_PF_MBOX_DATA(x) (0x00022000 | ((x) << 4)) 33 - 34 - #define OCTEP_EPF_RINFO_RPVF(val) (((val) >> 32) & 0xF) 35 - #define OCTEP_EPF_RINFO_NVFS(val) (((val) >> 48) & 0x7F) 32 + #define OCTEP_VF_IN_CTRL(x) (0x00010000 | ((x) << 17)) 33 + #define OCTEP_VF_IN_CTRL_RPVF(val) (((val) >> 48) & 0xF) 36 34 37 35 #define OCTEP_FW_READY_SIGNATURE0 0xFEEDFEED 38 36 #define OCTEP_FW_READY_SIGNATURE1 0x3355ffaa 37 + #define OCTEP_MAX_CB_INTR 8 39 38 40 39 enum octep_vdpa_dev_status { 41 40 OCTEP_VDPA_DEV_STATUS_INVALID, ··· 49 48 struct octep_vring_info { 50 49 struct vdpa_callback cb; 51 50 void __iomem *notify_addr; 52 - u32 __iomem *cb_notify_addr; 51 + void __iomem *cb_notify_addr; 53 52 phys_addr_t notify_pa; 54 - char msix_name[256]; 53 + }; 54 + 55 + enum octep_pci_vndr_cfg_type { 56 + OCTEP_PCI_VNDR_CFG_TYPE_VIRTIO_ID, 57 + OCTEP_PCI_VNDR_CFG_TYPE_MAX, 58 + }; 59 + 60 + struct octep_pci_vndr_data { 61 + struct virtio_pci_vndr_data hdr; 62 + u8 id; 63 + u8 bar; 64 + union { 65 + u64 data; 66 + struct { 67 + u32 offset; 68 + u32 length; 69 + }; 70 + }; 55 71 }; 56 72 57 73 struct octep_hw { ··· 86 68 u64 features; 87 69 u16 nr_vring; 88 70 u32 config_size; 89 - int irq; 71 + int nb_irqs; 72 + int *irqs; 73 + u8 dev_id; 90 74 }; 91 75 92 76 u8 octep_hw_get_status(struct octep_hw *oct_hw);
+35 -3
drivers/vdpa/octeon_ep/octep_vdpa_hw.c
··· 2 2 /* Copyright (C) 2024 Marvell. */ 3 3 4 4 #include <linux/iopoll.h> 5 + #include <linux/build_bug.h> 5 6 6 7 #include "octep_vdpa.h" 7 8 ··· 359 358 360 359 static u32 octep_get_config_size(struct octep_hw *oct_hw) 361 360 { 362 - return sizeof(struct virtio_net_config); 361 + switch (oct_hw->dev_id) { 362 + case VIRTIO_ID_NET: 363 + return sizeof(struct virtio_net_config); 364 + case VIRTIO_ID_CRYPTO: 365 + return sizeof(struct virtio_crypto_config); 366 + default: 367 + return 0; 368 + } 363 369 } 364 370 365 371 static void __iomem *octep_get_cap_addr(struct octep_hw *oct_hw, struct virtio_pci_cap *cap) ··· 424 416 return 0; 425 417 } 426 418 419 + static void octep_vndr_data_process(struct octep_hw *oct_hw, 420 + struct octep_pci_vndr_data *vndr_data) 421 + { 422 + BUILD_BUG_ON(sizeof(struct octep_pci_vndr_data) % 4 != 0); 423 + 424 + switch (vndr_data->id) { 425 + case OCTEP_PCI_VNDR_CFG_TYPE_VIRTIO_ID: 426 + oct_hw->dev_id = (u8)vndr_data->data; 427 + break; 428 + default: 429 + dev_err(&oct_hw->pdev->dev, "Invalid vendor data id %u\n", 430 + vndr_data->id); 431 + break; 432 + } 433 + } 434 + 427 435 int octep_hw_caps_read(struct octep_hw *oct_hw, struct pci_dev *pdev) 428 436 { 437 + struct octep_pci_vndr_data vndr_data; 429 438 struct octep_mbox __iomem *mbox; 430 439 struct device *dev = &pdev->dev; 431 440 struct virtio_pci_cap cap; ··· 491 466 case VIRTIO_PCI_CAP_ISR_CFG: 492 467 oct_hw->isr = octep_get_cap_addr(oct_hw, &cap); 493 468 break; 469 + case VIRTIO_PCI_CAP_VENDOR_CFG: 470 + octep_pci_caps_read(oct_hw, &vndr_data, sizeof(vndr_data), pos); 471 + if (vndr_data.hdr.vendor_id != PCI_VENDOR_ID_CAVIUM) { 472 + dev_err(dev, "Invalid vendor data\n"); 473 + return -EINVAL; 474 + } 475 + 476 + octep_vndr_data_process(oct_hw, &vndr_data); 477 + break; 494 478 } 495 479 496 480 pos = cap.cap_next; ··· 528 494 oct_hw->vqs = devm_kcalloc(&pdev->dev, oct_hw->nr_vring, sizeof(*oct_hw->vqs), GFP_KERNEL); 529 495 if (!oct_hw->vqs) 530 496 return -ENOMEM; 531 - 532 - oct_hw->irq = -1; 533 497 534 498 dev_info(&pdev->dev, "Device features : %llx\n", oct_hw->features); 535 499 dev_info(&pdev->dev, "Maximum queues : %u\n", oct_hw->nr_vring);
+67 -32
drivers/vdpa/octeon_ep/octep_vdpa_main.c
··· 49 49 struct octep_hw *oct_hw = data; 50 50 int i; 51 51 52 - for (i = 0; i < oct_hw->nr_vring; i++) { 53 - if (oct_hw->vqs[i].cb.callback && ioread32(oct_hw->vqs[i].cb_notify_addr)) { 54 - /* Acknowledge the per queue notification to the device */ 55 - iowrite32(0, oct_hw->vqs[i].cb_notify_addr); 56 - oct_hw->vqs[i].cb.callback(oct_hw->vqs[i].cb.private); 52 + /* Each device has multiple interrupts (nb_irqs) shared among rings 53 + * (nr_vring). Device interrupts are mapped to the rings in a 54 + * round-robin fashion. 55 + * 56 + * For example, if nb_irqs = 8 and nr_vring = 64: 57 + * 0 -> 0, 8, 16, 24, 32, 40, 48, 56; 58 + * 1 -> 1, 9, 17, 25, 33, 41, 49, 57; 59 + * ... 60 + * 7 -> 7, 15, 23, 31, 39, 47, 55, 63; 61 + */ 62 + 63 + for (i = irq - oct_hw->irqs[0]; i < oct_hw->nr_vring; i += oct_hw->nb_irqs) { 64 + if (ioread8(oct_hw->vqs[i].cb_notify_addr)) { 65 + /* Acknowledge the per ring notification to the device */ 66 + iowrite8(0, oct_hw->vqs[i].cb_notify_addr); 67 + 68 + if (likely(oct_hw->vqs[i].cb.callback)) 69 + oct_hw->vqs[i].cb.callback(oct_hw->vqs[i].cb.private); 70 + break; 57 71 } 72 + } 73 + 74 + /* Check for config interrupt. Config uses the first interrupt */ 75 + if (unlikely(irq == oct_hw->irqs[0] && ioread8(oct_hw->isr))) { 76 + iowrite8(0, oct_hw->isr); 77 + 78 + if (oct_hw->config_cb.callback) 79 + oct_hw->config_cb.callback(oct_hw->config_cb.private); 58 80 } 59 81 60 82 return IRQ_HANDLED; ··· 85 63 static void octep_free_irqs(struct octep_hw *oct_hw) 86 64 { 87 65 struct pci_dev *pdev = oct_hw->pdev; 66 + int irq; 88 67 89 - if (oct_hw->irq != -1) { 90 - devm_free_irq(&pdev->dev, oct_hw->irq, oct_hw); 91 - oct_hw->irq = -1; 68 + if (!oct_hw->irqs) 69 + return; 70 + 71 + for (irq = 0; irq < oct_hw->nb_irqs; irq++) { 72 + if (!oct_hw->irqs[irq]) 73 + break; 74 + 75 + devm_free_irq(&pdev->dev, oct_hw->irqs[irq], oct_hw); 92 76 } 77 + 93 78 pci_free_irq_vectors(pdev); 79 + devm_kfree(&pdev->dev, oct_hw->irqs); 80 + oct_hw->irqs = NULL; 94 81 } 95 82 96 83 static int octep_request_irqs(struct octep_hw *oct_hw) 97 84 { 98 85 struct pci_dev *pdev = oct_hw->pdev; 99 - int ret, irq; 86 + int ret, irq, idx; 100 87 101 - /* Currently HW device provisions one IRQ per VF, hence 102 - * allocate one IRQ for all virtqueues call interface. 103 - */ 104 - ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSIX); 88 + oct_hw->irqs = devm_kcalloc(&pdev->dev, oct_hw->nb_irqs, sizeof(int), GFP_KERNEL); 89 + if (!oct_hw->irqs) 90 + return -ENOMEM; 91 + 92 + ret = pci_alloc_irq_vectors(pdev, 1, oct_hw->nb_irqs, PCI_IRQ_MSIX); 105 93 if (ret < 0) { 106 94 dev_err(&pdev->dev, "Failed to alloc msix vector"); 107 95 return ret; 108 96 } 109 97 110 - snprintf(oct_hw->vqs->msix_name, sizeof(oct_hw->vqs->msix_name), 111 - OCTEP_VDPA_DRIVER_NAME "-vf-%d", pci_iov_vf_id(pdev)); 112 - 113 - irq = pci_irq_vector(pdev, 0); 114 - ret = devm_request_irq(&pdev->dev, irq, octep_vdpa_intr_handler, 0, 115 - oct_hw->vqs->msix_name, oct_hw); 116 - if (ret) { 117 - dev_err(&pdev->dev, "Failed to register interrupt handler\n"); 118 - goto free_irq_vec; 98 + for (idx = 0; idx < oct_hw->nb_irqs; idx++) { 99 + irq = pci_irq_vector(pdev, idx); 100 + ret = devm_request_irq(&pdev->dev, irq, octep_vdpa_intr_handler, 0, 101 + dev_name(&pdev->dev), oct_hw); 102 + if (ret) { 103 + dev_err(&pdev->dev, "Failed to register interrupt handler\n"); 104 + goto free_irqs; 105 + } 106 + oct_hw->irqs[idx] = irq; 119 107 } 120 - oct_hw->irq = irq; 121 108 122 109 return 0; 123 110 124 - free_irq_vec: 125 - pci_free_irq_vectors(pdev); 111 + free_irqs: 112 + octep_free_irqs(oct_hw); 126 113 return ret; 127 114 } 128 115 ··· 302 271 303 272 static u32 octep_vdpa_get_device_id(struct vdpa_device *vdpa_dev) 304 273 { 305 - return VIRTIO_ID_NET; 274 + struct octep_hw *oct_hw = vdpa_to_octep_hw(vdpa_dev); 275 + 276 + return oct_hw->dev_id; 306 277 } 307 278 308 279 static u32 octep_vdpa_get_vendor_id(struct vdpa_device *vdpa_dev) ··· 592 559 struct device *dev = &pdev->dev; 593 560 struct octep_hw *oct_hw; 594 561 unsigned long timeout; 562 + u64 val; 595 563 int ret; 596 564 597 565 oct_hw = &mgmt_dev->oct_hw; ··· 623 589 ret = octep_iomap_region(pdev, oct_hw->base, OCTEP_HW_CAPS_BAR); 624 590 if (ret) 625 591 return; 592 + 593 + val = readq(oct_hw->base[OCTEP_HW_MBOX_BAR] + OCTEP_VF_IN_CTRL(0)); 594 + oct_hw->nb_irqs = OCTEP_VF_IN_CTRL_RPVF(val); 595 + if (!oct_hw->nb_irqs || oct_hw->nb_irqs > OCTEP_MAX_CB_INTR) { 596 + dev_err(dev, "Invalid number of interrupts %d\n", oct_hw->nb_irqs); 597 + goto unmap_region; 598 + } 626 599 627 600 ret = octep_hw_caps_read(oct_hw, pdev); 628 601 if (ret < 0) ··· 807 766 if (val == 0) { 808 767 dev_err(&pdev->dev, "Invalid device configuration\n"); 809 768 return -EINVAL; 810 - } 811 - 812 - if (OCTEP_EPF_RINFO_RPVF(val) != BIT_ULL(0)) { 813 - val &= ~GENMASK_ULL(35, 32); 814 - val |= BIT_ULL(32); 815 - writeq(val, addr + OCTEP_EPF_RINFO(0)); 816 769 } 817 770 818 771 len = pci_resource_len(pdev, OCTEP_HW_CAPS_BAR);
+28 -29
drivers/vdpa/solidrun/snet_main.c
··· 556 556 static int psnet_open_pf_bar(struct pci_dev *pdev, struct psnet *psnet) 557 557 { 558 558 char *name; 559 - int ret, i, mask = 0; 559 + unsigned short i; 560 + bool bars_found = false; 561 + 562 + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "psnet[%s]-bars", pci_name(pdev)); 563 + if (!name) 564 + return -ENOMEM; 565 + 560 566 /* We don't know which BAR will be used to communicate.. 561 567 * We will map every bar with len > 0. 562 568 * 563 569 * Later, we will discover the BAR and unmap all other BARs. 564 570 */ 565 571 for (i = 0; i < PCI_STD_NUM_BARS; i++) { 566 - if (pci_resource_len(pdev, i)) 567 - mask |= (1 << i); 572 + void __iomem *io; 573 + 574 + if (pci_resource_len(pdev, i) == 0) 575 + continue; 576 + 577 + io = pcim_iomap_region(pdev, i, name); 578 + if (IS_ERR(io)) { 579 + SNET_ERR(pdev, "Failed to request and map PCI BARs\n"); 580 + return PTR_ERR(io); 581 + } 582 + 583 + psnet->bars[i] = io; 584 + bars_found = true; 568 585 } 569 586 570 587 /* No BAR can be used.. */ 571 - if (!mask) { 588 + if (!bars_found) { 572 589 SNET_ERR(pdev, "Failed to find a PCI BAR\n"); 573 590 return -ENODEV; 574 - } 575 - 576 - name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "psnet[%s]-bars", pci_name(pdev)); 577 - if (!name) 578 - return -ENOMEM; 579 - 580 - ret = pcim_iomap_regions(pdev, mask, name); 581 - if (ret) { 582 - SNET_ERR(pdev, "Failed to request and map PCI BARs\n"); 583 - return ret; 584 - } 585 - 586 - for (i = 0; i < PCI_STD_NUM_BARS; i++) { 587 - if (mask & (1 << i)) 588 - psnet->bars[i] = pcim_iomap_table(pdev)[i]; 589 591 } 590 592 591 593 return 0; ··· 596 594 static int snet_open_vf_bar(struct pci_dev *pdev, struct snet *snet) 597 595 { 598 596 char *name; 599 - int ret; 597 + void __iomem *io; 600 598 601 599 name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "snet[%s]-bars", pci_name(pdev)); 602 600 if (!name) 603 601 return -ENOMEM; 604 602 605 603 /* Request and map BAR */ 606 - ret = pcim_iomap_regions(pdev, BIT(snet->psnet->cfg.vf_bar), name); 607 - if (ret) { 604 + io = pcim_iomap_region(pdev, snet->psnet->cfg.vf_bar, name); 605 + if (IS_ERR(io)) { 608 606 SNET_ERR(pdev, "Failed to request and map PCI BAR for a VF\n"); 609 - return ret; 607 + return PTR_ERR(io); 610 608 } 611 609 612 - snet->bar = pcim_iomap_table(pdev)[snet->psnet->cfg.vf_bar]; 610 + snet->bar = io; 613 611 614 612 return 0; 615 613 } ··· 658 656 659 657 static void psnet_unmap_unused_bars(struct pci_dev *pdev, struct psnet *psnet) 660 658 { 661 - int i, mask = 0; 659 + unsigned short i; 662 660 663 661 for (i = 0; i < PCI_STD_NUM_BARS; i++) { 664 662 if (psnet->bars[i] && i != psnet->barno) 665 - mask |= (1 << i); 663 + pcim_iounmap_region(pdev, i); 666 664 } 667 - 668 - if (mask) 669 - pcim_iounmap_regions(pdev, mask); 670 665 } 671 666 672 667 /* Read SNET config from PCI BAR */
+9
drivers/vdpa/virtio_pci/vp_vdpa.c
··· 367 367 vp_iowrite16(qid, vp_vdpa->vring[qid].notify); 368 368 } 369 369 370 + static void vp_vdpa_kick_vq_with_data(struct vdpa_device *vdpa, u32 data) 371 + { 372 + struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa); 373 + u16 qid = data & 0xFFFF; 374 + 375 + vp_iowrite32(data, vp_vdpa->vring[qid].notify); 376 + } 377 + 370 378 static u32 vp_vdpa_get_generation(struct vdpa_device *vdpa) 371 379 { 372 380 struct virtio_pci_modern_device *mdev = vdpa_to_mdev(vdpa); ··· 480 472 .get_vq_size = vp_vdpa_get_vq_size, 481 473 .set_vq_address = vp_vdpa_set_vq_address, 482 474 .kick_vq = vp_vdpa_kick_vq, 475 + .kick_vq_with_data = vp_vdpa_kick_vq_with_data, 483 476 .get_generation = vp_vdpa_get_generation, 484 477 .get_device_id = vp_vdpa_get_device_id, 485 478 .get_vendor_id = vp_vdpa_get_vendor_id,
+4 -1
drivers/vhost/net.c
··· 1107 1107 size_t vhost_hlen, sock_hlen; 1108 1108 size_t vhost_len, sock_len; 1109 1109 bool busyloop_intr = false; 1110 + bool set_num_buffers; 1110 1111 struct socket *sock; 1111 1112 struct iov_iter fixup; 1112 1113 __virtio16 num_buffers; ··· 1130 1129 vq_log = unlikely(vhost_has_feature(vq, VHOST_F_LOG_ALL)) ? 1131 1130 vq->log : NULL; 1132 1131 mergeable = vhost_has_feature(vq, VIRTIO_NET_F_MRG_RXBUF); 1132 + set_num_buffers = mergeable || 1133 + vhost_has_feature(vq, VIRTIO_F_VERSION_1); 1133 1134 1134 1135 do { 1135 1136 sock_len = vhost_net_rx_peek_head_len(net, sock->sk, ··· 1208 1205 /* TODO: Should check and handle checksum. */ 1209 1206 1210 1207 num_buffers = cpu_to_vhost16(vq, headcount); 1211 - if (likely(mergeable) && 1208 + if (likely(set_num_buffers) && 1212 1209 copy_to_iter(&num_buffers, sizeof num_buffers, 1213 1210 &fixup) != sizeof num_buffers) { 1214 1211 vq_err(vq, "Failed num_buffers write");
+69 -25
drivers/virtio/virtio.c
··· 546 546 } 547 547 EXPORT_SYMBOL_GPL(unregister_virtio_device); 548 548 549 - #ifdef CONFIG_PM_SLEEP 550 - int virtio_device_freeze(struct virtio_device *dev) 551 - { 552 - struct virtio_driver *drv = drv_to_virtio(dev->dev.driver); 553 - int ret; 554 - 555 - virtio_config_core_disable(dev); 556 - 557 - dev->failed = dev->config->get_status(dev) & VIRTIO_CONFIG_S_FAILED; 558 - 559 - if (drv && drv->freeze) { 560 - ret = drv->freeze(dev); 561 - if (ret) { 562 - virtio_config_core_enable(dev); 563 - return ret; 564 - } 565 - } 566 - 567 - return 0; 568 - } 569 - EXPORT_SYMBOL_GPL(virtio_device_freeze); 570 - 571 - int virtio_device_restore(struct virtio_device *dev) 549 + static int virtio_device_restore_priv(struct virtio_device *dev, bool restore) 572 550 { 573 551 struct virtio_driver *drv = drv_to_virtio(dev->dev.driver); 574 552 int ret; ··· 577 599 if (ret) 578 600 goto err; 579 601 580 - if (drv->restore) { 581 - ret = drv->restore(dev); 602 + if (restore) { 603 + if (drv->restore) { 604 + ret = drv->restore(dev); 605 + if (ret) 606 + goto err; 607 + } 608 + } else { 609 + ret = drv->reset_done(dev); 582 610 if (ret) 583 611 goto err; 584 612 } ··· 601 617 virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED); 602 618 return ret; 603 619 } 620 + 621 + #ifdef CONFIG_PM_SLEEP 622 + int virtio_device_freeze(struct virtio_device *dev) 623 + { 624 + struct virtio_driver *drv = drv_to_virtio(dev->dev.driver); 625 + int ret; 626 + 627 + virtio_config_core_disable(dev); 628 + 629 + dev->failed = dev->config->get_status(dev) & VIRTIO_CONFIG_S_FAILED; 630 + 631 + if (drv && drv->freeze) { 632 + ret = drv->freeze(dev); 633 + if (ret) { 634 + virtio_config_core_enable(dev); 635 + return ret; 636 + } 637 + } 638 + 639 + return 0; 640 + } 641 + EXPORT_SYMBOL_GPL(virtio_device_freeze); 642 + 643 + int virtio_device_restore(struct virtio_device *dev) 644 + { 645 + return virtio_device_restore_priv(dev, true); 646 + } 604 647 EXPORT_SYMBOL_GPL(virtio_device_restore); 605 648 #endif 649 + 650 + int virtio_device_reset_prepare(struct virtio_device *dev) 651 + { 652 + struct virtio_driver *drv = drv_to_virtio(dev->dev.driver); 653 + int ret; 654 + 655 + if (!drv || !drv->reset_prepare) 656 + return -EOPNOTSUPP; 657 + 658 + virtio_config_core_disable(dev); 659 + 660 + dev->failed = dev->config->get_status(dev) & VIRTIO_CONFIG_S_FAILED; 661 + 662 + ret = drv->reset_prepare(dev); 663 + if (ret) { 664 + virtio_config_core_enable(dev); 665 + return ret; 666 + } 667 + 668 + return 0; 669 + } 670 + EXPORT_SYMBOL_GPL(virtio_device_reset_prepare); 671 + 672 + int virtio_device_reset_done(struct virtio_device *dev) 673 + { 674 + struct virtio_driver *drv = drv_to_virtio(dev->dev.driver); 675 + 676 + if (!drv || !drv->reset_done) 677 + return -EOPNOTSUPP; 678 + 679 + return virtio_device_restore_priv(dev, false); 680 + } 681 + EXPORT_SYMBOL_GPL(virtio_device_reset_done); 606 682 607 683 static int virtio_init(void) 608 684 {
+1 -1
drivers/virtio/virtio_balloon.c
··· 251 251 252 252 for (num_pfns = 0; num_pfns < num; 253 253 num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) { 254 - struct page *page = balloon_page_alloc(); 254 + page = balloon_page_alloc(); 255 255 256 256 if (!page) { 257 257 dev_info_ratelimited(&vb->vdev->dev,
+98 -5
drivers/virtio/virtio_mem.c
··· 133 133 uint64_t addr; 134 134 /* Maximum region size in bytes. */ 135 135 uint64_t region_size; 136 + /* Usable region size in bytes. */ 137 + uint64_t usable_region_size; 136 138 137 139 /* The parent resource for all memory added via this device. */ 138 140 struct resource *parent_resource; ··· 2370 2368 static void virtio_mem_refresh_config(struct virtio_mem *vm) 2371 2369 { 2372 2370 const struct range pluggable_range = mhp_get_pluggable_range(true); 2373 - uint64_t new_plugged_size, usable_region_size, end_addr; 2371 + uint64_t new_plugged_size, end_addr; 2374 2372 2375 2373 /* the plugged_size is just a reflection of what _we_ did previously */ 2376 2374 virtio_cread_le(vm->vdev, struct virtio_mem_config, plugged_size, ··· 2380 2378 2381 2379 /* calculate the last usable memory block id */ 2382 2380 virtio_cread_le(vm->vdev, struct virtio_mem_config, 2383 - usable_region_size, &usable_region_size); 2384 - end_addr = min(vm->addr + usable_region_size - 1, 2381 + usable_region_size, &vm->usable_region_size); 2382 + end_addr = min(vm->addr + vm->usable_region_size - 1, 2385 2383 pluggable_range.end); 2386 2384 2387 2385 if (vm->in_sbm) { ··· 2650 2648 if (rc) 2651 2649 goto out_unreg_pm; 2652 2650 2651 + virtio_device_ready(vm->vdev); 2653 2652 return 0; 2654 2653 out_unreg_pm: 2655 2654 unregister_pm_notifier(&vm->pm_notifier); ··· 2728 2725 mutex_unlock(&vm->hotplug_mutex); 2729 2726 return is_ram; 2730 2727 } 2728 + 2729 + #ifdef CONFIG_PROC_VMCORE_DEVICE_RAM 2730 + static int virtio_mem_vmcore_add_device_ram(struct virtio_mem *vm, 2731 + struct list_head *list, uint64_t start, uint64_t end) 2732 + { 2733 + int rc; 2734 + 2735 + rc = vmcore_alloc_add_range(list, start, end - start); 2736 + if (rc) 2737 + dev_err(&vm->vdev->dev, 2738 + "Error adding device RAM range: %d\n", rc); 2739 + return rc; 2740 + } 2741 + 2742 + static int virtio_mem_vmcore_get_device_ram(struct vmcore_cb *cb, 2743 + struct list_head *list) 2744 + { 2745 + struct virtio_mem *vm = container_of(cb, struct virtio_mem, 2746 + vmcore_cb); 2747 + const uint64_t device_start = vm->addr; 2748 + const uint64_t device_end = vm->addr + vm->usable_region_size; 2749 + uint64_t chunk_size, cur_start, cur_end, plugged_range_start = 0; 2750 + LIST_HEAD(tmp_list); 2751 + int rc; 2752 + 2753 + if (!vm->plugged_size) 2754 + return 0; 2755 + 2756 + /* Process memory sections, unless the device block size is bigger. */ 2757 + chunk_size = max_t(uint64_t, PFN_PHYS(PAGES_PER_SECTION), 2758 + vm->device_block_size); 2759 + 2760 + mutex_lock(&vm->hotplug_mutex); 2761 + 2762 + /* 2763 + * We process larger chunks and indicate the complete chunk if any 2764 + * block in there is plugged. This reduces the number of pfn_is_ram() 2765 + * callbacks and mimic what is effectively being done when the old 2766 + * kernel would add complete memory sections/blocks to the elfcore hdr. 2767 + */ 2768 + cur_start = device_start; 2769 + for (cur_start = device_start; cur_start < device_end; cur_start = cur_end) { 2770 + cur_end = ALIGN_DOWN(cur_start + chunk_size, chunk_size); 2771 + cur_end = min_t(uint64_t, cur_end, device_end); 2772 + 2773 + rc = virtio_mem_send_state_request(vm, cur_start, 2774 + cur_end - cur_start); 2775 + 2776 + if (rc < 0) { 2777 + dev_err(&vm->vdev->dev, 2778 + "Error querying block states: %d\n", rc); 2779 + goto out; 2780 + } else if (rc != VIRTIO_MEM_STATE_UNPLUGGED) { 2781 + /* Merge ranges with plugged memory. */ 2782 + if (!plugged_range_start) 2783 + plugged_range_start = cur_start; 2784 + continue; 2785 + } 2786 + 2787 + /* Flush any plugged range. */ 2788 + if (plugged_range_start) { 2789 + rc = virtio_mem_vmcore_add_device_ram(vm, &tmp_list, 2790 + plugged_range_start, 2791 + cur_start); 2792 + if (rc) 2793 + goto out; 2794 + plugged_range_start = 0; 2795 + } 2796 + } 2797 + 2798 + /* Flush any plugged range. */ 2799 + if (plugged_range_start) 2800 + rc = virtio_mem_vmcore_add_device_ram(vm, &tmp_list, 2801 + plugged_range_start, 2802 + cur_start); 2803 + out: 2804 + mutex_unlock(&vm->hotplug_mutex); 2805 + if (rc < 0) { 2806 + vmcore_free_ranges(&tmp_list); 2807 + return rc; 2808 + } 2809 + list_splice_tail(&tmp_list, list); 2810 + return 0; 2811 + } 2812 + #endif /* CONFIG_PROC_VMCORE_DEVICE_RAM */ 2731 2813 #endif /* CONFIG_PROC_VMCORE */ 2732 2814 2733 2815 static int virtio_mem_init_kdump(struct virtio_mem *vm) 2734 2816 { 2817 + /* We must be prepared to receive a callback immediately. */ 2818 + virtio_device_ready(vm->vdev); 2735 2819 #ifdef CONFIG_PROC_VMCORE 2736 2820 dev_info(&vm->vdev->dev, "memory hot(un)plug disabled in kdump kernel\n"); 2737 2821 vm->vmcore_cb.pfn_is_ram = virtio_mem_vmcore_pfn_is_ram; 2822 + #ifdef CONFIG_PROC_VMCORE_DEVICE_RAM 2823 + vm->vmcore_cb.get_device_ram = virtio_mem_vmcore_get_device_ram; 2824 + #endif /* CONFIG_PROC_VMCORE_DEVICE_RAM */ 2738 2825 register_vmcore_cb(&vm->vmcore_cb); 2739 2826 return 0; 2740 2827 #else /* CONFIG_PROC_VMCORE */ ··· 2853 2760 virtio_cread_le(vm->vdev, struct virtio_mem_config, addr, &vm->addr); 2854 2761 virtio_cread_le(vm->vdev, struct virtio_mem_config, region_size, 2855 2762 &vm->region_size); 2763 + virtio_cread_le(vm->vdev, struct virtio_mem_config, usable_region_size, 2764 + &vm->usable_region_size); 2856 2765 2857 2766 /* Determine the nid for the device based on the lowest address. */ 2858 2767 if (vm->nid == NUMA_NO_NODE) ··· 2964 2869 rc = virtio_mem_init(vm); 2965 2870 if (rc) 2966 2871 goto out_del_vq; 2967 - 2968 - virtio_device_ready(vdev); 2969 2872 2970 2873 /* trigger a config update to start processing the requested_size */ 2971 2874 if (!vm->in_kdump) {
+41
drivers/virtio/virtio_pci_common.c
··· 794 794 return num_vfs; 795 795 } 796 796 797 + static void virtio_pci_reset_prepare(struct pci_dev *pci_dev) 798 + { 799 + struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); 800 + int ret = 0; 801 + 802 + ret = virtio_device_reset_prepare(&vp_dev->vdev); 803 + if (ret) { 804 + if (ret != -EOPNOTSUPP) 805 + dev_warn(&pci_dev->dev, "Reset prepare failure: %d", 806 + ret); 807 + return; 808 + } 809 + 810 + if (pci_is_enabled(pci_dev)) 811 + pci_disable_device(pci_dev); 812 + } 813 + 814 + static void virtio_pci_reset_done(struct pci_dev *pci_dev) 815 + { 816 + struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); 817 + int ret; 818 + 819 + if (pci_is_enabled(pci_dev)) 820 + return; 821 + 822 + ret = pci_enable_device(pci_dev); 823 + if (!ret) { 824 + pci_set_master(pci_dev); 825 + ret = virtio_device_reset_done(&vp_dev->vdev); 826 + } 827 + 828 + if (ret && ret != -EOPNOTSUPP) 829 + dev_warn(&pci_dev->dev, "Reset done failure: %d", ret); 830 + } 831 + 832 + static const struct pci_error_handlers virtio_pci_err_handler = { 833 + .reset_prepare = virtio_pci_reset_prepare, 834 + .reset_done = virtio_pci_reset_done, 835 + }; 836 + 797 837 static struct pci_driver virtio_pci_driver = { 798 838 .name = "virtio-pci", 799 839 .id_table = virtio_pci_id_table, ··· 843 803 .driver.pm = &virtio_pci_pm_ops, 844 804 #endif 845 805 .sriov_configure = virtio_pci_sriov_configure, 806 + .err_handler = &virtio_pci_err_handler, 846 807 }; 847 808 848 809 struct virtio_device *virtio_pci_vf_get_pf_dev(struct pci_dev *pdev)
+19
fs/proc/Kconfig
··· 61 61 as ELF notes to /proc/vmcore. You can still disable device 62 62 dump using the kernel command line option 'novmcoredd'. 63 63 64 + config NEED_PROC_VMCORE_DEVICE_RAM 65 + bool 66 + 67 + config PROC_VMCORE_DEVICE_RAM 68 + def_bool y 69 + depends on PROC_VMCORE && NEED_PROC_VMCORE_DEVICE_RAM 70 + depends on VIRTIO_MEM 71 + help 72 + If the elfcore hdr is allocated and prepared by the dump kernel 73 + ("2nd kernel") instead of the crashed kernel, RAM provided by memory 74 + devices such as virtio-mem will not be included in the dump 75 + image, because only the device driver can properly detect them. 76 + 77 + With this config enabled, these RAM ranges will be queried from the 78 + device drivers once the device gets probed, so they can be included 79 + in the crash dump. 80 + 81 + Relevant architectures should select NEED_PROC_VMCORE_DEVICE_RAM. 82 + 64 83 config PROC_SYSCTL 65 84 bool "Sysctl support (/proc/sys)" if EXPERT 66 85 depends on PROC_FS
+217 -66
fs/proc/vmcore.c
··· 8 8 * 9 9 */ 10 10 11 + #define pr_fmt(fmt) "vmcore: " fmt 12 + 11 13 #include <linux/mm.h> 12 14 #include <linux/kcore.h> 13 15 #include <linux/user.h> ··· 53 51 static struct proc_dir_entry *proc_vmcore; 54 52 55 53 #ifdef CONFIG_PROC_VMCORE_DEVICE_DUMP 54 + struct vmcoredd_node { 55 + struct list_head list; /* List of dumps */ 56 + void *buf; /* Buffer containing device's dump */ 57 + unsigned int size; /* Size of the buffer */ 58 + }; 59 + 56 60 /* Device Dump list and mutex to synchronize access to list */ 57 61 static LIST_HEAD(vmcoredd_list); 58 - static DEFINE_MUTEX(vmcoredd_mutex); 59 62 60 63 static bool vmcoredd_disabled; 61 64 core_param(novmcoredd, vmcoredd_disabled, bool, 0); ··· 69 62 /* Device Dump Size */ 70 63 static size_t vmcoredd_orig_sz; 71 64 72 - static DEFINE_SPINLOCK(vmcore_cb_lock); 65 + static DEFINE_MUTEX(vmcore_mutex); 66 + 73 67 DEFINE_STATIC_SRCU(vmcore_cb_srcu); 74 68 /* List of registered vmcore callbacks. */ 75 69 static LIST_HEAD(vmcore_cb_list); 76 70 /* Whether the vmcore has been opened once. */ 77 71 static bool vmcore_opened; 72 + /* Whether the vmcore is currently open. */ 73 + static unsigned int vmcore_open; 74 + 75 + static void vmcore_process_device_ram(struct vmcore_cb *cb); 78 76 79 77 void register_vmcore_cb(struct vmcore_cb *cb) 80 78 { 81 79 INIT_LIST_HEAD(&cb->next); 82 - spin_lock(&vmcore_cb_lock); 80 + mutex_lock(&vmcore_mutex); 83 81 list_add_tail(&cb->next, &vmcore_cb_list); 84 82 /* 85 83 * Registering a vmcore callback after the vmcore was opened is ··· 92 80 */ 93 81 if (vmcore_opened) 94 82 pr_warn_once("Unexpected vmcore callback registration\n"); 95 - spin_unlock(&vmcore_cb_lock); 83 + if (!vmcore_open && cb->get_device_ram) 84 + vmcore_process_device_ram(cb); 85 + mutex_unlock(&vmcore_mutex); 96 86 } 97 87 EXPORT_SYMBOL_GPL(register_vmcore_cb); 98 88 99 89 void unregister_vmcore_cb(struct vmcore_cb *cb) 100 90 { 101 - spin_lock(&vmcore_cb_lock); 91 + mutex_lock(&vmcore_mutex); 102 92 list_del_rcu(&cb->next); 103 93 /* 104 94 * Unregistering a vmcore callback after the vmcore was opened is ··· 109 95 */ 110 96 if (vmcore_opened) 111 97 pr_warn_once("Unexpected vmcore callback unregistration\n"); 112 - spin_unlock(&vmcore_cb_lock); 98 + mutex_unlock(&vmcore_mutex); 113 99 114 100 synchronize_srcu(&vmcore_cb_srcu); 115 101 } ··· 134 120 135 121 static int open_vmcore(struct inode *inode, struct file *file) 136 122 { 137 - spin_lock(&vmcore_cb_lock); 123 + mutex_lock(&vmcore_mutex); 138 124 vmcore_opened = true; 139 - spin_unlock(&vmcore_cb_lock); 125 + if (vmcore_open + 1 == 0) { 126 + mutex_unlock(&vmcore_mutex); 127 + return -EBUSY; 128 + } 129 + vmcore_open++; 130 + mutex_unlock(&vmcore_mutex); 131 + 132 + return 0; 133 + } 134 + 135 + static int release_vmcore(struct inode *inode, struct file *file) 136 + { 137 + mutex_lock(&vmcore_mutex); 138 + vmcore_open--; 139 + mutex_unlock(&vmcore_mutex); 140 140 141 141 return 0; 142 142 } ··· 271 243 { 272 244 struct vmcoredd_node *dump; 273 245 u64 offset = 0; 274 - int ret = 0; 275 246 size_t tsz; 276 247 char *buf; 277 248 278 - mutex_lock(&vmcoredd_mutex); 279 249 list_for_each_entry(dump, &vmcoredd_list, list) { 280 250 if (start < offset + dump->size) { 281 251 tsz = min(offset + (u64)dump->size - start, (u64)size); 282 252 buf = dump->buf + start - offset; 283 - if (copy_to_iter(buf, tsz, iter) < tsz) { 284 - ret = -EFAULT; 285 - goto out_unlock; 286 - } 253 + if (copy_to_iter(buf, tsz, iter) < tsz) 254 + return -EFAULT; 287 255 288 256 size -= tsz; 289 257 start += tsz; 290 258 291 259 /* Leave now if buffer filled already */ 292 260 if (!size) 293 - goto out_unlock; 261 + return 0; 294 262 } 295 263 offset += dump->size; 296 264 } 297 265 298 - out_unlock: 299 - mutex_unlock(&vmcoredd_mutex); 300 - return ret; 266 + return 0; 301 267 } 302 268 303 269 #ifdef CONFIG_MMU ··· 300 278 { 301 279 struct vmcoredd_node *dump; 302 280 u64 offset = 0; 303 - int ret = 0; 304 281 size_t tsz; 305 282 char *buf; 306 283 307 - mutex_lock(&vmcoredd_mutex); 308 284 list_for_each_entry(dump, &vmcoredd_list, list) { 309 285 if (start < offset + dump->size) { 310 286 tsz = min(offset + (u64)dump->size - start, (u64)size); 311 287 buf = dump->buf + start - offset; 312 288 if (remap_vmalloc_range_partial(vma, dst, buf, 0, 313 - tsz)) { 314 - ret = -EFAULT; 315 - goto out_unlock; 316 - } 289 + tsz)) 290 + return -EFAULT; 317 291 318 292 size -= tsz; 319 293 start += tsz; ··· 317 299 318 300 /* Leave now if buffer filled already */ 319 301 if (!size) 320 - goto out_unlock; 302 + return 0; 321 303 } 322 304 offset += dump->size; 323 305 } 324 306 325 - out_unlock: 326 - mutex_unlock(&vmcoredd_mutex); 327 - return ret; 307 + return 0; 328 308 } 329 309 #endif /* CONFIG_MMU */ 330 310 #endif /* CONFIG_PROC_VMCORE_DEVICE_DUMP */ ··· 332 316 */ 333 317 static ssize_t __read_vmcore(struct iov_iter *iter, loff_t *fpos) 334 318 { 319 + struct vmcore_range *m = NULL; 335 320 ssize_t acc = 0, tmp; 336 321 size_t tsz; 337 322 u64 start; 338 - struct vmcore *m = NULL; 339 323 340 324 if (!iov_iter_count(iter) || *fpos >= vmcore_size) 341 325 return 0; ··· 592 576 { 593 577 size_t size = vma->vm_end - vma->vm_start; 594 578 u64 start, end, len, tsz; 595 - struct vmcore *m; 579 + struct vmcore_range *m; 596 580 597 581 start = (u64)vma->vm_pgoff << PAGE_SHIFT; 598 582 end = start + size; ··· 709 693 710 694 static const struct proc_ops vmcore_proc_ops = { 711 695 .proc_open = open_vmcore, 696 + .proc_release = release_vmcore, 712 697 .proc_read_iter = read_vmcore, 713 698 .proc_lseek = default_llseek, 714 699 .proc_mmap = mmap_vmcore, 715 700 }; 716 701 717 - static struct vmcore* __init get_new_element(void) 718 - { 719 - return kzalloc(sizeof(struct vmcore), GFP_KERNEL); 720 - } 721 - 722 702 static u64 get_vmcore_size(size_t elfsz, size_t elfnotesegsz, 723 703 struct list_head *vc_list) 724 704 { 705 + struct vmcore_range *m; 725 706 u64 size; 726 - struct vmcore *m; 727 707 728 708 size = elfsz + elfnotesegsz; 729 709 list_for_each_entry(m, vc_list, list) { ··· 1121 1109 Elf64_Ehdr *ehdr_ptr; 1122 1110 Elf64_Phdr *phdr_ptr; 1123 1111 loff_t vmcore_off; 1124 - struct vmcore *new; 1125 1112 1126 1113 ehdr_ptr = (Elf64_Ehdr *)elfptr; 1127 1114 phdr_ptr = (Elf64_Phdr*)(elfptr + sizeof(Elf64_Ehdr)); /* PT_NOTE hdr */ ··· 1139 1128 end = roundup(paddr + phdr_ptr->p_memsz, PAGE_SIZE); 1140 1129 size = end - start; 1141 1130 1142 - /* Add this contiguous chunk of memory to vmcore list.*/ 1143 - new = get_new_element(); 1144 - if (!new) 1131 + if (vmcore_alloc_add_range(vc_list, start, size)) 1145 1132 return -ENOMEM; 1146 - new->paddr = start; 1147 - new->size = size; 1148 - list_add_tail(&new->list, vc_list); 1149 1133 1150 1134 /* Update the program header offset. */ 1151 1135 phdr_ptr->p_offset = vmcore_off + (paddr - start); ··· 1158 1152 Elf32_Ehdr *ehdr_ptr; 1159 1153 Elf32_Phdr *phdr_ptr; 1160 1154 loff_t vmcore_off; 1161 - struct vmcore *new; 1162 1155 1163 1156 ehdr_ptr = (Elf32_Ehdr *)elfptr; 1164 1157 phdr_ptr = (Elf32_Phdr*)(elfptr + sizeof(Elf32_Ehdr)); /* PT_NOTE hdr */ ··· 1176 1171 end = roundup(paddr + phdr_ptr->p_memsz, PAGE_SIZE); 1177 1172 size = end - start; 1178 1173 1179 - /* Add this contiguous chunk of memory to vmcore list.*/ 1180 - new = get_new_element(); 1181 - if (!new) 1174 + if (vmcore_alloc_add_range(vc_list, start, size)) 1182 1175 return -ENOMEM; 1183 - new->paddr = start; 1184 - new->size = size; 1185 - list_add_tail(&new->list, vc_list); 1186 1176 1187 1177 /* Update the program header offset */ 1188 1178 phdr_ptr->p_offset = vmcore_off + (paddr - start); ··· 1190 1190 static void set_vmcore_list_offsets(size_t elfsz, size_t elfnotes_sz, 1191 1191 struct list_head *vc_list) 1192 1192 { 1193 + struct vmcore_range *m; 1193 1194 loff_t vmcore_off; 1194 - struct vmcore *m; 1195 1195 1196 1196 /* Skip ELF header, program headers and ELF note segment. */ 1197 1197 vmcore_off = elfsz + elfnotes_sz; ··· 1518 1518 dump->buf = buf; 1519 1519 dump->size = data_size; 1520 1520 1521 - /* Add the dump to driver sysfs list */ 1522 - mutex_lock(&vmcoredd_mutex); 1523 - list_add_tail(&dump->list, &vmcoredd_list); 1524 - mutex_unlock(&vmcoredd_mutex); 1521 + /* Add the dump to driver sysfs list and update the elfcore hdr */ 1522 + mutex_lock(&vmcore_mutex); 1523 + if (vmcore_opened) 1524 + pr_warn_once("Unexpected adding of device dump\n"); 1525 + if (vmcore_open) { 1526 + ret = -EBUSY; 1527 + goto out_err; 1528 + } 1525 1529 1530 + list_add_tail(&dump->list, &vmcoredd_list); 1526 1531 vmcoredd_update_size(data_size); 1532 + mutex_unlock(&vmcore_mutex); 1527 1533 return 0; 1528 1534 1529 1535 out_err: ··· 1541 1535 EXPORT_SYMBOL(vmcore_add_device_dump); 1542 1536 #endif /* CONFIG_PROC_VMCORE_DEVICE_DUMP */ 1543 1537 1538 + #ifdef CONFIG_PROC_VMCORE_DEVICE_RAM 1539 + static int vmcore_realloc_elfcore_buffer_elf64(size_t new_size) 1540 + { 1541 + char *elfcorebuf_new; 1542 + 1543 + if (WARN_ON_ONCE(new_size < elfcorebuf_sz)) 1544 + return -EINVAL; 1545 + if (get_order(elfcorebuf_sz_orig) == get_order(new_size)) { 1546 + elfcorebuf_sz_orig = new_size; 1547 + return 0; 1548 + } 1549 + 1550 + elfcorebuf_new = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1551 + get_order(new_size)); 1552 + if (!elfcorebuf_new) 1553 + return -ENOMEM; 1554 + memcpy(elfcorebuf_new, elfcorebuf, elfcorebuf_sz); 1555 + free_pages((unsigned long)elfcorebuf, get_order(elfcorebuf_sz_orig)); 1556 + elfcorebuf = elfcorebuf_new; 1557 + elfcorebuf_sz_orig = new_size; 1558 + return 0; 1559 + } 1560 + 1561 + static void vmcore_reset_offsets_elf64(void) 1562 + { 1563 + Elf64_Phdr *phdr_start = (Elf64_Phdr *)(elfcorebuf + sizeof(Elf64_Ehdr)); 1564 + loff_t vmcore_off = elfcorebuf_sz + elfnotes_sz; 1565 + Elf64_Ehdr *ehdr = (Elf64_Ehdr *)elfcorebuf; 1566 + Elf64_Phdr *phdr; 1567 + int i; 1568 + 1569 + for (i = 0, phdr = phdr_start; i < ehdr->e_phnum; i++, phdr++) { 1570 + u64 start, end; 1571 + 1572 + /* 1573 + * After merge_note_headers_elf64() we should only have a single 1574 + * PT_NOTE entry that starts immediately after elfcorebuf_sz. 1575 + */ 1576 + if (phdr->p_type == PT_NOTE) { 1577 + phdr->p_offset = elfcorebuf_sz; 1578 + continue; 1579 + } 1580 + 1581 + start = rounddown(phdr->p_offset, PAGE_SIZE); 1582 + end = roundup(phdr->p_offset + phdr->p_memsz, PAGE_SIZE); 1583 + phdr->p_offset = vmcore_off + (phdr->p_offset - start); 1584 + vmcore_off = vmcore_off + end - start; 1585 + } 1586 + set_vmcore_list_offsets(elfcorebuf_sz, elfnotes_sz, &vmcore_list); 1587 + } 1588 + 1589 + static int vmcore_add_device_ram_elf64(struct list_head *list, size_t count) 1590 + { 1591 + Elf64_Phdr *phdr_start = (Elf64_Phdr *)(elfcorebuf + sizeof(Elf64_Ehdr)); 1592 + Elf64_Ehdr *ehdr = (Elf64_Ehdr *)elfcorebuf; 1593 + struct vmcore_range *cur; 1594 + Elf64_Phdr *phdr; 1595 + size_t new_size; 1596 + int rc; 1597 + 1598 + if ((Elf32_Half)(ehdr->e_phnum + count) != ehdr->e_phnum + count) { 1599 + pr_err("too many device ram ranges\n"); 1600 + return -ENOSPC; 1601 + } 1602 + 1603 + /* elfcorebuf_sz must always cover full pages. */ 1604 + new_size = sizeof(Elf64_Ehdr) + 1605 + (ehdr->e_phnum + count) * sizeof(Elf64_Phdr); 1606 + new_size = roundup(new_size, PAGE_SIZE); 1607 + 1608 + /* 1609 + * Make sure we have sufficient space to include the new PT_LOAD 1610 + * entries. 1611 + */ 1612 + rc = vmcore_realloc_elfcore_buffer_elf64(new_size); 1613 + if (rc) { 1614 + pr_err("resizing elfcore failed\n"); 1615 + return rc; 1616 + } 1617 + 1618 + /* Modify our used elfcore buffer size to cover the new entries. */ 1619 + elfcorebuf_sz = new_size; 1620 + 1621 + /* Fill the added PT_LOAD entries. */ 1622 + phdr = phdr_start + ehdr->e_phnum; 1623 + list_for_each_entry(cur, list, list) { 1624 + WARN_ON_ONCE(!IS_ALIGNED(cur->paddr | cur->size, PAGE_SIZE)); 1625 + elfcorehdr_fill_device_ram_ptload_elf64(phdr, cur->paddr, cur->size); 1626 + 1627 + /* p_offset will be adjusted later. */ 1628 + phdr++; 1629 + ehdr->e_phnum++; 1630 + } 1631 + list_splice_tail(list, &vmcore_list); 1632 + 1633 + /* We changed elfcorebuf_sz and added new entries; reset all offsets. */ 1634 + vmcore_reset_offsets_elf64(); 1635 + 1636 + /* Finally, recalculate the total vmcore size. */ 1637 + vmcore_size = get_vmcore_size(elfcorebuf_sz, elfnotes_sz, 1638 + &vmcore_list); 1639 + proc_vmcore->size = vmcore_size; 1640 + return 0; 1641 + } 1642 + 1643 + static void vmcore_process_device_ram(struct vmcore_cb *cb) 1644 + { 1645 + unsigned char *e_ident = (unsigned char *)elfcorebuf; 1646 + struct vmcore_range *first, *m; 1647 + LIST_HEAD(list); 1648 + int count; 1649 + 1650 + /* We only support Elf64 dumps for now. */ 1651 + if (WARN_ON_ONCE(e_ident[EI_CLASS] != ELFCLASS64)) { 1652 + pr_err("device ram ranges only support Elf64\n"); 1653 + return; 1654 + } 1655 + 1656 + if (cb->get_device_ram(cb, &list)) { 1657 + pr_err("obtaining device ram ranges failed\n"); 1658 + return; 1659 + } 1660 + count = list_count_nodes(&list); 1661 + if (!count) 1662 + return; 1663 + 1664 + /* 1665 + * For some reason these ranges are already know? Might happen 1666 + * with unusual register->unregister->register sequences; we'll simply 1667 + * sanity check using the first range. 1668 + */ 1669 + first = list_first_entry(&list, struct vmcore_range, list); 1670 + list_for_each_entry(m, &vmcore_list, list) { 1671 + unsigned long long m_end = m->paddr + m->size; 1672 + unsigned long long first_end = first->paddr + first->size; 1673 + 1674 + if (first->paddr < m_end && m->paddr < first_end) 1675 + goto out_free; 1676 + } 1677 + 1678 + /* If adding the mem nodes succeeds, they must not be freed. */ 1679 + if (!vmcore_add_device_ram_elf64(&list, count)) 1680 + return; 1681 + out_free: 1682 + vmcore_free_ranges(&list); 1683 + } 1684 + #else /* !CONFIG_PROC_VMCORE_DEVICE_RAM */ 1685 + static void vmcore_process_device_ram(struct vmcore_cb *cb) 1686 + { 1687 + } 1688 + #endif /* CONFIG_PROC_VMCORE_DEVICE_RAM */ 1689 + 1544 1690 /* Free all dumps in vmcore device dump list */ 1545 1691 static void vmcore_free_device_dumps(void) 1546 1692 { 1547 1693 #ifdef CONFIG_PROC_VMCORE_DEVICE_DUMP 1548 - mutex_lock(&vmcoredd_mutex); 1694 + mutex_lock(&vmcore_mutex); 1549 1695 while (!list_empty(&vmcoredd_list)) { 1550 1696 struct vmcoredd_node *dump; 1551 1697 ··· 1707 1549 vfree(dump->buf); 1708 1550 vfree(dump); 1709 1551 } 1710 - mutex_unlock(&vmcoredd_mutex); 1552 + mutex_unlock(&vmcore_mutex); 1711 1553 #endif /* CONFIG_PROC_VMCORE_DEVICE_DUMP */ 1712 1554 } 1713 1555 ··· 1729 1571 rc = parse_crash_elf_headers(); 1730 1572 if (rc) { 1731 1573 elfcorehdr_free(elfcorehdr_addr); 1732 - pr_warn("Kdump: vmcore not initialized\n"); 1574 + pr_warn("not initialized\n"); 1733 1575 return rc; 1734 1576 } 1735 1577 elfcorehdr_free(elfcorehdr_addr); ··· 1750 1592 proc_vmcore = NULL; 1751 1593 } 1752 1594 1753 - /* clear the vmcore list. */ 1754 - while (!list_empty(&vmcore_list)) { 1755 - struct vmcore *m; 1756 - 1757 - m = list_first_entry(&vmcore_list, struct vmcore, list); 1758 - list_del(&m->list); 1759 - kfree(m); 1760 - } 1595 + vmcore_free_ranges(&vmcore_list); 1761 1596 free_elfcorebuf(); 1762 1597 1763 1598 /* clear vmcore device dump list */
+41
include/linux/crash_dump.h
··· 20 20 extern void elfcorehdr_free(unsigned long long addr); 21 21 extern ssize_t elfcorehdr_read(char *buf, size_t count, u64 *ppos); 22 22 extern ssize_t elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos); 23 + void elfcorehdr_fill_device_ram_ptload_elf64(Elf64_Phdr *phdr, 24 + unsigned long long paddr, unsigned long long size); 23 25 extern int remap_oldmem_pfn_range(struct vm_area_struct *vma, 24 26 unsigned long from, unsigned long pfn, 25 27 unsigned long size, pgprot_t prot); ··· 101 99 * indicated in the vmcore instead. For example, a ballooned page 102 100 * contains no data and reading from such a page will cause high 103 101 * load in the hypervisor. 102 + * @get_device_ram: query RAM ranges that can only be detected by device 103 + * drivers, such as the virtio-mem driver, so they can be included in 104 + * the crash dump on architectures that allocate the elfcore hdr in the dump 105 + * ("2nd") kernel. Indicated RAM ranges may contain holes to reduce the 106 + * total number of ranges; such holes can be detected using the pfn_is_ram 107 + * callback just like for other RAM. 104 108 * @next: List head to manage registered callbacks internally; initialized by 105 109 * register_vmcore_cb(). 106 110 * ··· 117 109 */ 118 110 struct vmcore_cb { 119 111 bool (*pfn_is_ram)(struct vmcore_cb *cb, unsigned long pfn); 112 + int (*get_device_ram)(struct vmcore_cb *cb, struct list_head *list); 120 113 struct list_head next; 121 114 }; 122 115 extern void register_vmcore_cb(struct vmcore_cb *cb); 123 116 extern void unregister_vmcore_cb(struct vmcore_cb *cb); 117 + 118 + struct vmcore_range { 119 + struct list_head list; 120 + unsigned long long paddr; 121 + unsigned long long size; 122 + loff_t offset; 123 + }; 124 + 125 + /* Allocate a vmcore range and add it to the list. */ 126 + static inline int vmcore_alloc_add_range(struct list_head *list, 127 + unsigned long long paddr, unsigned long long size) 128 + { 129 + struct vmcore_range *m = kzalloc(sizeof(*m), GFP_KERNEL); 130 + 131 + if (!m) 132 + return -ENOMEM; 133 + m->paddr = paddr; 134 + m->size = size; 135 + list_add_tail(&m->list, list); 136 + return 0; 137 + } 138 + 139 + /* Free a list of vmcore ranges. */ 140 + static inline void vmcore_free_ranges(struct list_head *list) 141 + { 142 + struct vmcore_range *m, *tmp; 143 + 144 + list_for_each_entry_safe(m, tmp, list, list) { 145 + list_del(&m->list); 146 + kfree(m); 147 + } 148 + } 124 149 125 150 #else /* !CONFIG_CRASH_DUMP */ 126 151 static inline bool is_kdump_kernel(void) { return false; }
-13
include/linux/kcore.h
··· 20 20 int type; 21 21 }; 22 22 23 - struct vmcore { 24 - struct list_head list; 25 - unsigned long long paddr; 26 - unsigned long long size; 27 - loff_t offset; 28 - }; 29 - 30 - struct vmcoredd_node { 31 - struct list_head list; /* List of dumps */ 32 - void *buf; /* Buffer containing device's dump */ 33 - unsigned int size; /* Size of the buffer */ 34 - }; 35 - 36 23 #ifdef CONFIG_PROC_KCORE 37 24 void __init kclist_add(struct kcore_list *, void *, size_t, int type); 38 25
+8
include/linux/virtio.h
··· 190 190 int virtio_device_restore(struct virtio_device *dev); 191 191 #endif 192 192 void virtio_reset_device(struct virtio_device *dev); 193 + int virtio_device_reset_prepare(struct virtio_device *dev); 194 + int virtio_device_reset_done(struct virtio_device *dev); 193 195 194 196 size_t virtio_max_dma_size(const struct virtio_device *vdev); 195 197 ··· 216 214 * changes; may be called in interrupt context. 217 215 * @freeze: optional function to call during suspend/hibernation. 218 216 * @restore: optional function to call on resume. 217 + * @reset_prepare: optional function to call when a transport specific reset 218 + * occurs. 219 + * @reset_done: optional function to call after transport specific reset 220 + * operation has finished. 219 221 */ 220 222 struct virtio_driver { 221 223 struct device_driver driver; ··· 235 229 void (*config_changed)(struct virtio_device *dev); 236 230 int (*freeze)(struct virtio_device *dev); 237 231 int (*restore)(struct virtio_device *dev); 232 + int (*reset_prepare)(struct virtio_device *dev); 233 + int (*reset_done)(struct virtio_device *dev); 238 234 }; 239 235 240 236 #define drv_to_virtio(__drv) container_of_const(__drv, struct virtio_driver, driver)
+1 -1
include/uapi/linux/vduse.h
··· 1 - /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ 1 + /* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ 2 2 #ifndef _UAPI_VDUSE_H_ 3 3 #define _UAPI_VDUSE_H_ 4 4
+14
include/uapi/linux/virtio_pci.h
··· 116 116 #define VIRTIO_PCI_CAP_PCI_CFG 5 117 117 /* Additional shared memory capability */ 118 118 #define VIRTIO_PCI_CAP_SHARED_MEMORY_CFG 8 119 + /* PCI vendor data configuration */ 120 + #define VIRTIO_PCI_CAP_VENDOR_CFG 9 119 121 120 122 /* This is the PCI capability header: */ 121 123 struct virtio_pci_cap { ··· 130 128 __u8 padding[2]; /* Pad to full dword. */ 131 129 __le32 offset; /* Offset within bar. */ 132 130 __le32 length; /* Length of the structure, in bytes. */ 131 + }; 132 + 133 + /* This is the PCI vendor data capability header: */ 134 + struct virtio_pci_vndr_data { 135 + __u8 cap_vndr; /* Generic PCI field: PCI_CAP_ID_VNDR */ 136 + __u8 cap_next; /* Generic PCI field: next ptr. */ 137 + __u8 cap_len; /* Generic PCI field: capability length */ 138 + __u8 cfg_type; /* Identifies the structure. */ 139 + __u16 vendor_id; /* Identifies the vendor-specific format. */ 140 + /* For Vendor Definition */ 141 + /* Pads structure to a multiple of 4 bytes */ 142 + /* Reads must not have side effects */ 133 143 }; 134 144 135 145 struct virtio_pci_cap64 {