Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost

Pull virtio fixes from Michael Tsirkin:
"Just a bunch of bugfixes all over the place"

* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost: (26 commits)
virtio-mem: check if the config changed before fake offlining memory
virtio-mem: keep retrying on offline_and_remove_memory() errors in Sub Block Mode (SBM)
virtio-mem: convert most offline_and_remove_memory() errors to -EBUSY
virtio-mem: remove unsafe unplug in Big Block Mode (BBM)
pds_vdpa: fix up debugfs feature bit printing
pds_vdpa: alloc irq vectors on DRIVER_OK
pds_vdpa: clean and reset vqs entries
pds_vdpa: always allow offering VIRTIO_NET_F_MAC
pds_vdpa: reset to vdpa specified mac
virtio-net: Zero max_tx_vq field for VIRTIO_NET_CTRL_MQ_HASH_CONFIG case
vdpa/mlx5: Fix crash on shutdown for when no ndev exists
vdpa/mlx5: Delete control vq iotlb in destroy_mr only when necessary
vdpa/mlx5: Fix mr->initialized semantics
vdpa/mlx5: Correct default number of queues when MQ is on
virtio-vdpa: Fix cpumask memory leak in virtio_vdpa_find_vqs()
vduse: Use proper spinlock for IRQ injection
vdpa: Enable strict validation for netlinks ops
vdpa: Add max vqp attr to vdpa_nl_policy for nlattr length check
vdpa: Add queue index attr to vdpa_nl_policy for nlattr length check
vdpa: Add features attr to vdpa_nl_policy for nlattr length check
...

+519 -208
+10 -1
MAINTAINERS
··· 22474 22474 S: Maintained 22475 22475 F: drivers/block/virtio_blk.c 22476 22476 F: drivers/scsi/virtio_scsi.c 22477 - F: drivers/vhost/scsi.c 22478 22477 F: include/uapi/linux/virtio_blk.h 22479 22478 F: include/uapi/linux/virtio_scsi.h 22480 22479 ··· 22571 22572 F: include/linux/vhost_iotlb.h 22572 22573 F: include/uapi/linux/vhost.h 22573 22574 F: kernel/vhost_task.c 22575 + 22576 + VIRTIO HOST (VHOST-SCSI) 22577 + M: "Michael S. Tsirkin" <mst@redhat.com> 22578 + M: Jason Wang <jasowang@redhat.com> 22579 + M: Mike Christie <michael.christie@oracle.com> 22580 + R: Paolo Bonzini <pbonzini@redhat.com> 22581 + R: Stefan Hajnoczi <stefanha@redhat.com> 22582 + L: virtualization@lists.linux-foundation.org 22583 + S: Maintained 22584 + F: drivers/vhost/scsi.c 22574 22585 22575 22586 VIRTIO I2C DRIVER 22576 22587 M: Conghui Chen <conghui.chen@intel.com>
+1 -1
drivers/net/virtio_net.c
··· 2761 2761 vi->ctrl->rss.indirection_table[i] = indir_val; 2762 2762 } 2763 2763 2764 - vi->ctrl->rss.max_tx_vq = vi->curr_queue_pairs; 2764 + vi->ctrl->rss.max_tx_vq = vi->has_rss ? vi->curr_queue_pairs : 0; 2765 2765 vi->ctrl->rss.hash_key_length = vi->rss_key_size; 2766 2766 2767 2767 netdev_rss_key_fill(vi->ctrl->rss.key, vi->rss_key_size);
+2
drivers/vdpa/mlx5/core/mlx5_vdpa.h
··· 31 31 struct list_head head; 32 32 unsigned long num_directs; 33 33 unsigned long num_klms; 34 + /* state of dvq mr */ 34 35 bool initialized; 35 36 36 37 /* serialize mkey creation and destruction */ ··· 122 121 int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb, 123 122 unsigned int asid); 124 123 void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev); 124 + void mlx5_vdpa_destroy_mr_asid(struct mlx5_vdpa_dev *mvdev, unsigned int asid); 125 125 126 126 #define mlx5_vdpa_warn(__dev, format, ...) \ 127 127 dev_warn((__dev)->mdev->device, "%s:%d:(pid %d) warning: " format, __func__, __LINE__, \
+74 -31
drivers/vdpa/mlx5/core/mr.c
··· 489 489 } 490 490 } 491 491 492 - void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev) 492 + static void _mlx5_vdpa_destroy_cvq_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid) 493 + { 494 + if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid) 495 + return; 496 + 497 + prune_iotlb(mvdev); 498 + } 499 + 500 + static void _mlx5_vdpa_destroy_dvq_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid) 493 501 { 494 502 struct mlx5_vdpa_mr *mr = &mvdev->mr; 495 503 496 - mutex_lock(&mr->mkey_mtx); 497 - if (!mr->initialized) 498 - goto out; 504 + if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] != asid) 505 + return; 499 506 500 - prune_iotlb(mvdev); 507 + if (!mr->initialized) 508 + return; 509 + 501 510 if (mr->user_mr) 502 511 destroy_user_mr(mvdev, mr); 503 512 else 504 513 destroy_dma_mr(mvdev, mr); 505 514 506 515 mr->initialized = false; 507 - out: 516 + } 517 + 518 + void mlx5_vdpa_destroy_mr_asid(struct mlx5_vdpa_dev *mvdev, unsigned int asid) 519 + { 520 + struct mlx5_vdpa_mr *mr = &mvdev->mr; 521 + 522 + mutex_lock(&mr->mkey_mtx); 523 + 524 + _mlx5_vdpa_destroy_dvq_mr(mvdev, asid); 525 + _mlx5_vdpa_destroy_cvq_mr(mvdev, asid); 526 + 508 527 mutex_unlock(&mr->mkey_mtx); 528 + } 529 + 530 + void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev) 531 + { 532 + mlx5_vdpa_destroy_mr_asid(mvdev, mvdev->group2asid[MLX5_VDPA_CVQ_GROUP]); 533 + mlx5_vdpa_destroy_mr_asid(mvdev, mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP]); 534 + } 535 + 536 + static int _mlx5_vdpa_create_cvq_mr(struct mlx5_vdpa_dev *mvdev, 537 + struct vhost_iotlb *iotlb, 538 + unsigned int asid) 539 + { 540 + if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid) 541 + return 0; 542 + 543 + return dup_iotlb(mvdev, iotlb); 544 + } 545 + 546 + static int _mlx5_vdpa_create_dvq_mr(struct mlx5_vdpa_dev *mvdev, 547 + struct vhost_iotlb *iotlb, 548 + unsigned int asid) 549 + { 550 + struct mlx5_vdpa_mr *mr = &mvdev->mr; 551 + int err; 552 + 553 + if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] != asid) 554 + return 0; 555 + 556 + if (mr->initialized) 557 + return 0; 558 + 559 + if (iotlb) 560 + err = create_user_mr(mvdev, iotlb); 561 + else 562 + err = create_dma_mr(mvdev, mr); 563 + 564 + if (err) 565 + return err; 566 + 567 + mr->initialized = true; 568 + 569 + return 0; 509 570 } 510 571 511 572 static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, 512 573 struct vhost_iotlb *iotlb, unsigned int asid) 513 574 { 514 - struct mlx5_vdpa_mr *mr = &mvdev->mr; 515 575 int err; 516 576 517 - if (mr->initialized) 518 - return 0; 577 + err = _mlx5_vdpa_create_dvq_mr(mvdev, iotlb, asid); 578 + if (err) 579 + return err; 519 580 520 - if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) { 521 - if (iotlb) 522 - err = create_user_mr(mvdev, iotlb); 523 - else 524 - err = create_dma_mr(mvdev, mr); 581 + err = _mlx5_vdpa_create_cvq_mr(mvdev, iotlb, asid); 582 + if (err) 583 + goto out_err; 525 584 526 - if (err) 527 - return err; 528 - } 529 - 530 - if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] == asid) { 531 - err = dup_iotlb(mvdev, iotlb); 532 - if (err) 533 - goto out_err; 534 - } 535 - 536 - mr->initialized = true; 537 585 return 0; 538 586 539 587 out_err: 540 - if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) { 541 - if (iotlb) 542 - destroy_user_mr(mvdev, mr); 543 - else 544 - destroy_dma_mr(mvdev, mr); 545 - } 588 + _mlx5_vdpa_destroy_dvq_mr(mvdev, asid); 546 589 547 590 return err; 548 591 }
+11 -15
drivers/vdpa/mlx5/net/mlx5_vnet.c
··· 2517 2517 else 2518 2518 ndev->rqt_size = 1; 2519 2519 2520 - ndev->cur_num_vqs = 2 * ndev->rqt_size; 2520 + /* Device must start with 1 queue pair, as per VIRTIO v1.2 spec, section 2521 + * 5.1.6.5.5 "Device operation in multiqueue mode": 2522 + * 2523 + * Multiqueue is disabled by default. 2524 + * The driver enables multiqueue by sending a command using class 2525 + * VIRTIO_NET_CTRL_MQ. The command selects the mode of multiqueue 2526 + * operation, as follows: ... 2527 + */ 2528 + ndev->cur_num_vqs = 2; 2521 2529 2522 2530 update_cvq_info(mvdev); 2523 2531 return err; ··· 2644 2636 goto err_mr; 2645 2637 2646 2638 teardown_driver(ndev); 2647 - mlx5_vdpa_destroy_mr(mvdev); 2639 + mlx5_vdpa_destroy_mr_asid(mvdev, asid); 2648 2640 err = mlx5_vdpa_create_mr(mvdev, iotlb, asid); 2649 2641 if (err) 2650 2642 goto err_mr; ··· 2660 2652 return 0; 2661 2653 2662 2654 err_setup: 2663 - mlx5_vdpa_destroy_mr(mvdev); 2655 + mlx5_vdpa_destroy_mr_asid(mvdev, asid); 2664 2656 err_mr: 2665 2657 return err; 2666 2658 } ··· 3556 3548 kfree(mgtdev); 3557 3549 } 3558 3550 3559 - static void mlx5v_shutdown(struct auxiliary_device *auxdev) 3560 - { 3561 - struct mlx5_vdpa_mgmtdev *mgtdev; 3562 - struct mlx5_vdpa_net *ndev; 3563 - 3564 - mgtdev = auxiliary_get_drvdata(auxdev); 3565 - ndev = mgtdev->ndev; 3566 - 3567 - free_irqs(ndev); 3568 - } 3569 - 3570 3551 static const struct auxiliary_device_id mlx5v_id_table[] = { 3571 3552 { .name = MLX5_ADEV_NAME ".vnet", }, 3572 3553 {}, ··· 3567 3570 .name = "vnet", 3568 3571 .probe = mlx5v_probe, 3569 3572 .remove = mlx5v_remove, 3570 - .shutdown = mlx5v_shutdown, 3571 3573 .id_table = mlx5v_id_table, 3572 3574 }; 3573 3575
+1 -2
drivers/vdpa/pds/Makefile
··· 5 5 6 6 pds_vdpa-y := aux_drv.o \ 7 7 cmds.o \ 8 + debugfs.o \ 8 9 vdpa_dev.o 9 - 10 - pds_vdpa-$(CONFIG_DEBUG_FS) += debugfs.o
+6 -9
drivers/vdpa/pds/debugfs.c
··· 176 176 { 177 177 struct pds_vdpa_aux *vdpa_aux = seq->private; 178 178 struct vdpa_mgmt_dev *mgmt; 179 + u64 hw_features; 179 180 180 181 seq_printf(seq, "aux_dev: %s\n", 181 182 dev_name(&vdpa_aux->padev->aux_dev.dev)); ··· 184 183 mgmt = &vdpa_aux->vdpa_mdev; 185 184 seq_printf(seq, "max_vqs: %d\n", mgmt->max_supported_vqs); 186 185 seq_printf(seq, "config_attr_mask: %#llx\n", mgmt->config_attr_mask); 187 - seq_printf(seq, "supported_features: %#llx\n", mgmt->supported_features); 188 - print_feature_bits_all(seq, mgmt->supported_features); 186 + hw_features = le64_to_cpu(vdpa_aux->ident.hw_features); 187 + seq_printf(seq, "hw_features: %#llx\n", hw_features); 188 + print_feature_bits_all(seq, hw_features); 189 189 190 190 return 0; 191 191 } ··· 202 200 { 203 201 struct pds_vdpa_device *pdsv = seq->private; 204 202 struct virtio_net_config vc; 205 - u64 driver_features; 206 203 u8 status; 207 204 208 205 memcpy_fromio(&vc, pdsv->vdpa_aux->vd_mdev.device, ··· 224 223 status = vp_modern_get_status(&pdsv->vdpa_aux->vd_mdev); 225 224 seq_printf(seq, "dev_status: %#x\n", status); 226 225 print_status_bits(seq, status); 227 - 228 - seq_printf(seq, "req_features: %#llx\n", pdsv->req_features); 229 - print_feature_bits_all(seq, pdsv->req_features); 230 - driver_features = vp_modern_get_driver_features(&pdsv->vdpa_aux->vd_mdev); 231 - seq_printf(seq, "driver_features: %#llx\n", driver_features); 232 - print_feature_bits_all(seq, driver_features); 226 + seq_printf(seq, "negotiated_features: %#llx\n", pdsv->negotiated_features); 227 + print_feature_bits_all(seq, pdsv->negotiated_features); 233 228 seq_printf(seq, "vdpa_index: %d\n", pdsv->vdpa_index); 234 229 seq_printf(seq, "num_vqs: %d\n", pdsv->num_vqs); 235 230
+125 -51
drivers/vdpa/pds/vdpa_dev.c
··· 126 126 static void pds_vdpa_set_vq_ready(struct vdpa_device *vdpa_dev, u16 qid, bool ready) 127 127 { 128 128 struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev); 129 - struct pci_dev *pdev = pdsv->vdpa_aux->padev->vf_pdev; 130 129 struct device *dev = &pdsv->vdpa_dev.dev; 131 130 u64 driver_features; 132 131 u16 invert_idx = 0; 133 - int irq; 134 132 int err; 135 133 136 134 dev_dbg(dev, "%s: qid %d ready %d => %d\n", ··· 141 143 invert_idx = PDS_VDPA_PACKED_INVERT_IDX; 142 144 143 145 if (ready) { 144 - irq = pci_irq_vector(pdev, qid); 145 - snprintf(pdsv->vqs[qid].irq_name, sizeof(pdsv->vqs[qid].irq_name), 146 - "vdpa-%s-%d", dev_name(dev), qid); 147 - 148 - err = request_irq(irq, pds_vdpa_isr, 0, 149 - pdsv->vqs[qid].irq_name, &pdsv->vqs[qid]); 150 - if (err) { 151 - dev_err(dev, "%s: no irq for qid %d: %pe\n", 152 - __func__, qid, ERR_PTR(err)); 153 - return; 154 - } 155 - pdsv->vqs[qid].irq = irq; 156 - 157 146 /* Pass vq setup info to DSC using adminq to gather up and 158 147 * send all info at once so FW can do its full set up in 159 148 * one easy operation ··· 149 164 if (err) { 150 165 dev_err(dev, "Failed to init vq %d: %pe\n", 151 166 qid, ERR_PTR(err)); 152 - pds_vdpa_release_irq(pdsv, qid); 153 167 ready = false; 154 168 } 155 169 } else { ··· 156 172 if (err) 157 173 dev_err(dev, "%s: reset_vq failed qid %d: %pe\n", 158 174 __func__, qid, ERR_PTR(err)); 159 - pds_vdpa_release_irq(pdsv, qid); 160 175 } 161 176 162 177 pdsv->vqs[qid].ready = ready; ··· 301 318 struct device *dev = &pdsv->vdpa_dev.dev; 302 319 u64 driver_features; 303 320 u64 nego_features; 321 + u64 hw_features; 304 322 u64 missing; 305 323 306 324 if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)) && features) { ··· 309 325 return -EOPNOTSUPP; 310 326 } 311 327 312 - pdsv->req_features = features; 313 - 314 328 /* Check for valid feature bits */ 315 - nego_features = features & le64_to_cpu(pdsv->vdpa_aux->ident.hw_features); 316 - missing = pdsv->req_features & ~nego_features; 329 + nego_features = features & pdsv->supported_features; 330 + missing = features & ~nego_features; 317 331 if (missing) { 318 332 dev_err(dev, "Can't support all requested features in %#llx, missing %#llx features\n", 319 - pdsv->req_features, missing); 333 + features, missing); 320 334 return -EOPNOTSUPP; 321 335 } 336 + 337 + pdsv->negotiated_features = nego_features; 322 338 323 339 driver_features = pds_vdpa_get_driver_features(vdpa_dev); 324 340 dev_dbg(dev, "%s: %#llx => %#llx\n", 325 341 __func__, driver_features, nego_features); 342 + 343 + /* if we're faking the F_MAC, strip it before writing to device */ 344 + hw_features = le64_to_cpu(pdsv->vdpa_aux->ident.hw_features); 345 + if (!(hw_features & BIT_ULL(VIRTIO_NET_F_MAC))) 346 + nego_features &= ~BIT_ULL(VIRTIO_NET_F_MAC); 326 347 327 348 if (driver_features == nego_features) 328 349 return 0; ··· 341 352 { 342 353 struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev); 343 354 344 - return vp_modern_get_driver_features(&pdsv->vdpa_aux->vd_mdev); 355 + return pdsv->negotiated_features; 345 356 } 346 357 347 358 static void pds_vdpa_set_config_cb(struct vdpa_device *vdpa_dev, ··· 378 389 return vp_modern_get_status(&pdsv->vdpa_aux->vd_mdev); 379 390 } 380 391 392 + static int pds_vdpa_request_irqs(struct pds_vdpa_device *pdsv) 393 + { 394 + struct pci_dev *pdev = pdsv->vdpa_aux->padev->vf_pdev; 395 + struct pds_vdpa_aux *vdpa_aux = pdsv->vdpa_aux; 396 + struct device *dev = &pdsv->vdpa_dev.dev; 397 + int max_vq, nintrs, qid, err; 398 + 399 + max_vq = vdpa_aux->vdpa_mdev.max_supported_vqs; 400 + 401 + nintrs = pci_alloc_irq_vectors(pdev, max_vq, max_vq, PCI_IRQ_MSIX); 402 + if (nintrs < 0) { 403 + dev_err(dev, "Couldn't get %d msix vectors: %pe\n", 404 + max_vq, ERR_PTR(nintrs)); 405 + return nintrs; 406 + } 407 + 408 + for (qid = 0; qid < pdsv->num_vqs; ++qid) { 409 + int irq = pci_irq_vector(pdev, qid); 410 + 411 + snprintf(pdsv->vqs[qid].irq_name, sizeof(pdsv->vqs[qid].irq_name), 412 + "vdpa-%s-%d", dev_name(dev), qid); 413 + 414 + err = request_irq(irq, pds_vdpa_isr, 0, 415 + pdsv->vqs[qid].irq_name, 416 + &pdsv->vqs[qid]); 417 + if (err) { 418 + dev_err(dev, "%s: no irq for qid %d: %pe\n", 419 + __func__, qid, ERR_PTR(err)); 420 + goto err_release; 421 + } 422 + 423 + pdsv->vqs[qid].irq = irq; 424 + } 425 + 426 + vdpa_aux->nintrs = nintrs; 427 + 428 + return 0; 429 + 430 + err_release: 431 + while (qid--) 432 + pds_vdpa_release_irq(pdsv, qid); 433 + 434 + pci_free_irq_vectors(pdev); 435 + 436 + vdpa_aux->nintrs = 0; 437 + 438 + return err; 439 + } 440 + 441 + static void pds_vdpa_release_irqs(struct pds_vdpa_device *pdsv) 442 + { 443 + struct pci_dev *pdev = pdsv->vdpa_aux->padev->vf_pdev; 444 + struct pds_vdpa_aux *vdpa_aux = pdsv->vdpa_aux; 445 + int qid; 446 + 447 + if (!vdpa_aux->nintrs) 448 + return; 449 + 450 + for (qid = 0; qid < pdsv->num_vqs; qid++) 451 + pds_vdpa_release_irq(pdsv, qid); 452 + 453 + pci_free_irq_vectors(pdev); 454 + 455 + vdpa_aux->nintrs = 0; 456 + } 457 + 381 458 static void pds_vdpa_set_status(struct vdpa_device *vdpa_dev, u8 status) 382 459 { 383 460 struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev); ··· 453 398 454 399 old_status = pds_vdpa_get_status(vdpa_dev); 455 400 dev_dbg(dev, "%s: old %#x new %#x\n", __func__, old_status, status); 401 + 402 + if (status & ~old_status & VIRTIO_CONFIG_S_DRIVER_OK) { 403 + if (pds_vdpa_request_irqs(pdsv)) 404 + status = old_status | VIRTIO_CONFIG_S_FAILED; 405 + } 456 406 457 407 pds_vdpa_cmd_set_status(pdsv, status); 458 408 ··· 469 409 pdsv->vqs[i].avail_idx = 0; 470 410 pdsv->vqs[i].used_idx = 0; 471 411 } 412 + 413 + pds_vdpa_cmd_set_mac(pdsv, pdsv->mac); 472 414 } 473 415 474 416 if (status & ~old_status & VIRTIO_CONFIG_S_FEATURES_OK) { ··· 480 418 i, &pdsv->vqs[i].notify_pa); 481 419 } 482 420 } 421 + 422 + if (old_status & ~status & VIRTIO_CONFIG_S_DRIVER_OK) 423 + pds_vdpa_release_irqs(pdsv); 424 + } 425 + 426 + static void pds_vdpa_init_vqs_entry(struct pds_vdpa_device *pdsv, int qid, 427 + void __iomem *notify) 428 + { 429 + memset(&pdsv->vqs[qid], 0, sizeof(pdsv->vqs[0])); 430 + pdsv->vqs[qid].qid = qid; 431 + pdsv->vqs[qid].pdsv = pdsv; 432 + pdsv->vqs[qid].ready = false; 433 + pdsv->vqs[qid].irq = VIRTIO_MSI_NO_VECTOR; 434 + pdsv->vqs[qid].notify = notify; 483 435 } 484 436 485 437 static int pds_vdpa_reset(struct vdpa_device *vdpa_dev) ··· 517 441 if (err) 518 442 dev_err(dev, "%s: reset_vq failed qid %d: %pe\n", 519 443 __func__, i, ERR_PTR(err)); 520 - pds_vdpa_release_irq(pdsv, i); 521 - memset(&pdsv->vqs[i], 0, sizeof(pdsv->vqs[0])); 522 - pdsv->vqs[i].ready = false; 523 444 } 524 445 } 525 446 526 447 pds_vdpa_set_status(vdpa_dev, 0); 448 + 449 + if (status & VIRTIO_CONFIG_S_DRIVER_OK) { 450 + /* Reset the vq info */ 451 + for (i = 0; i < pdsv->num_vqs && !err; i++) 452 + pds_vdpa_init_vqs_entry(pdsv, i, pdsv->vqs[i].notify); 453 + } 527 454 528 455 return 0; 529 456 } ··· 611 532 struct device *dma_dev; 612 533 struct pci_dev *pdev; 613 534 struct device *dev; 614 - u8 mac[ETH_ALEN]; 615 535 int err; 616 536 int i; 617 537 ··· 641 563 642 564 if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) { 643 565 u64 unsupp_features = 644 - add_config->device_features & ~mgmt->supported_features; 566 + add_config->device_features & ~pdsv->supported_features; 645 567 646 568 if (unsupp_features) { 647 569 dev_err(dev, "Unsupported features: %#llx\n", unsupp_features); ··· 692 614 } 693 615 694 616 /* Set a mac, either from the user config if provided 695 - * or set a random mac if default is 00:..:00 617 + * or use the device's mac if not 00:..:00 618 + * or set a random mac 696 619 */ 697 620 if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR)) { 698 - ether_addr_copy(mac, add_config->net.mac); 699 - pds_vdpa_cmd_set_mac(pdsv, mac); 621 + ether_addr_copy(pdsv->mac, add_config->net.mac); 700 622 } else { 701 623 struct virtio_net_config __iomem *vc; 702 624 703 625 vc = pdsv->vdpa_aux->vd_mdev.device; 704 - memcpy_fromio(mac, vc->mac, sizeof(mac)); 705 - if (is_zero_ether_addr(mac)) { 706 - eth_random_addr(mac); 707 - dev_info(dev, "setting random mac %pM\n", mac); 708 - pds_vdpa_cmd_set_mac(pdsv, mac); 626 + memcpy_fromio(pdsv->mac, vc->mac, sizeof(pdsv->mac)); 627 + if (is_zero_ether_addr(pdsv->mac) && 628 + (pdsv->supported_features & BIT_ULL(VIRTIO_NET_F_MAC))) { 629 + eth_random_addr(pdsv->mac); 630 + dev_info(dev, "setting random mac %pM\n", pdsv->mac); 709 631 } 710 632 } 633 + pds_vdpa_cmd_set_mac(pdsv, pdsv->mac); 711 634 712 635 for (i = 0; i < pdsv->num_vqs; i++) { 713 - pdsv->vqs[i].qid = i; 714 - pdsv->vqs[i].pdsv = pdsv; 715 - pdsv->vqs[i].irq = VIRTIO_MSI_NO_VECTOR; 716 - pdsv->vqs[i].notify = vp_modern_map_vq_notify(&pdsv->vdpa_aux->vd_mdev, 717 - i, &pdsv->vqs[i].notify_pa); 636 + void __iomem *notify; 637 + 638 + notify = vp_modern_map_vq_notify(&pdsv->vdpa_aux->vd_mdev, 639 + i, &pdsv->vqs[i].notify_pa); 640 + pds_vdpa_init_vqs_entry(pdsv, i, notify); 718 641 } 719 642 720 643 pdsv->vdpa_dev.mdev = &vdpa_aux->vdpa_mdev; ··· 825 746 826 747 max_vqs = min_t(u16, dev_intrs, max_vqs); 827 748 mgmt->max_supported_vqs = min_t(u16, PDS_VDPA_MAX_QUEUES, max_vqs); 828 - vdpa_aux->nintrs = mgmt->max_supported_vqs; 749 + vdpa_aux->nintrs = 0; 829 750 830 751 mgmt->ops = &pds_vdpa_mgmt_dev_ops; 831 752 mgmt->id_table = pds_vdpa_id_table; 832 753 mgmt->device = dev; 833 754 mgmt->supported_features = le64_to_cpu(vdpa_aux->ident.hw_features); 755 + 756 + /* advertise F_MAC even if the device doesn't */ 757 + mgmt->supported_features |= BIT_ULL(VIRTIO_NET_F_MAC); 758 + 834 759 mgmt->config_attr_mask = BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR); 835 760 mgmt->config_attr_mask |= BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP); 836 761 mgmt->config_attr_mask |= BIT_ULL(VDPA_ATTR_DEV_FEATURES); 837 - 838 - err = pci_alloc_irq_vectors(pdev, vdpa_aux->nintrs, vdpa_aux->nintrs, 839 - PCI_IRQ_MSIX); 840 - if (err < 0) { 841 - dev_err(dev, "Couldn't get %d msix vectors: %pe\n", 842 - vdpa_aux->nintrs, ERR_PTR(err)); 843 - return err; 844 - } 845 - vdpa_aux->nintrs = err; 846 762 847 763 return 0; 848 764 }
+3 -2
drivers/vdpa/pds/vdpa_dev.h
··· 35 35 struct pds_vdpa_aux *vdpa_aux; 36 36 37 37 struct pds_vdpa_vq_info vqs[PDS_VDPA_MAX_QUEUES]; 38 - u64 supported_features; /* specified device features */ 39 - u64 req_features; /* features requested by vdpa */ 38 + u64 supported_features; /* supported device features */ 39 + u64 negotiated_features; /* negotiated features */ 40 40 u8 vdpa_index; /* rsvd for future subdevice use */ 41 41 u8 num_vqs; /* num vqs in use */ 42 + u8 mac[ETH_ALEN]; /* mac selected when the device was added */ 42 43 struct vdpa_callback config_cb; 43 44 struct notifier_block nb; 44 45 };
+3 -6
drivers/vdpa/vdpa.c
··· 1247 1247 [VDPA_ATTR_MGMTDEV_DEV_NAME] = { .type = NLA_STRING }, 1248 1248 [VDPA_ATTR_DEV_NAME] = { .type = NLA_STRING }, 1249 1249 [VDPA_ATTR_DEV_NET_CFG_MACADDR] = NLA_POLICY_ETH_ADDR, 1250 + [VDPA_ATTR_DEV_NET_CFG_MAX_VQP] = { .type = NLA_U16 }, 1250 1251 /* virtio spec 1.1 section 5.1.4.1 for valid MTU range */ 1251 1252 [VDPA_ATTR_DEV_NET_CFG_MTU] = NLA_POLICY_MIN(NLA_U16, 68), 1253 + [VDPA_ATTR_DEV_QUEUE_INDEX] = { .type = NLA_U32 }, 1254 + [VDPA_ATTR_DEV_FEATURES] = { .type = NLA_U64 }, 1252 1255 }; 1253 1256 1254 1257 static const struct genl_ops vdpa_nl_ops[] = { 1255 1258 { 1256 1259 .cmd = VDPA_CMD_MGMTDEV_GET, 1257 - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 1258 1260 .doit = vdpa_nl_cmd_mgmtdev_get_doit, 1259 1261 .dumpit = vdpa_nl_cmd_mgmtdev_get_dumpit, 1260 1262 }, 1261 1263 { 1262 1264 .cmd = VDPA_CMD_DEV_NEW, 1263 - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 1264 1265 .doit = vdpa_nl_cmd_dev_add_set_doit, 1265 1266 .flags = GENL_ADMIN_PERM, 1266 1267 }, 1267 1268 { 1268 1269 .cmd = VDPA_CMD_DEV_DEL, 1269 - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 1270 1270 .doit = vdpa_nl_cmd_dev_del_set_doit, 1271 1271 .flags = GENL_ADMIN_PERM, 1272 1272 }, 1273 1273 { 1274 1274 .cmd = VDPA_CMD_DEV_GET, 1275 - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 1276 1275 .doit = vdpa_nl_cmd_dev_get_doit, 1277 1276 .dumpit = vdpa_nl_cmd_dev_get_dumpit, 1278 1277 }, 1279 1278 { 1280 1279 .cmd = VDPA_CMD_DEV_CONFIG_GET, 1281 - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 1282 1280 .doit = vdpa_nl_cmd_dev_config_get_doit, 1283 1281 .dumpit = vdpa_nl_cmd_dev_config_get_dumpit, 1284 1282 }, 1285 1283 { 1286 1284 .cmd = VDPA_CMD_DEV_VSTATS_GET, 1287 - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 1288 1285 .doit = vdpa_nl_cmd_dev_stats_get_doit, 1289 1286 .flags = GENL_ADMIN_PERM, 1290 1287 },
+4 -4
drivers/vdpa/vdpa_user/vduse_dev.c
··· 935 935 { 936 936 struct vduse_dev *dev = container_of(work, struct vduse_dev, inject); 937 937 938 - spin_lock_irq(&dev->irq_lock); 938 + spin_lock_bh(&dev->irq_lock); 939 939 if (dev->config_cb.callback) 940 940 dev->config_cb.callback(dev->config_cb.private); 941 - spin_unlock_irq(&dev->irq_lock); 941 + spin_unlock_bh(&dev->irq_lock); 942 942 } 943 943 944 944 static void vduse_vq_irq_inject(struct work_struct *work) ··· 946 946 struct vduse_virtqueue *vq = container_of(work, 947 947 struct vduse_virtqueue, inject); 948 948 949 - spin_lock_irq(&vq->irq_lock); 949 + spin_lock_bh(&vq->irq_lock); 950 950 if (vq->ready && vq->cb.callback) 951 951 vq->cb.callback(vq->cb.private); 952 - spin_unlock_irq(&vq->irq_lock); 952 + spin_unlock_bh(&vq->irq_lock); 953 953 } 954 954 955 955 static bool vduse_vq_signal_irqfd(struct vduse_virtqueue *vq)
+162 -25
drivers/vhost/scsi.c
··· 25 25 #include <linux/fs.h> 26 26 #include <linux/vmalloc.h> 27 27 #include <linux/miscdevice.h> 28 + #include <linux/blk_types.h> 29 + #include <linux/bio.h> 28 30 #include <asm/unaligned.h> 29 31 #include <scsi/scsi_common.h> 30 32 #include <scsi/scsi_proto.h> ··· 77 75 u32 tvc_prot_sgl_count; 78 76 /* Saved unpacked SCSI LUN for vhost_scsi_target_queue_cmd() */ 79 77 u32 tvc_lun; 78 + u32 copied_iov:1; 79 + const void *saved_iter_addr; 80 + struct iov_iter saved_iter; 80 81 /* Pointer to the SGL formatted memory from virtio-scsi */ 81 82 struct scatterlist *tvc_sgl; 82 83 struct scatterlist *tvc_prot_sgl; ··· 333 328 int i; 334 329 335 330 if (tv_cmd->tvc_sgl_count) { 336 - for (i = 0; i < tv_cmd->tvc_sgl_count; i++) 337 - put_page(sg_page(&tv_cmd->tvc_sgl[i])); 331 + for (i = 0; i < tv_cmd->tvc_sgl_count; i++) { 332 + if (tv_cmd->copied_iov) 333 + __free_page(sg_page(&tv_cmd->tvc_sgl[i])); 334 + else 335 + put_page(sg_page(&tv_cmd->tvc_sgl[i])); 336 + } 337 + kfree(tv_cmd->saved_iter_addr); 338 338 } 339 339 if (tv_cmd->tvc_prot_sgl_count) { 340 340 for (i = 0; i < tv_cmd->tvc_prot_sgl_count; i++) ··· 514 504 mutex_unlock(&vq->mutex); 515 505 } 516 506 507 + static int vhost_scsi_copy_sgl_to_iov(struct vhost_scsi_cmd *cmd) 508 + { 509 + struct iov_iter *iter = &cmd->saved_iter; 510 + struct scatterlist *sg = cmd->tvc_sgl; 511 + struct page *page; 512 + size_t len; 513 + int i; 514 + 515 + for (i = 0; i < cmd->tvc_sgl_count; i++) { 516 + page = sg_page(&sg[i]); 517 + len = sg[i].length; 518 + 519 + if (copy_page_to_iter(page, 0, len, iter) != len) { 520 + pr_err("Could not copy data while handling misaligned cmd. Error %zu\n", 521 + len); 522 + return -1; 523 + } 524 + } 525 + 526 + return 0; 527 + } 528 + 517 529 /* Fill in status and signal that we are done processing this command 518 530 * 519 531 * This is scheduled in the vhost work queue so we are called with the owner ··· 559 527 560 528 pr_debug("%s tv_cmd %p resid %u status %#02x\n", __func__, 561 529 cmd, se_cmd->residual_count, se_cmd->scsi_status); 562 - 563 530 memset(&v_rsp, 0, sizeof(v_rsp)); 564 - v_rsp.resid = cpu_to_vhost32(cmd->tvc_vq, se_cmd->residual_count); 565 - /* TODO is status_qualifier field needed? */ 566 - v_rsp.status = se_cmd->scsi_status; 567 - v_rsp.sense_len = cpu_to_vhost32(cmd->tvc_vq, 568 - se_cmd->scsi_sense_length); 569 - memcpy(v_rsp.sense, cmd->tvc_sense_buf, 570 - se_cmd->scsi_sense_length); 531 + 532 + if (cmd->saved_iter_addr && vhost_scsi_copy_sgl_to_iov(cmd)) { 533 + v_rsp.response = VIRTIO_SCSI_S_BAD_TARGET; 534 + } else { 535 + v_rsp.resid = cpu_to_vhost32(cmd->tvc_vq, 536 + se_cmd->residual_count); 537 + /* TODO is status_qualifier field needed? */ 538 + v_rsp.status = se_cmd->scsi_status; 539 + v_rsp.sense_len = cpu_to_vhost32(cmd->tvc_vq, 540 + se_cmd->scsi_sense_length); 541 + memcpy(v_rsp.sense, cmd->tvc_sense_buf, 542 + se_cmd->scsi_sense_length); 543 + } 571 544 572 545 iov_iter_init(&iov_iter, ITER_DEST, cmd->tvc_resp_iov, 573 546 cmd->tvc_in_iovs, sizeof(v_rsp)); ··· 650 613 vhost_scsi_map_to_sgl(struct vhost_scsi_cmd *cmd, 651 614 struct iov_iter *iter, 652 615 struct scatterlist *sgl, 653 - bool write) 616 + bool is_prot) 654 617 { 655 618 struct page **pages = cmd->tvc_upages; 656 619 struct scatterlist *sg = sgl; 657 - ssize_t bytes; 658 - size_t offset; 620 + ssize_t bytes, mapped_bytes; 621 + size_t offset, mapped_offset; 659 622 unsigned int npages = 0; 660 623 661 624 bytes = iov_iter_get_pages2(iter, pages, LONG_MAX, ··· 664 627 if (bytes <= 0) 665 628 return bytes < 0 ? bytes : -EFAULT; 666 629 630 + mapped_bytes = bytes; 631 + mapped_offset = offset; 632 + 667 633 while (bytes) { 668 634 unsigned n = min_t(unsigned, PAGE_SIZE - offset, bytes); 635 + /* 636 + * The block layer requires bios/requests to be a multiple of 637 + * 512 bytes, but Windows can send us vecs that are misaligned. 638 + * This can result in bios and later requests with misaligned 639 + * sizes if we have to break up a cmd/scatterlist into multiple 640 + * bios. 641 + * 642 + * We currently only break up a command into multiple bios if 643 + * we hit the vec/seg limit, so check if our sgl_count is 644 + * greater than the max and if a vec in the cmd has a 645 + * misaligned offset/size. 646 + */ 647 + if (!is_prot && 648 + (offset & (SECTOR_SIZE - 1) || n & (SECTOR_SIZE - 1)) && 649 + cmd->tvc_sgl_count > BIO_MAX_VECS) { 650 + WARN_ONCE(true, 651 + "vhost-scsi detected misaligned IO. Performance may be degraded."); 652 + goto revert_iter_get_pages; 653 + } 654 + 669 655 sg_set_page(sg++, pages[npages++], n, offset); 670 656 bytes -= n; 671 657 offset = 0; 672 658 } 659 + 673 660 return npages; 661 + 662 + revert_iter_get_pages: 663 + iov_iter_revert(iter, mapped_bytes); 664 + 665 + npages = 0; 666 + while (mapped_bytes) { 667 + unsigned int n = min_t(unsigned int, PAGE_SIZE - mapped_offset, 668 + mapped_bytes); 669 + 670 + put_page(pages[npages++]); 671 + 672 + mapped_bytes -= n; 673 + mapped_offset = 0; 674 + } 675 + 676 + return -EINVAL; 674 677 } 675 678 676 679 static int ··· 734 657 } 735 658 736 659 static int 737 - vhost_scsi_iov_to_sgl(struct vhost_scsi_cmd *cmd, bool write, 738 - struct iov_iter *iter, 739 - struct scatterlist *sg, int sg_count) 660 + vhost_scsi_copy_iov_to_sgl(struct vhost_scsi_cmd *cmd, struct iov_iter *iter, 661 + struct scatterlist *sg, int sg_count) 662 + { 663 + size_t len = iov_iter_count(iter); 664 + unsigned int nbytes = 0; 665 + struct page *page; 666 + int i; 667 + 668 + if (cmd->tvc_data_direction == DMA_FROM_DEVICE) { 669 + cmd->saved_iter_addr = dup_iter(&cmd->saved_iter, iter, 670 + GFP_KERNEL); 671 + if (!cmd->saved_iter_addr) 672 + return -ENOMEM; 673 + } 674 + 675 + for (i = 0; i < sg_count; i++) { 676 + page = alloc_page(GFP_KERNEL); 677 + if (!page) { 678 + i--; 679 + goto err; 680 + } 681 + 682 + nbytes = min_t(unsigned int, PAGE_SIZE, len); 683 + sg_set_page(&sg[i], page, nbytes, 0); 684 + 685 + if (cmd->tvc_data_direction == DMA_TO_DEVICE && 686 + copy_page_from_iter(page, 0, nbytes, iter) != nbytes) 687 + goto err; 688 + 689 + len -= nbytes; 690 + } 691 + 692 + cmd->copied_iov = 1; 693 + return 0; 694 + 695 + err: 696 + pr_err("Could not read %u bytes while handling misaligned cmd\n", 697 + nbytes); 698 + 699 + for (; i >= 0; i--) 700 + __free_page(sg_page(&sg[i])); 701 + kfree(cmd->saved_iter_addr); 702 + return -ENOMEM; 703 + } 704 + 705 + static int 706 + vhost_scsi_map_iov_to_sgl(struct vhost_scsi_cmd *cmd, struct iov_iter *iter, 707 + struct scatterlist *sg, int sg_count, bool is_prot) 740 708 { 741 709 struct scatterlist *p = sg; 710 + size_t revert_bytes; 742 711 int ret; 743 712 744 713 while (iov_iter_count(iter)) { 745 - ret = vhost_scsi_map_to_sgl(cmd, iter, sg, write); 714 + ret = vhost_scsi_map_to_sgl(cmd, iter, sg, is_prot); 746 715 if (ret < 0) { 716 + revert_bytes = 0; 717 + 747 718 while (p < sg) { 748 - struct page *page = sg_page(p++); 749 - if (page) 719 + struct page *page = sg_page(p); 720 + 721 + if (page) { 750 722 put_page(page); 723 + revert_bytes += p->length; 724 + } 725 + p++; 751 726 } 727 + 728 + iov_iter_revert(iter, revert_bytes); 752 729 return ret; 753 730 } 754 731 sg += ret; 755 732 } 733 + 756 734 return 0; 757 735 } 758 736 ··· 817 685 size_t data_bytes, struct iov_iter *data_iter) 818 686 { 819 687 int sgl_count, ret; 820 - bool write = (cmd->tvc_data_direction == DMA_FROM_DEVICE); 821 688 822 689 if (prot_bytes) { 823 690 sgl_count = vhost_scsi_calc_sgls(prot_iter, prot_bytes, ··· 829 698 pr_debug("%s prot_sg %p prot_sgl_count %u\n", __func__, 830 699 cmd->tvc_prot_sgl, cmd->tvc_prot_sgl_count); 831 700 832 - ret = vhost_scsi_iov_to_sgl(cmd, write, prot_iter, 833 - cmd->tvc_prot_sgl, 834 - cmd->tvc_prot_sgl_count); 701 + ret = vhost_scsi_map_iov_to_sgl(cmd, prot_iter, 702 + cmd->tvc_prot_sgl, 703 + cmd->tvc_prot_sgl_count, true); 835 704 if (ret < 0) { 836 705 cmd->tvc_prot_sgl_count = 0; 837 706 return ret; ··· 847 716 pr_debug("%s data_sg %p data_sgl_count %u\n", __func__, 848 717 cmd->tvc_sgl, cmd->tvc_sgl_count); 849 718 850 - ret = vhost_scsi_iov_to_sgl(cmd, write, data_iter, 851 - cmd->tvc_sgl, cmd->tvc_sgl_count); 719 + ret = vhost_scsi_map_iov_to_sgl(cmd, data_iter, cmd->tvc_sgl, 720 + cmd->tvc_sgl_count, false); 721 + if (ret == -EINVAL) { 722 + sg_init_table(cmd->tvc_sgl, cmd->tvc_sgl_count); 723 + ret = vhost_scsi_copy_iov_to_sgl(cmd, data_iter, cmd->tvc_sgl, 724 + cmd->tvc_sgl_count); 725 + } 726 + 852 727 if (ret < 0) { 853 728 cmd->tvc_sgl_count = 0; 854 729 return ret;
+112 -56
drivers/virtio/virtio_mem.c
··· 38 38 MODULE_PARM_DESC(bbm_block_size, 39 39 "Big Block size in bytes. Default is 0 (auto-detection)."); 40 40 41 - static bool bbm_safe_unplug = true; 42 - module_param(bbm_safe_unplug, bool, 0444); 43 - MODULE_PARM_DESC(bbm_safe_unplug, 44 - "Use a safe unplug mechanism in BBM, avoiding long/endless loops"); 45 - 46 41 /* 47 42 * virtio-mem currently supports the following modes of operation: 48 43 * ··· 167 172 uint64_t sb_size; 168 173 /* The number of subblocks per Linux memory block. */ 169 174 uint32_t sbs_per_mb; 175 + 176 + /* 177 + * Some of the Linux memory blocks tracked as "partially 178 + * plugged" are completely unplugged and can be offlined 179 + * and removed -- which previously failed. 180 + */ 181 + bool have_unplugged_mb; 170 182 171 183 /* Summary of all memory block states. */ 172 184 unsigned long mb_count[VIRTIO_MEM_SBM_MB_COUNT]; ··· 748 746 * immediately instead of waiting. 749 747 */ 750 748 virtio_mem_retry(vm); 751 - } else { 752 - dev_dbg(&vm->vdev->dev, 753 - "offlining and removing memory failed: %d\n", rc); 749 + return 0; 754 750 } 755 - return rc; 751 + dev_dbg(&vm->vdev->dev, "offlining and removing memory failed: %d\n", rc); 752 + /* 753 + * We don't really expect this to fail, because we fake-offlined all 754 + * memory already. But it could fail in corner cases. 755 + */ 756 + WARN_ON_ONCE(rc != -ENOMEM && rc != -EBUSY); 757 + return rc == -ENOMEM ? -ENOMEM : -EBUSY; 756 758 } 757 759 758 760 /* ··· 770 764 const uint64_t size = memory_block_size_bytes(); 771 765 772 766 return virtio_mem_offline_and_remove_memory(vm, addr, size); 767 + } 768 + 769 + /* 770 + * Try (offlining and) removing memory from Linux in case all subblocks are 771 + * unplugged. Can be called on online and offline memory blocks. 772 + * 773 + * May modify the state of memory blocks in virtio-mem. 774 + */ 775 + static int virtio_mem_sbm_try_remove_unplugged_mb(struct virtio_mem *vm, 776 + unsigned long mb_id) 777 + { 778 + int rc; 779 + 780 + /* 781 + * Once all subblocks of a memory block were unplugged, offline and 782 + * remove it. 783 + */ 784 + if (!virtio_mem_sbm_test_sb_unplugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) 785 + return 0; 786 + 787 + /* offline_and_remove_memory() works for online and offline memory. */ 788 + mutex_unlock(&vm->hotplug_mutex); 789 + rc = virtio_mem_sbm_offline_and_remove_mb(vm, mb_id); 790 + mutex_lock(&vm->hotplug_mutex); 791 + if (!rc) 792 + virtio_mem_sbm_set_mb_state(vm, mb_id, 793 + VIRTIO_MEM_SBM_MB_UNUSED); 794 + return rc; 773 795 } 774 796 775 797 /* ··· 1189 1155 * Try to allocate a range, marking pages fake-offline, effectively 1190 1156 * fake-offlining them. 1191 1157 */ 1192 - static int virtio_mem_fake_offline(unsigned long pfn, unsigned long nr_pages) 1158 + static int virtio_mem_fake_offline(struct virtio_mem *vm, unsigned long pfn, 1159 + unsigned long nr_pages) 1193 1160 { 1194 1161 const bool is_movable = is_zone_movable_page(pfn_to_page(pfn)); 1195 1162 int rc, retry_count; ··· 1203 1168 * some guarantees. 1204 1169 */ 1205 1170 for (retry_count = 0; retry_count < 5; retry_count++) { 1171 + /* 1172 + * If the config changed, stop immediately and go back to the 1173 + * main loop: avoid trying to keep unplugging if the device 1174 + * might have decided to not remove any more memory. 1175 + */ 1176 + if (atomic_read(&vm->config_changed)) 1177 + return -EAGAIN; 1178 + 1206 1179 rc = alloc_contig_range(pfn, pfn + nr_pages, MIGRATE_MOVABLE, 1207 1180 GFP_KERNEL); 1208 1181 if (rc == -ENOMEM) ··· 1960 1917 start_pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) + 1961 1918 sb_id * vm->sbm.sb_size); 1962 1919 1963 - rc = virtio_mem_fake_offline(start_pfn, nr_pages); 1920 + rc = virtio_mem_fake_offline(vm, start_pfn, nr_pages); 1964 1921 if (rc) 1965 1922 return rc; 1966 1923 ··· 2032 1989 } 2033 1990 2034 1991 unplugged: 2035 - /* 2036 - * Once all subblocks of a memory block were unplugged, offline and 2037 - * remove it. This will usually not fail, as no memory is in use 2038 - * anymore - however some other notifiers might NACK the request. 2039 - */ 2040 - if (virtio_mem_sbm_test_sb_unplugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) { 2041 - mutex_unlock(&vm->hotplug_mutex); 2042 - rc = virtio_mem_sbm_offline_and_remove_mb(vm, mb_id); 2043 - mutex_lock(&vm->hotplug_mutex); 2044 - if (!rc) 2045 - virtio_mem_sbm_set_mb_state(vm, mb_id, 2046 - VIRTIO_MEM_SBM_MB_UNUSED); 2047 - } 2048 - 1992 + rc = virtio_mem_sbm_try_remove_unplugged_mb(vm, mb_id); 1993 + if (rc) 1994 + vm->sbm.have_unplugged_mb = 1; 1995 + /* Ignore errors, this is not critical. We'll retry later. */ 2049 1996 return 0; 2050 1997 } 2051 1998 ··· 2144 2111 VIRTIO_MEM_BBM_BB_ADDED)) 2145 2112 return -EINVAL; 2146 2113 2147 - if (bbm_safe_unplug) { 2148 - /* 2149 - * Start by fake-offlining all memory. Once we marked the device 2150 - * block as fake-offline, all newly onlined memory will 2151 - * automatically be kept fake-offline. Protect from concurrent 2152 - * onlining/offlining until we have a consistent state. 2153 - */ 2154 - mutex_lock(&vm->hotplug_mutex); 2155 - virtio_mem_bbm_set_bb_state(vm, bb_id, 2156 - VIRTIO_MEM_BBM_BB_FAKE_OFFLINE); 2114 + /* 2115 + * Start by fake-offlining all memory. Once we marked the device 2116 + * block as fake-offline, all newly onlined memory will 2117 + * automatically be kept fake-offline. Protect from concurrent 2118 + * onlining/offlining until we have a consistent state. 2119 + */ 2120 + mutex_lock(&vm->hotplug_mutex); 2121 + virtio_mem_bbm_set_bb_state(vm, bb_id, VIRTIO_MEM_BBM_BB_FAKE_OFFLINE); 2157 2122 2158 - for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { 2159 - page = pfn_to_online_page(pfn); 2160 - if (!page) 2161 - continue; 2123 + for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { 2124 + page = pfn_to_online_page(pfn); 2125 + if (!page) 2126 + continue; 2162 2127 2163 - rc = virtio_mem_fake_offline(pfn, PAGES_PER_SECTION); 2164 - if (rc) { 2165 - end_pfn = pfn; 2166 - goto rollback_safe_unplug; 2167 - } 2128 + rc = virtio_mem_fake_offline(vm, pfn, PAGES_PER_SECTION); 2129 + if (rc) { 2130 + end_pfn = pfn; 2131 + goto rollback; 2168 2132 } 2169 - mutex_unlock(&vm->hotplug_mutex); 2170 2133 } 2134 + mutex_unlock(&vm->hotplug_mutex); 2171 2135 2172 2136 rc = virtio_mem_bbm_offline_and_remove_bb(vm, bb_id); 2173 2137 if (rc) { 2174 - if (bbm_safe_unplug) { 2175 - mutex_lock(&vm->hotplug_mutex); 2176 - goto rollback_safe_unplug; 2177 - } 2178 - return rc; 2138 + mutex_lock(&vm->hotplug_mutex); 2139 + goto rollback; 2179 2140 } 2180 2141 2181 2142 rc = virtio_mem_bbm_unplug_bb(vm, bb_id); ··· 2181 2154 VIRTIO_MEM_BBM_BB_UNUSED); 2182 2155 return rc; 2183 2156 2184 - rollback_safe_unplug: 2157 + rollback: 2185 2158 for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { 2186 2159 page = pfn_to_online_page(pfn); 2187 2160 if (!page) ··· 2287 2260 2288 2261 /* 2289 2262 * Try to unplug all blocks that couldn't be unplugged before, for example, 2290 - * because the hypervisor was busy. 2263 + * because the hypervisor was busy. Further, offline and remove any memory 2264 + * blocks where we previously failed. 2291 2265 */ 2292 - static int virtio_mem_unplug_pending_mb(struct virtio_mem *vm) 2266 + static int virtio_mem_cleanup_pending_mb(struct virtio_mem *vm) 2293 2267 { 2294 2268 unsigned long id; 2295 - int rc; 2269 + int rc = 0; 2296 2270 2297 2271 if (!vm->in_sbm) { 2298 2272 virtio_mem_bbm_for_each_bb(vm, id, ··· 2315 2287 VIRTIO_MEM_SBM_MB_UNUSED); 2316 2288 } 2317 2289 2290 + if (!vm->sbm.have_unplugged_mb) 2291 + return 0; 2292 + 2293 + /* 2294 + * Let's retry (offlining and) removing completely unplugged Linux 2295 + * memory blocks. 2296 + */ 2297 + vm->sbm.have_unplugged_mb = false; 2298 + 2299 + mutex_lock(&vm->hotplug_mutex); 2300 + virtio_mem_sbm_for_each_mb(vm, id, VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL) 2301 + rc |= virtio_mem_sbm_try_remove_unplugged_mb(vm, id); 2302 + virtio_mem_sbm_for_each_mb(vm, id, VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL) 2303 + rc |= virtio_mem_sbm_try_remove_unplugged_mb(vm, id); 2304 + virtio_mem_sbm_for_each_mb(vm, id, VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL) 2305 + rc |= virtio_mem_sbm_try_remove_unplugged_mb(vm, id); 2306 + mutex_unlock(&vm->hotplug_mutex); 2307 + 2308 + if (rc) 2309 + vm->sbm.have_unplugged_mb = true; 2310 + /* Ignore errors, this is not critical. We'll retry later. */ 2318 2311 return 0; 2319 2312 } 2320 2313 ··· 2417 2368 virtio_mem_refresh_config(vm); 2418 2369 } 2419 2370 2420 - /* Unplug any leftovers from previous runs */ 2371 + /* Cleanup any leftovers from previous runs */ 2421 2372 if (!rc) 2422 - rc = virtio_mem_unplug_pending_mb(vm); 2373 + rc = virtio_mem_cleanup_pending_mb(vm); 2423 2374 2424 2375 if (!rc && vm->requested_size != vm->plugged_size) { 2425 2376 if (vm->requested_size > vm->plugged_size) { ··· 2430 2381 rc = virtio_mem_unplug_request(vm, diff); 2431 2382 } 2432 2383 } 2384 + 2385 + /* 2386 + * Keep retrying to offline and remove completely unplugged Linux 2387 + * memory blocks. 2388 + */ 2389 + if (!rc && vm->in_sbm && vm->sbm.have_unplugged_mb) 2390 + rc = -EBUSY; 2433 2391 2434 2392 switch (rc) { 2435 2393 case 0:
+2 -3
drivers/virtio/virtio_mmio.c
··· 607 607 struct virtio_device *vdev = 608 608 container_of(_d, struct virtio_device, dev); 609 609 struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); 610 - struct platform_device *pdev = vm_dev->pdev; 611 610 612 - devm_kfree(&pdev->dev, vm_dev); 611 + kfree(vm_dev); 613 612 } 614 613 615 614 /* Platform device */ ··· 619 620 unsigned long magic; 620 621 int rc; 621 622 622 - vm_dev = devm_kzalloc(&pdev->dev, sizeof(*vm_dev), GFP_KERNEL); 623 + vm_dev = kzalloc(sizeof(*vm_dev), GFP_KERNEL); 623 624 if (!vm_dev) 624 625 return -ENOMEM; 625 626
-2
drivers/virtio/virtio_pci_common.c
··· 557 557 558 558 pci_set_master(pci_dev); 559 559 560 - vp_dev->is_legacy = vp_dev->ldev.ioaddr ? true : false; 561 - 562 560 rc = register_virtio_device(&vp_dev->vdev); 563 561 reg_dev = vp_dev; 564 562 if (rc)
+1
drivers/virtio/virtio_pci_legacy.c
··· 223 223 vp_dev->config_vector = vp_config_vector; 224 224 vp_dev->setup_vq = setup_vq; 225 225 vp_dev->del_vq = del_vq; 226 + vp_dev->is_legacy = true; 226 227 227 228 return 0; 228 229 }
+2
drivers/virtio/virtio_vdpa.c
··· 393 393 cb.callback = virtio_vdpa_config_cb; 394 394 cb.private = vd_dev; 395 395 ops->set_config_cb(vdpa, &cb); 396 + kfree(masks); 396 397 397 398 return 0; 398 399 399 400 err_setup_vq: 400 401 virtio_vdpa_del_vqs(vdev); 402 + kfree(masks); 401 403 return err; 402 404 } 403 405