Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

PCI/P2PDMA: track pgmap references per resource, not globally

In preparation for fixing a race between devm_memremap_pages_release()
and the final put of a page from the device-page-map, allocate a
percpu-ref per p2pdma resource mapping.

Link: http://lkml.kernel.org/r/155727338646.292046.9922678317501435597.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Reviewed-by: Logan Gunthorpe <logang@deltatee.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: "Jérôme Glisse" <jglisse@redhat.com>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Dan Williams and committed by
Linus Torvalds
1570175a 795ee306

+82 -44
+82 -44
drivers/pci/p2pdma.c
··· 20 20 #include <linux/seq_buf.h> 21 21 22 22 struct pci_p2pdma { 23 - struct percpu_ref devmap_ref; 24 - struct completion devmap_ref_done; 25 23 struct gen_pool *pool; 26 24 bool p2pmem_published; 25 + }; 26 + 27 + struct p2pdma_pagemap { 28 + struct dev_pagemap pgmap; 29 + struct percpu_ref ref; 30 + struct completion ref_done; 27 31 }; 28 32 29 33 static ssize_t size_show(struct device *dev, struct device_attribute *attr, ··· 78 74 .name = "p2pmem", 79 75 }; 80 76 77 + static struct p2pdma_pagemap *to_p2p_pgmap(struct percpu_ref *ref) 78 + { 79 + return container_of(ref, struct p2pdma_pagemap, ref); 80 + } 81 + 81 82 static void pci_p2pdma_percpu_release(struct percpu_ref *ref) 82 83 { 83 - struct pci_p2pdma *p2p = 84 - container_of(ref, struct pci_p2pdma, devmap_ref); 84 + struct p2pdma_pagemap *p2p_pgmap = to_p2p_pgmap(ref); 85 85 86 - complete_all(&p2p->devmap_ref_done); 86 + complete(&p2p_pgmap->ref_done); 87 87 } 88 88 89 89 static void pci_p2pdma_percpu_kill(struct percpu_ref *ref) 90 90 { 91 - /* 92 - * pci_p2pdma_add_resource() may be called multiple times 93 - * by a driver and may register the percpu_kill devm action multiple 94 - * times. We only want the first action to actually kill the 95 - * percpu_ref. 96 - */ 97 - if (percpu_ref_is_dying(ref)) 98 - return; 99 - 100 91 percpu_ref_kill(ref); 92 + } 93 + 94 + static void pci_p2pdma_percpu_cleanup(void *ref) 95 + { 96 + struct p2pdma_pagemap *p2p_pgmap = to_p2p_pgmap(ref); 97 + 98 + wait_for_completion(&p2p_pgmap->ref_done); 99 + percpu_ref_exit(&p2p_pgmap->ref); 101 100 } 102 101 103 102 static void pci_p2pdma_release(void *data) 104 103 { 105 104 struct pci_dev *pdev = data; 105 + struct pci_p2pdma *p2pdma = pdev->p2pdma; 106 106 107 - if (!pdev->p2pdma) 107 + if (!p2pdma) 108 108 return; 109 109 110 - wait_for_completion(&pdev->p2pdma->devmap_ref_done); 111 - percpu_ref_exit(&pdev->p2pdma->devmap_ref); 112 - 113 - gen_pool_destroy(pdev->p2pdma->pool); 114 - sysfs_remove_group(&pdev->dev.kobj, &p2pmem_group); 110 + /* Flush and disable pci_alloc_p2p_mem() */ 115 111 pdev->p2pdma = NULL; 112 + synchronize_rcu(); 113 + 114 + gen_pool_destroy(p2pdma->pool); 115 + sysfs_remove_group(&pdev->dev.kobj, &p2pmem_group); 116 116 } 117 117 118 118 static int pci_p2pdma_setup(struct pci_dev *pdev) ··· 131 123 p2p->pool = gen_pool_create(PAGE_SHIFT, dev_to_node(&pdev->dev)); 132 124 if (!p2p->pool) 133 125 goto out; 134 - 135 - init_completion(&p2p->devmap_ref_done); 136 - error = percpu_ref_init(&p2p->devmap_ref, 137 - pci_p2pdma_percpu_release, 0, GFP_KERNEL); 138 - if (error) 139 - goto out_pool_destroy; 140 126 141 127 error = devm_add_action_or_reset(&pdev->dev, pci_p2pdma_release, pdev); 142 128 if (error) ··· 165 163 int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size, 166 164 u64 offset) 167 165 { 166 + struct p2pdma_pagemap *p2p_pgmap; 168 167 struct dev_pagemap *pgmap; 169 168 void *addr; 170 169 int error; ··· 188 185 return error; 189 186 } 190 187 191 - pgmap = devm_kzalloc(&pdev->dev, sizeof(*pgmap), GFP_KERNEL); 192 - if (!pgmap) 188 + p2p_pgmap = devm_kzalloc(&pdev->dev, sizeof(*p2p_pgmap), GFP_KERNEL); 189 + if (!p2p_pgmap) 193 190 return -ENOMEM; 191 + 192 + init_completion(&p2p_pgmap->ref_done); 193 + error = percpu_ref_init(&p2p_pgmap->ref, 194 + pci_p2pdma_percpu_release, 0, GFP_KERNEL); 195 + if (error) 196 + goto pgmap_free; 197 + 198 + /* 199 + * FIXME: the percpu_ref_exit needs to be coordinated internal 200 + * to devm_memremap_pages_release(). Duplicate the same ordering 201 + * as other devm_memremap_pages() users for now. 202 + */ 203 + error = devm_add_action(&pdev->dev, pci_p2pdma_percpu_cleanup, 204 + &p2p_pgmap->ref); 205 + if (error) 206 + goto ref_cleanup; 207 + 208 + pgmap = &p2p_pgmap->pgmap; 194 209 195 210 pgmap->res.start = pci_resource_start(pdev, bar) + offset; 196 211 pgmap->res.end = pgmap->res.start + size - 1; 197 212 pgmap->res.flags = pci_resource_flags(pdev, bar); 198 - pgmap->ref = &pdev->p2pdma->devmap_ref; 213 + pgmap->ref = &p2p_pgmap->ref; 199 214 pgmap->type = MEMORY_DEVICE_PCI_P2PDMA; 200 215 pgmap->pci_p2pdma_bus_offset = pci_bus_address(pdev, bar) - 201 216 pci_resource_start(pdev, bar); ··· 222 201 addr = devm_memremap_pages(&pdev->dev, pgmap); 223 202 if (IS_ERR(addr)) { 224 203 error = PTR_ERR(addr); 225 - goto pgmap_free; 204 + goto ref_exit; 226 205 } 227 206 228 - error = gen_pool_add_virt(pdev->p2pdma->pool, (unsigned long)addr, 207 + error = gen_pool_add_owner(pdev->p2pdma->pool, (unsigned long)addr, 229 208 pci_bus_address(pdev, bar) + offset, 230 - resource_size(&pgmap->res), dev_to_node(&pdev->dev)); 209 + resource_size(&pgmap->res), dev_to_node(&pdev->dev), 210 + &p2p_pgmap->ref); 231 211 if (error) 232 212 goto pages_free; 233 213 ··· 239 217 240 218 pages_free: 241 219 devm_memunmap_pages(&pdev->dev, pgmap); 220 + ref_cleanup: 221 + percpu_ref_exit(&p2p_pgmap->ref); 242 222 pgmap_free: 243 - devm_kfree(&pdev->dev, pgmap); 223 + devm_kfree(&pdev->dev, p2p_pgmap); 244 224 return error; 245 225 } 246 226 EXPORT_SYMBOL_GPL(pci_p2pdma_add_resource); ··· 611 587 */ 612 588 void *pci_alloc_p2pmem(struct pci_dev *pdev, size_t size) 613 589 { 614 - void *ret; 590 + void *ret = NULL; 591 + struct percpu_ref *ref; 615 592 593 + /* 594 + * Pairs with synchronize_rcu() in pci_p2pdma_release() to 595 + * ensure pdev->p2pdma is non-NULL for the duration of the 596 + * read-lock. 597 + */ 598 + rcu_read_lock(); 616 599 if (unlikely(!pdev->p2pdma)) 617 - return NULL; 600 + goto out; 618 601 619 - if (unlikely(!percpu_ref_tryget_live(&pdev->p2pdma->devmap_ref))) 620 - return NULL; 602 + ret = (void *)gen_pool_alloc_owner(pdev->p2pdma->pool, size, 603 + (void **) &ref); 604 + if (!ret) 605 + goto out; 621 606 622 - ret = (void *)gen_pool_alloc(pdev->p2pdma->pool, size); 623 - 624 - if (unlikely(!ret)) 625 - percpu_ref_put(&pdev->p2pdma->devmap_ref); 626 - 607 + if (unlikely(!percpu_ref_tryget_live(ref))) { 608 + gen_pool_free(pdev->p2pdma->pool, (unsigned long) ret, size); 609 + ret = NULL; 610 + goto out; 611 + } 612 + out: 613 + rcu_read_unlock(); 627 614 return ret; 628 615 } 629 616 EXPORT_SYMBOL_GPL(pci_alloc_p2pmem); ··· 647 612 */ 648 613 void pci_free_p2pmem(struct pci_dev *pdev, void *addr, size_t size) 649 614 { 650 - gen_pool_free(pdev->p2pdma->pool, (uintptr_t)addr, size); 651 - percpu_ref_put(&pdev->p2pdma->devmap_ref); 615 + struct percpu_ref *ref; 616 + 617 + gen_pool_free_owner(pdev->p2pdma->pool, (uintptr_t)addr, size, 618 + (void **) &ref); 619 + percpu_ref_put(ref); 652 620 } 653 621 EXPORT_SYMBOL_GPL(pci_free_p2pmem); 654 622