Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

mm: selftests for exclusive device memory

Adds some selftests for exclusive device memory.

Link: https://lkml.kernel.org/r/20210616105937.23201-9-apopple@nvidia.com
Signed-off-by: Alistair Popple <apopple@nvidia.com>
Acked-by: Jason Gunthorpe <jgg@nvidia.com>
Tested-by: Ralph Campbell <rcampbell@nvidia.com>
Reviewed-by: Ralph Campbell <rcampbell@nvidia.com>
Cc: Ben Skeggs <bskeggs@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Hugh Dickins <hughd@google.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Cc: Peter Xu <peterx@redhat.com>
Cc: Shakeel Butt <shakeelb@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Alistair Popple and committed by
Linus Torvalds
b659baea b756a3b5

+285
+125
lib/test_hmm.c
··· 25 25 #include <linux/swapops.h> 26 26 #include <linux/sched/mm.h> 27 27 #include <linux/platform_device.h> 28 + #include <linux/rmap.h> 28 29 29 30 #include "test_hmm_uapi.h" 30 31 ··· 47 46 unsigned long cpages; 48 47 }; 49 48 49 + #define DPT_XA_TAG_ATOMIC 1UL 50 50 #define DPT_XA_TAG_WRITE 3UL 51 51 52 52 /* ··· 621 619 } 622 620 } 623 621 622 + static int dmirror_check_atomic(struct dmirror *dmirror, unsigned long start, 623 + unsigned long end) 624 + { 625 + unsigned long pfn; 626 + 627 + for (pfn = start >> PAGE_SHIFT; pfn < (end >> PAGE_SHIFT); pfn++) { 628 + void *entry; 629 + struct page *page; 630 + 631 + entry = xa_load(&dmirror->pt, pfn); 632 + page = xa_untag_pointer(entry); 633 + if (xa_pointer_tag(entry) == DPT_XA_TAG_ATOMIC) 634 + return -EPERM; 635 + } 636 + 637 + return 0; 638 + } 639 + 640 + static int dmirror_atomic_map(unsigned long start, unsigned long end, 641 + struct page **pages, struct dmirror *dmirror) 642 + { 643 + unsigned long pfn, mapped = 0; 644 + int i; 645 + 646 + /* Map the migrated pages into the device's page tables. */ 647 + mutex_lock(&dmirror->mutex); 648 + 649 + for (i = 0, pfn = start >> PAGE_SHIFT; pfn < (end >> PAGE_SHIFT); pfn++, i++) { 650 + void *entry; 651 + 652 + if (!pages[i]) 653 + continue; 654 + 655 + entry = pages[i]; 656 + entry = xa_tag_pointer(entry, DPT_XA_TAG_ATOMIC); 657 + entry = xa_store(&dmirror->pt, pfn, entry, GFP_ATOMIC); 658 + if (xa_is_err(entry)) { 659 + mutex_unlock(&dmirror->mutex); 660 + return xa_err(entry); 661 + } 662 + 663 + mapped++; 664 + } 665 + 666 + mutex_unlock(&dmirror->mutex); 667 + return mapped; 668 + } 669 + 624 670 static int dmirror_migrate_finalize_and_map(struct migrate_vma *args, 625 671 struct dmirror *dmirror) 626 672 { ··· 709 659 710 660 mutex_unlock(&dmirror->mutex); 711 661 return 0; 662 + } 663 + 664 + static int dmirror_exclusive(struct dmirror *dmirror, 665 + struct hmm_dmirror_cmd *cmd) 666 + { 667 + unsigned long start, end, addr; 668 + unsigned long size = cmd->npages << PAGE_SHIFT; 669 + struct mm_struct *mm = dmirror->notifier.mm; 670 + struct page *pages[64]; 671 + struct dmirror_bounce bounce; 672 + unsigned long next; 673 + int ret; 674 + 675 + start = cmd->addr; 676 + end = start + size; 677 + if (end < start) 678 + return -EINVAL; 679 + 680 + /* Since the mm is for the mirrored process, get a reference first. */ 681 + if (!mmget_not_zero(mm)) 682 + return -EINVAL; 683 + 684 + mmap_read_lock(mm); 685 + for (addr = start; addr < end; addr = next) { 686 + unsigned long mapped; 687 + int i; 688 + 689 + if (end < addr + (ARRAY_SIZE(pages) << PAGE_SHIFT)) 690 + next = end; 691 + else 692 + next = addr + (ARRAY_SIZE(pages) << PAGE_SHIFT); 693 + 694 + ret = make_device_exclusive_range(mm, addr, next, pages, NULL); 695 + mapped = dmirror_atomic_map(addr, next, pages, dmirror); 696 + for (i = 0; i < ret; i++) { 697 + if (pages[i]) { 698 + unlock_page(pages[i]); 699 + put_page(pages[i]); 700 + } 701 + } 702 + 703 + if (addr + (mapped << PAGE_SHIFT) < next) { 704 + mmap_read_unlock(mm); 705 + mmput(mm); 706 + return -EBUSY; 707 + } 708 + } 709 + mmap_read_unlock(mm); 710 + mmput(mm); 711 + 712 + /* Return the migrated data for verification. */ 713 + ret = dmirror_bounce_init(&bounce, start, size); 714 + if (ret) 715 + return ret; 716 + mutex_lock(&dmirror->mutex); 717 + ret = dmirror_do_read(dmirror, start, end, &bounce); 718 + mutex_unlock(&dmirror->mutex); 719 + if (ret == 0) { 720 + if (copy_to_user(u64_to_user_ptr(cmd->ptr), bounce.ptr, 721 + bounce.size)) 722 + ret = -EFAULT; 723 + } 724 + 725 + cmd->cpages = bounce.cpages; 726 + dmirror_bounce_fini(&bounce); 727 + return ret; 712 728 } 713 729 714 730 static int dmirror_migrate(struct dmirror *dmirror, ··· 1062 946 1063 947 case HMM_DMIRROR_MIGRATE: 1064 948 ret = dmirror_migrate(dmirror, &cmd); 949 + break; 950 + 951 + case HMM_DMIRROR_EXCLUSIVE: 952 + ret = dmirror_exclusive(dmirror, &cmd); 953 + break; 954 + 955 + case HMM_DMIRROR_CHECK_EXCLUSIVE: 956 + ret = dmirror_check_atomic(dmirror, cmd.addr, 957 + cmd.addr + (cmd.npages << PAGE_SHIFT)); 1065 958 break; 1066 959 1067 960 case HMM_DMIRROR_SNAPSHOT:
+2
lib/test_hmm_uapi.h
··· 33 33 #define HMM_DMIRROR_WRITE _IOWR('H', 0x01, struct hmm_dmirror_cmd) 34 34 #define HMM_DMIRROR_MIGRATE _IOWR('H', 0x02, struct hmm_dmirror_cmd) 35 35 #define HMM_DMIRROR_SNAPSHOT _IOWR('H', 0x03, struct hmm_dmirror_cmd) 36 + #define HMM_DMIRROR_EXCLUSIVE _IOWR('H', 0x04, struct hmm_dmirror_cmd) 37 + #define HMM_DMIRROR_CHECK_EXCLUSIVE _IOWR('H', 0x05, struct hmm_dmirror_cmd) 36 38 37 39 /* 38 40 * Values returned in hmm_dmirror_cmd.ptr for HMM_DMIRROR_SNAPSHOT.
+158
tools/testing/selftests/vm/hmm-tests.c
··· 1485 1485 hmm_buffer_free(buffer); 1486 1486 } 1487 1487 1488 + /* 1489 + * Basic check of exclusive faulting. 1490 + */ 1491 + TEST_F(hmm, exclusive) 1492 + { 1493 + struct hmm_buffer *buffer; 1494 + unsigned long npages; 1495 + unsigned long size; 1496 + unsigned long i; 1497 + int *ptr; 1498 + int ret; 1499 + 1500 + npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; 1501 + ASSERT_NE(npages, 0); 1502 + size = npages << self->page_shift; 1503 + 1504 + buffer = malloc(sizeof(*buffer)); 1505 + ASSERT_NE(buffer, NULL); 1506 + 1507 + buffer->fd = -1; 1508 + buffer->size = size; 1509 + buffer->mirror = malloc(size); 1510 + ASSERT_NE(buffer->mirror, NULL); 1511 + 1512 + buffer->ptr = mmap(NULL, size, 1513 + PROT_READ | PROT_WRITE, 1514 + MAP_PRIVATE | MAP_ANONYMOUS, 1515 + buffer->fd, 0); 1516 + ASSERT_NE(buffer->ptr, MAP_FAILED); 1517 + 1518 + /* Initialize buffer in system memory. */ 1519 + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 1520 + ptr[i] = i; 1521 + 1522 + /* Map memory exclusively for device access. */ 1523 + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_EXCLUSIVE, buffer, npages); 1524 + ASSERT_EQ(ret, 0); 1525 + ASSERT_EQ(buffer->cpages, npages); 1526 + 1527 + /* Check what the device read. */ 1528 + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) 1529 + ASSERT_EQ(ptr[i], i); 1530 + 1531 + /* Fault pages back to system memory and check them. */ 1532 + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 1533 + ASSERT_EQ(ptr[i]++, i); 1534 + 1535 + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 1536 + ASSERT_EQ(ptr[i], i+1); 1537 + 1538 + /* Check atomic access revoked */ 1539 + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_CHECK_EXCLUSIVE, buffer, npages); 1540 + ASSERT_EQ(ret, 0); 1541 + 1542 + hmm_buffer_free(buffer); 1543 + } 1544 + 1545 + TEST_F(hmm, exclusive_mprotect) 1546 + { 1547 + struct hmm_buffer *buffer; 1548 + unsigned long npages; 1549 + unsigned long size; 1550 + unsigned long i; 1551 + int *ptr; 1552 + int ret; 1553 + 1554 + npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; 1555 + ASSERT_NE(npages, 0); 1556 + size = npages << self->page_shift; 1557 + 1558 + buffer = malloc(sizeof(*buffer)); 1559 + ASSERT_NE(buffer, NULL); 1560 + 1561 + buffer->fd = -1; 1562 + buffer->size = size; 1563 + buffer->mirror = malloc(size); 1564 + ASSERT_NE(buffer->mirror, NULL); 1565 + 1566 + buffer->ptr = mmap(NULL, size, 1567 + PROT_READ | PROT_WRITE, 1568 + MAP_PRIVATE | MAP_ANONYMOUS, 1569 + buffer->fd, 0); 1570 + ASSERT_NE(buffer->ptr, MAP_FAILED); 1571 + 1572 + /* Initialize buffer in system memory. */ 1573 + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 1574 + ptr[i] = i; 1575 + 1576 + /* Map memory exclusively for device access. */ 1577 + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_EXCLUSIVE, buffer, npages); 1578 + ASSERT_EQ(ret, 0); 1579 + ASSERT_EQ(buffer->cpages, npages); 1580 + 1581 + /* Check what the device read. */ 1582 + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) 1583 + ASSERT_EQ(ptr[i], i); 1584 + 1585 + ret = mprotect(buffer->ptr, size, PROT_READ); 1586 + ASSERT_EQ(ret, 0); 1587 + 1588 + /* Simulate a device writing system memory. */ 1589 + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages); 1590 + ASSERT_EQ(ret, -EPERM); 1591 + 1592 + hmm_buffer_free(buffer); 1593 + } 1594 + 1595 + /* 1596 + * Check copy-on-write works. 1597 + */ 1598 + TEST_F(hmm, exclusive_cow) 1599 + { 1600 + struct hmm_buffer *buffer; 1601 + unsigned long npages; 1602 + unsigned long size; 1603 + unsigned long i; 1604 + int *ptr; 1605 + int ret; 1606 + 1607 + npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; 1608 + ASSERT_NE(npages, 0); 1609 + size = npages << self->page_shift; 1610 + 1611 + buffer = malloc(sizeof(*buffer)); 1612 + ASSERT_NE(buffer, NULL); 1613 + 1614 + buffer->fd = -1; 1615 + buffer->size = size; 1616 + buffer->mirror = malloc(size); 1617 + ASSERT_NE(buffer->mirror, NULL); 1618 + 1619 + buffer->ptr = mmap(NULL, size, 1620 + PROT_READ | PROT_WRITE, 1621 + MAP_PRIVATE | MAP_ANONYMOUS, 1622 + buffer->fd, 0); 1623 + ASSERT_NE(buffer->ptr, MAP_FAILED); 1624 + 1625 + /* Initialize buffer in system memory. */ 1626 + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 1627 + ptr[i] = i; 1628 + 1629 + /* Map memory exclusively for device access. */ 1630 + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_EXCLUSIVE, buffer, npages); 1631 + ASSERT_EQ(ret, 0); 1632 + ASSERT_EQ(buffer->cpages, npages); 1633 + 1634 + fork(); 1635 + 1636 + /* Fault pages back to system memory and check them. */ 1637 + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 1638 + ASSERT_EQ(ptr[i]++, i); 1639 + 1640 + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) 1641 + ASSERT_EQ(ptr[i], i+1); 1642 + 1643 + hmm_buffer_free(buffer); 1644 + } 1645 + 1488 1646 TEST_HARNESS_MAIN