Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

uaccess: Provide scoped user access regions

User space access regions are tedious and require similar code patterns all
over the place:

if (!user_read_access_begin(from, sizeof(*from)))
return -EFAULT;
unsafe_get_user(val, from, Efault);
user_read_access_end();
return 0;
Efault:
user_read_access_end();
return -EFAULT;

This got worse with the recent addition of masked user access, which
optimizes the speculation prevention:

if (can_do_masked_user_access())
from = masked_user_read_access_begin((from));
else if (!user_read_access_begin(from, sizeof(*from)))
return -EFAULT;
unsafe_get_user(val, from, Efault);
user_read_access_end();
return 0;
Efault:
user_read_access_end();
return -EFAULT;

There have been issues with using the wrong user_*_access_end() variant in
the error path and other typical Copy&Pasta problems, e.g. using the wrong
fault label in the user accessor which ends up using the wrong accesss end
variant.

These patterns beg for scopes with automatic cleanup. The resulting outcome
is:
scoped_user_read_access(from, Efault)
unsafe_get_user(val, from, Efault);
return 0;
Efault:
return -EFAULT;

The scope guarantees the proper cleanup for the access mode is invoked both
in the success and the failure (fault) path.

The scoped_user_$MODE_access() macros are implemented as self terminating
nested for() loops. Thanks to Andrew Cooper for pointing me at them. The
scope can therefore be left with 'break', 'goto' and 'return'. Even
'continue' "works" due to the self termination mechanism. Both GCC and
clang optimize all the convoluted macro maze out and the above results with
clang in:

b80: f3 0f 1e fa endbr64
b84: 48 b8 ef cd ab 89 67 45 23 01 movabs $0x123456789abcdef,%rax
b8e: 48 39 c7 cmp %rax,%rdi
b91: 48 0f 47 f8 cmova %rax,%rdi
b95: 90 nop
b96: 90 nop
b97: 90 nop
b98: 31 c9 xor %ecx,%ecx
b9a: 8b 07 mov (%rdi),%eax
b9c: 89 06 mov %eax,(%rsi)
b9e: 85 c9 test %ecx,%ecx
ba0: 0f 94 c0 sete %al
ba3: 90 nop
ba4: 90 nop
ba5: 90 nop
ba6: c3 ret

Which looks as compact as it gets. The NOPs are placeholder for STAC/CLAC.
GCC emits the fault path seperately:

bf0: f3 0f 1e fa endbr64
bf4: 48 b8 ef cd ab 89 67 45 23 01 movabs $0x123456789abcdef,%rax
bfe: 48 39 c7 cmp %rax,%rdi
c01: 48 0f 47 f8 cmova %rax,%rdi
c05: 90 nop
c06: 90 nop
c07: 90 nop
c08: 31 d2 xor %edx,%edx
c0a: 8b 07 mov (%rdi),%eax
c0c: 89 06 mov %eax,(%rsi)
c0e: 85 d2 test %edx,%edx
c10: 75 09 jne c1b <afoo+0x2b>
c12: 90 nop
c13: 90 nop
c14: 90 nop
c15: b8 01 00 00 00 mov $0x1,%eax
c1a: c3 ret
c1b: 90 nop
c1c: 90 nop
c1d: 90 nop
c1e: 31 c0 xor %eax,%eax
c20: c3 ret

The fault labels for the scoped*() macros and the fault labels for the
actual user space accessors can be shared and must be placed outside of the
scope.

If masked user access is enabled on an architecture, then the pointer
handed in to scoped_user_$MODE_access() can be modified to point to a
guaranteed faulting user address. This modification is only scope local as
the pointer is aliased inside the scope. When the scope is left the alias
is not longer in effect. IOW the original pointer value is preserved so it
can be used e.g. for fixup or diagnostic purposes in the fault path.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Link: https://patch.msgid.link/20251027083745.546420421@linutronix.de

authored by

Thomas Gleixner and committed by
Ingo Molnar
e497310b 2db48d8b

+192
+192
include/linux/uaccess.h
··· 2 2 #ifndef __LINUX_UACCESS_H__ 3 3 #define __LINUX_UACCESS_H__ 4 4 5 + #include <linux/cleanup.h> 5 6 #include <linux/fault-inject-usercopy.h> 6 7 #include <linux/instrumented.h> 7 8 #include <linux/minmax.h> ··· 36 35 37 36 #ifdef masked_user_access_begin 38 37 #define can_do_masked_user_access() 1 38 + # ifndef masked_user_write_access_begin 39 + # define masked_user_write_access_begin masked_user_access_begin 40 + # endif 41 + # ifndef masked_user_read_access_begin 42 + # define masked_user_read_access_begin masked_user_access_begin 43 + #endif 39 44 #else 40 45 #define can_do_masked_user_access() 0 41 46 #define masked_user_access_begin(src) NULL 47 + #define masked_user_read_access_begin(src) NULL 48 + #define masked_user_write_access_begin(src) NULL 42 49 #define mask_user_address(src) (src) 43 50 #endif 44 51 ··· 641 632 #define user_read_access_begin user_access_begin 642 633 #define user_read_access_end user_access_end 643 634 #endif 635 + 636 + /* Define RW variant so the below _mode macro expansion works */ 637 + #define masked_user_rw_access_begin(u) masked_user_access_begin(u) 638 + #define user_rw_access_begin(u, s) user_access_begin(u, s) 639 + #define user_rw_access_end() user_access_end() 640 + 641 + /* Scoped user access */ 642 + #define USER_ACCESS_GUARD(_mode) \ 643 + static __always_inline void __user * \ 644 + class_user_##_mode##_begin(void __user *ptr) \ 645 + { \ 646 + return ptr; \ 647 + } \ 648 + \ 649 + static __always_inline void \ 650 + class_user_##_mode##_end(void __user *ptr) \ 651 + { \ 652 + user_##_mode##_access_end(); \ 653 + } \ 654 + \ 655 + DEFINE_CLASS(user_ ##_mode## _access, void __user *, \ 656 + class_user_##_mode##_end(_T), \ 657 + class_user_##_mode##_begin(ptr), void __user *ptr) \ 658 + \ 659 + static __always_inline class_user_##_mode##_access_t \ 660 + class_user_##_mode##_access_ptr(void __user *scope) \ 661 + { \ 662 + return scope; \ 663 + } 664 + 665 + USER_ACCESS_GUARD(read) 666 + USER_ACCESS_GUARD(write) 667 + USER_ACCESS_GUARD(rw) 668 + #undef USER_ACCESS_GUARD 669 + 670 + /** 671 + * __scoped_user_access_begin - Start a scoped user access 672 + * @mode: The mode of the access class (read, write, rw) 673 + * @uptr: The pointer to access user space memory 674 + * @size: Size of the access 675 + * @elbl: Error label to goto when the access region is rejected 676 + * 677 + * Internal helper for __scoped_user_access(). Don't use directly. 678 + */ 679 + #define __scoped_user_access_begin(mode, uptr, size, elbl) \ 680 + ({ \ 681 + typeof(uptr) __retptr; \ 682 + \ 683 + if (can_do_masked_user_access()) { \ 684 + __retptr = masked_user_##mode##_access_begin(uptr); \ 685 + } else { \ 686 + __retptr = uptr; \ 687 + if (!user_##mode##_access_begin(uptr, size)) \ 688 + goto elbl; \ 689 + } \ 690 + __retptr; \ 691 + }) 692 + 693 + /** 694 + * __scoped_user_access - Open a scope for user access 695 + * @mode: The mode of the access class (read, write, rw) 696 + * @uptr: The pointer to access user space memory 697 + * @size: Size of the access 698 + * @elbl: Error label to goto when the access region is rejected. It 699 + * must be placed outside the scope 700 + * 701 + * If the user access function inside the scope requires a fault label, it 702 + * can use @elbl or a different label outside the scope, which requires 703 + * that user access which is implemented with ASM GOTO has been properly 704 + * wrapped. See unsafe_get_user() for reference. 705 + * 706 + * scoped_user_rw_access(ptr, efault) { 707 + * unsafe_get_user(rval, &ptr->rval, efault); 708 + * unsafe_put_user(wval, &ptr->wval, efault); 709 + * } 710 + * return 0; 711 + * efault: 712 + * return -EFAULT; 713 + * 714 + * The scope is internally implemented as a autoterminating nested for() 715 + * loop, which can be left with 'return', 'break' and 'goto' at any 716 + * point. 717 + * 718 + * When the scope is left user_##@_mode##_access_end() is automatically 719 + * invoked. 720 + * 721 + * When the architecture supports masked user access and the access region 722 + * which is determined by @uptr and @size is not a valid user space 723 + * address, i.e. < TASK_SIZE, the scope sets the pointer to a faulting user 724 + * space address and does not terminate early. This optimizes for the good 725 + * case and lets the performance uncritical bad case go through the fault. 726 + * 727 + * The eventual modification of the pointer is limited to the scope. 728 + * Outside of the scope the original pointer value is unmodified, so that 729 + * the original pointer value is available for diagnostic purposes in an 730 + * out of scope fault path. 731 + * 732 + * Nesting scoped user access into a user access scope is invalid and fails 733 + * the build. Nesting into other guards, e.g. pagefault is safe. 734 + * 735 + * The masked variant does not check the size of the access and relies on a 736 + * mapping hole (e.g. guard page) to catch an out of range pointer, the 737 + * first access to user memory inside the scope has to be within 738 + * @uptr ... @uptr + PAGE_SIZE - 1 739 + * 740 + * Don't use directly. Use scoped_masked_user_$MODE_access() instead. 741 + */ 742 + #define __scoped_user_access(mode, uptr, size, elbl) \ 743 + for (bool done = false; !done; done = true) \ 744 + for (void __user *_tmpptr = __scoped_user_access_begin(mode, uptr, size, elbl); \ 745 + !done; done = true) \ 746 + for (CLASS(user_##mode##_access, scope)(_tmpptr); !done; done = true) \ 747 + /* Force modified pointer usage within the scope */ \ 748 + for (const typeof(uptr) uptr = _tmpptr; !done; done = true) 749 + 750 + /** 751 + * scoped_user_read_access_size - Start a scoped user read access with given size 752 + * @usrc: Pointer to the user space address to read from 753 + * @size: Size of the access starting from @usrc 754 + * @elbl: Error label to goto when the access region is rejected 755 + * 756 + * For further information see __scoped_user_access() above. 757 + */ 758 + #define scoped_user_read_access_size(usrc, size, elbl) \ 759 + __scoped_user_access(read, usrc, size, elbl) 760 + 761 + /** 762 + * scoped_user_read_access - Start a scoped user read access 763 + * @usrc: Pointer to the user space address to read from 764 + * @elbl: Error label to goto when the access region is rejected 765 + * 766 + * The size of the access starting from @usrc is determined via sizeof(*@usrc)). 767 + * 768 + * For further information see __scoped_user_access() above. 769 + */ 770 + #define scoped_user_read_access(usrc, elbl) \ 771 + scoped_user_read_access_size(usrc, sizeof(*(usrc)), elbl) 772 + 773 + /** 774 + * scoped_user_write_access_size - Start a scoped user write access with given size 775 + * @udst: Pointer to the user space address to write to 776 + * @size: Size of the access starting from @udst 777 + * @elbl: Error label to goto when the access region is rejected 778 + * 779 + * For further information see __scoped_user_access() above. 780 + */ 781 + #define scoped_user_write_access_size(udst, size, elbl) \ 782 + __scoped_user_access(write, udst, size, elbl) 783 + 784 + /** 785 + * scoped_user_write_access - Start a scoped user write access 786 + * @udst: Pointer to the user space address to write to 787 + * @elbl: Error label to goto when the access region is rejected 788 + * 789 + * The size of the access starting from @udst is determined via sizeof(*@udst)). 790 + * 791 + * For further information see __scoped_user_access() above. 792 + */ 793 + #define scoped_user_write_access(udst, elbl) \ 794 + scoped_user_write_access_size(udst, sizeof(*(udst)), elbl) 795 + 796 + /** 797 + * scoped_user_rw_access_size - Start a scoped user read/write access with given size 798 + * @uptr Pointer to the user space address to read from and write to 799 + * @size: Size of the access starting from @uptr 800 + * @elbl: Error label to goto when the access region is rejected 801 + * 802 + * For further information see __scoped_user_access() above. 803 + */ 804 + #define scoped_user_rw_access_size(uptr, size, elbl) \ 805 + __scoped_user_access(rw, uptr, size, elbl) 806 + 807 + /** 808 + * scoped_user_rw_access - Start a scoped user read/write access 809 + * @uptr Pointer to the user space address to read from and write to 810 + * @elbl: Error label to goto when the access region is rejected 811 + * 812 + * The size of the access starting from @uptr is determined via sizeof(*@uptr)). 813 + * 814 + * For further information see __scoped_user_access() above. 815 + */ 816 + #define scoped_user_rw_access(uptr, elbl) \ 817 + scoped_user_rw_access_size(uptr, sizeof(*(uptr)), elbl) 644 818 645 819 #ifdef CONFIG_HARDENED_USERCOPY 646 820 void __noreturn usercopy_abort(const char *name, const char *detail,