Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86/mpx: Support 32-bit binaries on 64-bit kernels

Right now, the kernel can only switch between 64-bit and 32-bit
binaries at compile time. This patch adds support for 32-bit
binaries on 64-bit kernels when we support ia32 emulation.

We essentially choose which set of table sizes to use when doing
arithmetic for the bounds table calculations.

This also uses a different approach for calculating the table
indexes than before. I think the new one makes it much more
clear what is going on, and allows us to share more code between
the 32-bit and 64-bit cases.

Based-on-patch-by: Qiaowei Ren <qiaowei.ren@intel.com>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Dave Hansen <dave@sr71.net>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20150607183705.E01F21E2@viggo.jf.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

authored by

Dave Hansen and committed by
Ingo Molnar
613fcb7d 6ac52bb4

+179 -53
+30 -32
arch/x86/include/asm/mpx.h
··· 13 13 #define MPX_BNDCFG_ENABLE_FLAG 0x1 14 14 #define MPX_BD_ENTRY_VALID_FLAG 0x1 15 15 16 - #ifdef CONFIG_X86_64 17 - 18 - /* upper 28 bits [47:20] of the virtual address in 64-bit used to 19 - * index into bounds directory (BD). 16 + /* 17 + * The upper 28 bits [47:20] of the virtual address in 64-bit 18 + * are used to index into bounds directory (BD). 19 + * 20 + * The directory is 2G (2^31) in size, and with 8-byte entries 21 + * it has 2^28 entries. 20 22 */ 21 - #define MPX_BD_ENTRY_OFFSET 28 22 - #define MPX_BD_ENTRY_SHIFT 3 23 - /* bits [19:3] of the virtual address in 64-bit used to index into 24 - * bounds table (BT). 23 + #define MPX_BD_SIZE_BYTES_64 (1UL<<31) 24 + #define MPX_BD_ENTRY_BYTES_64 8 25 + #define MPX_BD_NR_ENTRIES_64 (MPX_BD_SIZE_BYTES_64/MPX_BD_ENTRY_BYTES_64) 26 + 27 + /* 28 + * The 32-bit directory is 4MB (2^22) in size, and with 4-byte 29 + * entries it has 2^20 entries. 25 30 */ 26 - #define MPX_BT_ENTRY_OFFSET 17 27 - #define MPX_BT_ENTRY_SHIFT 5 28 - #define MPX_IGN_BITS 3 29 - #define MPX_BD_ENTRY_TAIL 3 31 + #define MPX_BD_SIZE_BYTES_32 (1UL<<22) 32 + #define MPX_BD_ENTRY_BYTES_32 4 33 + #define MPX_BD_NR_ENTRIES_32 (MPX_BD_SIZE_BYTES_32/MPX_BD_ENTRY_BYTES_32) 30 34 31 - #else 35 + /* 36 + * A 64-bit table is 4MB total in size, and an entry is 37 + * 4 64-bit pointers in size. 38 + */ 39 + #define MPX_BT_SIZE_BYTES_64 (1UL<<22) 40 + #define MPX_BT_ENTRY_BYTES_64 32 41 + #define MPX_BT_NR_ENTRIES_64 (MPX_BT_SIZE_BYTES_64/MPX_BT_ENTRY_BYTES_64) 32 42 33 - #define MPX_BD_ENTRY_OFFSET 20 34 - #define MPX_BD_ENTRY_SHIFT 2 35 - #define MPX_BT_ENTRY_OFFSET 10 36 - #define MPX_BT_ENTRY_SHIFT 4 37 - #define MPX_IGN_BITS 2 38 - #define MPX_BD_ENTRY_TAIL 2 39 - 40 - #endif 41 - 42 - #define MPX_BD_SIZE_BYTES (1UL<<(MPX_BD_ENTRY_OFFSET+MPX_BD_ENTRY_SHIFT)) 43 - #define MPX_BT_SIZE_BYTES (1UL<<(MPX_BT_ENTRY_OFFSET+MPX_BT_ENTRY_SHIFT)) 43 + /* 44 + * A 32-bit table is 16kB total in size, and an entry is 45 + * 4 32-bit pointers in size. 46 + */ 47 + #define MPX_BT_SIZE_BYTES_32 (1UL<<14) 48 + #define MPX_BT_ENTRY_BYTES_32 16 49 + #define MPX_BT_NR_ENTRIES_32 (MPX_BT_SIZE_BYTES_32/MPX_BT_ENTRY_BYTES_32) 44 50 45 51 #define MPX_BNDSTA_TAIL 2 46 52 #define MPX_BNDCFG_TAIL 12 47 53 #define MPX_BNDSTA_ADDR_MASK (~((1UL<<MPX_BNDSTA_TAIL)-1)) 48 - 49 54 #define MPX_BNDCFG_ADDR_MASK (~((1UL<<MPX_BNDCFG_TAIL)-1)) 50 55 #define MPX_BNDSTA_ERROR_CODE 0x3 51 - 52 - #define MPX_BD_ENTRY_MASK ((1<<MPX_BD_ENTRY_OFFSET)-1) 53 - #define MPX_BT_ENTRY_MASK ((1<<MPX_BT_ENTRY_OFFSET)-1) 54 - #define MPX_GET_BD_ENTRY_OFFSET(addr) ((((addr)>>(MPX_BT_ENTRY_OFFSET+ \ 55 - MPX_IGN_BITS)) & MPX_BD_ENTRY_MASK) << MPX_BD_ENTRY_SHIFT) 56 - #define MPX_GET_BT_ENTRY_OFFSET(addr) ((((addr)>>MPX_IGN_BITS) & \ 57 - MPX_BT_ENTRY_MASK) << MPX_BT_ENTRY_SHIFT) 58 56 59 57 #ifdef CONFIG_X86_INTEL_MPX 60 58 siginfo_t *mpx_generate_siginfo(struct pt_regs *regs);
+149 -21
arch/x86/mm/mpx.c
··· 34 34 return (vma->vm_ops == &mpx_vma_ops); 35 35 } 36 36 37 + static inline unsigned long mpx_bd_size_bytes(struct mm_struct *mm) 38 + { 39 + if (is_64bit_mm(mm)) 40 + return MPX_BD_SIZE_BYTES_64; 41 + else 42 + return MPX_BD_SIZE_BYTES_32; 43 + } 44 + 45 + static inline unsigned long mpx_bt_size_bytes(struct mm_struct *mm) 46 + { 47 + if (is_64bit_mm(mm)) 48 + return MPX_BT_SIZE_BYTES_64; 49 + else 50 + return MPX_BT_SIZE_BYTES_32; 51 + } 52 + 37 53 /* 38 54 * This is really a simplified "vm_mmap". it only handles MPX 39 55 * bounds tables (the bounds directory is user-allocated). ··· 66 50 struct vm_area_struct *vma; 67 51 68 52 /* Only bounds table can be allocated here */ 69 - if (len != MPX_BT_SIZE_BYTES) 53 + if (len != mpx_bt_size_bytes(mm)) 70 54 return -EINVAL; 71 55 72 56 down_write(&mm->mmap_sem); ··· 465 449 } 466 450 467 451 /* 468 - * With 32-bit mode, MPX_BT_SIZE_BYTES is 4MB, and the size of each 469 - * bounds table is 16KB. With 64-bit mode, MPX_BT_SIZE_BYTES is 2GB, 452 + * With 32-bit mode, a bounds directory is 4MB, and the size of each 453 + * bounds table is 16KB. With 64-bit mode, a bounds directory is 2GB, 470 454 * and the size of each bounds table is 4MB. 471 455 */ 472 - static int allocate_bt(long __user *bd_entry) 456 + static int allocate_bt(struct mm_struct *mm, long __user *bd_entry) 473 457 { 474 - struct mm_struct *mm = current->mm; 475 458 unsigned long expected_old_val = 0; 476 459 unsigned long actual_old_val = 0; 477 460 unsigned long bt_addr; ··· 481 466 * Carve the virtual space out of userspace for the new 482 467 * bounds table: 483 468 */ 484 - bt_addr = mpx_mmap(MPX_BT_SIZE_BYTES); 469 + bt_addr = mpx_mmap(mpx_bt_size_bytes(mm)); 485 470 if (IS_ERR((void *)bt_addr)) 486 471 return PTR_ERR((void *)bt_addr); 487 472 /* ··· 532 517 trace_mpx_new_bounds_table(bt_addr); 533 518 return 0; 534 519 out_unmap: 535 - vm_munmap(bt_addr, MPX_BT_SIZE_BYTES); 520 + vm_munmap(bt_addr, mpx_bt_size_bytes(mm)); 536 521 return ret; 537 522 } 538 523 ··· 551 536 { 552 537 unsigned long bd_entry, bd_base; 553 538 const struct bndcsr *bndcsr; 539 + struct mm_struct *mm = current->mm; 554 540 555 541 bndcsr = get_xsave_field_ptr(XSTATE_BNDCSR); 556 542 if (!bndcsr) ··· 570 554 * the directory is. 571 555 */ 572 556 if ((bd_entry < bd_base) || 573 - (bd_entry >= bd_base + MPX_BD_SIZE_BYTES)) 557 + (bd_entry >= bd_base + mpx_bd_size_bytes(mm))) 574 558 return -EINVAL; 575 559 576 - return allocate_bt((long __user *)bd_entry); 560 + return allocate_bt(mm, (long __user *)bd_entry); 577 561 } 578 562 579 563 int mpx_handle_bd_fault(void) ··· 805 789 * avoid recursion, do_munmap() will check whether it comes 806 790 * from one bounds table through VM_MPX flag. 807 791 */ 808 - return do_munmap(mm, bt_addr, MPX_BT_SIZE_BYTES); 792 + return do_munmap(mm, bt_addr, mpx_bt_size_bytes(mm)); 793 + } 794 + 795 + static inline int bt_entry_size_bytes(struct mm_struct *mm) 796 + { 797 + if (is_64bit_mm(mm)) 798 + return MPX_BT_ENTRY_BYTES_64; 799 + else 800 + return MPX_BT_ENTRY_BYTES_32; 801 + } 802 + 803 + /* 804 + * Take a virtual address and turns it in to the offset in bytes 805 + * inside of the bounds table where the bounds table entry 806 + * controlling 'addr' can be found. 807 + */ 808 + static unsigned long mpx_get_bt_entry_offset_bytes(struct mm_struct *mm, 809 + unsigned long addr) 810 + { 811 + unsigned long bt_table_nr_entries; 812 + unsigned long offset = addr; 813 + 814 + if (is_64bit_mm(mm)) { 815 + /* Bottom 3 bits are ignored on 64-bit */ 816 + offset >>= 3; 817 + bt_table_nr_entries = MPX_BT_NR_ENTRIES_64; 818 + } else { 819 + /* Bottom 2 bits are ignored on 32-bit */ 820 + offset >>= 2; 821 + bt_table_nr_entries = MPX_BT_NR_ENTRIES_32; 822 + } 823 + /* 824 + * We know the size of the table in to which we are 825 + * indexing, and we have eliminated all the low bits 826 + * which are ignored for indexing. 827 + * 828 + * Mask out all the high bits which we do not need 829 + * to index in to the table. Note that the tables 830 + * are always powers of two so this gives us a proper 831 + * mask. 832 + */ 833 + offset &= (bt_table_nr_entries-1); 834 + /* 835 + * We now have an entry offset in terms of *entries* in 836 + * the table. We need to scale it back up to bytes. 837 + */ 838 + offset *= bt_entry_size_bytes(mm); 839 + return offset; 840 + } 841 + 842 + /* 843 + * How much virtual address space does a single bounds 844 + * directory entry cover? 845 + * 846 + * Note, we need a long long because 4GB doesn't fit in 847 + * to a long on 32-bit. 848 + */ 849 + static inline unsigned long bd_entry_virt_space(struct mm_struct *mm) 850 + { 851 + unsigned long long virt_space = (1ULL << boot_cpu_data.x86_virt_bits); 852 + if (is_64bit_mm(mm)) 853 + return virt_space / MPX_BD_NR_ENTRIES_64; 854 + else 855 + return virt_space / MPX_BD_NR_ENTRIES_32; 856 + } 857 + 858 + /* 859 + * Return an offset in terms of bytes in to the bounds 860 + * directory where the bounds directory entry for a given 861 + * virtual address resides. 862 + * 863 + * This has to be in bytes because the directory entries 864 + * are different sizes on 64/32 bit. 865 + */ 866 + static unsigned long mpx_get_bd_entry_offset(struct mm_struct *mm, 867 + unsigned long addr) 868 + { 869 + /* 870 + * There are several ways to derive the bd offsets. We 871 + * use the following approach here: 872 + * 1. We know the size of the virtual address space 873 + * 2. We know the number of entries in a bounds table 874 + * 3. We know that each entry covers a fixed amount of 875 + * virtual address space. 876 + * So, we can just divide the virtual address by the 877 + * virtual space used by one entry to determine which 878 + * entry "controls" the given virtual address. 879 + */ 880 + if (is_64bit_mm(mm)) { 881 + int bd_entry_size = 8; /* 64-bit pointer */ 882 + /* 883 + * Take the 64-bit addressing hole in to account. 884 + */ 885 + addr &= ((1UL << boot_cpu_data.x86_virt_bits) - 1); 886 + return (addr / bd_entry_virt_space(mm)) * bd_entry_size; 887 + } else { 888 + int bd_entry_size = 4; /* 32-bit pointer */ 889 + /* 890 + * 32-bit has no hole so this case needs no mask 891 + */ 892 + return (addr / bd_entry_virt_space(mm)) * bd_entry_size; 893 + } 894 + /* 895 + * The two return calls above are exact copies. If we 896 + * pull out a single copy and put it in here, gcc won't 897 + * realize that we're doing a power-of-2 divide and use 898 + * shifts. It uses a real divide. If we put them up 899 + * there, it manages to figure it out (gcc 4.8.3). 900 + */ 809 901 } 810 902 811 903 /* ··· 927 803 unsigned long end, bool prev_shared, bool next_shared) 928 804 { 929 805 unsigned long bt_addr; 806 + unsigned long start_off, end_off; 930 807 int ret; 931 808 932 809 ret = get_bt_addr(mm, bd_entry, &bt_addr); ··· 939 814 if (ret) 940 815 return ret; 941 816 817 + start_off = mpx_get_bt_entry_offset_bytes(mm, start); 818 + end_off = mpx_get_bt_entry_offset_bytes(mm, end); 819 + 942 820 if (prev_shared && next_shared) 943 821 ret = zap_bt_entries(mm, bt_addr, 944 - bt_addr+MPX_GET_BT_ENTRY_OFFSET(start), 945 - bt_addr+MPX_GET_BT_ENTRY_OFFSET(end)); 822 + bt_addr + start_off, 823 + bt_addr + end_off); 946 824 else if (prev_shared) 947 825 ret = zap_bt_entries(mm, bt_addr, 948 - bt_addr+MPX_GET_BT_ENTRY_OFFSET(start), 949 - bt_addr+MPX_BT_SIZE_BYTES); 826 + bt_addr + start_off, 827 + bt_addr + mpx_bt_size_bytes(mm)); 950 828 else if (next_shared) 951 829 ret = zap_bt_entries(mm, bt_addr, bt_addr, 952 - bt_addr+MPX_GET_BT_ENTRY_OFFSET(end)); 830 + bt_addr + end_off); 953 831 else 954 832 ret = unmap_single_bt(mm, bd_entry, bt_addr); 955 833 ··· 973 845 struct vm_area_struct *prev, *next; 974 846 bool prev_shared = false, next_shared = false; 975 847 976 - bde_start = mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(start); 977 - bde_end = mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(end-1); 848 + bde_start = mm->bd_addr + mpx_get_bd_entry_offset(mm, start); 849 + bde_end = mm->bd_addr + mpx_get_bd_entry_offset(mm, end-1); 978 850 979 851 /* 980 852 * Check whether bde_start and bde_end are shared with adjacent ··· 986 858 * in to 'next'. 987 859 */ 988 860 next = find_vma_prev(mm, start, &prev); 989 - if (prev && (mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(prev->vm_end-1)) 861 + if (prev && (mm->bd_addr + mpx_get_bd_entry_offset(mm, prev->vm_end-1)) 990 862 == bde_start) 991 863 prev_shared = true; 992 - if (next && (mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(next->vm_start)) 864 + if (next && (mm->bd_addr + mpx_get_bd_entry_offset(mm, next->vm_start)) 993 865 == bde_end) 994 866 next_shared = true; 995 867 ··· 1055 927 * 1. fully covered 1056 928 * 2. not at the edges of the mapping, even if full aligned 1057 929 */ 1058 - bde_start = mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(start); 1059 - bde_end = mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(end-1); 930 + bde_start = mm->bd_addr + mpx_get_bd_entry_offset(mm, start); 931 + bde_end = mm->bd_addr + mpx_get_bd_entry_offset(mm, end-1); 1060 932 for (bd_entry = bde_start + 1; bd_entry < bde_end; bd_entry++) { 1061 933 ret = get_bt_addr(mm, bd_entry, &bt_addr); 1062 934 switch (ret) {