Merge tag 'powerpc-5.1-6' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux

Pull powerpc fixes from Michael Ellerman:
"A one-liner to make our Radix MMU support depend on HUGETLB_PAGE. We
use some of the hugetlb inlines (eg. pud_huge()) when operating on the
linear mapping and if they're compiled into empty wrappers we can
corrupt memory.

Then two fixes to our VFIO IOMMU code. The first is not a regression
but fixes the locking to avoid a user-triggerable deadlock.

The second does fix a regression since rc1, and depends on the first
fix. It makes it possible to run guests with large amounts of memory
again (~256GB).

Thanks to Alexey Kardashevskiy"

* tag 'powerpc-5.1-6' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux:
powerpc/mm_iommu: Allow pinning large regions
powerpc/mm_iommu: Fix potential deadlock
powerpc/mm/radix: Make Radix require HUGETLB_PAGE

Changed files
+61 -41
arch
+1
arch/powerpc/configs/skiroot_defconfig
··· 266 266 CONFIG_MSDOS_FS=m 267 267 CONFIG_VFAT_FS=m 268 268 CONFIG_PROC_KCORE=y 269 + CONFIG_HUGETLBFS=y 269 270 # CONFIG_MISC_FILESYSTEMS is not set 270 271 # CONFIG_NETWORK_FILESYSTEMS is not set 271 272 CONFIG_NLS=y
+59 -40
arch/powerpc/mm/mmu_context_iommu.c
··· 95 95 unsigned long entries, unsigned long dev_hpa, 96 96 struct mm_iommu_table_group_mem_t **pmem) 97 97 { 98 - struct mm_iommu_table_group_mem_t *mem; 99 - long i, ret, locked_entries = 0; 98 + struct mm_iommu_table_group_mem_t *mem, *mem2; 99 + long i, ret, locked_entries = 0, pinned = 0; 100 100 unsigned int pageshift; 101 - 102 - mutex_lock(&mem_list_mutex); 103 - 104 - list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, 105 - next) { 106 - /* Overlap? */ 107 - if ((mem->ua < (ua + (entries << PAGE_SHIFT))) && 108 - (ua < (mem->ua + 109 - (mem->entries << PAGE_SHIFT)))) { 110 - ret = -EINVAL; 111 - goto unlock_exit; 112 - } 113 - 114 - } 101 + unsigned long entry, chunk; 115 102 116 103 if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) { 117 104 ret = mm_iommu_adjust_locked_vm(mm, entries, true); 118 105 if (ret) 119 - goto unlock_exit; 106 + return ret; 120 107 121 108 locked_entries = entries; 122 109 } ··· 135 148 } 136 149 137 150 down_read(&mm->mmap_sem); 138 - ret = get_user_pages_longterm(ua, entries, FOLL_WRITE, mem->hpages, NULL); 139 - up_read(&mm->mmap_sem); 140 - if (ret != entries) { 141 - /* free the reference taken */ 142 - for (i = 0; i < ret; i++) 143 - put_page(mem->hpages[i]); 151 + chunk = (1UL << (PAGE_SHIFT + MAX_ORDER - 1)) / 152 + sizeof(struct vm_area_struct *); 153 + chunk = min(chunk, entries); 154 + for (entry = 0; entry < entries; entry += chunk) { 155 + unsigned long n = min(entries - entry, chunk); 144 156 145 - vfree(mem->hpas); 146 - kfree(mem); 147 - ret = -EFAULT; 148 - goto unlock_exit; 157 + ret = get_user_pages_longterm(ua + (entry << PAGE_SHIFT), n, 158 + FOLL_WRITE, mem->hpages + entry, NULL); 159 + if (ret == n) { 160 + pinned += n; 161 + continue; 162 + } 163 + if (ret > 0) 164 + pinned += ret; 165 + break; 166 + } 167 + up_read(&mm->mmap_sem); 168 + if (pinned != entries) { 169 + if (!ret) 170 + ret = -EFAULT; 171 + goto free_exit; 149 172 } 150 173 151 174 pageshift = PAGE_SHIFT; ··· 180 183 } 181 184 182 185 good_exit: 183 - ret = 0; 184 186 atomic64_set(&mem->mapped, 1); 185 187 mem->used = 1; 186 188 mem->ua = ua; 187 189 mem->entries = entries; 188 - *pmem = mem; 190 + 191 + mutex_lock(&mem_list_mutex); 192 + 193 + list_for_each_entry_rcu(mem2, &mm->context.iommu_group_mem_list, next) { 194 + /* Overlap? */ 195 + if ((mem2->ua < (ua + (entries << PAGE_SHIFT))) && 196 + (ua < (mem2->ua + 197 + (mem2->entries << PAGE_SHIFT)))) { 198 + ret = -EINVAL; 199 + mutex_unlock(&mem_list_mutex); 200 + goto free_exit; 201 + } 202 + } 189 203 190 204 list_add_rcu(&mem->next, &mm->context.iommu_group_mem_list); 191 205 192 - unlock_exit: 193 - if (locked_entries && ret) 194 - mm_iommu_adjust_locked_vm(mm, locked_entries, false); 195 - 196 206 mutex_unlock(&mem_list_mutex); 207 + 208 + *pmem = mem; 209 + 210 + return 0; 211 + 212 + free_exit: 213 + /* free the reference taken */ 214 + for (i = 0; i < pinned; i++) 215 + put_page(mem->hpages[i]); 216 + 217 + vfree(mem->hpas); 218 + kfree(mem); 219 + 220 + unlock_exit: 221 + mm_iommu_adjust_locked_vm(mm, locked_entries, false); 197 222 198 223 return ret; 199 224 } ··· 285 266 long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem) 286 267 { 287 268 long ret = 0; 288 - unsigned long entries, dev_hpa; 269 + unsigned long unlock_entries = 0; 289 270 290 271 mutex_lock(&mem_list_mutex); 291 272 ··· 306 287 goto unlock_exit; 307 288 } 308 289 309 - /* @mapped became 0 so now mappings are disabled, release the region */ 310 - entries = mem->entries; 311 - dev_hpa = mem->dev_hpa; 312 - mm_iommu_release(mem); 290 + if (mem->dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) 291 + unlock_entries = mem->entries; 313 292 314 - if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) 315 - mm_iommu_adjust_locked_vm(mm, entries, false); 293 + /* @mapped became 0 so now mappings are disabled, release the region */ 294 + mm_iommu_release(mem); 316 295 317 296 unlock_exit: 318 297 mutex_unlock(&mem_list_mutex); 298 + 299 + mm_iommu_adjust_locked_vm(mm, unlock_entries, false); 319 300 320 301 return ret; 321 302 }
+1 -1
arch/powerpc/platforms/Kconfig.cputype
··· 324 324 325 325 config PPC_RADIX_MMU 326 326 bool "Radix MMU Support" 327 - depends on PPC_BOOK3S_64 327 + depends on PPC_BOOK3S_64 && HUGETLB_PAGE 328 328 select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA 329 329 default y 330 330 help