Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (c) 2014, The Linux Foundation. All rights reserved.
4 */
5#include <linux/kernel.h>
6#include <linux/mm.h>
7#include <linux/module.h>
8#include <linux/mem_encrypt.h>
9#include <linux/sched.h>
10#include <linux/vmalloc.h>
11#include <linux/pagewalk.h>
12
13#include <asm/cacheflush.h>
14#include <asm/pgtable-prot.h>
15#include <asm/set_memory.h>
16#include <asm/tlbflush.h>
17#include <asm/kfence.h>
18
19struct page_change_data {
20 pgprot_t set_mask;
21 pgprot_t clear_mask;
22};
23
24static ptdesc_t set_pageattr_masks(ptdesc_t val, struct mm_walk *walk)
25{
26 struct page_change_data *masks = walk->private;
27
28 val &= ~(pgprot_val(masks->clear_mask));
29 val |= (pgprot_val(masks->set_mask));
30
31 return val;
32}
33
34static int pageattr_pud_entry(pud_t *pud, unsigned long addr,
35 unsigned long next, struct mm_walk *walk)
36{
37 pud_t val = pudp_get(pud);
38
39 if (pud_sect(val)) {
40 if (WARN_ON_ONCE((next - addr) != PUD_SIZE))
41 return -EINVAL;
42 val = __pud(set_pageattr_masks(pud_val(val), walk));
43 set_pud(pud, val);
44 walk->action = ACTION_CONTINUE;
45 }
46
47 return 0;
48}
49
50static int pageattr_pmd_entry(pmd_t *pmd, unsigned long addr,
51 unsigned long next, struct mm_walk *walk)
52{
53 pmd_t val = pmdp_get(pmd);
54
55 if (pmd_sect(val)) {
56 if (WARN_ON_ONCE((next - addr) != PMD_SIZE))
57 return -EINVAL;
58 val = __pmd(set_pageattr_masks(pmd_val(val), walk));
59 set_pmd(pmd, val);
60 walk->action = ACTION_CONTINUE;
61 }
62
63 return 0;
64}
65
66static int pageattr_pte_entry(pte_t *pte, unsigned long addr,
67 unsigned long next, struct mm_walk *walk)
68{
69 pte_t val = __ptep_get(pte);
70
71 val = __pte(set_pageattr_masks(pte_val(val), walk));
72 __set_pte(pte, val);
73
74 return 0;
75}
76
77static const struct mm_walk_ops pageattr_ops = {
78 .pud_entry = pageattr_pud_entry,
79 .pmd_entry = pageattr_pmd_entry,
80 .pte_entry = pageattr_pte_entry,
81};
82
83bool rodata_full __ro_after_init = true;
84
85bool can_set_direct_map(void)
86{
87 /*
88 * rodata_full, DEBUG_PAGEALLOC and a Realm guest all require linear
89 * map to be mapped at page granularity, so that it is possible to
90 * protect/unprotect single pages.
91 *
92 * KFENCE pool requires page-granular mapping if initialized late.
93 *
94 * Realms need to make pages shared/protected at page granularity.
95 */
96 return rodata_full || debug_pagealloc_enabled() ||
97 arm64_kfence_can_set_direct_map() || is_realm_world();
98}
99
100static int update_range_prot(unsigned long start, unsigned long size,
101 pgprot_t set_mask, pgprot_t clear_mask)
102{
103 struct page_change_data data;
104 int ret;
105
106 data.set_mask = set_mask;
107 data.clear_mask = clear_mask;
108
109 ret = split_kernel_leaf_mapping(start, start + size);
110 if (WARN_ON_ONCE(ret))
111 return ret;
112
113 arch_enter_lazy_mmu_mode();
114
115 /*
116 * The caller must ensure that the range we are operating on does not
117 * partially overlap a block mapping, or a cont mapping. Any such case
118 * must be eliminated by splitting the mapping.
119 */
120 ret = walk_kernel_page_table_range_lockless(start, start + size,
121 &pageattr_ops, NULL, &data);
122 arch_leave_lazy_mmu_mode();
123
124 return ret;
125}
126
127static int __change_memory_common(unsigned long start, unsigned long size,
128 pgprot_t set_mask, pgprot_t clear_mask)
129{
130 int ret;
131
132 ret = update_range_prot(start, size, set_mask, clear_mask);
133
134 /*
135 * If the memory is being made valid without changing any other bits
136 * then a TLBI isn't required as a non-valid entry cannot be cached in
137 * the TLB.
138 */
139 if (pgprot_val(set_mask) != PTE_VALID || pgprot_val(clear_mask))
140 flush_tlb_kernel_range(start, start + size);
141 return ret;
142}
143
144static int change_memory_common(unsigned long addr, int numpages,
145 pgprot_t set_mask, pgprot_t clear_mask)
146{
147 unsigned long start = addr;
148 unsigned long size = PAGE_SIZE * numpages;
149 unsigned long end = start + size;
150 struct vm_struct *area;
151 int ret;
152
153 if (!PAGE_ALIGNED(addr)) {
154 start &= PAGE_MASK;
155 end = start + size;
156 WARN_ON_ONCE(1);
157 }
158
159 /*
160 * Kernel VA mappings are always live, and splitting live section
161 * mappings into page mappings may cause TLB conflicts. This means
162 * we have to ensure that changing the permission bits of the range
163 * we are operating on does not result in such splitting.
164 *
165 * Let's restrict ourselves to mappings created by vmalloc (or vmap).
166 * Disallow VM_ALLOW_HUGE_VMAP mappings to guarantee that only page
167 * mappings are updated and splitting is never needed.
168 *
169 * So check whether the [addr, addr + size) interval is entirely
170 * covered by precisely one VM area that has the VM_ALLOC flag set.
171 */
172 area = find_vm_area((void *)addr);
173 if (!area ||
174 ((unsigned long)kasan_reset_tag((void *)end) >
175 (unsigned long)kasan_reset_tag(area->addr) + area->size) ||
176 ((area->flags & (VM_ALLOC | VM_ALLOW_HUGE_VMAP)) != VM_ALLOC))
177 return -EINVAL;
178
179 if (!numpages)
180 return 0;
181
182 /*
183 * If we are manipulating read-only permissions, apply the same
184 * change to the linear mapping of the pages that back this VM area.
185 */
186 if (rodata_full && (pgprot_val(set_mask) == PTE_RDONLY ||
187 pgprot_val(clear_mask) == PTE_RDONLY)) {
188 unsigned long idx = ((unsigned long)kasan_reset_tag((void *)start) -
189 (unsigned long)kasan_reset_tag(area->addr))
190 >> PAGE_SHIFT;
191 for (; numpages; idx++, numpages--) {
192 ret = __change_memory_common((u64)page_address(area->pages[idx]),
193 PAGE_SIZE, set_mask, clear_mask);
194 if (ret)
195 return ret;
196 }
197 }
198
199 /*
200 * Get rid of potentially aliasing lazily unmapped vm areas that may
201 * have permissions set that deviate from the ones we are setting here.
202 */
203 vm_unmap_aliases();
204
205 return __change_memory_common(start, size, set_mask, clear_mask);
206}
207
208int set_memory_ro(unsigned long addr, int numpages)
209{
210 return change_memory_common(addr, numpages,
211 __pgprot(PTE_RDONLY),
212 __pgprot(PTE_WRITE));
213}
214
215int set_memory_rw(unsigned long addr, int numpages)
216{
217 return change_memory_common(addr, numpages,
218 __pgprot(PTE_WRITE),
219 __pgprot(PTE_RDONLY));
220}
221
222int set_memory_nx(unsigned long addr, int numpages)
223{
224 return change_memory_common(addr, numpages,
225 __pgprot(PTE_PXN),
226 __pgprot(PTE_MAYBE_GP));
227}
228
229int set_memory_x(unsigned long addr, int numpages)
230{
231 return change_memory_common(addr, numpages,
232 __pgprot(PTE_MAYBE_GP),
233 __pgprot(PTE_PXN));
234}
235
236int set_memory_valid(unsigned long addr, int numpages, int enable)
237{
238 if (enable)
239 return __change_memory_common(addr, PAGE_SIZE * numpages,
240 __pgprot(PTE_VALID),
241 __pgprot(0));
242 else
243 return __change_memory_common(addr, PAGE_SIZE * numpages,
244 __pgprot(0),
245 __pgprot(PTE_VALID));
246}
247
248int set_direct_map_invalid_noflush(struct page *page)
249{
250 pgprot_t clear_mask = __pgprot(PTE_VALID);
251 pgprot_t set_mask = __pgprot(0);
252
253 if (!can_set_direct_map())
254 return 0;
255
256 return update_range_prot((unsigned long)page_address(page),
257 PAGE_SIZE, set_mask, clear_mask);
258}
259
260int set_direct_map_default_noflush(struct page *page)
261{
262 pgprot_t set_mask = __pgprot(PTE_VALID | PTE_WRITE);
263 pgprot_t clear_mask = __pgprot(PTE_RDONLY);
264
265 if (!can_set_direct_map())
266 return 0;
267
268 return update_range_prot((unsigned long)page_address(page),
269 PAGE_SIZE, set_mask, clear_mask);
270}
271
272static int __set_memory_enc_dec(unsigned long addr,
273 int numpages,
274 bool encrypt)
275{
276 unsigned long set_prot = 0, clear_prot = 0;
277 phys_addr_t start, end;
278 int ret;
279
280 if (!is_realm_world())
281 return 0;
282
283 if (!__is_lm_address(addr))
284 return -EINVAL;
285
286 start = __virt_to_phys(addr);
287 end = start + numpages * PAGE_SIZE;
288
289 if (encrypt)
290 clear_prot = PROT_NS_SHARED;
291 else
292 set_prot = PROT_NS_SHARED;
293
294 /*
295 * Break the mapping before we make any changes to avoid stale TLB
296 * entries or Synchronous External Aborts caused by RIPAS_EMPTY
297 */
298 ret = __change_memory_common(addr, PAGE_SIZE * numpages,
299 __pgprot(set_prot),
300 __pgprot(clear_prot | PTE_VALID));
301
302 if (ret)
303 return ret;
304
305 if (encrypt)
306 ret = rsi_set_memory_range_protected(start, end);
307 else
308 ret = rsi_set_memory_range_shared(start, end);
309
310 if (ret)
311 return ret;
312
313 return __change_memory_common(addr, PAGE_SIZE * numpages,
314 __pgprot(PTE_VALID),
315 __pgprot(0));
316}
317
318static int realm_set_memory_encrypted(unsigned long addr, int numpages)
319{
320 int ret = __set_memory_enc_dec(addr, numpages, true);
321
322 /*
323 * If the request to change state fails, then the only sensible cause
324 * of action for the caller is to leak the memory
325 */
326 WARN(ret, "Failed to encrypt memory, %d pages will be leaked",
327 numpages);
328
329 return ret;
330}
331
332static int realm_set_memory_decrypted(unsigned long addr, int numpages)
333{
334 int ret = __set_memory_enc_dec(addr, numpages, false);
335
336 WARN(ret, "Failed to decrypt memory, %d pages will be leaked",
337 numpages);
338
339 return ret;
340}
341
342static const struct arm64_mem_crypt_ops realm_crypt_ops = {
343 .encrypt = realm_set_memory_encrypted,
344 .decrypt = realm_set_memory_decrypted,
345};
346
347int realm_register_memory_enc_ops(void)
348{
349 return arm64_mem_crypt_ops_register(&realm_crypt_ops);
350}
351
352int set_direct_map_valid_noflush(struct page *page, unsigned nr, bool valid)
353{
354 unsigned long addr = (unsigned long)page_address(page);
355
356 if (!can_set_direct_map())
357 return 0;
358
359 return set_memory_valid(addr, nr, valid);
360}
361
362#ifdef CONFIG_DEBUG_PAGEALLOC
363/*
364 * This is - apart from the return value - doing the same
365 * thing as the new set_direct_map_valid_noflush() function.
366 *
367 * Unify? Explain the conceptual differences?
368 */
369void __kernel_map_pages(struct page *page, int numpages, int enable)
370{
371 if (!can_set_direct_map())
372 return;
373
374 set_memory_valid((unsigned long)page_address(page), numpages, enable);
375}
376#endif /* CONFIG_DEBUG_PAGEALLOC */
377
378/*
379 * This function is used to determine if a linear map page has been marked as
380 * not-valid. Walk the page table and check the PTE_VALID bit.
381 *
382 * Because this is only called on the kernel linear map, p?d_sect() implies
383 * p?d_present(). When debug_pagealloc is enabled, sections mappings are
384 * disabled.
385 */
386bool kernel_page_present(struct page *page)
387{
388 pgd_t *pgdp;
389 p4d_t *p4dp;
390 pud_t *pudp, pud;
391 pmd_t *pmdp, pmd;
392 pte_t *ptep;
393 unsigned long addr = (unsigned long)page_address(page);
394
395 pgdp = pgd_offset_k(addr);
396 if (pgd_none(READ_ONCE(*pgdp)))
397 return false;
398
399 p4dp = p4d_offset(pgdp, addr);
400 if (p4d_none(READ_ONCE(*p4dp)))
401 return false;
402
403 pudp = pud_offset(p4dp, addr);
404 pud = READ_ONCE(*pudp);
405 if (pud_none(pud))
406 return false;
407 if (pud_sect(pud))
408 return true;
409
410 pmdp = pmd_offset(pudp, addr);
411 pmd = READ_ONCE(*pmdp);
412 if (pmd_none(pmd))
413 return false;
414 if (pmd_sect(pmd))
415 return true;
416
417 ptep = pte_offset_kernel(pmdp, addr);
418 return pte_valid(__ptep_get(ptep));
419}