Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * linux/drivers/char/mem.c
4 *
5 * Copyright (C) 1991, 1992 Linus Torvalds
6 *
7 * Added devfs support.
8 * Jan-11-1998, C. Scott Ananian <cananian@alumni.princeton.edu>
9 * Shared /dev/zero mmapping support, Feb 2000, Kanoj Sarcar <kanoj@sgi.com>
10 */
11
12#include <linux/mm.h>
13#include <linux/miscdevice.h>
14#include <linux/slab.h>
15#include <linux/vmalloc.h>
16#include <linux/mman.h>
17#include <linux/random.h>
18#include <linux/init.h>
19#include <linux/tty.h>
20#include <linux/capability.h>
21#include <linux/ptrace.h>
22#include <linux/device.h>
23#include <linux/highmem.h>
24#include <linux/backing-dev.h>
25#include <linux/shmem_fs.h>
26#include <linux/splice.h>
27#include <linux/pfn.h>
28#include <linux/export.h>
29#include <linux/io.h>
30#include <linux/uio.h>
31#include <linux/uaccess.h>
32#include <linux/security.h>
33
34#define DEVMEM_MINOR 1
35#define DEVPORT_MINOR 4
36
37static inline unsigned long size_inside_page(unsigned long start,
38 unsigned long size)
39{
40 unsigned long sz;
41
42 sz = PAGE_SIZE - (start & (PAGE_SIZE - 1));
43
44 return min(sz, size);
45}
46
47#ifndef ARCH_HAS_VALID_PHYS_ADDR_RANGE
48static inline int valid_phys_addr_range(phys_addr_t addr, size_t count)
49{
50 return addr + count <= __pa(high_memory);
51}
52
53static inline int valid_mmap_phys_addr_range(unsigned long pfn, size_t size)
54{
55 return 1;
56}
57#endif
58
59#ifdef CONFIG_STRICT_DEVMEM
60static inline int page_is_allowed(unsigned long pfn)
61{
62 return devmem_is_allowed(pfn);
63}
64#else
65static inline int page_is_allowed(unsigned long pfn)
66{
67 return 1;
68}
69#endif
70
71static inline bool should_stop_iteration(void)
72{
73 if (need_resched())
74 cond_resched();
75 return signal_pending(current);
76}
77
78/*
79 * This funcion reads the *physical* memory. The f_pos points directly to the
80 * memory location.
81 */
82static ssize_t read_mem(struct file *file, char __user *buf,
83 size_t count, loff_t *ppos)
84{
85 phys_addr_t p = *ppos;
86 ssize_t read, sz;
87 void *ptr;
88 char *bounce;
89 int err;
90
91 if (p != *ppos)
92 return 0;
93
94 if (!valid_phys_addr_range(p, count))
95 return -EFAULT;
96 read = 0;
97#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
98 /* we don't have page 0 mapped on sparc and m68k.. */
99 if (p < PAGE_SIZE) {
100 sz = size_inside_page(p, count);
101 if (sz > 0) {
102 if (clear_user(buf, sz))
103 return -EFAULT;
104 buf += sz;
105 p += sz;
106 count -= sz;
107 read += sz;
108 }
109 }
110#endif
111
112 bounce = kmalloc(PAGE_SIZE, GFP_KERNEL);
113 if (!bounce)
114 return -ENOMEM;
115
116 while (count > 0) {
117 unsigned long remaining;
118 int allowed, probe;
119
120 sz = size_inside_page(p, count);
121
122 err = -EPERM;
123 allowed = page_is_allowed(p >> PAGE_SHIFT);
124 if (!allowed)
125 goto failed;
126
127 err = -EFAULT;
128 if (allowed == 2) {
129 /* Show zeros for restricted memory. */
130 remaining = clear_user(buf, sz);
131 } else {
132 /*
133 * On ia64 if a page has been mapped somewhere as
134 * uncached, then it must also be accessed uncached
135 * by the kernel or data corruption may occur.
136 */
137 ptr = xlate_dev_mem_ptr(p);
138 if (!ptr)
139 goto failed;
140
141 probe = copy_from_kernel_nofault(bounce, ptr, sz);
142 unxlate_dev_mem_ptr(p, ptr);
143 if (probe)
144 goto failed;
145
146 remaining = copy_to_user(buf, bounce, sz);
147 }
148
149 if (remaining)
150 goto failed;
151
152 buf += sz;
153 p += sz;
154 count -= sz;
155 read += sz;
156 if (should_stop_iteration())
157 break;
158 }
159 kfree(bounce);
160
161 *ppos += read;
162 return read;
163
164failed:
165 kfree(bounce);
166 return err;
167}
168
169static ssize_t write_mem(struct file *file, const char __user *buf,
170 size_t count, loff_t *ppos)
171{
172 phys_addr_t p = *ppos;
173 ssize_t written, sz;
174 unsigned long copied;
175 void *ptr;
176
177 if (p != *ppos)
178 return -EFBIG;
179
180 if (!valid_phys_addr_range(p, count))
181 return -EFAULT;
182
183 written = 0;
184
185#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
186 /* we don't have page 0 mapped on sparc and m68k.. */
187 if (p < PAGE_SIZE) {
188 sz = size_inside_page(p, count);
189 /* Hmm. Do something? */
190 buf += sz;
191 p += sz;
192 count -= sz;
193 written += sz;
194 }
195#endif
196
197 while (count > 0) {
198 int allowed;
199
200 sz = size_inside_page(p, count);
201
202 allowed = page_is_allowed(p >> PAGE_SHIFT);
203 if (!allowed)
204 return -EPERM;
205
206 /* Skip actual writing when a page is marked as restricted. */
207 if (allowed == 1) {
208 /*
209 * On ia64 if a page has been mapped somewhere as
210 * uncached, then it must also be accessed uncached
211 * by the kernel or data corruption may occur.
212 */
213 ptr = xlate_dev_mem_ptr(p);
214 if (!ptr) {
215 if (written)
216 break;
217 return -EFAULT;
218 }
219
220 copied = copy_from_user(ptr, buf, sz);
221 unxlate_dev_mem_ptr(p, ptr);
222 if (copied) {
223 written += sz - copied;
224 if (written)
225 break;
226 return -EFAULT;
227 }
228 }
229
230 buf += sz;
231 p += sz;
232 count -= sz;
233 written += sz;
234 if (should_stop_iteration())
235 break;
236 }
237
238 *ppos += written;
239 return written;
240}
241
242int __weak phys_mem_access_prot_allowed(struct file *file,
243 unsigned long pfn, unsigned long size, pgprot_t *vma_prot)
244{
245 return 1;
246}
247
248#ifndef __HAVE_PHYS_MEM_ACCESS_PROT
249
250/*
251 * Architectures vary in how they handle caching for addresses
252 * outside of main memory.
253 *
254 */
255#ifdef pgprot_noncached
256static int uncached_access(struct file *file, phys_addr_t addr)
257{
258 /*
259 * Accessing memory above the top the kernel knows about or through a
260 * file pointer
261 * that was marked O_DSYNC will be done non-cached.
262 */
263 if (file->f_flags & O_DSYNC)
264 return 1;
265 return addr >= __pa(high_memory);
266}
267#endif
268
269static pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
270 unsigned long size, pgprot_t vma_prot)
271{
272#ifdef pgprot_noncached
273 phys_addr_t offset = pfn << PAGE_SHIFT;
274
275 if (uncached_access(file, offset))
276 return pgprot_noncached(vma_prot);
277#endif
278 return vma_prot;
279}
280#endif
281
282#ifndef CONFIG_MMU
283static unsigned long get_unmapped_area_mem(struct file *file,
284 unsigned long addr,
285 unsigned long len,
286 unsigned long pgoff,
287 unsigned long flags)
288{
289 if (!valid_mmap_phys_addr_range(pgoff, len))
290 return (unsigned long) -EINVAL;
291 return pgoff << PAGE_SHIFT;
292}
293
294/* permit direct mmap, for read, write or exec */
295static unsigned memory_mmap_capabilities(struct file *file)
296{
297 return NOMMU_MAP_DIRECT |
298 NOMMU_MAP_READ | NOMMU_MAP_WRITE | NOMMU_MAP_EXEC;
299}
300
301static unsigned zero_mmap_capabilities(struct file *file)
302{
303 return NOMMU_MAP_COPY;
304}
305
306/* can't do an in-place private mapping if there's no MMU */
307static inline int private_mapping_ok(struct vm_area_desc *desc)
308{
309 return is_nommu_shared_mapping(desc->vm_flags);
310}
311#else
312
313static inline int private_mapping_ok(struct vm_area_desc *desc)
314{
315 return 1;
316}
317#endif
318
319static const struct vm_operations_struct mmap_mem_ops = {
320#ifdef CONFIG_HAVE_IOREMAP_PROT
321 .access = generic_access_phys
322#endif
323};
324
325static int mmap_filter_error(int err)
326{
327 return -EAGAIN;
328}
329
330static int mmap_mem_prepare(struct vm_area_desc *desc)
331{
332 struct file *file = desc->file;
333 const size_t size = vma_desc_size(desc);
334 const phys_addr_t offset = (phys_addr_t)desc->pgoff << PAGE_SHIFT;
335
336 /* Does it even fit in phys_addr_t? */
337 if (offset >> PAGE_SHIFT != desc->pgoff)
338 return -EINVAL;
339
340 /* It's illegal to wrap around the end of the physical address space. */
341 if (offset + (phys_addr_t)size - 1 < offset)
342 return -EINVAL;
343
344 if (!valid_mmap_phys_addr_range(desc->pgoff, size))
345 return -EINVAL;
346
347 if (!private_mapping_ok(desc))
348 return -ENOSYS;
349
350 if (!range_is_allowed(desc->pgoff, size))
351 return -EPERM;
352
353 if (!phys_mem_access_prot_allowed(file, desc->pgoff, size,
354 &desc->page_prot))
355 return -EINVAL;
356
357 desc->page_prot = phys_mem_access_prot(file, desc->pgoff,
358 size,
359 desc->page_prot);
360
361 desc->vm_ops = &mmap_mem_ops;
362
363 /* Remap-pfn-range will mark the range VM_IO. */
364 mmap_action_remap_full(desc, desc->pgoff);
365 /* We filter remap errors to -EAGAIN. */
366 desc->action.error_hook = mmap_filter_error;
367
368 return 0;
369}
370
371#ifdef CONFIG_DEVPORT
372static ssize_t read_port(struct file *file, char __user *buf,
373 size_t count, loff_t *ppos)
374{
375 unsigned long i = *ppos;
376 char __user *tmp = buf;
377
378 if (!access_ok(buf, count))
379 return -EFAULT;
380 while (count-- > 0 && i < 65536) {
381 if (__put_user(inb(i), tmp) < 0)
382 return -EFAULT;
383 i++;
384 tmp++;
385 }
386 *ppos = i;
387 return tmp-buf;
388}
389
390static ssize_t write_port(struct file *file, const char __user *buf,
391 size_t count, loff_t *ppos)
392{
393 unsigned long i = *ppos;
394 const char __user *tmp = buf;
395
396 if (!access_ok(buf, count))
397 return -EFAULT;
398 while (count-- > 0 && i < 65536) {
399 char c;
400
401 if (__get_user(c, tmp)) {
402 if (tmp > buf)
403 break;
404 return -EFAULT;
405 }
406 outb(c, i);
407 i++;
408 tmp++;
409 }
410 *ppos = i;
411 return tmp-buf;
412}
413#endif
414
415static ssize_t read_null(struct file *file, char __user *buf,
416 size_t count, loff_t *ppos)
417{
418 return 0;
419}
420
421static ssize_t write_null(struct file *file, const char __user *buf,
422 size_t count, loff_t *ppos)
423{
424 return count;
425}
426
427static ssize_t read_iter_null(struct kiocb *iocb, struct iov_iter *to)
428{
429 return 0;
430}
431
432static ssize_t write_iter_null(struct kiocb *iocb, struct iov_iter *from)
433{
434 size_t count = iov_iter_count(from);
435 iov_iter_advance(from, count);
436 return count;
437}
438
439static int pipe_to_null(struct pipe_inode_info *info, struct pipe_buffer *buf,
440 struct splice_desc *sd)
441{
442 return sd->len;
443}
444
445static ssize_t splice_write_null(struct pipe_inode_info *pipe, struct file *out,
446 loff_t *ppos, size_t len, unsigned int flags)
447{
448 return splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_null);
449}
450
451static int uring_cmd_null(struct io_uring_cmd *ioucmd, unsigned int issue_flags)
452{
453 return 0;
454}
455
456static ssize_t read_iter_zero(struct kiocb *iocb, struct iov_iter *iter)
457{
458 size_t written = 0;
459
460 while (iov_iter_count(iter)) {
461 size_t chunk = iov_iter_count(iter), n;
462
463 if (chunk > PAGE_SIZE)
464 chunk = PAGE_SIZE; /* Just for latency reasons */
465 n = iov_iter_zero(chunk, iter);
466 if (!n && iov_iter_count(iter))
467 return written ? written : -EFAULT;
468 written += n;
469 if (signal_pending(current))
470 return written ? written : -ERESTARTSYS;
471 if (!need_resched())
472 continue;
473 if (iocb->ki_flags & IOCB_NOWAIT)
474 return written ? written : -EAGAIN;
475 cond_resched();
476 }
477 return written;
478}
479
480static ssize_t read_zero(struct file *file, char __user *buf,
481 size_t count, loff_t *ppos)
482{
483 size_t cleared = 0;
484
485 while (count) {
486 size_t chunk = min_t(size_t, count, PAGE_SIZE);
487 size_t left;
488
489 left = clear_user(buf + cleared, chunk);
490 if (unlikely(left)) {
491 cleared += (chunk - left);
492 if (!cleared)
493 return -EFAULT;
494 break;
495 }
496 cleared += chunk;
497 count -= chunk;
498
499 if (signal_pending(current))
500 break;
501 cond_resched();
502 }
503
504 return cleared;
505}
506
507static int mmap_zero_private_success(const struct vm_area_struct *vma)
508{
509 /*
510 * This is a highly unique situation where we mark a MAP_PRIVATE mapping
511 * of /dev/zero anonymous, despite it not being.
512 */
513 vma_set_anonymous((struct vm_area_struct *)vma);
514
515 return 0;
516}
517
518static int mmap_zero_prepare(struct vm_area_desc *desc)
519{
520#ifndef CONFIG_MMU
521 return -ENOSYS;
522#endif
523 if (desc->vm_flags & VM_SHARED)
524 return shmem_zero_setup_desc(desc);
525
526 desc->action.success_hook = mmap_zero_private_success;
527 return 0;
528}
529
530#ifndef CONFIG_MMU
531static unsigned long get_unmapped_area_zero(struct file *file,
532 unsigned long addr, unsigned long len,
533 unsigned long pgoff, unsigned long flags)
534{
535 return -ENOSYS;
536}
537#else
538static unsigned long get_unmapped_area_zero(struct file *file,
539 unsigned long addr, unsigned long len,
540 unsigned long pgoff, unsigned long flags)
541{
542 if (flags & MAP_SHARED) {
543 /*
544 * mmap_zero_prepare() will call shmem_zero_setup() to create a
545 * file, so use shmem's get_unmapped_area in case it can be
546 * huge; and pass NULL for file as in mmap.c's
547 * get_unmapped_area(), so as not to confuse shmem with our
548 * handle on "/dev/zero".
549 */
550 return shmem_get_unmapped_area(NULL, addr, len, pgoff, flags);
551 }
552
553 /*
554 * Otherwise flags & MAP_PRIVATE: with no shmem object beneath it,
555 * attempt to map aligned to huge page size if possible, otherwise we
556 * fall back to system page size mappings.
557 */
558#ifdef CONFIG_TRANSPARENT_HUGEPAGE
559 return thp_get_unmapped_area(file, addr, len, pgoff, flags);
560#else
561 return mm_get_unmapped_area(file, addr, len, pgoff, flags);
562#endif
563}
564#endif /* CONFIG_MMU */
565
566static ssize_t write_full(struct file *file, const char __user *buf,
567 size_t count, loff_t *ppos)
568{
569 return -ENOSPC;
570}
571
572/*
573 * Special lseek() function for /dev/null and /dev/zero. Most notably, you
574 * can fopen() both devices with "a" now. This was previously impossible.
575 * -- SRB.
576 */
577static loff_t null_lseek(struct file *file, loff_t offset, int orig)
578{
579 return file->f_pos = 0;
580}
581
582/*
583 * The memory devices use the full 32/64 bits of the offset, and so we cannot
584 * check against negative addresses: they are ok. The return value is weird,
585 * though, in that case (0).
586 *
587 * also note that seeking relative to the "end of file" isn't supported:
588 * it has no meaning, so it returns -EINVAL.
589 */
590static loff_t memory_lseek(struct file *file, loff_t offset, int orig)
591{
592 loff_t ret;
593
594 inode_lock(file_inode(file));
595 switch (orig) {
596 case SEEK_CUR:
597 offset += file->f_pos;
598 fallthrough;
599 case SEEK_SET:
600 /* to avoid userland mistaking f_pos=-9 as -EBADF=-9 */
601 if ((unsigned long long)offset >= -MAX_ERRNO) {
602 ret = -EOVERFLOW;
603 break;
604 }
605 file->f_pos = offset;
606 ret = file->f_pos;
607 force_successful_syscall_return();
608 break;
609 default:
610 ret = -EINVAL;
611 }
612 inode_unlock(file_inode(file));
613 return ret;
614}
615
616static int open_port(struct inode *inode, struct file *filp)
617{
618 int rc;
619
620 if (!capable(CAP_SYS_RAWIO))
621 return -EPERM;
622
623 rc = security_locked_down(LOCKDOWN_DEV_MEM);
624 if (rc)
625 return rc;
626
627 if (iminor(inode) != DEVMEM_MINOR)
628 return 0;
629
630 /*
631 * Use a unified address space to have a single point to manage
632 * revocations when drivers want to take over a /dev/mem mapped
633 * range.
634 */
635 filp->f_mapping = iomem_get_mapping();
636
637 return 0;
638}
639
640#define zero_lseek null_lseek
641#define full_lseek null_lseek
642#define write_zero write_null
643#define write_iter_zero write_iter_null
644#define splice_write_zero splice_write_null
645#define open_mem open_port
646
647static const struct file_operations __maybe_unused mem_fops = {
648 .llseek = memory_lseek,
649 .read = read_mem,
650 .write = write_mem,
651 .mmap_prepare = mmap_mem_prepare,
652 .open = open_mem,
653#ifndef CONFIG_MMU
654 .get_unmapped_area = get_unmapped_area_mem,
655 .mmap_capabilities = memory_mmap_capabilities,
656#endif
657 .fop_flags = FOP_UNSIGNED_OFFSET,
658};
659
660static const struct file_operations null_fops = {
661 .llseek = null_lseek,
662 .read = read_null,
663 .write = write_null,
664 .read_iter = read_iter_null,
665 .write_iter = write_iter_null,
666 .splice_write = splice_write_null,
667 .uring_cmd = uring_cmd_null,
668};
669
670#ifdef CONFIG_DEVPORT
671static const struct file_operations port_fops = {
672 .llseek = memory_lseek,
673 .read = read_port,
674 .write = write_port,
675 .open = open_port,
676};
677#endif
678
679static const struct file_operations zero_fops = {
680 .llseek = zero_lseek,
681 .write = write_zero,
682 .read_iter = read_iter_zero,
683 .read = read_zero,
684 .write_iter = write_iter_zero,
685 .splice_read = copy_splice_read,
686 .splice_write = splice_write_zero,
687 .mmap_prepare = mmap_zero_prepare,
688 .get_unmapped_area = get_unmapped_area_zero,
689#ifndef CONFIG_MMU
690 .mmap_capabilities = zero_mmap_capabilities,
691#endif
692};
693
694static const struct file_operations full_fops = {
695 .llseek = full_lseek,
696 .read_iter = read_iter_zero,
697 .write = write_full,
698 .splice_read = copy_splice_read,
699};
700
701static const struct memdev {
702 const char *name;
703 const struct file_operations *fops;
704 fmode_t fmode;
705 umode_t mode;
706} devlist[] = {
707#ifdef CONFIG_DEVMEM
708 [DEVMEM_MINOR] = { "mem", &mem_fops, 0, 0 },
709#endif
710 [3] = { "null", &null_fops, FMODE_NOWAIT, 0666 },
711#ifdef CONFIG_DEVPORT
712 [4] = { "port", &port_fops, 0, 0 },
713#endif
714 [5] = { "zero", &zero_fops, FMODE_NOWAIT, 0666 },
715 [7] = { "full", &full_fops, 0, 0666 },
716 [8] = { "random", &random_fops, FMODE_NOWAIT, 0666 },
717 [9] = { "urandom", &urandom_fops, FMODE_NOWAIT, 0666 },
718#ifdef CONFIG_PRINTK
719 [11] = { "kmsg", &kmsg_fops, 0, 0644 },
720#endif
721};
722
723static int memory_open(struct inode *inode, struct file *filp)
724{
725 int minor;
726 const struct memdev *dev;
727
728 minor = iminor(inode);
729 if (minor >= ARRAY_SIZE(devlist))
730 return -ENXIO;
731
732 dev = &devlist[minor];
733 if (!dev->fops)
734 return -ENXIO;
735
736 filp->f_op = dev->fops;
737 filp->f_mode |= dev->fmode;
738
739 if (dev->fops->open)
740 return dev->fops->open(inode, filp);
741
742 return 0;
743}
744
745static const struct file_operations memory_fops = {
746 .open = memory_open,
747 .llseek = noop_llseek,
748};
749
750static char *mem_devnode(const struct device *dev, umode_t *mode)
751{
752 if (mode && devlist[MINOR(dev->devt)].mode)
753 *mode = devlist[MINOR(dev->devt)].mode;
754 return NULL;
755}
756
757static const struct class mem_class = {
758 .name = "mem",
759 .devnode = mem_devnode,
760};
761
762static int __init chr_dev_init(void)
763{
764 int retval;
765 int minor;
766
767 if (register_chrdev(MEM_MAJOR, "mem", &memory_fops))
768 printk("unable to get major %d for memory devs\n", MEM_MAJOR);
769
770 retval = class_register(&mem_class);
771 if (retval)
772 return retval;
773
774 for (minor = 1; minor < ARRAY_SIZE(devlist); minor++) {
775 if (!devlist[minor].name)
776 continue;
777
778 /*
779 * Create /dev/port?
780 */
781 if ((minor == DEVPORT_MINOR) && !arch_has_dev_port())
782 continue;
783
784 device_create(&mem_class, NULL, MKDEV(MEM_MAJOR, minor),
785 NULL, devlist[minor].name);
786 }
787
788 return tty_init();
789}
790
791fs_initcall(chr_dev_init);