Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'trace-ringbuffer-v6.15-3' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace

Pull ring-buffer updates from Steven Rostedt:
"Persistent buffer cleanups and simplifications.

It was mistaken that the physical memory returned from "reserve_mem"
had to be vmap()'d to get to it from a virtual address. But
reserve_mem already maps the memory to the virtual address of the
kernel so a simple phys_to_virt() can be used to get to the virtual
address from the physical memory returned by "reserve_mem". With this
new found knowledge, the code can be cleaned up and simplified.

- Enforce that the persistent memory is page aligned

As the buffers using the persistent memory are all going to be
mapped via pages, make sure that the memory given to the tracing
infrastructure is page aligned. If it is not, it will print a
warning and fail to map the buffer.

- Use phys_to_virt() to get the virtual address from reserve_mem

Instead of calling vmap() on the physical memory returned from
"reserve_mem", use phys_to_virt() instead.

As the memory returned by "memmap" or any other means where a
physical address is given to the tracing infrastructure, it still
needs to be vmap(). Since this memory can never be returned back to
the buddy allocator nor should it ever be memmory mapped to user
space, flag this buffer and up the ref count. The ref count will
keep it from ever being freed, and the flag will prevent it from
ever being memory mapped to user space.

- Use vmap_page_range() for memmap virtual address mapping

For the memmap buffer, instead of allocating an array of struct
pages, assigning them to the contiguous phsycial memory and then
passing that to vmap(), use vmap_page_range() instead

- Replace flush_dcache_folio() with flush_kernel_vmap_range()

Instead of calling virt_to_folio() and passing that to
flush_dcache_folio(), just call flush_kernel_vmap_range() directly.
This also fixes a bug where if a subbuffer was bigger than
PAGE_SIZE only the PAGE_SIZE portion would be flushed"

* tag 'trace-ringbuffer-v6.15-3' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace:
ring-buffer: Use flush_kernel_vmap_range() over flush_dcache_folio()
tracing: Use vmap_page_range() to map memmap ring buffer
tracing: Have reserve_mem use phys_to_virt() and separate from memmap buffer
tracing: Enforce the persistent ring buffer to be page aligned

+51 -27
+2
Documentation/admin-guide/kernel-parameters.txt
··· 7288 7288 This is just one of many ways that can clear memory. Make sure your system 7289 7289 keeps the content of memory across reboots before relying on this option. 7290 7290 7291 + NB: Both the mapped address and size must be page aligned for the architecture. 7292 + 7291 7293 See also Documentation/trace/debugging.rst 7292 7294 7293 7295
+2
Documentation/trace/debugging.rst
··· 136 136 preserved. Switching to a different kernel version may find a different 137 137 layout and mark the buffer as invalid. 138 138 139 + NB: Both the mapped address and size must be page aligned for the architecture. 140 + 139 141 Using trace_printk() in the boot instance 140 142 ----------------------------------------- 141 143 By default, the content of trace_printk() goes into the top level tracing
+3 -2
kernel/trace/ring_buffer.c
··· 6016 6016 meta->read = cpu_buffer->read; 6017 6017 6018 6018 /* Some archs do not have data cache coherency between kernel and user-space */ 6019 - flush_dcache_folio(virt_to_folio(cpu_buffer->meta_page)); 6019 + flush_kernel_vmap_range(cpu_buffer->meta_page, PAGE_SIZE); 6020 6020 } 6021 6021 6022 6022 static void ··· 7319 7319 7320 7320 out: 7321 7321 /* Some archs do not have data cache coherency between kernel and user-space */ 7322 - flush_dcache_folio(virt_to_folio(cpu_buffer->reader_page->page)); 7322 + flush_kernel_vmap_range(cpu_buffer->reader_page->page, 7323 + buffer->subbuf_size + BUF_PAGE_HDR_SIZE); 7323 7324 7324 7325 rb_update_meta_page(cpu_buffer); 7325 7326
+43 -25
kernel/trace/trace.c
··· 50 50 #include <linux/irq_work.h> 51 51 #include <linux/workqueue.h> 52 52 #include <linux/sort.h> 53 + #include <linux/io.h> /* vmap_page_range() */ 53 54 54 55 #include <asm/setup.h> /* COMMAND_LINE_SIZE */ 55 56 ··· 8501 8500 struct trace_iterator *iter = &info->iter; 8502 8501 int ret = 0; 8503 8502 8503 + /* A memmap'ed buffer is not supported for user space mmap */ 8504 + if (iter->tr->flags & TRACE_ARRAY_FL_MEMMAP) 8505 + return -ENODEV; 8506 + 8504 8507 /* Currently the boot mapped buffer is not supported for mmap */ 8505 8508 if (iter->tr->flags & TRACE_ARRAY_FL_BOOT) 8506 8509 return -ENODEV; ··· 9614 9609 #ifdef CONFIG_TRACER_MAX_TRACE 9615 9610 free_trace_buffer(&tr->max_buffer); 9616 9611 #endif 9617 - 9618 - if (tr->range_addr_start) 9619 - vunmap((void *)tr->range_addr_start); 9620 9612 } 9621 9613 9622 9614 static void init_trace_flags_index(struct trace_array *tr) ··· 9806 9804 return ret; 9807 9805 } 9808 9806 9809 - static u64 map_pages(u64 start, u64 size) 9807 + static u64 map_pages(unsigned long start, unsigned long size) 9810 9808 { 9811 - struct page **pages; 9812 - phys_addr_t page_start; 9813 - unsigned int page_count; 9814 - unsigned int i; 9815 - void *vaddr; 9809 + unsigned long vmap_start, vmap_end; 9810 + struct vm_struct *area; 9811 + int ret; 9816 9812 9817 - page_count = DIV_ROUND_UP(size, PAGE_SIZE); 9818 - 9819 - page_start = start; 9820 - pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL); 9821 - if (!pages) 9813 + area = get_vm_area(size, VM_IOREMAP); 9814 + if (!area) 9822 9815 return 0; 9823 9816 9824 - for (i = 0; i < page_count; i++) { 9825 - phys_addr_t addr = page_start + i * PAGE_SIZE; 9826 - pages[i] = pfn_to_page(addr >> PAGE_SHIFT); 9827 - } 9828 - vaddr = vmap(pages, page_count, VM_MAP, PAGE_KERNEL); 9829 - kfree(pages); 9817 + vmap_start = (unsigned long) area->addr; 9818 + vmap_end = vmap_start + size; 9830 9819 9831 - return (u64)(unsigned long)vaddr; 9820 + ret = vmap_page_range(vmap_start, vmap_end, 9821 + start, pgprot_nx(PAGE_KERNEL)); 9822 + if (ret < 0) { 9823 + free_vm_area(area); 9824 + return 0; 9825 + } 9826 + 9827 + return (u64)vmap_start; 9832 9828 } 9833 9829 9834 9830 /** ··· 10705 10705 __init static void enable_instances(void) 10706 10706 { 10707 10707 struct trace_array *tr; 10708 + bool memmap_area = false; 10708 10709 char *curr_str; 10709 10710 char *name; 10710 10711 char *str; ··· 10774 10773 name); 10775 10774 continue; 10776 10775 } 10776 + memmap_area = true; 10777 10777 } else if (tok) { 10778 10778 if (!reserve_mem_find_by_name(tok, &start, &size)) { 10779 10779 start = 0; ··· 10785 10783 } 10786 10784 10787 10785 if (start) { 10788 - addr = map_pages(start, size); 10786 + /* Start and size must be page aligned */ 10787 + if (start & ~PAGE_MASK) { 10788 + pr_warn("Tracing: mapping start addr %pa is not page aligned\n", &start); 10789 + continue; 10790 + } 10791 + if (size & ~PAGE_MASK) { 10792 + pr_warn("Tracing: mapping size %pa is not page aligned\n", &size); 10793 + continue; 10794 + } 10795 + 10796 + if (memmap_area) 10797 + addr = map_pages(start, size); 10798 + else 10799 + addr = (unsigned long)phys_to_virt(start); 10789 10800 if (addr) { 10790 10801 pr_info("Tracing: mapped boot instance %s at physical memory %pa of size 0x%lx\n", 10791 10802 name, &start, (unsigned long)size); ··· 10825 10810 update_printk_trace(tr); 10826 10811 10827 10812 /* 10828 - * If start is set, then this is a mapped buffer, and 10829 - * cannot be deleted by user space, so keep the reference 10830 - * to it. 10813 + * memmap'd buffers can not be freed. 10831 10814 */ 10815 + if (memmap_area) { 10816 + tr->flags |= TRACE_ARRAY_FL_MEMMAP; 10817 + tr->ref++; 10818 + } 10819 + 10832 10820 if (start) { 10833 10821 tr->flags |= TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT; 10834 10822 tr->range_name = no_free_ptr(rname);
+1
kernel/trace/trace.h
··· 447 447 TRACE_ARRAY_FL_BOOT = BIT(1), 448 448 TRACE_ARRAY_FL_LAST_BOOT = BIT(2), 449 449 TRACE_ARRAY_FL_MOD_INIT = BIT(3), 450 + TRACE_ARRAY_FL_MEMMAP = BIT(4), 450 451 }; 451 452 452 453 #ifdef CONFIG_MODULES