Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[PATCH] ia64 uncached alloc

This patch contains the ia64 uncached page allocator and the generic
allocator (genalloc). The uncached allocator was formerly part of the SN2
mspec driver but there are several other users of it so it has been split
off from the driver.

The generic allocator can be used by device driver to manage special memory
etc. The generic allocator is based on the allocator from the sym53c8xx_2
driver.

Various users on ia64 needs uncached memory. The SGI SN architecture requires
it for inter-partition communication between partitions within a large NUMA
cluster. The specific user for this is the XPC code. Another application is
large MPI style applications which use it for synchronization, on SN this can
be done using special 'fetchop' operations but it also benefits non SN
hardware which may use regular uncached memory for this purpose. Performance
of doing this through uncached vs cached memory is pretty substantial. This
is handled by the mspec driver which I will push out in a seperate patch.

Rather than creating a specific allocator for just uncached memory I came up
with genalloc which is a generic purpose allocator that can be used by device
drivers and other subsystems as they please. For instance to handle onboard
device memory. It was derived from the sym53c7xx_2 driver's allocator which
is also an example of a potential user (I am refraining from modifying sym2
right now as it seems to have been under fairly heavy development recently).

On ia64 memory has various properties within a granule, ie. it isn't safe to
access memory as uncached within the same granule as currently has memory
accessed in cached mode. The regular system therefore doesn't utilize memory
in the lower granules which is mixed in with device PAL code etc. The
uncached driver walks the EFI memmap and pulls out the spill uncached pages
and sticks them into the uncached pool. Only after these chunks have been
utilized, will it start converting regular cached memory into uncached memory.
Hence the reason for the EFI related code additions.

Signed-off-by: Jes Sorensen <jes@wildopensource.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

authored by

Jes Sorensen and committed by
Linus Torvalds
f14f75b8 2caaad41

+530
+4
arch/ia64/Kconfig
··· 50 50 bool 51 51 default y 52 52 53 + config IA64_UNCACHED_ALLOCATOR 54 + bool 55 + select GENERIC_ALLOCATOR 56 + 53 57 choice 54 58 prompt "System type" 55 59 default IA64_GENERIC
+1
arch/ia64/kernel/Makefile
··· 20 20 obj-$(CONFIG_PERFMON) += perfmon_default_smpl.o 21 21 obj-$(CONFIG_IA64_CYCLONE) += cyclone.o 22 22 obj-$(CONFIG_IA64_MCA_RECOVERY) += mca_recovery.o 23 + obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR) += uncached.o 23 24 mca_recovery-y += mca_drv.o mca_drv_asm.o 24 25 25 26 # The gate DSO image is built using a special linker script.
+32
arch/ia64/kernel/efi.c
··· 410 410 } 411 411 412 412 /* 413 + * Walk the EFI memory map to pull out leftover pages in the lower 414 + * memory regions which do not end up in the regular memory map and 415 + * stick them into the uncached allocator 416 + * 417 + * The regular walk function is significantly more complex than the 418 + * uncached walk which means it really doesn't make sense to try and 419 + * marge the two. 420 + */ 421 + void __init 422 + efi_memmap_walk_uc (efi_freemem_callback_t callback) 423 + { 424 + void *efi_map_start, *efi_map_end, *p; 425 + efi_memory_desc_t *md; 426 + u64 efi_desc_size, start, end; 427 + 428 + efi_map_start = __va(ia64_boot_param->efi_memmap); 429 + efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; 430 + efi_desc_size = ia64_boot_param->efi_memdesc_size; 431 + 432 + for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { 433 + md = p; 434 + if (md->attribute == EFI_MEMORY_UC) { 435 + start = PAGE_ALIGN(md->phys_addr); 436 + end = PAGE_ALIGN((md->phys_addr+(md->num_pages << EFI_PAGE_SHIFT)) & PAGE_MASK); 437 + if ((*callback)(start, end, NULL) < 0) 438 + return; 439 + } 440 + } 441 + } 442 + 443 + 444 + /* 413 445 * Look for the PAL_CODE region reported by EFI and maps it using an 414 446 * ITR to enable safe PAL calls in virtual mode. See IA-64 Processor 415 447 * Abstraction Layer chapter 11 in ADAG
+246
arch/ia64/kernel/uncached.c
··· 1 + /* 2 + * Copyright (C) 2001-2005 Silicon Graphics, Inc. All rights reserved. 3 + * 4 + * This program is free software; you can redistribute it and/or modify it 5 + * under the terms of version 2 of the GNU General Public License 6 + * as published by the Free Software Foundation. 7 + * 8 + * A simple uncached page allocator using the generic allocator. This 9 + * allocator first utilizes the spare (spill) pages found in the EFI 10 + * memmap and will then start converting cached pages to uncached ones 11 + * at a granule at a time. Node awareness is implemented by having a 12 + * pool of pages per node. 13 + */ 14 + 15 + #include <linux/types.h> 16 + #include <linux/kernel.h> 17 + #include <linux/module.h> 18 + #include <linux/init.h> 19 + #include <linux/errno.h> 20 + #include <linux/string.h> 21 + #include <linux/slab.h> 22 + #include <linux/efi.h> 23 + #include <linux/genalloc.h> 24 + #include <asm/page.h> 25 + #include <asm/pal.h> 26 + #include <asm/system.h> 27 + #include <asm/pgtable.h> 28 + #include <asm/atomic.h> 29 + #include <asm/tlbflush.h> 30 + #include <asm/sn/arch.h> 31 + 32 + #define DEBUG 0 33 + 34 + #if DEBUG 35 + #define dprintk printk 36 + #else 37 + #define dprintk(x...) do { } while (0) 38 + #endif 39 + 40 + void __init efi_memmap_walk_uc (efi_freemem_callback_t callback); 41 + 42 + #define MAX_UNCACHED_GRANULES 5 43 + static int allocated_granules; 44 + 45 + struct gen_pool *uncached_pool[MAX_NUMNODES]; 46 + 47 + 48 + static void uncached_ipi_visibility(void *data) 49 + { 50 + int status; 51 + 52 + status = ia64_pal_prefetch_visibility(PAL_VISIBILITY_PHYSICAL); 53 + if ((status != PAL_VISIBILITY_OK) && 54 + (status != PAL_VISIBILITY_OK_REMOTE_NEEDED)) 55 + printk(KERN_DEBUG "pal_prefetch_visibility() returns %i on " 56 + "CPU %i\n", status, get_cpu()); 57 + } 58 + 59 + 60 + static void uncached_ipi_mc_drain(void *data) 61 + { 62 + int status; 63 + status = ia64_pal_mc_drain(); 64 + if (status) 65 + printk(KERN_WARNING "ia64_pal_mc_drain() failed with %i on " 66 + "CPU %i\n", status, get_cpu()); 67 + } 68 + 69 + 70 + static unsigned long 71 + uncached_get_new_chunk(struct gen_pool *poolp) 72 + { 73 + struct page *page; 74 + void *tmp; 75 + int status, i; 76 + unsigned long addr, node; 77 + 78 + if (allocated_granules >= MAX_UNCACHED_GRANULES) 79 + return 0; 80 + 81 + node = poolp->private; 82 + page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 83 + IA64_GRANULE_SHIFT-PAGE_SHIFT); 84 + 85 + dprintk(KERN_INFO "get_new_chunk page %p, addr %lx\n", 86 + page, (unsigned long)(page-vmem_map) << PAGE_SHIFT); 87 + 88 + /* 89 + * Do magic if no mem on local node! XXX 90 + */ 91 + if (!page) 92 + return 0; 93 + tmp = page_address(page); 94 + 95 + /* 96 + * There's a small race here where it's possible for someone to 97 + * access the page through /dev/mem halfway through the conversion 98 + * to uncached - not sure it's really worth bothering about 99 + */ 100 + for (i = 0; i < (IA64_GRANULE_SIZE / PAGE_SIZE); i++) 101 + SetPageUncached(&page[i]); 102 + 103 + flush_tlb_kernel_range(tmp, tmp + IA64_GRANULE_SIZE); 104 + 105 + status = ia64_pal_prefetch_visibility(PAL_VISIBILITY_PHYSICAL); 106 + 107 + dprintk(KERN_INFO "pal_prefetch_visibility() returns %i on cpu %i\n", 108 + status, get_cpu()); 109 + 110 + if (!status) { 111 + status = smp_call_function(uncached_ipi_visibility, NULL, 0, 1); 112 + if (status) 113 + printk(KERN_WARNING "smp_call_function failed for " 114 + "uncached_ipi_visibility! (%i)\n", status); 115 + } 116 + 117 + if (ia64_platform_is("sn2")) 118 + sn_flush_all_caches((unsigned long)tmp, IA64_GRANULE_SIZE); 119 + else 120 + flush_icache_range((unsigned long)tmp, 121 + (unsigned long)tmp+IA64_GRANULE_SIZE); 122 + 123 + ia64_pal_mc_drain(); 124 + status = smp_call_function(uncached_ipi_mc_drain, NULL, 0, 1); 125 + if (status) 126 + printk(KERN_WARNING "smp_call_function failed for " 127 + "uncached_ipi_mc_drain! (%i)\n", status); 128 + 129 + addr = (unsigned long)tmp - PAGE_OFFSET + __IA64_UNCACHED_OFFSET; 130 + 131 + allocated_granules++; 132 + return addr; 133 + } 134 + 135 + 136 + /* 137 + * uncached_alloc_page 138 + * 139 + * Allocate 1 uncached page. Allocates on the requested node. If no 140 + * uncached pages are available on the requested node, roundrobin starting 141 + * with higher nodes. 142 + */ 143 + unsigned long 144 + uncached_alloc_page(int nid) 145 + { 146 + unsigned long maddr; 147 + 148 + maddr = gen_pool_alloc(uncached_pool[nid], PAGE_SIZE); 149 + 150 + dprintk(KERN_DEBUG "uncached_alloc_page returns %lx on node %i\n", 151 + maddr, nid); 152 + 153 + /* 154 + * If no memory is availble on our local node, try the 155 + * remaining nodes in the system. 156 + */ 157 + if (!maddr) { 158 + int i; 159 + 160 + for (i = MAX_NUMNODES - 1; i >= 0; i--) { 161 + if (i == nid || !node_online(i)) 162 + continue; 163 + maddr = gen_pool_alloc(uncached_pool[i], PAGE_SIZE); 164 + dprintk(KERN_DEBUG "uncached_alloc_page alternate search " 165 + "returns %lx on node %i\n", maddr, i); 166 + if (maddr) { 167 + break; 168 + } 169 + } 170 + } 171 + 172 + return maddr; 173 + } 174 + EXPORT_SYMBOL(uncached_alloc_page); 175 + 176 + 177 + /* 178 + * uncached_free_page 179 + * 180 + * Free a single uncached page. 181 + */ 182 + void 183 + uncached_free_page(unsigned long maddr) 184 + { 185 + int node; 186 + 187 + node = nasid_to_cnodeid(NASID_GET(maddr)); 188 + 189 + dprintk(KERN_DEBUG "uncached_free_page(%lx) on node %i\n", maddr, node); 190 + 191 + if ((maddr & (0XFUL << 60)) != __IA64_UNCACHED_OFFSET) 192 + panic("uncached_free_page invalid address %lx\n", maddr); 193 + 194 + gen_pool_free(uncached_pool[node], maddr, PAGE_SIZE); 195 + } 196 + EXPORT_SYMBOL(uncached_free_page); 197 + 198 + 199 + /* 200 + * uncached_build_memmap, 201 + * 202 + * Called at boot time to build a map of pages that can be used for 203 + * memory special operations. 204 + */ 205 + static int __init 206 + uncached_build_memmap(unsigned long start, unsigned long end, void *arg) 207 + { 208 + long length; 209 + unsigned long vstart, vend; 210 + int node; 211 + 212 + length = end - start; 213 + vstart = start + __IA64_UNCACHED_OFFSET; 214 + vend = end + __IA64_UNCACHED_OFFSET; 215 + 216 + dprintk(KERN_ERR "uncached_build_memmap(%lx %lx)\n", start, end); 217 + 218 + memset((char *)vstart, 0, length); 219 + 220 + node = nasid_to_cnodeid(NASID_GET(start)); 221 + 222 + for (; vstart < vend ; vstart += PAGE_SIZE) { 223 + dprintk(KERN_INFO "sticking %lx into the pool!\n", vstart); 224 + gen_pool_free(uncached_pool[node], vstart, PAGE_SIZE); 225 + } 226 + 227 + return 0; 228 + } 229 + 230 + 231 + static int __init uncached_init(void) { 232 + int i; 233 + 234 + for (i = 0; i < MAX_NUMNODES; i++) { 235 + if (!node_online(i)) 236 + continue; 237 + uncached_pool[i] = gen_pool_create(0, IA64_GRANULE_SHIFT, 238 + &uncached_get_new_chunk, i); 239 + } 240 + 241 + efi_memmap_walk_uc(uncached_build_memmap); 242 + 243 + return 0; 244 + } 245 + 246 + __initcall(uncached_init);
+12
include/asm-ia64/uncached.h
··· 1 + /* 2 + * Copyright (C) 2001-2005 Silicon Graphics, Inc. All rights reserved. 3 + * 4 + * This program is free software; you can redistribute it and/or modify it 5 + * under the terms of version 2 of the GNU General Public License 6 + * as published by the Free Software Foundation. 7 + * 8 + * Prototypes for the uncached page allocator 9 + */ 10 + 11 + extern unsigned long uncached_alloc_page(int nid); 12 + extern void uncached_free_page(unsigned long);
+40
include/linux/genalloc.h
··· 1 + /* 2 + * Basic general purpose allocator for managing special purpose memory 3 + * not managed by the regular kmalloc/kfree interface. 4 + * Uses for this includes on-device special memory, uncached memory 5 + * etc. 6 + * 7 + * This code is based on the buddy allocator found in the sym53c8xx_2 8 + * driver, adapted for general purpose use. 9 + * 10 + * This source code is licensed under the GNU General Public License, 11 + * Version 2. See the file COPYING for more details. 12 + */ 13 + 14 + #include <linux/spinlock.h> 15 + 16 + #define ALLOC_MIN_SHIFT 5 /* 32 bytes minimum */ 17 + /* 18 + * Link between free memory chunks of a given size. 19 + */ 20 + struct gen_pool_link { 21 + struct gen_pool_link *next; 22 + }; 23 + 24 + /* 25 + * Memory pool descriptor. 26 + */ 27 + struct gen_pool { 28 + spinlock_t lock; 29 + unsigned long (*get_new_chunk)(struct gen_pool *); 30 + struct gen_pool *next; 31 + struct gen_pool_link *h; 32 + unsigned long private; 33 + int max_chunk_shift; 34 + }; 35 + 36 + unsigned long gen_pool_alloc(struct gen_pool *poolp, int size); 37 + void gen_pool_free(struct gen_pool *mp, unsigned long ptr, int size); 38 + struct gen_pool *gen_pool_create(int nr_chunks, int max_chunk_shift, 39 + unsigned long (*fp)(struct gen_pool *), 40 + unsigned long data);
+6
lib/Kconfig
··· 40 40 tristate 41 41 42 42 # 43 + # Generic allocator support is selected if needed 44 + # 45 + config GENERIC_ALLOCATOR 46 + boolean 47 + 48 + # 43 49 # reed solomon support is select'ed if needed 44 50 # 45 51 config REED_SOLOMON
+1
lib/Makefile
··· 30 30 obj-$(CONFIG_CRC32) += crc32.o 31 31 obj-$(CONFIG_LIBCRC32C) += libcrc32c.o 32 32 obj-$(CONFIG_GENERIC_IOMAP) += iomap.o 33 + obj-$(CONFIG_GENERIC_ALLOCATOR) += genalloc.o 33 34 34 35 obj-$(CONFIG_ZLIB_INFLATE) += zlib_inflate/ 35 36 obj-$(CONFIG_ZLIB_DEFLATE) += zlib_deflate/
+188
lib/genalloc.c
··· 1 + /* 2 + * Basic general purpose allocator for managing special purpose memory 3 + * not managed by the regular kmalloc/kfree interface. 4 + * Uses for this includes on-device special memory, uncached memory 5 + * etc. 6 + * 7 + * This code is based on the buddy allocator found in the sym53c8xx_2 8 + * driver Copyright (C) 1999-2001 Gerard Roudier <groudier@free.fr>, 9 + * and adapted for general purpose use. 10 + * 11 + * Copyright 2005 (C) Jes Sorensen <jes@trained-monkey.org> 12 + * 13 + * This source code is licensed under the GNU General Public License, 14 + * Version 2. See the file COPYING for more details. 15 + */ 16 + 17 + #include <linux/module.h> 18 + #include <linux/stddef.h> 19 + #include <linux/kernel.h> 20 + #include <linux/string.h> 21 + #include <linux/slab.h> 22 + #include <linux/init.h> 23 + #include <linux/mm.h> 24 + #include <linux/spinlock.h> 25 + #include <linux/genalloc.h> 26 + 27 + #include <asm/page.h> 28 + 29 + 30 + struct gen_pool *gen_pool_create(int nr_chunks, int max_chunk_shift, 31 + unsigned long (*fp)(struct gen_pool *), 32 + unsigned long data) 33 + { 34 + struct gen_pool *poolp; 35 + unsigned long tmp; 36 + int i; 37 + 38 + /* 39 + * This is really an arbitrary limit, +10 is enough for 40 + * IA64_GRANULE_SHIFT, aka 16MB. If anyone needs a large limit 41 + * this can be increased without problems. 42 + */ 43 + if ((max_chunk_shift > (PAGE_SHIFT + 10)) || 44 + ((max_chunk_shift < ALLOC_MIN_SHIFT) && max_chunk_shift)) 45 + return NULL; 46 + 47 + if (!max_chunk_shift) 48 + max_chunk_shift = PAGE_SHIFT; 49 + 50 + poolp = kmalloc(sizeof(struct gen_pool), GFP_KERNEL); 51 + if (!poolp) 52 + return NULL; 53 + memset(poolp, 0, sizeof(struct gen_pool)); 54 + poolp->h = kmalloc(sizeof(struct gen_pool_link) * 55 + (max_chunk_shift - ALLOC_MIN_SHIFT + 1), 56 + GFP_KERNEL); 57 + if (!poolp->h) { 58 + printk(KERN_WARNING "gen_pool_alloc() failed to allocate\n"); 59 + kfree(poolp); 60 + return NULL; 61 + } 62 + memset(poolp->h, 0, sizeof(struct gen_pool_link) * 63 + (max_chunk_shift - ALLOC_MIN_SHIFT + 1)); 64 + 65 + spin_lock_init(&poolp->lock); 66 + poolp->get_new_chunk = fp; 67 + poolp->max_chunk_shift = max_chunk_shift; 68 + poolp->private = data; 69 + 70 + for (i = 0; i < nr_chunks; i++) { 71 + tmp = poolp->get_new_chunk(poolp); 72 + printk(KERN_INFO "allocated %lx\n", tmp); 73 + if (!tmp) 74 + break; 75 + gen_pool_free(poolp, tmp, (1 << poolp->max_chunk_shift)); 76 + } 77 + 78 + return poolp; 79 + } 80 + EXPORT_SYMBOL(gen_pool_create); 81 + 82 + 83 + /* 84 + * Simple power of two buddy-like generic allocator. 85 + * Provides naturally aligned memory chunks. 86 + */ 87 + unsigned long gen_pool_alloc(struct gen_pool *poolp, int size) 88 + { 89 + int j, i, s, max_chunk_size; 90 + unsigned long a, flags; 91 + struct gen_pool_link *h = poolp->h; 92 + 93 + max_chunk_size = 1 << poolp->max_chunk_shift; 94 + 95 + if (size > max_chunk_size) 96 + return 0; 97 + 98 + i = 0; 99 + 100 + size = max(size, 1 << ALLOC_MIN_SHIFT); 101 + s = roundup_pow_of_two(size); 102 + 103 + j = i; 104 + 105 + spin_lock_irqsave(&poolp->lock, flags); 106 + while (!h[j].next) { 107 + if (s == max_chunk_size) { 108 + struct gen_pool_link *ptr; 109 + spin_unlock_irqrestore(&poolp->lock, flags); 110 + ptr = (struct gen_pool_link *)poolp->get_new_chunk(poolp); 111 + spin_lock_irqsave(&poolp->lock, flags); 112 + h[j].next = ptr; 113 + if (h[j].next) 114 + h[j].next->next = NULL; 115 + break; 116 + } 117 + j++; 118 + s <<= 1; 119 + } 120 + a = (unsigned long) h[j].next; 121 + if (a) { 122 + h[j].next = h[j].next->next; 123 + /* 124 + * This should be split into a seperate function doing 125 + * the chunk split in order to support custom 126 + * handling memory not physically accessible by host 127 + */ 128 + while (j > i) { 129 + j -= 1; 130 + s >>= 1; 131 + h[j].next = (struct gen_pool_link *) (a + s); 132 + h[j].next->next = NULL; 133 + } 134 + } 135 + spin_unlock_irqrestore(&poolp->lock, flags); 136 + return a; 137 + } 138 + EXPORT_SYMBOL(gen_pool_alloc); 139 + 140 + 141 + /* 142 + * Counter-part of the generic allocator. 143 + */ 144 + void gen_pool_free(struct gen_pool *poolp, unsigned long ptr, int size) 145 + { 146 + struct gen_pool_link *q; 147 + struct gen_pool_link *h = poolp->h; 148 + unsigned long a, b, flags; 149 + int i, s, max_chunk_size; 150 + 151 + max_chunk_size = 1 << poolp->max_chunk_shift; 152 + 153 + if (size > max_chunk_size) 154 + return; 155 + 156 + i = 0; 157 + 158 + size = max(size, 1 << ALLOC_MIN_SHIFT); 159 + s = roundup_pow_of_two(size); 160 + 161 + a = ptr; 162 + 163 + spin_lock_irqsave(&poolp->lock, flags); 164 + while (1) { 165 + if (s == max_chunk_size) { 166 + ((struct gen_pool_link *)a)->next = h[i].next; 167 + h[i].next = (struct gen_pool_link *)a; 168 + break; 169 + } 170 + b = a ^ s; 171 + q = &h[i]; 172 + 173 + while (q->next && q->next != (struct gen_pool_link *)b) 174 + q = q->next; 175 + 176 + if (!q->next) { 177 + ((struct gen_pool_link *)a)->next = h[i].next; 178 + h[i].next = (struct gen_pool_link *)a; 179 + break; 180 + } 181 + q->next = q->next->next; 182 + a = a & b; 183 + s <<= 1; 184 + i++; 185 + } 186 + spin_unlock_irqrestore(&poolp->lock, flags); 187 + } 188 + EXPORT_SYMBOL(gen_pool_free);