at v2.6.15-rc4 452 lines 11 kB view raw
1/* 2 * drivers/base/memory.c - basic Memory class support 3 * 4 * Written by Matt Tolentino <matthew.e.tolentino@intel.com> 5 * Dave Hansen <haveblue@us.ibm.com> 6 * 7 * This file provides the necessary infrastructure to represent 8 * a SPARSEMEM-memory-model system's physical memory in /sysfs. 9 * All arch-independent code that assumes MEMORY_HOTPLUG requires 10 * SPARSEMEM should be contained here, or in mm/memory_hotplug.c. 11 */ 12 13#include <linux/sysdev.h> 14#include <linux/module.h> 15#include <linux/init.h> 16#include <linux/sched.h> /* capable() */ 17#include <linux/topology.h> 18#include <linux/device.h> 19#include <linux/memory.h> 20#include <linux/kobject.h> 21#include <linux/memory_hotplug.h> 22#include <linux/mm.h> 23#include <asm/atomic.h> 24#include <asm/uaccess.h> 25 26#define MEMORY_CLASS_NAME "memory" 27 28static struct sysdev_class memory_sysdev_class = { 29 set_kset_name(MEMORY_CLASS_NAME), 30}; 31EXPORT_SYMBOL(memory_sysdev_class); 32 33static char *memory_hotplug_name(struct kset *kset, struct kobject *kobj) 34{ 35 return MEMORY_CLASS_NAME; 36} 37 38static int memory_hotplug(struct kset *kset, struct kobject *kobj, char **envp, 39 int num_envp, char *buffer, int buffer_size) 40{ 41 int retval = 0; 42 43 return retval; 44} 45 46static struct kset_hotplug_ops memory_hotplug_ops = { 47 .name = memory_hotplug_name, 48 .hotplug = memory_hotplug, 49}; 50 51static struct notifier_block *memory_chain; 52 53static int register_memory_notifier(struct notifier_block *nb) 54{ 55 return notifier_chain_register(&memory_chain, nb); 56} 57 58static void unregister_memory_notifier(struct notifier_block *nb) 59{ 60 notifier_chain_unregister(&memory_chain, nb); 61} 62 63/* 64 * register_memory - Setup a sysfs device for a memory block 65 */ 66static int 67register_memory(struct memory_block *memory, struct mem_section *section, 68 struct node *root) 69{ 70 int error; 71 72 memory->sysdev.cls = &memory_sysdev_class; 73 memory->sysdev.id = __section_nr(section); 74 75 error = sysdev_register(&memory->sysdev); 76 77 if (root && !error) 78 error = sysfs_create_link(&root->sysdev.kobj, 79 &memory->sysdev.kobj, 80 kobject_name(&memory->sysdev.kobj)); 81 82 return error; 83} 84 85static void 86unregister_memory(struct memory_block *memory, struct mem_section *section, 87 struct node *root) 88{ 89 BUG_ON(memory->sysdev.cls != &memory_sysdev_class); 90 BUG_ON(memory->sysdev.id != __section_nr(section)); 91 92 sysdev_unregister(&memory->sysdev); 93 if (root) 94 sysfs_remove_link(&root->sysdev.kobj, 95 kobject_name(&memory->sysdev.kobj)); 96} 97 98/* 99 * use this as the physical section index that this memsection 100 * uses. 101 */ 102 103static ssize_t show_mem_phys_index(struct sys_device *dev, char *buf) 104{ 105 struct memory_block *mem = 106 container_of(dev, struct memory_block, sysdev); 107 return sprintf(buf, "%08lx\n", mem->phys_index); 108} 109 110/* 111 * online, offline, going offline, etc. 112 */ 113static ssize_t show_mem_state(struct sys_device *dev, char *buf) 114{ 115 struct memory_block *mem = 116 container_of(dev, struct memory_block, sysdev); 117 ssize_t len = 0; 118 119 /* 120 * We can probably put these states in a nice little array 121 * so that they're not open-coded 122 */ 123 switch (mem->state) { 124 case MEM_ONLINE: 125 len = sprintf(buf, "online\n"); 126 break; 127 case MEM_OFFLINE: 128 len = sprintf(buf, "offline\n"); 129 break; 130 case MEM_GOING_OFFLINE: 131 len = sprintf(buf, "going-offline\n"); 132 break; 133 default: 134 len = sprintf(buf, "ERROR-UNKNOWN-%ld\n", 135 mem->state); 136 WARN_ON(1); 137 break; 138 } 139 140 return len; 141} 142 143static inline int memory_notify(unsigned long val, void *v) 144{ 145 return notifier_call_chain(&memory_chain, val, v); 146} 147 148/* 149 * MEMORY_HOTPLUG depends on SPARSEMEM in mm/Kconfig, so it is 150 * OK to have direct references to sparsemem variables in here. 151 */ 152static int 153memory_block_action(struct memory_block *mem, unsigned long action) 154{ 155 int i; 156 unsigned long psection; 157 unsigned long start_pfn, start_paddr; 158 struct page *first_page; 159 int ret; 160 int old_state = mem->state; 161 162 psection = mem->phys_index; 163 first_page = pfn_to_page(psection << PFN_SECTION_SHIFT); 164 165 /* 166 * The probe routines leave the pages reserved, just 167 * as the bootmem code does. Make sure they're still 168 * that way. 169 */ 170 if (action == MEM_ONLINE) { 171 for (i = 0; i < PAGES_PER_SECTION; i++) { 172 if (PageReserved(first_page+i)) 173 continue; 174 175 printk(KERN_WARNING "section number %ld page number %d " 176 "not reserved, was it already online? \n", 177 psection, i); 178 return -EBUSY; 179 } 180 } 181 182 switch (action) { 183 case MEM_ONLINE: 184 start_pfn = page_to_pfn(first_page); 185 ret = online_pages(start_pfn, PAGES_PER_SECTION); 186 break; 187 case MEM_OFFLINE: 188 mem->state = MEM_GOING_OFFLINE; 189 memory_notify(MEM_GOING_OFFLINE, NULL); 190 start_paddr = page_to_pfn(first_page) << PAGE_SHIFT; 191 ret = remove_memory(start_paddr, 192 PAGES_PER_SECTION << PAGE_SHIFT); 193 if (ret) { 194 mem->state = old_state; 195 break; 196 } 197 memory_notify(MEM_MAPPING_INVALID, NULL); 198 break; 199 default: 200 printk(KERN_WARNING "%s(%p, %ld) unknown action: %ld\n", 201 __FUNCTION__, mem, action, action); 202 WARN_ON(1); 203 ret = -EINVAL; 204 } 205 /* 206 * For now, only notify on successful memory operations 207 */ 208 if (!ret) 209 memory_notify(action, NULL); 210 211 return ret; 212} 213 214static int memory_block_change_state(struct memory_block *mem, 215 unsigned long to_state, unsigned long from_state_req) 216{ 217 int ret = 0; 218 down(&mem->state_sem); 219 220 if (mem->state != from_state_req) { 221 ret = -EINVAL; 222 goto out; 223 } 224 225 ret = memory_block_action(mem, to_state); 226 if (!ret) 227 mem->state = to_state; 228 229out: 230 up(&mem->state_sem); 231 return ret; 232} 233 234static ssize_t 235store_mem_state(struct sys_device *dev, const char *buf, size_t count) 236{ 237 struct memory_block *mem; 238 unsigned int phys_section_nr; 239 int ret = -EINVAL; 240 241 mem = container_of(dev, struct memory_block, sysdev); 242 phys_section_nr = mem->phys_index; 243 244 if (!valid_section_nr(phys_section_nr)) 245 goto out; 246 247 if (!strncmp(buf, "online", min((int)count, 6))) 248 ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE); 249 else if(!strncmp(buf, "offline", min((int)count, 7))) 250 ret = memory_block_change_state(mem, MEM_OFFLINE, MEM_ONLINE); 251out: 252 if (ret) 253 return ret; 254 return count; 255} 256 257/* 258 * phys_device is a bad name for this. What I really want 259 * is a way to differentiate between memory ranges that 260 * are part of physical devices that constitute 261 * a complete removable unit or fru. 262 * i.e. do these ranges belong to the same physical device, 263 * s.t. if I offline all of these sections I can then 264 * remove the physical device? 265 */ 266static ssize_t show_phys_device(struct sys_device *dev, char *buf) 267{ 268 struct memory_block *mem = 269 container_of(dev, struct memory_block, sysdev); 270 return sprintf(buf, "%d\n", mem->phys_device); 271} 272 273static SYSDEV_ATTR(phys_index, 0444, show_mem_phys_index, NULL); 274static SYSDEV_ATTR(state, 0644, show_mem_state, store_mem_state); 275static SYSDEV_ATTR(phys_device, 0444, show_phys_device, NULL); 276 277#define mem_create_simple_file(mem, attr_name) \ 278 sysdev_create_file(&mem->sysdev, &attr_##attr_name) 279#define mem_remove_simple_file(mem, attr_name) \ 280 sysdev_remove_file(&mem->sysdev, &attr_##attr_name) 281 282/* 283 * Block size attribute stuff 284 */ 285static ssize_t 286print_block_size(struct class *class, char *buf) 287{ 288 return sprintf(buf, "%lx\n", (unsigned long)PAGES_PER_SECTION * PAGE_SIZE); 289} 290 291static CLASS_ATTR(block_size_bytes, 0444, print_block_size, NULL); 292 293static int block_size_init(void) 294{ 295 sysfs_create_file(&memory_sysdev_class.kset.kobj, 296 &class_attr_block_size_bytes.attr); 297 return 0; 298} 299 300/* 301 * Some architectures will have custom drivers to do this, and 302 * will not need to do it from userspace. The fake hot-add code 303 * as well as ppc64 will do all of their discovery in userspace 304 * and will require this interface. 305 */ 306#ifdef CONFIG_ARCH_MEMORY_PROBE 307static ssize_t 308memory_probe_store(struct class *class, const char __user *buf, size_t count) 309{ 310 u64 phys_addr; 311 int ret; 312 313 phys_addr = simple_strtoull(buf, NULL, 0); 314 315 ret = add_memory(phys_addr, PAGES_PER_SECTION << PAGE_SHIFT); 316 317 if (ret) 318 count = ret; 319 320 return count; 321} 322static CLASS_ATTR(probe, 0700, NULL, memory_probe_store); 323 324static int memory_probe_init(void) 325{ 326 sysfs_create_file(&memory_sysdev_class.kset.kobj, 327 &class_attr_probe.attr); 328 return 0; 329} 330#else 331#define memory_probe_init(...) do {} while (0) 332#endif 333 334/* 335 * Note that phys_device is optional. It is here to allow for 336 * differentiation between which *physical* devices each 337 * section belongs to... 338 */ 339 340static int add_memory_block(unsigned long node_id, struct mem_section *section, 341 unsigned long state, int phys_device) 342{ 343 struct memory_block *mem = kzalloc(sizeof(*mem), GFP_KERNEL); 344 int ret = 0; 345 346 if (!mem) 347 return -ENOMEM; 348 349 mem->phys_index = __section_nr(section); 350 mem->state = state; 351 init_MUTEX(&mem->state_sem); 352 mem->phys_device = phys_device; 353 354 ret = register_memory(mem, section, NULL); 355 if (!ret) 356 ret = mem_create_simple_file(mem, phys_index); 357 if (!ret) 358 ret = mem_create_simple_file(mem, state); 359 if (!ret) 360 ret = mem_create_simple_file(mem, phys_device); 361 362 return ret; 363} 364 365/* 366 * For now, we have a linear search to go find the appropriate 367 * memory_block corresponding to a particular phys_index. If 368 * this gets to be a real problem, we can always use a radix 369 * tree or something here. 370 * 371 * This could be made generic for all sysdev classes. 372 */ 373static struct memory_block *find_memory_block(struct mem_section *section) 374{ 375 struct kobject *kobj; 376 struct sys_device *sysdev; 377 struct memory_block *mem; 378 char name[sizeof(MEMORY_CLASS_NAME) + 9 + 1]; 379 380 /* 381 * This only works because we know that section == sysdev->id 382 * slightly redundant with sysdev_register() 383 */ 384 sprintf(&name[0], "%s%d", MEMORY_CLASS_NAME, __section_nr(section)); 385 386 kobj = kset_find_obj(&memory_sysdev_class.kset, name); 387 if (!kobj) 388 return NULL; 389 390 sysdev = container_of(kobj, struct sys_device, kobj); 391 mem = container_of(sysdev, struct memory_block, sysdev); 392 393 return mem; 394} 395 396int remove_memory_block(unsigned long node_id, struct mem_section *section, 397 int phys_device) 398{ 399 struct memory_block *mem; 400 401 mem = find_memory_block(section); 402 mem_remove_simple_file(mem, phys_index); 403 mem_remove_simple_file(mem, state); 404 mem_remove_simple_file(mem, phys_device); 405 unregister_memory(mem, section, NULL); 406 407 return 0; 408} 409 410/* 411 * need an interface for the VM to add new memory regions, 412 * but without onlining it. 413 */ 414int register_new_memory(struct mem_section *section) 415{ 416 return add_memory_block(0, section, MEM_OFFLINE, 0); 417} 418 419int unregister_memory_section(struct mem_section *section) 420{ 421 if (!valid_section(section)) 422 return -EINVAL; 423 424 return remove_memory_block(0, section, 0); 425} 426 427/* 428 * Initialize the sysfs support for memory devices... 429 */ 430int __init memory_dev_init(void) 431{ 432 unsigned int i; 433 int ret; 434 435 memory_sysdev_class.kset.hotplug_ops = &memory_hotplug_ops; 436 ret = sysdev_class_register(&memory_sysdev_class); 437 438 /* 439 * Create entries for memory sections that were found 440 * during boot and have been initialized 441 */ 442 for (i = 0; i < NR_MEM_SECTIONS; i++) { 443 if (!valid_section_nr(i)) 444 continue; 445 add_memory_block(0, __nr_to_section(i), MEM_ONLINE, 0); 446 } 447 448 memory_probe_init(); 449 block_size_init(); 450 451 return ret; 452}