at v2.6.18 772 lines 18 kB view raw
1/* 2 * linux/init/main.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 * 6 * GK 2/5/95 - Changed to support mounting root fs via NFS 7 * Added initrd & change_root: Werner Almesberger & Hans Lermen, Feb '96 8 * Moan early if gcc is old, avoiding bogus kernels - Paul Gortmaker, May '96 9 * Simplified starting of init: Michael A. Griffith <grif@acm.org> 10 */ 11 12#define __KERNEL_SYSCALLS__ 13 14#include <linux/types.h> 15#include <linux/module.h> 16#include <linux/proc_fs.h> 17#include <linux/kernel.h> 18#include <linux/syscalls.h> 19#include <linux/string.h> 20#include <linux/ctype.h> 21#include <linux/delay.h> 22#include <linux/utsname.h> 23#include <linux/ioport.h> 24#include <linux/init.h> 25#include <linux/smp_lock.h> 26#include <linux/initrd.h> 27#include <linux/hdreg.h> 28#include <linux/bootmem.h> 29#include <linux/tty.h> 30#include <linux/gfp.h> 31#include <linux/percpu.h> 32#include <linux/kmod.h> 33#include <linux/kernel_stat.h> 34#include <linux/security.h> 35#include <linux/workqueue.h> 36#include <linux/profile.h> 37#include <linux/rcupdate.h> 38#include <linux/moduleparam.h> 39#include <linux/kallsyms.h> 40#include <linux/writeback.h> 41#include <linux/cpu.h> 42#include <linux/cpuset.h> 43#include <linux/efi.h> 44#include <linux/taskstats_kern.h> 45#include <linux/delayacct.h> 46#include <linux/unistd.h> 47#include <linux/rmap.h> 48#include <linux/mempolicy.h> 49#include <linux/key.h> 50#include <linux/unwind.h> 51#include <linux/buffer_head.h> 52#include <linux/debug_locks.h> 53#include <linux/lockdep.h> 54 55#include <asm/io.h> 56#include <asm/bugs.h> 57#include <asm/setup.h> 58#include <asm/sections.h> 59#include <asm/cacheflush.h> 60 61#ifdef CONFIG_X86_LOCAL_APIC 62#include <asm/smp.h> 63#endif 64 65/* 66 * This is one of the first .c files built. Error out early if we have compiler 67 * trouble. 68 * 69 * Versions of gcc older than that listed below may actually compile and link 70 * okay, but the end product can have subtle run time bugs. To avoid associated 71 * bogus bug reports, we flatly refuse to compile with a gcc that is known to be 72 * too old from the very beginning. 73 */ 74#if (__GNUC__ < 3) || (__GNUC__ == 3 && __GNUC_MINOR__ < 2) 75#error Sorry, your GCC is too old. It builds incorrect kernels. 76#endif 77 78static int init(void *); 79 80extern void init_IRQ(void); 81extern void fork_init(unsigned long); 82extern void mca_init(void); 83extern void sbus_init(void); 84extern void sysctl_init(void); 85extern void signals_init(void); 86extern void pidhash_init(void); 87extern void pidmap_init(void); 88extern void prio_tree_init(void); 89extern void radix_tree_init(void); 90extern void free_initmem(void); 91extern void populate_rootfs(void); 92extern void driver_init(void); 93extern void prepare_namespace(void); 94#ifdef CONFIG_ACPI 95extern void acpi_early_init(void); 96#else 97static inline void acpi_early_init(void) { } 98#endif 99#ifndef CONFIG_DEBUG_RODATA 100static inline void mark_rodata_ro(void) { } 101#endif 102 103#ifdef CONFIG_TC 104extern void tc_init(void); 105#endif 106 107enum system_states system_state; 108EXPORT_SYMBOL(system_state); 109 110/* 111 * Boot command-line arguments 112 */ 113#define MAX_INIT_ARGS CONFIG_INIT_ENV_ARG_LIMIT 114#define MAX_INIT_ENVS CONFIG_INIT_ENV_ARG_LIMIT 115 116extern void time_init(void); 117/* Default late time init is NULL. archs can override this later. */ 118void (*late_time_init)(void); 119extern void softirq_init(void); 120 121/* Untouched command line (eg. for /proc) saved by arch-specific code. */ 122char saved_command_line[COMMAND_LINE_SIZE]; 123 124static char *execute_command; 125static char *ramdisk_execute_command; 126 127/* Setup configured maximum number of CPUs to activate */ 128static unsigned int max_cpus = NR_CPUS; 129 130/* 131 * Setup routine for controlling SMP activation 132 * 133 * Command-line option of "nosmp" or "maxcpus=0" will disable SMP 134 * activation entirely (the MPS table probe still happens, though). 135 * 136 * Command-line option of "maxcpus=<NUM>", where <NUM> is an integer 137 * greater than 0, limits the maximum number of CPUs activated in 138 * SMP mode to <NUM>. 139 */ 140static int __init nosmp(char *str) 141{ 142 max_cpus = 0; 143 return 1; 144} 145 146__setup("nosmp", nosmp); 147 148static int __init maxcpus(char *str) 149{ 150 get_option(&str, &max_cpus); 151 return 1; 152} 153 154__setup("maxcpus=", maxcpus); 155 156static char * argv_init[MAX_INIT_ARGS+2] = { "init", NULL, }; 157char * envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, }; 158static const char *panic_later, *panic_param; 159 160extern struct obs_kernel_param __setup_start[], __setup_end[]; 161 162static int __init obsolete_checksetup(char *line) 163{ 164 struct obs_kernel_param *p; 165 166 p = __setup_start; 167 do { 168 int n = strlen(p->str); 169 if (!strncmp(line, p->str, n)) { 170 if (p->early) { 171 /* Already done in parse_early_param? (Needs 172 * exact match on param part) */ 173 if (line[n] == '\0' || line[n] == '=') 174 return 1; 175 } else if (!p->setup_func) { 176 printk(KERN_WARNING "Parameter %s is obsolete," 177 " ignored\n", p->str); 178 return 1; 179 } else if (p->setup_func(line + n)) 180 return 1; 181 } 182 p++; 183 } while (p < __setup_end); 184 return 0; 185} 186 187/* 188 * This should be approx 2 Bo*oMips to start (note initial shift), and will 189 * still work even if initially too large, it will just take slightly longer 190 */ 191unsigned long loops_per_jiffy = (1<<12); 192 193EXPORT_SYMBOL(loops_per_jiffy); 194 195static int __init debug_kernel(char *str) 196{ 197 if (*str) 198 return 0; 199 console_loglevel = 10; 200 return 1; 201} 202 203static int __init quiet_kernel(char *str) 204{ 205 if (*str) 206 return 0; 207 console_loglevel = 4; 208 return 1; 209} 210 211__setup("debug", debug_kernel); 212__setup("quiet", quiet_kernel); 213 214static int __init loglevel(char *str) 215{ 216 get_option(&str, &console_loglevel); 217 return 1; 218} 219 220__setup("loglevel=", loglevel); 221 222/* 223 * Unknown boot options get handed to init, unless they look like 224 * failed parameters 225 */ 226static int __init unknown_bootoption(char *param, char *val) 227{ 228 /* Change NUL term back to "=", to make "param" the whole string. */ 229 if (val) { 230 /* param=val or param="val"? */ 231 if (val == param+strlen(param)+1) 232 val[-1] = '='; 233 else if (val == param+strlen(param)+2) { 234 val[-2] = '='; 235 memmove(val-1, val, strlen(val)+1); 236 val--; 237 } else 238 BUG(); 239 } 240 241 /* Handle obsolete-style parameters */ 242 if (obsolete_checksetup(param)) 243 return 0; 244 245 /* 246 * Preemptive maintenance for "why didn't my mispelled command 247 * line work?" 248 */ 249 if (strchr(param, '.') && (!val || strchr(param, '.') < val)) { 250 printk(KERN_ERR "Unknown boot option `%s': ignoring\n", param); 251 return 0; 252 } 253 254 if (panic_later) 255 return 0; 256 257 if (val) { 258 /* Environment option */ 259 unsigned int i; 260 for (i = 0; envp_init[i]; i++) { 261 if (i == MAX_INIT_ENVS) { 262 panic_later = "Too many boot env vars at `%s'"; 263 panic_param = param; 264 } 265 if (!strncmp(param, envp_init[i], val - param)) 266 break; 267 } 268 envp_init[i] = param; 269 } else { 270 /* Command line option */ 271 unsigned int i; 272 for (i = 0; argv_init[i]; i++) { 273 if (i == MAX_INIT_ARGS) { 274 panic_later = "Too many boot init vars at `%s'"; 275 panic_param = param; 276 } 277 } 278 argv_init[i] = param; 279 } 280 return 0; 281} 282 283static int __init init_setup(char *str) 284{ 285 unsigned int i; 286 287 execute_command = str; 288 /* 289 * In case LILO is going to boot us with default command line, 290 * it prepends "auto" before the whole cmdline which makes 291 * the shell think it should execute a script with such name. 292 * So we ignore all arguments entered _before_ init=... [MJ] 293 */ 294 for (i = 1; i < MAX_INIT_ARGS; i++) 295 argv_init[i] = NULL; 296 return 1; 297} 298__setup("init=", init_setup); 299 300static int __init rdinit_setup(char *str) 301{ 302 unsigned int i; 303 304 ramdisk_execute_command = str; 305 /* See "auto" comment in init_setup */ 306 for (i = 1; i < MAX_INIT_ARGS; i++) 307 argv_init[i] = NULL; 308 return 1; 309} 310__setup("rdinit=", rdinit_setup); 311 312#ifndef CONFIG_SMP 313 314#ifdef CONFIG_X86_LOCAL_APIC 315static void __init smp_init(void) 316{ 317 APIC_init_uniprocessor(); 318} 319#else 320#define smp_init() do { } while (0) 321#endif 322 323static inline void setup_per_cpu_areas(void) { } 324static inline void smp_prepare_cpus(unsigned int maxcpus) { } 325 326#else 327 328#ifdef __GENERIC_PER_CPU 329unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; 330 331EXPORT_SYMBOL(__per_cpu_offset); 332 333static void __init setup_per_cpu_areas(void) 334{ 335 unsigned long size, i; 336 char *ptr; 337 unsigned long nr_possible_cpus = num_possible_cpus(); 338 339 /* Copy section for each CPU (we discard the original) */ 340 size = ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES); 341#ifdef CONFIG_MODULES 342 if (size < PERCPU_ENOUGH_ROOM) 343 size = PERCPU_ENOUGH_ROOM; 344#endif 345 ptr = alloc_bootmem(size * nr_possible_cpus); 346 347 for_each_possible_cpu(i) { 348 __per_cpu_offset[i] = ptr - __per_cpu_start; 349 memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); 350 ptr += size; 351 } 352} 353#endif /* !__GENERIC_PER_CPU */ 354 355/* Called by boot processor to activate the rest. */ 356static void __init smp_init(void) 357{ 358 unsigned int i; 359 360 /* FIXME: This should be done in userspace --RR */ 361 for_each_present_cpu(i) { 362 if (num_online_cpus() >= max_cpus) 363 break; 364 if (!cpu_online(i)) 365 cpu_up(i); 366 } 367 368 /* Any cleanup work */ 369 printk(KERN_INFO "Brought up %ld CPUs\n", (long)num_online_cpus()); 370 smp_cpus_done(max_cpus); 371#if 0 372 /* Get other processors into their bootup holding patterns. */ 373 374 smp_commence(); 375#endif 376} 377 378#endif 379 380/* 381 * We need to finalize in a non-__init function or else race conditions 382 * between the root thread and the init thread may cause start_kernel to 383 * be reaped by free_initmem before the root thread has proceeded to 384 * cpu_idle. 385 * 386 * gcc-3.4 accidentally inlines this function, so use noinline. 387 */ 388 389static void noinline rest_init(void) 390 __releases(kernel_lock) 391{ 392 kernel_thread(init, NULL, CLONE_FS | CLONE_SIGHAND); 393 numa_default_policy(); 394 unlock_kernel(); 395 396 /* 397 * The boot idle thread must execute schedule() 398 * at least one to get things moving: 399 */ 400 preempt_enable_no_resched(); 401 schedule(); 402 preempt_disable(); 403 404 /* Call into cpu_idle with preempt disabled */ 405 cpu_idle(); 406} 407 408/* Check for early params. */ 409static int __init do_early_param(char *param, char *val) 410{ 411 struct obs_kernel_param *p; 412 413 for (p = __setup_start; p < __setup_end; p++) { 414 if (p->early && strcmp(param, p->str) == 0) { 415 if (p->setup_func(val) != 0) 416 printk(KERN_WARNING 417 "Malformed early option '%s'\n", param); 418 } 419 } 420 /* We accept everything at this stage. */ 421 return 0; 422} 423 424/* Arch code calls this early on, or if not, just before other parsing. */ 425void __init parse_early_param(void) 426{ 427 static __initdata int done = 0; 428 static __initdata char tmp_cmdline[COMMAND_LINE_SIZE]; 429 430 if (done) 431 return; 432 433 /* All fall through to do_early_param. */ 434 strlcpy(tmp_cmdline, saved_command_line, COMMAND_LINE_SIZE); 435 parse_args("early options", tmp_cmdline, NULL, 0, do_early_param); 436 done = 1; 437} 438 439/* 440 * Activate the first processor. 441 */ 442 443static void __init boot_cpu_init(void) 444{ 445 int cpu = smp_processor_id(); 446 /* Mark the boot cpu "present", "online" etc for SMP and UP case */ 447 cpu_set(cpu, cpu_online_map); 448 cpu_set(cpu, cpu_present_map); 449 cpu_set(cpu, cpu_possible_map); 450} 451 452void __init __attribute__((weak)) smp_setup_processor_id(void) 453{ 454} 455 456asmlinkage void __init start_kernel(void) 457{ 458 char * command_line; 459 extern struct kernel_param __start___param[], __stop___param[]; 460 461 smp_setup_processor_id(); 462 463 /* 464 * Need to run as early as possible, to initialize the 465 * lockdep hash: 466 */ 467 lockdep_init(); 468 469 local_irq_disable(); 470 early_boot_irqs_off(); 471 early_init_irq_lock_class(); 472 473/* 474 * Interrupts are still disabled. Do necessary setups, then 475 * enable them 476 */ 477 lock_kernel(); 478 boot_cpu_init(); 479 page_address_init(); 480 printk(KERN_NOTICE); 481 printk(linux_banner); 482 setup_arch(&command_line); 483 setup_per_cpu_areas(); 484 smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */ 485 486 /* 487 * Set up the scheduler prior starting any interrupts (such as the 488 * timer interrupt). Full topology setup happens at smp_init() 489 * time - but meanwhile we still have a functioning scheduler. 490 */ 491 sched_init(); 492 /* 493 * Disable preemption - early bootup scheduling is extremely 494 * fragile until we cpu_idle() for the first time. 495 */ 496 preempt_disable(); 497 build_all_zonelists(); 498 page_alloc_init(); 499 printk(KERN_NOTICE "Kernel command line: %s\n", saved_command_line); 500 parse_early_param(); 501 parse_args("Booting kernel", command_line, __start___param, 502 __stop___param - __start___param, 503 &unknown_bootoption); 504 sort_main_extable(); 505 unwind_init(); 506 trap_init(); 507 rcu_init(); 508 init_IRQ(); 509 pidhash_init(); 510 init_timers(); 511 hrtimers_init(); 512 softirq_init(); 513 timekeeping_init(); 514 time_init(); 515 profile_init(); 516 if (!irqs_disabled()) 517 printk("start_kernel(): bug: interrupts were enabled early\n"); 518 early_boot_irqs_on(); 519 local_irq_enable(); 520 521 /* 522 * HACK ALERT! This is early. We're enabling the console before 523 * we've done PCI setups etc, and console_init() must be aware of 524 * this. But we do want output early, in case something goes wrong. 525 */ 526 console_init(); 527 if (panic_later) 528 panic(panic_later, panic_param); 529 530 lockdep_info(); 531 532 /* 533 * Need to run this when irqs are enabled, because it wants 534 * to self-test [hard/soft]-irqs on/off lock inversion bugs 535 * too: 536 */ 537 locking_selftest(); 538 539#ifdef CONFIG_BLK_DEV_INITRD 540 if (initrd_start && !initrd_below_start_ok && 541 initrd_start < min_low_pfn << PAGE_SHIFT) { 542 printk(KERN_CRIT "initrd overwritten (0x%08lx < 0x%08lx) - " 543 "disabling it.\n",initrd_start,min_low_pfn << PAGE_SHIFT); 544 initrd_start = 0; 545 } 546#endif 547 vfs_caches_init_early(); 548 cpuset_init_early(); 549 mem_init(); 550 kmem_cache_init(); 551 setup_per_cpu_pageset(); 552 numa_policy_init(); 553 if (late_time_init) 554 late_time_init(); 555 calibrate_delay(); 556 pidmap_init(); 557 pgtable_cache_init(); 558 prio_tree_init(); 559 anon_vma_init(); 560#ifdef CONFIG_X86 561 if (efi_enabled) 562 efi_enter_virtual_mode(); 563#endif 564 fork_init(num_physpages); 565 proc_caches_init(); 566 buffer_init(); 567 unnamed_dev_init(); 568 key_init(); 569 security_init(); 570 vfs_caches_init(num_physpages); 571 radix_tree_init(); 572 signals_init(); 573 /* rootfs populating might need page-writeback */ 574 page_writeback_init(); 575#ifdef CONFIG_PROC_FS 576 proc_root_init(); 577#endif 578 cpuset_init(); 579 taskstats_init_early(); 580 delayacct_init(); 581 582 check_bugs(); 583 584 acpi_early_init(); /* before LAPIC and SMP init */ 585 586 /* Do the rest non-__init'ed, we're now alive */ 587 rest_init(); 588} 589 590static int __initdata initcall_debug; 591 592static int __init initcall_debug_setup(char *str) 593{ 594 initcall_debug = 1; 595 return 1; 596} 597__setup("initcall_debug", initcall_debug_setup); 598 599struct task_struct *child_reaper = &init_task; 600 601extern initcall_t __initcall_start[], __initcall_end[]; 602 603static void __init do_initcalls(void) 604{ 605 initcall_t *call; 606 int count = preempt_count(); 607 608 for (call = __initcall_start; call < __initcall_end; call++) { 609 char *msg = NULL; 610 char msgbuf[40]; 611 int result; 612 613 if (initcall_debug) { 614 printk("Calling initcall 0x%p", *call); 615 print_fn_descriptor_symbol(": %s()", 616 (unsigned long) *call); 617 printk("\n"); 618 } 619 620 result = (*call)(); 621 622 if (result && result != -ENODEV && initcall_debug) { 623 sprintf(msgbuf, "error code %d", result); 624 msg = msgbuf; 625 } 626 if (preempt_count() != count) { 627 msg = "preemption imbalance"; 628 preempt_count() = count; 629 } 630 if (irqs_disabled()) { 631 msg = "disabled interrupts"; 632 local_irq_enable(); 633 } 634 if (msg) { 635 printk(KERN_WARNING "initcall at 0x%p", *call); 636 print_fn_descriptor_symbol(": %s()", 637 (unsigned long) *call); 638 printk(": returned with %s\n", msg); 639 } 640 } 641 642 /* Make sure there is no pending stuff from the initcall sequence */ 643 flush_scheduled_work(); 644} 645 646/* 647 * Ok, the machine is now initialized. None of the devices 648 * have been touched yet, but the CPU subsystem is up and 649 * running, and memory and process management works. 650 * 651 * Now we can finally start doing some real work.. 652 */ 653static void __init do_basic_setup(void) 654{ 655 /* drivers will send hotplug events */ 656 init_workqueues(); 657 usermodehelper_init(); 658 driver_init(); 659 660#ifdef CONFIG_SYSCTL 661 sysctl_init(); 662#endif 663 664 do_initcalls(); 665} 666 667static void do_pre_smp_initcalls(void) 668{ 669 extern int spawn_ksoftirqd(void); 670#ifdef CONFIG_SMP 671 extern int migration_init(void); 672 673 migration_init(); 674#endif 675 spawn_ksoftirqd(); 676 spawn_softlockup_task(); 677} 678 679static void run_init_process(char *init_filename) 680{ 681 argv_init[0] = init_filename; 682 execve(init_filename, argv_init, envp_init); 683} 684 685static int init(void * unused) 686{ 687 lock_kernel(); 688 /* 689 * init can run on any cpu. 690 */ 691 set_cpus_allowed(current, CPU_MASK_ALL); 692 /* 693 * Tell the world that we're going to be the grim 694 * reaper of innocent orphaned children. 695 * 696 * We don't want people to have to make incorrect 697 * assumptions about where in the task array this 698 * can be found. 699 */ 700 child_reaper = current; 701 702 smp_prepare_cpus(max_cpus); 703 704 do_pre_smp_initcalls(); 705 706 smp_init(); 707 sched_init_smp(); 708 709 cpuset_init_smp(); 710 711 /* 712 * Do this before initcalls, because some drivers want to access 713 * firmware files. 714 */ 715 populate_rootfs(); 716 717 do_basic_setup(); 718 719 /* 720 * check if there is an early userspace init. If yes, let it do all 721 * the work 722 */ 723 724 if (!ramdisk_execute_command) 725 ramdisk_execute_command = "/init"; 726 727 if (sys_access((const char __user *) ramdisk_execute_command, 0) != 0) { 728 ramdisk_execute_command = NULL; 729 prepare_namespace(); 730 } 731 732 /* 733 * Ok, we have completed the initial bootup, and 734 * we're essentially up and running. Get rid of the 735 * initmem segments and start the user-mode stuff.. 736 */ 737 free_initmem(); 738 unlock_kernel(); 739 mark_rodata_ro(); 740 system_state = SYSTEM_RUNNING; 741 numa_default_policy(); 742 743 if (sys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0) 744 printk(KERN_WARNING "Warning: unable to open an initial console.\n"); 745 746 (void) sys_dup(0); 747 (void) sys_dup(0); 748 749 if (ramdisk_execute_command) { 750 run_init_process(ramdisk_execute_command); 751 printk(KERN_WARNING "Failed to execute %s\n", 752 ramdisk_execute_command); 753 } 754 755 /* 756 * We try each of these until one succeeds. 757 * 758 * The Bourne shell can be used instead of init if we are 759 * trying to recover a really broken machine. 760 */ 761 if (execute_command) { 762 run_init_process(execute_command); 763 printk(KERN_WARNING "Failed to execute %s. Attempting " 764 "defaults...\n", execute_command); 765 } 766 run_init_process("/sbin/init"); 767 run_init_process("/etc/init"); 768 run_init_process("/bin/init"); 769 run_init_process("/bin/sh"); 770 771 panic("No init found. Try passing init= option to kernel."); 772}