1/* 2 * linux/kernel/timer.c 3 * 4 * Kernel internal timers, basic process system calls 5 * 6 * Copyright (C) 1991, 1992 Linus Torvalds 7 * 8 * 1997-01-28 Modified by Finn Arne Gangstad to make timers scale better. 9 * 10 * 1997-09-10 Updated NTP code according to technical memorandum Jan '96 11 * "A Kernel Model for Precision Timekeeping" by Dave Mills 12 * 1998-12-24 Fixed a xtime SMP race (we need the xtime_lock rw spinlock to 13 * serialize accesses to xtime/lost_ticks). 14 * Copyright (C) 1998 Andrea Arcangeli 15 * 1999-03-10 Improved NTP compatibility by Ulrich Windl 16 * 2002-05-31 Move sys_sysinfo here and make its locking sane, Robert Love 17 * 2000-10-05 Implemented scalable SMP per-CPU timer handling. 18 * Copyright (C) 2000, 2001, 2002 Ingo Molnar 19 * Designed by David S. Miller, Alexey Kuznetsov and Ingo Molnar 20 */ 21 22#include <linux/kernel_stat.h> 23#include <linux/module.h> 24#include <linux/interrupt.h> 25#include <linux/percpu.h> 26#include <linux/init.h> 27#include <linux/mm.h> 28#include <linux/swap.h> 29#include <linux/pid_namespace.h> 30#include <linux/notifier.h> 31#include <linux/thread_info.h> 32#include <linux/time.h> 33#include <linux/jiffies.h> 34#include <linux/posix-timers.h> 35#include <linux/cpu.h> 36#include <linux/syscalls.h> 37#include <linux/delay.h> 38#include <linux/tick.h> 39#include <linux/kallsyms.h> 40#include <linux/perf_event.h> 41#include <linux/sched.h> 42#include <linux/slab.h> 43 44#include <asm/uaccess.h> 45#include <asm/unistd.h> 46#include <asm/div64.h> 47#include <asm/timex.h> 48#include <asm/io.h> 49 50#define CREATE_TRACE_POINTS 51#include <trace/events/timer.h> 52 53u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES; 54 55EXPORT_SYMBOL(jiffies_64); 56 57/* 58 * per-CPU timer vector definitions: 59 */ 60#define TVN_BITS (CONFIG_BASE_SMALL ? 4 : 6) 61#define TVR_BITS (CONFIG_BASE_SMALL ? 6 : 8) 62#define TVN_SIZE (1 << TVN_BITS) 63#define TVR_SIZE (1 << TVR_BITS) 64#define TVN_MASK (TVN_SIZE - 1) 65#define TVR_MASK (TVR_SIZE - 1) 66 67struct tvec { 68 struct list_head vec[TVN_SIZE]; 69}; 70 71struct tvec_root { 72 struct list_head vec[TVR_SIZE]; 73}; 74 75struct tvec_base { 76 spinlock_t lock; 77 struct timer_list *running_timer; 78 unsigned long timer_jiffies; 79 unsigned long next_timer; 80 struct tvec_root tv1; 81 struct tvec tv2; 82 struct tvec tv3; 83 struct tvec tv4; 84 struct tvec tv5; 85} ____cacheline_aligned; 86 87struct tvec_base boot_tvec_bases; 88EXPORT_SYMBOL(boot_tvec_bases); 89static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases; 90 91/* 92 * Note that all tvec_bases are 2 byte aligned and lower bit of 93 * base in timer_list is guaranteed to be zero. Use the LSB for 94 * the new flag to indicate whether the timer is deferrable 95 */ 96#define TBASE_DEFERRABLE_FLAG (0x1) 97 98/* Functions below help us manage 'deferrable' flag */ 99static inline unsigned int tbase_get_deferrable(struct tvec_base *base) 100{ 101 return ((unsigned int)(unsigned long)base & TBASE_DEFERRABLE_FLAG); 102} 103 104static inline struct tvec_base *tbase_get_base(struct tvec_base *base) 105{ 106 return ((struct tvec_base *)((unsigned long)base & ~TBASE_DEFERRABLE_FLAG)); 107} 108 109static inline void timer_set_deferrable(struct timer_list *timer) 110{ 111 timer->base = ((struct tvec_base *)((unsigned long)(timer->base) | 112 TBASE_DEFERRABLE_FLAG)); 113} 114 115static inline void 116timer_set_base(struct timer_list *timer, struct tvec_base *new_base) 117{ 118 timer->base = (struct tvec_base *)((unsigned long)(new_base) | 119 tbase_get_deferrable(timer->base)); 120} 121 122static unsigned long round_jiffies_common(unsigned long j, int cpu, 123 bool force_up) 124{ 125 int rem; 126 unsigned long original = j; 127 128 /* 129 * We don't want all cpus firing their timers at once hitting the 130 * same lock or cachelines, so we skew each extra cpu with an extra 131 * 3 jiffies. This 3 jiffies came originally from the mm/ code which 132 * already did this. 133 * The skew is done by adding 3*cpunr, then round, then subtract this 134 * extra offset again. 135 */ 136 j += cpu * 3; 137 138 rem = j % HZ; 139 140 /* 141 * If the target jiffie is just after a whole second (which can happen 142 * due to delays of the timer irq, long irq off times etc etc) then 143 * we should round down to the whole second, not up. Use 1/4th second 144 * as cutoff for this rounding as an extreme upper bound for this. 145 * But never round down if @force_up is set. 146 */ 147 if (rem < HZ/4 && !force_up) /* round down */ 148 j = j - rem; 149 else /* round up */ 150 j = j - rem + HZ; 151 152 /* now that we have rounded, subtract the extra skew again */ 153 j -= cpu * 3; 154 155 if (j <= jiffies) /* rounding ate our timeout entirely; */ 156 return original; 157 return j; 158} 159 160/** 161 * __round_jiffies - function to round jiffies to a full second 162 * @j: the time in (absolute) jiffies that should be rounded 163 * @cpu: the processor number on which the timeout will happen 164 * 165 * __round_jiffies() rounds an absolute time in the future (in jiffies) 166 * up or down to (approximately) full seconds. This is useful for timers 167 * for which the exact time they fire does not matter too much, as long as 168 * they fire approximately every X seconds. 169 * 170 * By rounding these timers to whole seconds, all such timers will fire 171 * at the same time, rather than at various times spread out. The goal 172 * of this is to have the CPU wake up less, which saves power. 173 * 174 * The exact rounding is skewed for each processor to avoid all 175 * processors firing at the exact same time, which could lead 176 * to lock contention or spurious cache line bouncing. 177 * 178 * The return value is the rounded version of the @j parameter. 179 */ 180unsigned long __round_jiffies(unsigned long j, int cpu) 181{ 182 return round_jiffies_common(j, cpu, false); 183} 184EXPORT_SYMBOL_GPL(__round_jiffies); 185 186/** 187 * __round_jiffies_relative - function to round jiffies to a full second 188 * @j: the time in (relative) jiffies that should be rounded 189 * @cpu: the processor number on which the timeout will happen 190 * 191 * __round_jiffies_relative() rounds a time delta in the future (in jiffies) 192 * up or down to (approximately) full seconds. This is useful for timers 193 * for which the exact time they fire does not matter too much, as long as 194 * they fire approximately every X seconds. 195 * 196 * By rounding these timers to whole seconds, all such timers will fire 197 * at the same time, rather than at various times spread out. The goal 198 * of this is to have the CPU wake up less, which saves power. 199 * 200 * The exact rounding is skewed for each processor to avoid all 201 * processors firing at the exact same time, which could lead 202 * to lock contention or spurious cache line bouncing. 203 * 204 * The return value is the rounded version of the @j parameter. 205 */ 206unsigned long __round_jiffies_relative(unsigned long j, int cpu) 207{ 208 unsigned long j0 = jiffies; 209 210 /* Use j0 because jiffies might change while we run */ 211 return round_jiffies_common(j + j0, cpu, false) - j0; 212} 213EXPORT_SYMBOL_GPL(__round_jiffies_relative); 214 215/** 216 * round_jiffies - function to round jiffies to a full second 217 * @j: the time in (absolute) jiffies that should be rounded 218 * 219 * round_jiffies() rounds an absolute time in the future (in jiffies) 220 * up or down to (approximately) full seconds. This is useful for timers 221 * for which the exact time they fire does not matter too much, as long as 222 * they fire approximately every X seconds. 223 * 224 * By rounding these timers to whole seconds, all such timers will fire 225 * at the same time, rather than at various times spread out. The goal 226 * of this is to have the CPU wake up less, which saves power. 227 * 228 * The return value is the rounded version of the @j parameter. 229 */ 230unsigned long round_jiffies(unsigned long j) 231{ 232 return round_jiffies_common(j, raw_smp_processor_id(), false); 233} 234EXPORT_SYMBOL_GPL(round_jiffies); 235 236/** 237 * round_jiffies_relative - function to round jiffies to a full second 238 * @j: the time in (relative) jiffies that should be rounded 239 * 240 * round_jiffies_relative() rounds a time delta in the future (in jiffies) 241 * up or down to (approximately) full seconds. This is useful for timers 242 * for which the exact time they fire does not matter too much, as long as 243 * they fire approximately every X seconds. 244 * 245 * By rounding these timers to whole seconds, all such timers will fire 246 * at the same time, rather than at various times spread out. The goal 247 * of this is to have the CPU wake up less, which saves power. 248 * 249 * The return value is the rounded version of the @j parameter. 250 */ 251unsigned long round_jiffies_relative(unsigned long j) 252{ 253 return __round_jiffies_relative(j, raw_smp_processor_id()); 254} 255EXPORT_SYMBOL_GPL(round_jiffies_relative); 256 257/** 258 * __round_jiffies_up - function to round jiffies up to a full second 259 * @j: the time in (absolute) jiffies that should be rounded 260 * @cpu: the processor number on which the timeout will happen 261 * 262 * This is the same as __round_jiffies() except that it will never 263 * round down. This is useful for timeouts for which the exact time 264 * of firing does not matter too much, as long as they don't fire too 265 * early. 266 */ 267unsigned long __round_jiffies_up(unsigned long j, int cpu) 268{ 269 return round_jiffies_common(j, cpu, true); 270} 271EXPORT_SYMBOL_GPL(__round_jiffies_up); 272 273/** 274 * __round_jiffies_up_relative - function to round jiffies up to a full second 275 * @j: the time in (relative) jiffies that should be rounded 276 * @cpu: the processor number on which the timeout will happen 277 * 278 * This is the same as __round_jiffies_relative() except that it will never 279 * round down. This is useful for timeouts for which the exact time 280 * of firing does not matter too much, as long as they don't fire too 281 * early. 282 */ 283unsigned long __round_jiffies_up_relative(unsigned long j, int cpu) 284{ 285 unsigned long j0 = jiffies; 286 287 /* Use j0 because jiffies might change while we run */ 288 return round_jiffies_common(j + j0, cpu, true) - j0; 289} 290EXPORT_SYMBOL_GPL(__round_jiffies_up_relative); 291 292/** 293 * round_jiffies_up - function to round jiffies up to a full second 294 * @j: the time in (absolute) jiffies that should be rounded 295 * 296 * This is the same as round_jiffies() except that it will never 297 * round down. This is useful for timeouts for which the exact time 298 * of firing does not matter too much, as long as they don't fire too 299 * early. 300 */ 301unsigned long round_jiffies_up(unsigned long j) 302{ 303 return round_jiffies_common(j, raw_smp_processor_id(), true); 304} 305EXPORT_SYMBOL_GPL(round_jiffies_up); 306 307/** 308 * round_jiffies_up_relative - function to round jiffies up to a full second 309 * @j: the time in (relative) jiffies that should be rounded 310 * 311 * This is the same as round_jiffies_relative() except that it will never 312 * round down. This is useful for timeouts for which the exact time 313 * of firing does not matter too much, as long as they don't fire too 314 * early. 315 */ 316unsigned long round_jiffies_up_relative(unsigned long j) 317{ 318 return __round_jiffies_up_relative(j, raw_smp_processor_id()); 319} 320EXPORT_SYMBOL_GPL(round_jiffies_up_relative); 321 322 323static inline void set_running_timer(struct tvec_base *base, 324 struct timer_list *timer) 325{ 326#ifdef CONFIG_SMP 327 base->running_timer = timer; 328#endif 329} 330 331static void internal_add_timer(struct tvec_base *base, struct timer_list *timer) 332{ 333 unsigned long expires = timer->expires; 334 unsigned long idx = expires - base->timer_jiffies; 335 struct list_head *vec; 336 337 if (idx < TVR_SIZE) { 338 int i = expires & TVR_MASK; 339 vec = base->tv1.vec + i; 340 } else if (idx < 1 << (TVR_BITS + TVN_BITS)) { 341 int i = (expires >> TVR_BITS) & TVN_MASK; 342 vec = base->tv2.vec + i; 343 } else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) { 344 int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK; 345 vec = base->tv3.vec + i; 346 } else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) { 347 int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK; 348 vec = base->tv4.vec + i; 349 } else if ((signed long) idx < 0) { 350 /* 351 * Can happen if you add a timer with expires == jiffies, 352 * or you set a timer to go off in the past 353 */ 354 vec = base->tv1.vec + (base->timer_jiffies & TVR_MASK); 355 } else { 356 int i; 357 /* If the timeout is larger than 0xffffffff on 64-bit 358 * architectures then we use the maximum timeout: 359 */ 360 if (idx > 0xffffffffUL) { 361 idx = 0xffffffffUL; 362 expires = idx + base->timer_jiffies; 363 } 364 i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK; 365 vec = base->tv5.vec + i; 366 } 367 /* 368 * Timers are FIFO: 369 */ 370 list_add_tail(&timer->entry, vec); 371} 372 373#ifdef CONFIG_TIMER_STATS 374void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr) 375{ 376 if (timer->start_site) 377 return; 378 379 timer->start_site = addr; 380 memcpy(timer->start_comm, current->comm, TASK_COMM_LEN); 381 timer->start_pid = current->pid; 382} 383 384static void timer_stats_account_timer(struct timer_list *timer) 385{ 386 unsigned int flag = 0; 387 388 if (likely(!timer->start_site)) 389 return; 390 if (unlikely(tbase_get_deferrable(timer->base))) 391 flag |= TIMER_STATS_FLAG_DEFERRABLE; 392 393 timer_stats_update_stats(timer, timer->start_pid, timer->start_site, 394 timer->function, timer->start_comm, flag); 395} 396 397#else 398static void timer_stats_account_timer(struct timer_list *timer) {} 399#endif 400 401#ifdef CONFIG_DEBUG_OBJECTS_TIMERS 402 403static struct debug_obj_descr timer_debug_descr; 404 405/* 406 * fixup_init is called when: 407 * - an active object is initialized 408 */ 409static int timer_fixup_init(void *addr, enum debug_obj_state state) 410{ 411 struct timer_list *timer = addr; 412 413 switch (state) { 414 case ODEBUG_STATE_ACTIVE: 415 del_timer_sync(timer); 416 debug_object_init(timer, &timer_debug_descr); 417 return 1; 418 default: 419 return 0; 420 } 421} 422 423/* 424 * fixup_activate is called when: 425 * - an active object is activated 426 * - an unknown object is activated (might be a statically initialized object) 427 */ 428static int timer_fixup_activate(void *addr, enum debug_obj_state state) 429{ 430 struct timer_list *timer = addr; 431 432 switch (state) { 433 434 case ODEBUG_STATE_NOTAVAILABLE: 435 /* 436 * This is not really a fixup. The timer was 437 * statically initialized. We just make sure that it 438 * is tracked in the object tracker. 439 */ 440 if (timer->entry.next == NULL && 441 timer->entry.prev == TIMER_ENTRY_STATIC) { 442 debug_object_init(timer, &timer_debug_descr); 443 debug_object_activate(timer, &timer_debug_descr); 444 return 0; 445 } else { 446 WARN_ON_ONCE(1); 447 } 448 return 0; 449 450 case ODEBUG_STATE_ACTIVE: 451 WARN_ON(1); 452 453 default: 454 return 0; 455 } 456} 457 458/* 459 * fixup_free is called when: 460 * - an active object is freed 461 */ 462static int timer_fixup_free(void *addr, enum debug_obj_state state) 463{ 464 struct timer_list *timer = addr; 465 466 switch (state) { 467 case ODEBUG_STATE_ACTIVE: 468 del_timer_sync(timer); 469 debug_object_free(timer, &timer_debug_descr); 470 return 1; 471 default: 472 return 0; 473 } 474} 475 476static struct debug_obj_descr timer_debug_descr = { 477 .name = "timer_list", 478 .fixup_init = timer_fixup_init, 479 .fixup_activate = timer_fixup_activate, 480 .fixup_free = timer_fixup_free, 481}; 482 483static inline void debug_timer_init(struct timer_list *timer) 484{ 485 debug_object_init(timer, &timer_debug_descr); 486} 487 488static inline void debug_timer_activate(struct timer_list *timer) 489{ 490 debug_object_activate(timer, &timer_debug_descr); 491} 492 493static inline void debug_timer_deactivate(struct timer_list *timer) 494{ 495 debug_object_deactivate(timer, &timer_debug_descr); 496} 497 498static inline void debug_timer_free(struct timer_list *timer) 499{ 500 debug_object_free(timer, &timer_debug_descr); 501} 502 503static void __init_timer(struct timer_list *timer, 504 const char *name, 505 struct lock_class_key *key); 506 507void init_timer_on_stack_key(struct timer_list *timer, 508 const char *name, 509 struct lock_class_key *key) 510{ 511 debug_object_init_on_stack(timer, &timer_debug_descr); 512 __init_timer(timer, name, key); 513} 514EXPORT_SYMBOL_GPL(init_timer_on_stack_key); 515 516void destroy_timer_on_stack(struct timer_list *timer) 517{ 518 debug_object_free(timer, &timer_debug_descr); 519} 520EXPORT_SYMBOL_GPL(destroy_timer_on_stack); 521 522#else 523static inline void debug_timer_init(struct timer_list *timer) { } 524static inline void debug_timer_activate(struct timer_list *timer) { } 525static inline void debug_timer_deactivate(struct timer_list *timer) { } 526#endif 527 528static inline void debug_init(struct timer_list *timer) 529{ 530 debug_timer_init(timer); 531 trace_timer_init(timer); 532} 533 534static inline void 535debug_activate(struct timer_list *timer, unsigned long expires) 536{ 537 debug_timer_activate(timer); 538 trace_timer_start(timer, expires); 539} 540 541static inline void debug_deactivate(struct timer_list *timer) 542{ 543 debug_timer_deactivate(timer); 544 trace_timer_cancel(timer); 545} 546 547static void __init_timer(struct timer_list *timer, 548 const char *name, 549 struct lock_class_key *key) 550{ 551 timer->entry.next = NULL; 552 timer->base = __raw_get_cpu_var(tvec_bases); 553#ifdef CONFIG_TIMER_STATS 554 timer->start_site = NULL; 555 timer->start_pid = -1; 556 memset(timer->start_comm, 0, TASK_COMM_LEN); 557#endif 558 lockdep_init_map(&timer->lockdep_map, name, key, 0); 559} 560 561/** 562 * init_timer_key - initialize a timer 563 * @timer: the timer to be initialized 564 * @name: name of the timer 565 * @key: lockdep class key of the fake lock used for tracking timer 566 * sync lock dependencies 567 * 568 * init_timer_key() must be done to a timer prior calling *any* of the 569 * other timer functions. 570 */ 571void init_timer_key(struct timer_list *timer, 572 const char *name, 573 struct lock_class_key *key) 574{ 575 debug_init(timer); 576 __init_timer(timer, name, key); 577} 578EXPORT_SYMBOL(init_timer_key); 579 580void init_timer_deferrable_key(struct timer_list *timer, 581 const char *name, 582 struct lock_class_key *key) 583{ 584 init_timer_key(timer, name, key); 585 timer_set_deferrable(timer); 586} 587EXPORT_SYMBOL(init_timer_deferrable_key); 588 589static inline void detach_timer(struct timer_list *timer, 590 int clear_pending) 591{ 592 struct list_head *entry = &timer->entry; 593 594 debug_deactivate(timer); 595 596 __list_del(entry->prev, entry->next); 597 if (clear_pending) 598 entry->next = NULL; 599 entry->prev = LIST_POISON2; 600} 601 602/* 603 * We are using hashed locking: holding per_cpu(tvec_bases).lock 604 * means that all timers which are tied to this base via timer->base are 605 * locked, and the base itself is locked too. 606 * 607 * So __run_timers/migrate_timers can safely modify all timers which could 608 * be found on ->tvX lists. 609 * 610 * When the timer's base is locked, and the timer removed from list, it is 611 * possible to set timer->base = NULL and drop the lock: the timer remains 612 * locked. 613 */ 614static struct tvec_base *lock_timer_base(struct timer_list *timer, 615 unsigned long *flags) 616 __acquires(timer->base->lock) 617{ 618 struct tvec_base *base; 619 620 for (;;) { 621 struct tvec_base *prelock_base = timer->base; 622 base = tbase_get_base(prelock_base); 623 if (likely(base != NULL)) { 624 spin_lock_irqsave(&base->lock, *flags); 625 if (likely(prelock_base == timer->base)) 626 return base; 627 /* The timer has migrated to another CPU */ 628 spin_unlock_irqrestore(&base->lock, *flags); 629 } 630 cpu_relax(); 631 } 632} 633 634static inline int 635__mod_timer(struct timer_list *timer, unsigned long expires, 636 bool pending_only, int pinned) 637{ 638 struct tvec_base *base, *new_base; 639 unsigned long flags; 640 int ret = 0 , cpu; 641 642 timer_stats_timer_set_start_info(timer); 643 BUG_ON(!timer->function); 644 645 base = lock_timer_base(timer, &flags); 646 647 if (timer_pending(timer)) { 648 detach_timer(timer, 0); 649 if (timer->expires == base->next_timer && 650 !tbase_get_deferrable(timer->base)) 651 base->next_timer = base->timer_jiffies; 652 ret = 1; 653 } else { 654 if (pending_only) 655 goto out_unlock; 656 } 657 658 debug_activate(timer, expires); 659 660 cpu = smp_processor_id(); 661 662#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP) 663 if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) { 664 int preferred_cpu = get_nohz_load_balancer(); 665 666 if (preferred_cpu >= 0) 667 cpu = preferred_cpu; 668 } 669#endif 670 new_base = per_cpu(tvec_bases, cpu); 671 672 if (base != new_base) { 673 /* 674 * We are trying to schedule the timer on the local CPU. 675 * However we can't change timer's base while it is running, 676 * otherwise del_timer_sync() can't detect that the timer's 677 * handler yet has not finished. This also guarantees that 678 * the timer is serialized wrt itself. 679 */ 680 if (likely(base->running_timer != timer)) { 681 /* See the comment in lock_timer_base() */ 682 timer_set_base(timer, NULL); 683 spin_unlock(&base->lock); 684 base = new_base; 685 spin_lock(&base->lock); 686 timer_set_base(timer, base); 687 } 688 } 689 690 timer->expires = expires; 691 if (time_before(timer->expires, base->next_timer) && 692 !tbase_get_deferrable(timer->base)) 693 base->next_timer = timer->expires; 694 internal_add_timer(base, timer); 695 696out_unlock: 697 spin_unlock_irqrestore(&base->lock, flags); 698 699 return ret; 700} 701 702/** 703 * mod_timer_pending - modify a pending timer's timeout 704 * @timer: the pending timer to be modified 705 * @expires: new timeout in jiffies 706 * 707 * mod_timer_pending() is the same for pending timers as mod_timer(), 708 * but will not re-activate and modify already deleted timers. 709 * 710 * It is useful for unserialized use of timers. 711 */ 712int mod_timer_pending(struct timer_list *timer, unsigned long expires) 713{ 714 return __mod_timer(timer, expires, true, TIMER_NOT_PINNED); 715} 716EXPORT_SYMBOL(mod_timer_pending); 717 718/** 719 * mod_timer - modify a timer's timeout 720 * @timer: the timer to be modified 721 * @expires: new timeout in jiffies 722 * 723 * mod_timer() is a more efficient way to update the expire field of an 724 * active timer (if the timer is inactive it will be activated) 725 * 726 * mod_timer(timer, expires) is equivalent to: 727 * 728 * del_timer(timer); timer->expires = expires; add_timer(timer); 729 * 730 * Note that if there are multiple unserialized concurrent users of the 731 * same timer, then mod_timer() is the only safe way to modify the timeout, 732 * since add_timer() cannot modify an already running timer. 733 * 734 * The function returns whether it has modified a pending timer or not. 735 * (ie. mod_timer() of an inactive timer returns 0, mod_timer() of an 736 * active timer returns 1.) 737 */ 738int mod_timer(struct timer_list *timer, unsigned long expires) 739{ 740 /* 741 * This is a common optimization triggered by the 742 * networking code - if the timer is re-modified 743 * to be the same thing then just return: 744 */ 745 if (timer_pending(timer) && timer->expires == expires) 746 return 1; 747 748 return __mod_timer(timer, expires, false, TIMER_NOT_PINNED); 749} 750EXPORT_SYMBOL(mod_timer); 751 752/** 753 * mod_timer_pinned - modify a timer's timeout 754 * @timer: the timer to be modified 755 * @expires: new timeout in jiffies 756 * 757 * mod_timer_pinned() is a way to update the expire field of an 758 * active timer (if the timer is inactive it will be activated) 759 * and not allow the timer to be migrated to a different CPU. 760 * 761 * mod_timer_pinned(timer, expires) is equivalent to: 762 * 763 * del_timer(timer); timer->expires = expires; add_timer(timer); 764 */ 765int mod_timer_pinned(struct timer_list *timer, unsigned long expires) 766{ 767 if (timer->expires == expires && timer_pending(timer)) 768 return 1; 769 770 return __mod_timer(timer, expires, false, TIMER_PINNED); 771} 772EXPORT_SYMBOL(mod_timer_pinned); 773 774/** 775 * add_timer - start a timer 776 * @timer: the timer to be added 777 * 778 * The kernel will do a ->function(->data) callback from the 779 * timer interrupt at the ->expires point in the future. The 780 * current time is 'jiffies'. 781 * 782 * The timer's ->expires, ->function (and if the handler uses it, ->data) 783 * fields must be set prior calling this function. 784 * 785 * Timers with an ->expires field in the past will be executed in the next 786 * timer tick. 787 */ 788void add_timer(struct timer_list *timer) 789{ 790 BUG_ON(timer_pending(timer)); 791 mod_timer(timer, timer->expires); 792} 793EXPORT_SYMBOL(add_timer); 794 795/** 796 * add_timer_on - start a timer on a particular CPU 797 * @timer: the timer to be added 798 * @cpu: the CPU to start it on 799 * 800 * This is not very scalable on SMP. Double adds are not possible. 801 */ 802void add_timer_on(struct timer_list *timer, int cpu) 803{ 804 struct tvec_base *base = per_cpu(tvec_bases, cpu); 805 unsigned long flags; 806 807 timer_stats_timer_set_start_info(timer); 808 BUG_ON(timer_pending(timer) || !timer->function); 809 spin_lock_irqsave(&base->lock, flags); 810 timer_set_base(timer, base); 811 debug_activate(timer, timer->expires); 812 if (time_before(timer->expires, base->next_timer) && 813 !tbase_get_deferrable(timer->base)) 814 base->next_timer = timer->expires; 815 internal_add_timer(base, timer); 816 /* 817 * Check whether the other CPU is idle and needs to be 818 * triggered to reevaluate the timer wheel when nohz is 819 * active. We are protected against the other CPU fiddling 820 * with the timer by holding the timer base lock. This also 821 * makes sure that a CPU on the way to idle can not evaluate 822 * the timer wheel. 823 */ 824 wake_up_idle_cpu(cpu); 825 spin_unlock_irqrestore(&base->lock, flags); 826} 827EXPORT_SYMBOL_GPL(add_timer_on); 828 829/** 830 * del_timer - deactive a timer. 831 * @timer: the timer to be deactivated 832 * 833 * del_timer() deactivates a timer - this works on both active and inactive 834 * timers. 835 * 836 * The function returns whether it has deactivated a pending timer or not. 837 * (ie. del_timer() of an inactive timer returns 0, del_timer() of an 838 * active timer returns 1.) 839 */ 840int del_timer(struct timer_list *timer) 841{ 842 struct tvec_base *base; 843 unsigned long flags; 844 int ret = 0; 845 846 timer_stats_timer_clear_start_info(timer); 847 if (timer_pending(timer)) { 848 base = lock_timer_base(timer, &flags); 849 if (timer_pending(timer)) { 850 detach_timer(timer, 1); 851 if (timer->expires == base->next_timer && 852 !tbase_get_deferrable(timer->base)) 853 base->next_timer = base->timer_jiffies; 854 ret = 1; 855 } 856 spin_unlock_irqrestore(&base->lock, flags); 857 } 858 859 return ret; 860} 861EXPORT_SYMBOL(del_timer); 862 863#ifdef CONFIG_SMP 864/** 865 * try_to_del_timer_sync - Try to deactivate a timer 866 * @timer: timer do del 867 * 868 * This function tries to deactivate a timer. Upon successful (ret >= 0) 869 * exit the timer is not queued and the handler is not running on any CPU. 870 * 871 * It must not be called from interrupt contexts. 872 */ 873int try_to_del_timer_sync(struct timer_list *timer) 874{ 875 struct tvec_base *base; 876 unsigned long flags; 877 int ret = -1; 878 879 base = lock_timer_base(timer, &flags); 880 881 if (base->running_timer == timer) 882 goto out; 883 884 timer_stats_timer_clear_start_info(timer); 885 ret = 0; 886 if (timer_pending(timer)) { 887 detach_timer(timer, 1); 888 if (timer->expires == base->next_timer && 889 !tbase_get_deferrable(timer->base)) 890 base->next_timer = base->timer_jiffies; 891 ret = 1; 892 } 893out: 894 spin_unlock_irqrestore(&base->lock, flags); 895 896 return ret; 897} 898EXPORT_SYMBOL(try_to_del_timer_sync); 899 900/** 901 * del_timer_sync - deactivate a timer and wait for the handler to finish. 902 * @timer: the timer to be deactivated 903 * 904 * This function only differs from del_timer() on SMP: besides deactivating 905 * the timer it also makes sure the handler has finished executing on other 906 * CPUs. 907 * 908 * Synchronization rules: Callers must prevent restarting of the timer, 909 * otherwise this function is meaningless. It must not be called from 910 * interrupt contexts. The caller must not hold locks which would prevent 911 * completion of the timer's handler. The timer's handler must not call 912 * add_timer_on(). Upon exit the timer is not queued and the handler is 913 * not running on any CPU. 914 * 915 * The function returns whether it has deactivated a pending timer or not. 916 */ 917int del_timer_sync(struct timer_list *timer) 918{ 919#ifdef CONFIG_LOCKDEP 920 unsigned long flags; 921 922 local_irq_save(flags); 923 lock_map_acquire(&timer->lockdep_map); 924 lock_map_release(&timer->lockdep_map); 925 local_irq_restore(flags); 926#endif 927 928 for (;;) { 929 int ret = try_to_del_timer_sync(timer); 930 if (ret >= 0) 931 return ret; 932 cpu_relax(); 933 } 934} 935EXPORT_SYMBOL(del_timer_sync); 936#endif 937 938static int cascade(struct tvec_base *base, struct tvec *tv, int index) 939{ 940 /* cascade all the timers from tv up one level */ 941 struct timer_list *timer, *tmp; 942 struct list_head tv_list; 943 944 list_replace_init(tv->vec + index, &tv_list); 945 946 /* 947 * We are removing _all_ timers from the list, so we 948 * don't have to detach them individually. 949 */ 950 list_for_each_entry_safe(timer, tmp, &tv_list, entry) { 951 BUG_ON(tbase_get_base(timer->base) != base); 952 internal_add_timer(base, timer); 953 } 954 955 return index; 956} 957 958#define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK) 959 960/** 961 * __run_timers - run all expired timers (if any) on this CPU. 962 * @base: the timer vector to be processed. 963 * 964 * This function cascades all vectors and executes all expired timer 965 * vectors. 966 */ 967static inline void __run_timers(struct tvec_base *base) 968{ 969 struct timer_list *timer; 970 971 spin_lock_irq(&base->lock); 972 while (time_after_eq(jiffies, base->timer_jiffies)) { 973 struct list_head work_list; 974 struct list_head *head = &work_list; 975 int index = base->timer_jiffies & TVR_MASK; 976 977 /* 978 * Cascade timers: 979 */ 980 if (!index && 981 (!cascade(base, &base->tv2, INDEX(0))) && 982 (!cascade(base, &base->tv3, INDEX(1))) && 983 !cascade(base, &base->tv4, INDEX(2))) 984 cascade(base, &base->tv5, INDEX(3)); 985 ++base->timer_jiffies; 986 list_replace_init(base->tv1.vec + index, &work_list); 987 while (!list_empty(head)) { 988 void (*fn)(unsigned long); 989 unsigned long data; 990 991 timer = list_first_entry(head, struct timer_list,entry); 992 fn = timer->function; 993 data = timer->data; 994 995 timer_stats_account_timer(timer); 996 997 set_running_timer(base, timer); 998 detach_timer(timer, 1); 999 1000 spin_unlock_irq(&base->lock); 1001 { 1002 int preempt_count = preempt_count(); 1003 1004#ifdef CONFIG_LOCKDEP 1005 /* 1006 * It is permissible to free the timer from 1007 * inside the function that is called from 1008 * it, this we need to take into account for 1009 * lockdep too. To avoid bogus "held lock 1010 * freed" warnings as well as problems when 1011 * looking into timer->lockdep_map, make a 1012 * copy and use that here. 1013 */ 1014 struct lockdep_map lockdep_map = 1015 timer->lockdep_map; 1016#endif 1017 /* 1018 * Couple the lock chain with the lock chain at 1019 * del_timer_sync() by acquiring the lock_map 1020 * around the fn() call here and in 1021 * del_timer_sync(). 1022 */ 1023 lock_map_acquire(&lockdep_map); 1024 1025 trace_timer_expire_entry(timer); 1026 fn(data); 1027 trace_timer_expire_exit(timer); 1028 1029 lock_map_release(&lockdep_map); 1030 1031 if (preempt_count != preempt_count()) { 1032 printk(KERN_ERR "huh, entered %p " 1033 "with preempt_count %08x, exited" 1034 " with %08x?\n", 1035 fn, preempt_count, 1036 preempt_count()); 1037 BUG(); 1038 } 1039 } 1040 spin_lock_irq(&base->lock); 1041 } 1042 } 1043 set_running_timer(base, NULL); 1044 spin_unlock_irq(&base->lock); 1045} 1046 1047#ifdef CONFIG_NO_HZ 1048/* 1049 * Find out when the next timer event is due to happen. This 1050 * is used on S/390 to stop all activity when a CPU is idle. 1051 * This function needs to be called with interrupts disabled. 1052 */ 1053static unsigned long __next_timer_interrupt(struct tvec_base *base) 1054{ 1055 unsigned long timer_jiffies = base->timer_jiffies; 1056 unsigned long expires = timer_jiffies + NEXT_TIMER_MAX_DELTA; 1057 int index, slot, array, found = 0; 1058 struct timer_list *nte; 1059 struct tvec *varray[4]; 1060 1061 /* Look for timer events in tv1. */ 1062 index = slot = timer_jiffies & TVR_MASK; 1063 do { 1064 list_for_each_entry(nte, base->tv1.vec + slot, entry) { 1065 if (tbase_get_deferrable(nte->base)) 1066 continue; 1067 1068 found = 1; 1069 expires = nte->expires; 1070 /* Look at the cascade bucket(s)? */ 1071 if (!index || slot < index) 1072 goto cascade; 1073 return expires; 1074 } 1075 slot = (slot + 1) & TVR_MASK; 1076 } while (slot != index); 1077 1078cascade: 1079 /* Calculate the next cascade event */ 1080 if (index) 1081 timer_jiffies += TVR_SIZE - index; 1082 timer_jiffies >>= TVR_BITS; 1083 1084 /* Check tv2-tv5. */ 1085 varray[0] = &base->tv2; 1086 varray[1] = &base->tv3; 1087 varray[2] = &base->tv4; 1088 varray[3] = &base->tv5; 1089 1090 for (array = 0; array < 4; array++) { 1091 struct tvec *varp = varray[array]; 1092 1093 index = slot = timer_jiffies & TVN_MASK; 1094 do { 1095 list_for_each_entry(nte, varp->vec + slot, entry) { 1096 if (tbase_get_deferrable(nte->base)) 1097 continue; 1098 1099 found = 1; 1100 if (time_before(nte->expires, expires)) 1101 expires = nte->expires; 1102 } 1103 /* 1104 * Do we still search for the first timer or are 1105 * we looking up the cascade buckets ? 1106 */ 1107 if (found) { 1108 /* Look at the cascade bucket(s)? */ 1109 if (!index || slot < index) 1110 break; 1111 return expires; 1112 } 1113 slot = (slot + 1) & TVN_MASK; 1114 } while (slot != index); 1115 1116 if (index) 1117 timer_jiffies += TVN_SIZE - index; 1118 timer_jiffies >>= TVN_BITS; 1119 } 1120 return expires; 1121} 1122 1123/* 1124 * Check, if the next hrtimer event is before the next timer wheel 1125 * event: 1126 */ 1127static unsigned long cmp_next_hrtimer_event(unsigned long now, 1128 unsigned long expires) 1129{ 1130 ktime_t hr_delta = hrtimer_get_next_event(); 1131 struct timespec tsdelta; 1132 unsigned long delta; 1133 1134 if (hr_delta.tv64 == KTIME_MAX) 1135 return expires; 1136 1137 /* 1138 * Expired timer available, let it expire in the next tick 1139 */ 1140 if (hr_delta.tv64 <= 0) 1141 return now + 1; 1142 1143 tsdelta = ktime_to_timespec(hr_delta); 1144 delta = timespec_to_jiffies(&tsdelta); 1145 1146 /* 1147 * Limit the delta to the max value, which is checked in 1148 * tick_nohz_stop_sched_tick(): 1149 */ 1150 if (delta > NEXT_TIMER_MAX_DELTA) 1151 delta = NEXT_TIMER_MAX_DELTA; 1152 1153 /* 1154 * Take rounding errors in to account and make sure, that it 1155 * expires in the next tick. Otherwise we go into an endless 1156 * ping pong due to tick_nohz_stop_sched_tick() retriggering 1157 * the timer softirq 1158 */ 1159 if (delta < 1) 1160 delta = 1; 1161 now += delta; 1162 if (time_before(now, expires)) 1163 return now; 1164 return expires; 1165} 1166 1167/** 1168 * get_next_timer_interrupt - return the jiffy of the next pending timer 1169 * @now: current time (in jiffies) 1170 */ 1171unsigned long get_next_timer_interrupt(unsigned long now) 1172{ 1173 struct tvec_base *base = __get_cpu_var(tvec_bases); 1174 unsigned long expires; 1175 1176 spin_lock(&base->lock); 1177 if (time_before_eq(base->next_timer, base->timer_jiffies)) 1178 base->next_timer = __next_timer_interrupt(base); 1179 expires = base->next_timer; 1180 spin_unlock(&base->lock); 1181 1182 if (time_before_eq(expires, now)) 1183 return now; 1184 1185 return cmp_next_hrtimer_event(now, expires); 1186} 1187#endif 1188 1189/* 1190 * Called from the timer interrupt handler to charge one tick to the current 1191 * process. user_tick is 1 if the tick is user time, 0 for system. 1192 */ 1193void update_process_times(int user_tick) 1194{ 1195 struct task_struct *p = current; 1196 int cpu = smp_processor_id(); 1197 1198 /* Note: this timer irq context must be accounted for as well. */ 1199 account_process_tick(p, user_tick); 1200 run_local_timers(); 1201 rcu_check_callbacks(cpu, user_tick); 1202 printk_tick(); 1203 perf_event_do_pending(); 1204 scheduler_tick(); 1205 run_posix_cpu_timers(p); 1206} 1207 1208/* 1209 * This function runs timers and the timer-tq in bottom half context. 1210 */ 1211static void run_timer_softirq(struct softirq_action *h) 1212{ 1213 struct tvec_base *base = __get_cpu_var(tvec_bases); 1214 1215 hrtimer_run_pending(); 1216 1217 if (time_after_eq(jiffies, base->timer_jiffies)) 1218 __run_timers(base); 1219} 1220 1221/* 1222 * Called by the local, per-CPU timer interrupt on SMP. 1223 */ 1224void run_local_timers(void) 1225{ 1226 hrtimer_run_queues(); 1227 raise_softirq(TIMER_SOFTIRQ); 1228 softlockup_tick(); 1229} 1230 1231/* 1232 * The 64-bit jiffies value is not atomic - you MUST NOT read it 1233 * without sampling the sequence number in xtime_lock. 1234 * jiffies is defined in the linker script... 1235 */ 1236 1237void do_timer(unsigned long ticks) 1238{ 1239 jiffies_64 += ticks; 1240 update_wall_time(); 1241 calc_global_load(); 1242} 1243 1244#ifdef __ARCH_WANT_SYS_ALARM 1245 1246/* 1247 * For backwards compatibility? This can be done in libc so Alpha 1248 * and all newer ports shouldn't need it. 1249 */ 1250SYSCALL_DEFINE1(alarm, unsigned int, seconds) 1251{ 1252 return alarm_setitimer(seconds); 1253} 1254 1255#endif 1256 1257#ifndef __alpha__ 1258 1259/* 1260 * The Alpha uses getxpid, getxuid, and getxgid instead. Maybe this 1261 * should be moved into arch/i386 instead? 1262 */ 1263 1264/** 1265 * sys_getpid - return the thread group id of the current process 1266 * 1267 * Note, despite the name, this returns the tgid not the pid. The tgid and 1268 * the pid are identical unless CLONE_THREAD was specified on clone() in 1269 * which case the tgid is the same in all threads of the same group. 1270 * 1271 * This is SMP safe as current->tgid does not change. 1272 */ 1273SYSCALL_DEFINE0(getpid) 1274{ 1275 return task_tgid_vnr(current); 1276} 1277 1278/* 1279 * Accessing ->real_parent is not SMP-safe, it could 1280 * change from under us. However, we can use a stale 1281 * value of ->real_parent under rcu_read_lock(), see 1282 * release_task()->call_rcu(delayed_put_task_struct). 1283 */ 1284SYSCALL_DEFINE0(getppid) 1285{ 1286 int pid; 1287 1288 rcu_read_lock(); 1289 pid = task_tgid_vnr(current->real_parent); 1290 rcu_read_unlock(); 1291 1292 return pid; 1293} 1294 1295SYSCALL_DEFINE0(getuid) 1296{ 1297 /* Only we change this so SMP safe */ 1298 return current_uid(); 1299} 1300 1301SYSCALL_DEFINE0(geteuid) 1302{ 1303 /* Only we change this so SMP safe */ 1304 return current_euid(); 1305} 1306 1307SYSCALL_DEFINE0(getgid) 1308{ 1309 /* Only we change this so SMP safe */ 1310 return current_gid(); 1311} 1312 1313SYSCALL_DEFINE0(getegid) 1314{ 1315 /* Only we change this so SMP safe */ 1316 return current_egid(); 1317} 1318 1319#endif 1320 1321static void process_timeout(unsigned long __data) 1322{ 1323 wake_up_process((struct task_struct *)__data); 1324} 1325 1326/** 1327 * schedule_timeout - sleep until timeout 1328 * @timeout: timeout value in jiffies 1329 * 1330 * Make the current task sleep until @timeout jiffies have 1331 * elapsed. The routine will return immediately unless 1332 * the current task state has been set (see set_current_state()). 1333 * 1334 * You can set the task state as follows - 1335 * 1336 * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to 1337 * pass before the routine returns. The routine will return 0 1338 * 1339 * %TASK_INTERRUPTIBLE - the routine may return early if a signal is 1340 * delivered to the current task. In this case the remaining time 1341 * in jiffies will be returned, or 0 if the timer expired in time 1342 * 1343 * The current task state is guaranteed to be TASK_RUNNING when this 1344 * routine returns. 1345 * 1346 * Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule 1347 * the CPU away without a bound on the timeout. In this case the return 1348 * value will be %MAX_SCHEDULE_TIMEOUT. 1349 * 1350 * In all cases the return value is guaranteed to be non-negative. 1351 */ 1352signed long __sched schedule_timeout(signed long timeout) 1353{ 1354 struct timer_list timer; 1355 unsigned long expire; 1356 1357 switch (timeout) 1358 { 1359 case MAX_SCHEDULE_TIMEOUT: 1360 /* 1361 * These two special cases are useful to be comfortable 1362 * in the caller. Nothing more. We could take 1363 * MAX_SCHEDULE_TIMEOUT from one of the negative value 1364 * but I' d like to return a valid offset (>=0) to allow 1365 * the caller to do everything it want with the retval. 1366 */ 1367 schedule(); 1368 goto out; 1369 default: 1370 /* 1371 * Another bit of PARANOID. Note that the retval will be 1372 * 0 since no piece of kernel is supposed to do a check 1373 * for a negative retval of schedule_timeout() (since it 1374 * should never happens anyway). You just have the printk() 1375 * that will tell you if something is gone wrong and where. 1376 */ 1377 if (timeout < 0) { 1378 printk(KERN_ERR "schedule_timeout: wrong timeout " 1379 "value %lx\n", timeout); 1380 dump_stack(); 1381 current->state = TASK_RUNNING; 1382 goto out; 1383 } 1384 } 1385 1386 expire = timeout + jiffies; 1387 1388 setup_timer_on_stack(&timer, process_timeout, (unsigned long)current); 1389 __mod_timer(&timer, expire, false, TIMER_NOT_PINNED); 1390 schedule(); 1391 del_singleshot_timer_sync(&timer); 1392 1393 /* Remove the timer from the object tracker */ 1394 destroy_timer_on_stack(&timer); 1395 1396 timeout = expire - jiffies; 1397 1398 out: 1399 return timeout < 0 ? 0 : timeout; 1400} 1401EXPORT_SYMBOL(schedule_timeout); 1402 1403/* 1404 * We can use __set_current_state() here because schedule_timeout() calls 1405 * schedule() unconditionally. 1406 */ 1407signed long __sched schedule_timeout_interruptible(signed long timeout) 1408{ 1409 __set_current_state(TASK_INTERRUPTIBLE); 1410 return schedule_timeout(timeout); 1411} 1412EXPORT_SYMBOL(schedule_timeout_interruptible); 1413 1414signed long __sched schedule_timeout_killable(signed long timeout) 1415{ 1416 __set_current_state(TASK_KILLABLE); 1417 return schedule_timeout(timeout); 1418} 1419EXPORT_SYMBOL(schedule_timeout_killable); 1420 1421signed long __sched schedule_timeout_uninterruptible(signed long timeout) 1422{ 1423 __set_current_state(TASK_UNINTERRUPTIBLE); 1424 return schedule_timeout(timeout); 1425} 1426EXPORT_SYMBOL(schedule_timeout_uninterruptible); 1427 1428/* Thread ID - the internal kernel "pid" */ 1429SYSCALL_DEFINE0(gettid) 1430{ 1431 return task_pid_vnr(current); 1432} 1433 1434/** 1435 * do_sysinfo - fill in sysinfo struct 1436 * @info: pointer to buffer to fill 1437 */ 1438int do_sysinfo(struct sysinfo *info) 1439{ 1440 unsigned long mem_total, sav_total; 1441 unsigned int mem_unit, bitcount; 1442 struct timespec tp; 1443 1444 memset(info, 0, sizeof(struct sysinfo)); 1445 1446 ktime_get_ts(&tp); 1447 monotonic_to_bootbased(&tp); 1448 info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0); 1449 1450 get_avenrun(info->loads, 0, SI_LOAD_SHIFT - FSHIFT); 1451 1452 info->procs = nr_threads; 1453 1454 si_meminfo(info); 1455 si_swapinfo(info); 1456 1457 /* 1458 * If the sum of all the available memory (i.e. ram + swap) 1459 * is less than can be stored in a 32 bit unsigned long then 1460 * we can be binary compatible with 2.2.x kernels. If not, 1461 * well, in that case 2.2.x was broken anyways... 1462 * 1463 * -Erik Andersen <andersee@debian.org> 1464 */ 1465 1466 mem_total = info->totalram + info->totalswap; 1467 if (mem_total < info->totalram || mem_total < info->totalswap) 1468 goto out; 1469 bitcount = 0; 1470 mem_unit = info->mem_unit; 1471 while (mem_unit > 1) { 1472 bitcount++; 1473 mem_unit >>= 1; 1474 sav_total = mem_total; 1475 mem_total <<= 1; 1476 if (mem_total < sav_total) 1477 goto out; 1478 } 1479 1480 /* 1481 * If mem_total did not overflow, multiply all memory values by 1482 * info->mem_unit and set it to 1. This leaves things compatible 1483 * with 2.2.x, and also retains compatibility with earlier 2.4.x 1484 * kernels... 1485 */ 1486 1487 info->mem_unit = 1; 1488 info->totalram <<= bitcount; 1489 info->freeram <<= bitcount; 1490 info->sharedram <<= bitcount; 1491 info->bufferram <<= bitcount; 1492 info->totalswap <<= bitcount; 1493 info->freeswap <<= bitcount; 1494 info->totalhigh <<= bitcount; 1495 info->freehigh <<= bitcount; 1496 1497out: 1498 return 0; 1499} 1500 1501SYSCALL_DEFINE1(sysinfo, struct sysinfo __user *, info) 1502{ 1503 struct sysinfo val; 1504 1505 do_sysinfo(&val); 1506 1507 if (copy_to_user(info, &val, sizeof(struct sysinfo))) 1508 return -EFAULT; 1509 1510 return 0; 1511} 1512 1513static int __cpuinit init_timers_cpu(int cpu) 1514{ 1515 int j; 1516 struct tvec_base *base; 1517 static char __cpuinitdata tvec_base_done[NR_CPUS]; 1518 1519 if (!tvec_base_done[cpu]) { 1520 static char boot_done; 1521 1522 if (boot_done) { 1523 /* 1524 * The APs use this path later in boot 1525 */ 1526 base = kmalloc_node(sizeof(*base), 1527 GFP_KERNEL | __GFP_ZERO, 1528 cpu_to_node(cpu)); 1529 if (!base) 1530 return -ENOMEM; 1531 1532 /* Make sure that tvec_base is 2 byte aligned */ 1533 if (tbase_get_deferrable(base)) { 1534 WARN_ON(1); 1535 kfree(base); 1536 return -ENOMEM; 1537 } 1538 per_cpu(tvec_bases, cpu) = base; 1539 } else { 1540 /* 1541 * This is for the boot CPU - we use compile-time 1542 * static initialisation because per-cpu memory isn't 1543 * ready yet and because the memory allocators are not 1544 * initialised either. 1545 */ 1546 boot_done = 1; 1547 base = &boot_tvec_bases; 1548 } 1549 tvec_base_done[cpu] = 1; 1550 } else { 1551 base = per_cpu(tvec_bases, cpu); 1552 } 1553 1554 spin_lock_init(&base->lock); 1555 1556 for (j = 0; j < TVN_SIZE; j++) { 1557 INIT_LIST_HEAD(base->tv5.vec + j); 1558 INIT_LIST_HEAD(base->tv4.vec + j); 1559 INIT_LIST_HEAD(base->tv3.vec + j); 1560 INIT_LIST_HEAD(base->tv2.vec + j); 1561 } 1562 for (j = 0; j < TVR_SIZE; j++) 1563 INIT_LIST_HEAD(base->tv1.vec + j); 1564 1565 base->timer_jiffies = jiffies; 1566 base->next_timer = base->timer_jiffies; 1567 return 0; 1568} 1569 1570#ifdef CONFIG_HOTPLUG_CPU 1571static void migrate_timer_list(struct tvec_base *new_base, struct list_head *head) 1572{ 1573 struct timer_list *timer; 1574 1575 while (!list_empty(head)) { 1576 timer = list_first_entry(head, struct timer_list, entry); 1577 detach_timer(timer, 0); 1578 timer_set_base(timer, new_base); 1579 if (time_before(timer->expires, new_base->next_timer) && 1580 !tbase_get_deferrable(timer->base)) 1581 new_base->next_timer = timer->expires; 1582 internal_add_timer(new_base, timer); 1583 } 1584} 1585 1586static void __cpuinit migrate_timers(int cpu) 1587{ 1588 struct tvec_base *old_base; 1589 struct tvec_base *new_base; 1590 int i; 1591 1592 BUG_ON(cpu_online(cpu)); 1593 old_base = per_cpu(tvec_bases, cpu); 1594 new_base = get_cpu_var(tvec_bases); 1595 /* 1596 * The caller is globally serialized and nobody else 1597 * takes two locks at once, deadlock is not possible. 1598 */ 1599 spin_lock_irq(&new_base->lock); 1600 spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); 1601 1602 BUG_ON(old_base->running_timer); 1603 1604 for (i = 0; i < TVR_SIZE; i++) 1605 migrate_timer_list(new_base, old_base->tv1.vec + i); 1606 for (i = 0; i < TVN_SIZE; i++) { 1607 migrate_timer_list(new_base, old_base->tv2.vec + i); 1608 migrate_timer_list(new_base, old_base->tv3.vec + i); 1609 migrate_timer_list(new_base, old_base->tv4.vec + i); 1610 migrate_timer_list(new_base, old_base->tv5.vec + i); 1611 } 1612 1613 spin_unlock(&old_base->lock); 1614 spin_unlock_irq(&new_base->lock); 1615 put_cpu_var(tvec_bases); 1616} 1617#endif /* CONFIG_HOTPLUG_CPU */ 1618 1619static int __cpuinit timer_cpu_notify(struct notifier_block *self, 1620 unsigned long action, void *hcpu) 1621{ 1622 long cpu = (long)hcpu; 1623 switch(action) { 1624 case CPU_UP_PREPARE: 1625 case CPU_UP_PREPARE_FROZEN: 1626 if (init_timers_cpu(cpu) < 0) 1627 return NOTIFY_BAD; 1628 break; 1629#ifdef CONFIG_HOTPLUG_CPU 1630 case CPU_DEAD: 1631 case CPU_DEAD_FROZEN: 1632 migrate_timers(cpu); 1633 break; 1634#endif 1635 default: 1636 break; 1637 } 1638 return NOTIFY_OK; 1639} 1640 1641static struct notifier_block __cpuinitdata timers_nb = { 1642 .notifier_call = timer_cpu_notify, 1643}; 1644 1645 1646void __init init_timers(void) 1647{ 1648 int err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE, 1649 (void *)(long)smp_processor_id()); 1650 1651 init_timer_stats(); 1652 1653 BUG_ON(err == NOTIFY_BAD); 1654 register_cpu_notifier(&timers_nb); 1655 open_softirq(TIMER_SOFTIRQ, run_timer_softirq); 1656} 1657 1658/** 1659 * msleep - sleep safely even with waitqueue interruptions 1660 * @msecs: Time in milliseconds to sleep for 1661 */ 1662void msleep(unsigned int msecs) 1663{ 1664 unsigned long timeout = msecs_to_jiffies(msecs) + 1; 1665 1666 while (timeout) 1667 timeout = schedule_timeout_uninterruptible(timeout); 1668} 1669 1670EXPORT_SYMBOL(msleep); 1671 1672/** 1673 * msleep_interruptible - sleep waiting for signals 1674 * @msecs: Time in milliseconds to sleep for 1675 */ 1676unsigned long msleep_interruptible(unsigned int msecs) 1677{ 1678 unsigned long timeout = msecs_to_jiffies(msecs) + 1; 1679 1680 while (timeout && !signal_pending(current)) 1681 timeout = schedule_timeout_interruptible(timeout); 1682 return jiffies_to_msecs(timeout); 1683} 1684 1685EXPORT_SYMBOL(msleep_interruptible);