1/* 2 * linux/kernel/timer.c 3 * 4 * Kernel internal timers, basic process system calls 5 * 6 * Copyright (C) 1991, 1992 Linus Torvalds 7 * 8 * 1997-01-28 Modified by Finn Arne Gangstad to make timers scale better. 9 * 10 * 1997-09-10 Updated NTP code according to technical memorandum Jan '96 11 * "A Kernel Model for Precision Timekeeping" by Dave Mills 12 * 1998-12-24 Fixed a xtime SMP race (we need the xtime_lock rw spinlock to 13 * serialize accesses to xtime/lost_ticks). 14 * Copyright (C) 1998 Andrea Arcangeli 15 * 1999-03-10 Improved NTP compatibility by Ulrich Windl 16 * 2002-05-31 Move sys_sysinfo here and make its locking sane, Robert Love 17 * 2000-10-05 Implemented scalable SMP per-CPU timer handling. 18 * Copyright (C) 2000, 2001, 2002 Ingo Molnar 19 * Designed by David S. Miller, Alexey Kuznetsov and Ingo Molnar 20 */ 21 22#include <linux/kernel_stat.h> 23#include <linux/module.h> 24#include <linux/interrupt.h> 25#include <linux/percpu.h> 26#include <linux/init.h> 27#include <linux/mm.h> 28#include <linux/swap.h> 29#include <linux/pid_namespace.h> 30#include <linux/notifier.h> 31#include <linux/thread_info.h> 32#include <linux/time.h> 33#include <linux/jiffies.h> 34#include <linux/posix-timers.h> 35#include <linux/cpu.h> 36#include <linux/syscalls.h> 37#include <linux/delay.h> 38#include <linux/tick.h> 39#include <linux/kallsyms.h> 40#include <linux/perf_event.h> 41#include <linux/sched.h> 42#include <linux/slab.h> 43 44#include <asm/uaccess.h> 45#include <asm/unistd.h> 46#include <asm/div64.h> 47#include <asm/timex.h> 48#include <asm/io.h> 49 50#define CREATE_TRACE_POINTS 51#include <trace/events/timer.h> 52 53u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES; 54 55EXPORT_SYMBOL(jiffies_64); 56 57/* 58 * per-CPU timer vector definitions: 59 */ 60#define TVN_BITS (CONFIG_BASE_SMALL ? 4 : 6) 61#define TVR_BITS (CONFIG_BASE_SMALL ? 6 : 8) 62#define TVN_SIZE (1 << TVN_BITS) 63#define TVR_SIZE (1 << TVR_BITS) 64#define TVN_MASK (TVN_SIZE - 1) 65#define TVR_MASK (TVR_SIZE - 1) 66 67struct tvec { 68 struct list_head vec[TVN_SIZE]; 69}; 70 71struct tvec_root { 72 struct list_head vec[TVR_SIZE]; 73}; 74 75struct tvec_base { 76 spinlock_t lock; 77 struct timer_list *running_timer; 78 unsigned long timer_jiffies; 79 unsigned long next_timer; 80 struct tvec_root tv1; 81 struct tvec tv2; 82 struct tvec tv3; 83 struct tvec tv4; 84 struct tvec tv5; 85} ____cacheline_aligned; 86 87struct tvec_base boot_tvec_bases; 88EXPORT_SYMBOL(boot_tvec_bases); 89static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases; 90 91/* 92 * Note that all tvec_bases are 2 byte aligned and lower bit of 93 * base in timer_list is guaranteed to be zero. Use the LSB for 94 * the new flag to indicate whether the timer is deferrable 95 */ 96#define TBASE_DEFERRABLE_FLAG (0x1) 97 98/* Functions below help us manage 'deferrable' flag */ 99static inline unsigned int tbase_get_deferrable(struct tvec_base *base) 100{ 101 return ((unsigned int)(unsigned long)base & TBASE_DEFERRABLE_FLAG); 102} 103 104static inline struct tvec_base *tbase_get_base(struct tvec_base *base) 105{ 106 return ((struct tvec_base *)((unsigned long)base & ~TBASE_DEFERRABLE_FLAG)); 107} 108 109static inline void timer_set_deferrable(struct timer_list *timer) 110{ 111 timer->base = ((struct tvec_base *)((unsigned long)(timer->base) | 112 TBASE_DEFERRABLE_FLAG)); 113} 114 115static inline void 116timer_set_base(struct timer_list *timer, struct tvec_base *new_base) 117{ 118 timer->base = (struct tvec_base *)((unsigned long)(new_base) | 119 tbase_get_deferrable(timer->base)); 120} 121 122static unsigned long round_jiffies_common(unsigned long j, int cpu, 123 bool force_up) 124{ 125 int rem; 126 unsigned long original = j; 127 128 /* 129 * We don't want all cpus firing their timers at once hitting the 130 * same lock or cachelines, so we skew each extra cpu with an extra 131 * 3 jiffies. This 3 jiffies came originally from the mm/ code which 132 * already did this. 133 * The skew is done by adding 3*cpunr, then round, then subtract this 134 * extra offset again. 135 */ 136 j += cpu * 3; 137 138 rem = j % HZ; 139 140 /* 141 * If the target jiffie is just after a whole second (which can happen 142 * due to delays of the timer irq, long irq off times etc etc) then 143 * we should round down to the whole second, not up. Use 1/4th second 144 * as cutoff for this rounding as an extreme upper bound for this. 145 * But never round down if @force_up is set. 146 */ 147 if (rem < HZ/4 && !force_up) /* round down */ 148 j = j - rem; 149 else /* round up */ 150 j = j - rem + HZ; 151 152 /* now that we have rounded, subtract the extra skew again */ 153 j -= cpu * 3; 154 155 if (j <= jiffies) /* rounding ate our timeout entirely; */ 156 return original; 157 return j; 158} 159 160/** 161 * __round_jiffies - function to round jiffies to a full second 162 * @j: the time in (absolute) jiffies that should be rounded 163 * @cpu: the processor number on which the timeout will happen 164 * 165 * __round_jiffies() rounds an absolute time in the future (in jiffies) 166 * up or down to (approximately) full seconds. This is useful for timers 167 * for which the exact time they fire does not matter too much, as long as 168 * they fire approximately every X seconds. 169 * 170 * By rounding these timers to whole seconds, all such timers will fire 171 * at the same time, rather than at various times spread out. The goal 172 * of this is to have the CPU wake up less, which saves power. 173 * 174 * The exact rounding is skewed for each processor to avoid all 175 * processors firing at the exact same time, which could lead 176 * to lock contention or spurious cache line bouncing. 177 * 178 * The return value is the rounded version of the @j parameter. 179 */ 180unsigned long __round_jiffies(unsigned long j, int cpu) 181{ 182 return round_jiffies_common(j, cpu, false); 183} 184EXPORT_SYMBOL_GPL(__round_jiffies); 185 186/** 187 * __round_jiffies_relative - function to round jiffies to a full second 188 * @j: the time in (relative) jiffies that should be rounded 189 * @cpu: the processor number on which the timeout will happen 190 * 191 * __round_jiffies_relative() rounds a time delta in the future (in jiffies) 192 * up or down to (approximately) full seconds. This is useful for timers 193 * for which the exact time they fire does not matter too much, as long as 194 * they fire approximately every X seconds. 195 * 196 * By rounding these timers to whole seconds, all such timers will fire 197 * at the same time, rather than at various times spread out. The goal 198 * of this is to have the CPU wake up less, which saves power. 199 * 200 * The exact rounding is skewed for each processor to avoid all 201 * processors firing at the exact same time, which could lead 202 * to lock contention or spurious cache line bouncing. 203 * 204 * The return value is the rounded version of the @j parameter. 205 */ 206unsigned long __round_jiffies_relative(unsigned long j, int cpu) 207{ 208 unsigned long j0 = jiffies; 209 210 /* Use j0 because jiffies might change while we run */ 211 return round_jiffies_common(j + j0, cpu, false) - j0; 212} 213EXPORT_SYMBOL_GPL(__round_jiffies_relative); 214 215/** 216 * round_jiffies - function to round jiffies to a full second 217 * @j: the time in (absolute) jiffies that should be rounded 218 * 219 * round_jiffies() rounds an absolute time in the future (in jiffies) 220 * up or down to (approximately) full seconds. This is useful for timers 221 * for which the exact time they fire does not matter too much, as long as 222 * they fire approximately every X seconds. 223 * 224 * By rounding these timers to whole seconds, all such timers will fire 225 * at the same time, rather than at various times spread out. The goal 226 * of this is to have the CPU wake up less, which saves power. 227 * 228 * The return value is the rounded version of the @j parameter. 229 */ 230unsigned long round_jiffies(unsigned long j) 231{ 232 return round_jiffies_common(j, raw_smp_processor_id(), false); 233} 234EXPORT_SYMBOL_GPL(round_jiffies); 235 236/** 237 * round_jiffies_relative - function to round jiffies to a full second 238 * @j: the time in (relative) jiffies that should be rounded 239 * 240 * round_jiffies_relative() rounds a time delta in the future (in jiffies) 241 * up or down to (approximately) full seconds. This is useful for timers 242 * for which the exact time they fire does not matter too much, as long as 243 * they fire approximately every X seconds. 244 * 245 * By rounding these timers to whole seconds, all such timers will fire 246 * at the same time, rather than at various times spread out. The goal 247 * of this is to have the CPU wake up less, which saves power. 248 * 249 * The return value is the rounded version of the @j parameter. 250 */ 251unsigned long round_jiffies_relative(unsigned long j) 252{ 253 return __round_jiffies_relative(j, raw_smp_processor_id()); 254} 255EXPORT_SYMBOL_GPL(round_jiffies_relative); 256 257/** 258 * __round_jiffies_up - function to round jiffies up to a full second 259 * @j: the time in (absolute) jiffies that should be rounded 260 * @cpu: the processor number on which the timeout will happen 261 * 262 * This is the same as __round_jiffies() except that it will never 263 * round down. This is useful for timeouts for which the exact time 264 * of firing does not matter too much, as long as they don't fire too 265 * early. 266 */ 267unsigned long __round_jiffies_up(unsigned long j, int cpu) 268{ 269 return round_jiffies_common(j, cpu, true); 270} 271EXPORT_SYMBOL_GPL(__round_jiffies_up); 272 273/** 274 * __round_jiffies_up_relative - function to round jiffies up to a full second 275 * @j: the time in (relative) jiffies that should be rounded 276 * @cpu: the processor number on which the timeout will happen 277 * 278 * This is the same as __round_jiffies_relative() except that it will never 279 * round down. This is useful for timeouts for which the exact time 280 * of firing does not matter too much, as long as they don't fire too 281 * early. 282 */ 283unsigned long __round_jiffies_up_relative(unsigned long j, int cpu) 284{ 285 unsigned long j0 = jiffies; 286 287 /* Use j0 because jiffies might change while we run */ 288 return round_jiffies_common(j + j0, cpu, true) - j0; 289} 290EXPORT_SYMBOL_GPL(__round_jiffies_up_relative); 291 292/** 293 * round_jiffies_up - function to round jiffies up to a full second 294 * @j: the time in (absolute) jiffies that should be rounded 295 * 296 * This is the same as round_jiffies() except that it will never 297 * round down. This is useful for timeouts for which the exact time 298 * of firing does not matter too much, as long as they don't fire too 299 * early. 300 */ 301unsigned long round_jiffies_up(unsigned long j) 302{ 303 return round_jiffies_common(j, raw_smp_processor_id(), true); 304} 305EXPORT_SYMBOL_GPL(round_jiffies_up); 306 307/** 308 * round_jiffies_up_relative - function to round jiffies up to a full second 309 * @j: the time in (relative) jiffies that should be rounded 310 * 311 * This is the same as round_jiffies_relative() except that it will never 312 * round down. This is useful for timeouts for which the exact time 313 * of firing does not matter too much, as long as they don't fire too 314 * early. 315 */ 316unsigned long round_jiffies_up_relative(unsigned long j) 317{ 318 return __round_jiffies_up_relative(j, raw_smp_processor_id()); 319} 320EXPORT_SYMBOL_GPL(round_jiffies_up_relative); 321 322/** 323 * set_timer_slack - set the allowed slack for a timer 324 * @slack_hz: the amount of time (in jiffies) allowed for rounding 325 * 326 * Set the amount of time, in jiffies, that a certain timer has 327 * in terms of slack. By setting this value, the timer subsystem 328 * will schedule the actual timer somewhere between 329 * the time mod_timer() asks for, and that time plus the slack. 330 * 331 * By setting the slack to -1, a percentage of the delay is used 332 * instead. 333 */ 334void set_timer_slack(struct timer_list *timer, int slack_hz) 335{ 336 timer->slack = slack_hz; 337} 338EXPORT_SYMBOL_GPL(set_timer_slack); 339 340 341static inline void set_running_timer(struct tvec_base *base, 342 struct timer_list *timer) 343{ 344#ifdef CONFIG_SMP 345 base->running_timer = timer; 346#endif 347} 348 349static void internal_add_timer(struct tvec_base *base, struct timer_list *timer) 350{ 351 unsigned long expires = timer->expires; 352 unsigned long idx = expires - base->timer_jiffies; 353 struct list_head *vec; 354 355 if (idx < TVR_SIZE) { 356 int i = expires & TVR_MASK; 357 vec = base->tv1.vec + i; 358 } else if (idx < 1 << (TVR_BITS + TVN_BITS)) { 359 int i = (expires >> TVR_BITS) & TVN_MASK; 360 vec = base->tv2.vec + i; 361 } else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) { 362 int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK; 363 vec = base->tv3.vec + i; 364 } else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) { 365 int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK; 366 vec = base->tv4.vec + i; 367 } else if ((signed long) idx < 0) { 368 /* 369 * Can happen if you add a timer with expires == jiffies, 370 * or you set a timer to go off in the past 371 */ 372 vec = base->tv1.vec + (base->timer_jiffies & TVR_MASK); 373 } else { 374 int i; 375 /* If the timeout is larger than 0xffffffff on 64-bit 376 * architectures then we use the maximum timeout: 377 */ 378 if (idx > 0xffffffffUL) { 379 idx = 0xffffffffUL; 380 expires = idx + base->timer_jiffies; 381 } 382 i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK; 383 vec = base->tv5.vec + i; 384 } 385 /* 386 * Timers are FIFO: 387 */ 388 list_add_tail(&timer->entry, vec); 389} 390 391#ifdef CONFIG_TIMER_STATS 392void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr) 393{ 394 if (timer->start_site) 395 return; 396 397 timer->start_site = addr; 398 memcpy(timer->start_comm, current->comm, TASK_COMM_LEN); 399 timer->start_pid = current->pid; 400} 401 402static void timer_stats_account_timer(struct timer_list *timer) 403{ 404 unsigned int flag = 0; 405 406 if (likely(!timer->start_site)) 407 return; 408 if (unlikely(tbase_get_deferrable(timer->base))) 409 flag |= TIMER_STATS_FLAG_DEFERRABLE; 410 411 timer_stats_update_stats(timer, timer->start_pid, timer->start_site, 412 timer->function, timer->start_comm, flag); 413} 414 415#else 416static void timer_stats_account_timer(struct timer_list *timer) {} 417#endif 418 419#ifdef CONFIG_DEBUG_OBJECTS_TIMERS 420 421static struct debug_obj_descr timer_debug_descr; 422 423/* 424 * fixup_init is called when: 425 * - an active object is initialized 426 */ 427static int timer_fixup_init(void *addr, enum debug_obj_state state) 428{ 429 struct timer_list *timer = addr; 430 431 switch (state) { 432 case ODEBUG_STATE_ACTIVE: 433 del_timer_sync(timer); 434 debug_object_init(timer, &timer_debug_descr); 435 return 1; 436 default: 437 return 0; 438 } 439} 440 441/* 442 * fixup_activate is called when: 443 * - an active object is activated 444 * - an unknown object is activated (might be a statically initialized object) 445 */ 446static int timer_fixup_activate(void *addr, enum debug_obj_state state) 447{ 448 struct timer_list *timer = addr; 449 450 switch (state) { 451 452 case ODEBUG_STATE_NOTAVAILABLE: 453 /* 454 * This is not really a fixup. The timer was 455 * statically initialized. We just make sure that it 456 * is tracked in the object tracker. 457 */ 458 if (timer->entry.next == NULL && 459 timer->entry.prev == TIMER_ENTRY_STATIC) { 460 debug_object_init(timer, &timer_debug_descr); 461 debug_object_activate(timer, &timer_debug_descr); 462 return 0; 463 } else { 464 WARN_ON_ONCE(1); 465 } 466 return 0; 467 468 case ODEBUG_STATE_ACTIVE: 469 WARN_ON(1); 470 471 default: 472 return 0; 473 } 474} 475 476/* 477 * fixup_free is called when: 478 * - an active object is freed 479 */ 480static int timer_fixup_free(void *addr, enum debug_obj_state state) 481{ 482 struct timer_list *timer = addr; 483 484 switch (state) { 485 case ODEBUG_STATE_ACTIVE: 486 del_timer_sync(timer); 487 debug_object_free(timer, &timer_debug_descr); 488 return 1; 489 default: 490 return 0; 491 } 492} 493 494static struct debug_obj_descr timer_debug_descr = { 495 .name = "timer_list", 496 .fixup_init = timer_fixup_init, 497 .fixup_activate = timer_fixup_activate, 498 .fixup_free = timer_fixup_free, 499}; 500 501static inline void debug_timer_init(struct timer_list *timer) 502{ 503 debug_object_init(timer, &timer_debug_descr); 504} 505 506static inline void debug_timer_activate(struct timer_list *timer) 507{ 508 debug_object_activate(timer, &timer_debug_descr); 509} 510 511static inline void debug_timer_deactivate(struct timer_list *timer) 512{ 513 debug_object_deactivate(timer, &timer_debug_descr); 514} 515 516static inline void debug_timer_free(struct timer_list *timer) 517{ 518 debug_object_free(timer, &timer_debug_descr); 519} 520 521static void __init_timer(struct timer_list *timer, 522 const char *name, 523 struct lock_class_key *key); 524 525void init_timer_on_stack_key(struct timer_list *timer, 526 const char *name, 527 struct lock_class_key *key) 528{ 529 debug_object_init_on_stack(timer, &timer_debug_descr); 530 __init_timer(timer, name, key); 531} 532EXPORT_SYMBOL_GPL(init_timer_on_stack_key); 533 534void destroy_timer_on_stack(struct timer_list *timer) 535{ 536 debug_object_free(timer, &timer_debug_descr); 537} 538EXPORT_SYMBOL_GPL(destroy_timer_on_stack); 539 540#else 541static inline void debug_timer_init(struct timer_list *timer) { } 542static inline void debug_timer_activate(struct timer_list *timer) { } 543static inline void debug_timer_deactivate(struct timer_list *timer) { } 544#endif 545 546static inline void debug_init(struct timer_list *timer) 547{ 548 debug_timer_init(timer); 549 trace_timer_init(timer); 550} 551 552static inline void 553debug_activate(struct timer_list *timer, unsigned long expires) 554{ 555 debug_timer_activate(timer); 556 trace_timer_start(timer, expires); 557} 558 559static inline void debug_deactivate(struct timer_list *timer) 560{ 561 debug_timer_deactivate(timer); 562 trace_timer_cancel(timer); 563} 564 565static void __init_timer(struct timer_list *timer, 566 const char *name, 567 struct lock_class_key *key) 568{ 569 timer->entry.next = NULL; 570 timer->base = __raw_get_cpu_var(tvec_bases); 571 timer->slack = -1; 572#ifdef CONFIG_TIMER_STATS 573 timer->start_site = NULL; 574 timer->start_pid = -1; 575 memset(timer->start_comm, 0, TASK_COMM_LEN); 576#endif 577 lockdep_init_map(&timer->lockdep_map, name, key, 0); 578} 579 580/** 581 * init_timer_key - initialize a timer 582 * @timer: the timer to be initialized 583 * @name: name of the timer 584 * @key: lockdep class key of the fake lock used for tracking timer 585 * sync lock dependencies 586 * 587 * init_timer_key() must be done to a timer prior calling *any* of the 588 * other timer functions. 589 */ 590void init_timer_key(struct timer_list *timer, 591 const char *name, 592 struct lock_class_key *key) 593{ 594 debug_init(timer); 595 __init_timer(timer, name, key); 596} 597EXPORT_SYMBOL(init_timer_key); 598 599void init_timer_deferrable_key(struct timer_list *timer, 600 const char *name, 601 struct lock_class_key *key) 602{ 603 init_timer_key(timer, name, key); 604 timer_set_deferrable(timer); 605} 606EXPORT_SYMBOL(init_timer_deferrable_key); 607 608static inline void detach_timer(struct timer_list *timer, 609 int clear_pending) 610{ 611 struct list_head *entry = &timer->entry; 612 613 debug_deactivate(timer); 614 615 __list_del(entry->prev, entry->next); 616 if (clear_pending) 617 entry->next = NULL; 618 entry->prev = LIST_POISON2; 619} 620 621/* 622 * We are using hashed locking: holding per_cpu(tvec_bases).lock 623 * means that all timers which are tied to this base via timer->base are 624 * locked, and the base itself is locked too. 625 * 626 * So __run_timers/migrate_timers can safely modify all timers which could 627 * be found on ->tvX lists. 628 * 629 * When the timer's base is locked, and the timer removed from list, it is 630 * possible to set timer->base = NULL and drop the lock: the timer remains 631 * locked. 632 */ 633static struct tvec_base *lock_timer_base(struct timer_list *timer, 634 unsigned long *flags) 635 __acquires(timer->base->lock) 636{ 637 struct tvec_base *base; 638 639 for (;;) { 640 struct tvec_base *prelock_base = timer->base; 641 base = tbase_get_base(prelock_base); 642 if (likely(base != NULL)) { 643 spin_lock_irqsave(&base->lock, *flags); 644 if (likely(prelock_base == timer->base)) 645 return base; 646 /* The timer has migrated to another CPU */ 647 spin_unlock_irqrestore(&base->lock, *flags); 648 } 649 cpu_relax(); 650 } 651} 652 653static inline int 654__mod_timer(struct timer_list *timer, unsigned long expires, 655 bool pending_only, int pinned) 656{ 657 struct tvec_base *base, *new_base; 658 unsigned long flags; 659 int ret = 0 , cpu; 660 661 timer_stats_timer_set_start_info(timer); 662 BUG_ON(!timer->function); 663 664 base = lock_timer_base(timer, &flags); 665 666 if (timer_pending(timer)) { 667 detach_timer(timer, 0); 668 if (timer->expires == base->next_timer && 669 !tbase_get_deferrable(timer->base)) 670 base->next_timer = base->timer_jiffies; 671 ret = 1; 672 } else { 673 if (pending_only) 674 goto out_unlock; 675 } 676 677 debug_activate(timer, expires); 678 679 cpu = smp_processor_id(); 680 681#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP) 682 if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) { 683 int preferred_cpu = get_nohz_load_balancer(); 684 685 if (preferred_cpu >= 0) 686 cpu = preferred_cpu; 687 } 688#endif 689 new_base = per_cpu(tvec_bases, cpu); 690 691 if (base != new_base) { 692 /* 693 * We are trying to schedule the timer on the local CPU. 694 * However we can't change timer's base while it is running, 695 * otherwise del_timer_sync() can't detect that the timer's 696 * handler yet has not finished. This also guarantees that 697 * the timer is serialized wrt itself. 698 */ 699 if (likely(base->running_timer != timer)) { 700 /* See the comment in lock_timer_base() */ 701 timer_set_base(timer, NULL); 702 spin_unlock(&base->lock); 703 base = new_base; 704 spin_lock(&base->lock); 705 timer_set_base(timer, base); 706 } 707 } 708 709 timer->expires = expires; 710 if (time_before(timer->expires, base->next_timer) && 711 !tbase_get_deferrable(timer->base)) 712 base->next_timer = timer->expires; 713 internal_add_timer(base, timer); 714 715out_unlock: 716 spin_unlock_irqrestore(&base->lock, flags); 717 718 return ret; 719} 720 721/** 722 * mod_timer_pending - modify a pending timer's timeout 723 * @timer: the pending timer to be modified 724 * @expires: new timeout in jiffies 725 * 726 * mod_timer_pending() is the same for pending timers as mod_timer(), 727 * but will not re-activate and modify already deleted timers. 728 * 729 * It is useful for unserialized use of timers. 730 */ 731int mod_timer_pending(struct timer_list *timer, unsigned long expires) 732{ 733 return __mod_timer(timer, expires, true, TIMER_NOT_PINNED); 734} 735EXPORT_SYMBOL(mod_timer_pending); 736 737/* 738 * Decide where to put the timer while taking the slack into account 739 * 740 * Algorithm: 741 * 1) calculate the maximum (absolute) time 742 * 2) calculate the highest bit where the expires and new max are different 743 * 3) use this bit to make a mask 744 * 4) use the bitmask to round down the maximum time, so that all last 745 * bits are zeros 746 */ 747static inline 748unsigned long apply_slack(struct timer_list *timer, unsigned long expires) 749{ 750 unsigned long expires_limit, mask; 751 int bit; 752 753 expires_limit = expires; 754 755 if (timer->slack >= 0) { 756 expires_limit = expires + timer->slack; 757 } else { 758 unsigned long now = jiffies; 759 760 /* No slack, if already expired else auto slack 0.4% */ 761 if (time_after(expires, now)) 762 expires_limit = expires + (expires - now)/256; 763 } 764 mask = expires ^ expires_limit; 765 if (mask == 0) 766 return expires; 767 768 bit = find_last_bit(&mask, BITS_PER_LONG); 769 770 mask = (1 << bit) - 1; 771 772 expires_limit = expires_limit & ~(mask); 773 774 return expires_limit; 775} 776 777/** 778 * mod_timer - modify a timer's timeout 779 * @timer: the timer to be modified 780 * @expires: new timeout in jiffies 781 * 782 * mod_timer() is a more efficient way to update the expire field of an 783 * active timer (if the timer is inactive it will be activated) 784 * 785 * mod_timer(timer, expires) is equivalent to: 786 * 787 * del_timer(timer); timer->expires = expires; add_timer(timer); 788 * 789 * Note that if there are multiple unserialized concurrent users of the 790 * same timer, then mod_timer() is the only safe way to modify the timeout, 791 * since add_timer() cannot modify an already running timer. 792 * 793 * The function returns whether it has modified a pending timer or not. 794 * (ie. mod_timer() of an inactive timer returns 0, mod_timer() of an 795 * active timer returns 1.) 796 */ 797int mod_timer(struct timer_list *timer, unsigned long expires) 798{ 799 /* 800 * This is a common optimization triggered by the 801 * networking code - if the timer is re-modified 802 * to be the same thing then just return: 803 */ 804 if (timer_pending(timer) && timer->expires == expires) 805 return 1; 806 807 expires = apply_slack(timer, expires); 808 809 return __mod_timer(timer, expires, false, TIMER_NOT_PINNED); 810} 811EXPORT_SYMBOL(mod_timer); 812 813/** 814 * mod_timer_pinned - modify a timer's timeout 815 * @timer: the timer to be modified 816 * @expires: new timeout in jiffies 817 * 818 * mod_timer_pinned() is a way to update the expire field of an 819 * active timer (if the timer is inactive it will be activated) 820 * and not allow the timer to be migrated to a different CPU. 821 * 822 * mod_timer_pinned(timer, expires) is equivalent to: 823 * 824 * del_timer(timer); timer->expires = expires; add_timer(timer); 825 */ 826int mod_timer_pinned(struct timer_list *timer, unsigned long expires) 827{ 828 if (timer->expires == expires && timer_pending(timer)) 829 return 1; 830 831 return __mod_timer(timer, expires, false, TIMER_PINNED); 832} 833EXPORT_SYMBOL(mod_timer_pinned); 834 835/** 836 * add_timer - start a timer 837 * @timer: the timer to be added 838 * 839 * The kernel will do a ->function(->data) callback from the 840 * timer interrupt at the ->expires point in the future. The 841 * current time is 'jiffies'. 842 * 843 * The timer's ->expires, ->function (and if the handler uses it, ->data) 844 * fields must be set prior calling this function. 845 * 846 * Timers with an ->expires field in the past will be executed in the next 847 * timer tick. 848 */ 849void add_timer(struct timer_list *timer) 850{ 851 BUG_ON(timer_pending(timer)); 852 mod_timer(timer, timer->expires); 853} 854EXPORT_SYMBOL(add_timer); 855 856/** 857 * add_timer_on - start a timer on a particular CPU 858 * @timer: the timer to be added 859 * @cpu: the CPU to start it on 860 * 861 * This is not very scalable on SMP. Double adds are not possible. 862 */ 863void add_timer_on(struct timer_list *timer, int cpu) 864{ 865 struct tvec_base *base = per_cpu(tvec_bases, cpu); 866 unsigned long flags; 867 868 timer_stats_timer_set_start_info(timer); 869 BUG_ON(timer_pending(timer) || !timer->function); 870 spin_lock_irqsave(&base->lock, flags); 871 timer_set_base(timer, base); 872 debug_activate(timer, timer->expires); 873 if (time_before(timer->expires, base->next_timer) && 874 !tbase_get_deferrable(timer->base)) 875 base->next_timer = timer->expires; 876 internal_add_timer(base, timer); 877 /* 878 * Check whether the other CPU is idle and needs to be 879 * triggered to reevaluate the timer wheel when nohz is 880 * active. We are protected against the other CPU fiddling 881 * with the timer by holding the timer base lock. This also 882 * makes sure that a CPU on the way to idle can not evaluate 883 * the timer wheel. 884 */ 885 wake_up_idle_cpu(cpu); 886 spin_unlock_irqrestore(&base->lock, flags); 887} 888EXPORT_SYMBOL_GPL(add_timer_on); 889 890/** 891 * del_timer - deactive a timer. 892 * @timer: the timer to be deactivated 893 * 894 * del_timer() deactivates a timer - this works on both active and inactive 895 * timers. 896 * 897 * The function returns whether it has deactivated a pending timer or not. 898 * (ie. del_timer() of an inactive timer returns 0, del_timer() of an 899 * active timer returns 1.) 900 */ 901int del_timer(struct timer_list *timer) 902{ 903 struct tvec_base *base; 904 unsigned long flags; 905 int ret = 0; 906 907 timer_stats_timer_clear_start_info(timer); 908 if (timer_pending(timer)) { 909 base = lock_timer_base(timer, &flags); 910 if (timer_pending(timer)) { 911 detach_timer(timer, 1); 912 if (timer->expires == base->next_timer && 913 !tbase_get_deferrable(timer->base)) 914 base->next_timer = base->timer_jiffies; 915 ret = 1; 916 } 917 spin_unlock_irqrestore(&base->lock, flags); 918 } 919 920 return ret; 921} 922EXPORT_SYMBOL(del_timer); 923 924#ifdef CONFIG_SMP 925/** 926 * try_to_del_timer_sync - Try to deactivate a timer 927 * @timer: timer do del 928 * 929 * This function tries to deactivate a timer. Upon successful (ret >= 0) 930 * exit the timer is not queued and the handler is not running on any CPU. 931 * 932 * It must not be called from interrupt contexts. 933 */ 934int try_to_del_timer_sync(struct timer_list *timer) 935{ 936 struct tvec_base *base; 937 unsigned long flags; 938 int ret = -1; 939 940 base = lock_timer_base(timer, &flags); 941 942 if (base->running_timer == timer) 943 goto out; 944 945 timer_stats_timer_clear_start_info(timer); 946 ret = 0; 947 if (timer_pending(timer)) { 948 detach_timer(timer, 1); 949 if (timer->expires == base->next_timer && 950 !tbase_get_deferrable(timer->base)) 951 base->next_timer = base->timer_jiffies; 952 ret = 1; 953 } 954out: 955 spin_unlock_irqrestore(&base->lock, flags); 956 957 return ret; 958} 959EXPORT_SYMBOL(try_to_del_timer_sync); 960 961/** 962 * del_timer_sync - deactivate a timer and wait for the handler to finish. 963 * @timer: the timer to be deactivated 964 * 965 * This function only differs from del_timer() on SMP: besides deactivating 966 * the timer it also makes sure the handler has finished executing on other 967 * CPUs. 968 * 969 * Synchronization rules: Callers must prevent restarting of the timer, 970 * otherwise this function is meaningless. It must not be called from 971 * interrupt contexts. The caller must not hold locks which would prevent 972 * completion of the timer's handler. The timer's handler must not call 973 * add_timer_on(). Upon exit the timer is not queued and the handler is 974 * not running on any CPU. 975 * 976 * The function returns whether it has deactivated a pending timer or not. 977 */ 978int del_timer_sync(struct timer_list *timer) 979{ 980#ifdef CONFIG_LOCKDEP 981 unsigned long flags; 982 983 local_irq_save(flags); 984 lock_map_acquire(&timer->lockdep_map); 985 lock_map_release(&timer->lockdep_map); 986 local_irq_restore(flags); 987#endif 988 989 for (;;) { 990 int ret = try_to_del_timer_sync(timer); 991 if (ret >= 0) 992 return ret; 993 cpu_relax(); 994 } 995} 996EXPORT_SYMBOL(del_timer_sync); 997#endif 998 999static int cascade(struct tvec_base *base, struct tvec *tv, int index) 1000{ 1001 /* cascade all the timers from tv up one level */ 1002 struct timer_list *timer, *tmp; 1003 struct list_head tv_list; 1004 1005 list_replace_init(tv->vec + index, &tv_list); 1006 1007 /* 1008 * We are removing _all_ timers from the list, so we 1009 * don't have to detach them individually. 1010 */ 1011 list_for_each_entry_safe(timer, tmp, &tv_list, entry) { 1012 BUG_ON(tbase_get_base(timer->base) != base); 1013 internal_add_timer(base, timer); 1014 } 1015 1016 return index; 1017} 1018 1019static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long), 1020 unsigned long data) 1021{ 1022 int preempt_count = preempt_count(); 1023 1024#ifdef CONFIG_LOCKDEP 1025 /* 1026 * It is permissible to free the timer from inside the 1027 * function that is called from it, this we need to take into 1028 * account for lockdep too. To avoid bogus "held lock freed" 1029 * warnings as well as problems when looking into 1030 * timer->lockdep_map, make a copy and use that here. 1031 */ 1032 struct lockdep_map lockdep_map = timer->lockdep_map; 1033#endif 1034 /* 1035 * Couple the lock chain with the lock chain at 1036 * del_timer_sync() by acquiring the lock_map around the fn() 1037 * call here and in del_timer_sync(). 1038 */ 1039 lock_map_acquire(&lockdep_map); 1040 1041 trace_timer_expire_entry(timer); 1042 fn(data); 1043 trace_timer_expire_exit(timer); 1044 1045 lock_map_release(&lockdep_map); 1046 1047 if (preempt_count != preempt_count()) { 1048 WARN_ONCE(1, "timer: %pF preempt leak: %08x -> %08x\n", 1049 fn, preempt_count, preempt_count()); 1050 /* 1051 * Restore the preempt count. That gives us a decent 1052 * chance to survive and extract information. If the 1053 * callback kept a lock held, bad luck, but not worse 1054 * than the BUG() we had. 1055 */ 1056 preempt_count() = preempt_count; 1057 } 1058} 1059 1060#define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK) 1061 1062/** 1063 * __run_timers - run all expired timers (if any) on this CPU. 1064 * @base: the timer vector to be processed. 1065 * 1066 * This function cascades all vectors and executes all expired timer 1067 * vectors. 1068 */ 1069static inline void __run_timers(struct tvec_base *base) 1070{ 1071 struct timer_list *timer; 1072 1073 spin_lock_irq(&base->lock); 1074 while (time_after_eq(jiffies, base->timer_jiffies)) { 1075 struct list_head work_list; 1076 struct list_head *head = &work_list; 1077 int index = base->timer_jiffies & TVR_MASK; 1078 1079 /* 1080 * Cascade timers: 1081 */ 1082 if (!index && 1083 (!cascade(base, &base->tv2, INDEX(0))) && 1084 (!cascade(base, &base->tv3, INDEX(1))) && 1085 !cascade(base, &base->tv4, INDEX(2))) 1086 cascade(base, &base->tv5, INDEX(3)); 1087 ++base->timer_jiffies; 1088 list_replace_init(base->tv1.vec + index, &work_list); 1089 while (!list_empty(head)) { 1090 void (*fn)(unsigned long); 1091 unsigned long data; 1092 1093 timer = list_first_entry(head, struct timer_list,entry); 1094 fn = timer->function; 1095 data = timer->data; 1096 1097 timer_stats_account_timer(timer); 1098 1099 set_running_timer(base, timer); 1100 detach_timer(timer, 1); 1101 1102 spin_unlock_irq(&base->lock); 1103 call_timer_fn(timer, fn, data); 1104 spin_lock_irq(&base->lock); 1105 } 1106 } 1107 set_running_timer(base, NULL); 1108 spin_unlock_irq(&base->lock); 1109} 1110 1111#ifdef CONFIG_NO_HZ 1112/* 1113 * Find out when the next timer event is due to happen. This 1114 * is used on S/390 to stop all activity when a CPU is idle. 1115 * This function needs to be called with interrupts disabled. 1116 */ 1117static unsigned long __next_timer_interrupt(struct tvec_base *base) 1118{ 1119 unsigned long timer_jiffies = base->timer_jiffies; 1120 unsigned long expires = timer_jiffies + NEXT_TIMER_MAX_DELTA; 1121 int index, slot, array, found = 0; 1122 struct timer_list *nte; 1123 struct tvec *varray[4]; 1124 1125 /* Look for timer events in tv1. */ 1126 index = slot = timer_jiffies & TVR_MASK; 1127 do { 1128 list_for_each_entry(nte, base->tv1.vec + slot, entry) { 1129 if (tbase_get_deferrable(nte->base)) 1130 continue; 1131 1132 found = 1; 1133 expires = nte->expires; 1134 /* Look at the cascade bucket(s)? */ 1135 if (!index || slot < index) 1136 goto cascade; 1137 return expires; 1138 } 1139 slot = (slot + 1) & TVR_MASK; 1140 } while (slot != index); 1141 1142cascade: 1143 /* Calculate the next cascade event */ 1144 if (index) 1145 timer_jiffies += TVR_SIZE - index; 1146 timer_jiffies >>= TVR_BITS; 1147 1148 /* Check tv2-tv5. */ 1149 varray[0] = &base->tv2; 1150 varray[1] = &base->tv3; 1151 varray[2] = &base->tv4; 1152 varray[3] = &base->tv5; 1153 1154 for (array = 0; array < 4; array++) { 1155 struct tvec *varp = varray[array]; 1156 1157 index = slot = timer_jiffies & TVN_MASK; 1158 do { 1159 list_for_each_entry(nte, varp->vec + slot, entry) { 1160 if (tbase_get_deferrable(nte->base)) 1161 continue; 1162 1163 found = 1; 1164 if (time_before(nte->expires, expires)) 1165 expires = nte->expires; 1166 } 1167 /* 1168 * Do we still search for the first timer or are 1169 * we looking up the cascade buckets ? 1170 */ 1171 if (found) { 1172 /* Look at the cascade bucket(s)? */ 1173 if (!index || slot < index) 1174 break; 1175 return expires; 1176 } 1177 slot = (slot + 1) & TVN_MASK; 1178 } while (slot != index); 1179 1180 if (index) 1181 timer_jiffies += TVN_SIZE - index; 1182 timer_jiffies >>= TVN_BITS; 1183 } 1184 return expires; 1185} 1186 1187/* 1188 * Check, if the next hrtimer event is before the next timer wheel 1189 * event: 1190 */ 1191static unsigned long cmp_next_hrtimer_event(unsigned long now, 1192 unsigned long expires) 1193{ 1194 ktime_t hr_delta = hrtimer_get_next_event(); 1195 struct timespec tsdelta; 1196 unsigned long delta; 1197 1198 if (hr_delta.tv64 == KTIME_MAX) 1199 return expires; 1200 1201 /* 1202 * Expired timer available, let it expire in the next tick 1203 */ 1204 if (hr_delta.tv64 <= 0) 1205 return now + 1; 1206 1207 tsdelta = ktime_to_timespec(hr_delta); 1208 delta = timespec_to_jiffies(&tsdelta); 1209 1210 /* 1211 * Limit the delta to the max value, which is checked in 1212 * tick_nohz_stop_sched_tick(): 1213 */ 1214 if (delta > NEXT_TIMER_MAX_DELTA) 1215 delta = NEXT_TIMER_MAX_DELTA; 1216 1217 /* 1218 * Take rounding errors in to account and make sure, that it 1219 * expires in the next tick. Otherwise we go into an endless 1220 * ping pong due to tick_nohz_stop_sched_tick() retriggering 1221 * the timer softirq 1222 */ 1223 if (delta < 1) 1224 delta = 1; 1225 now += delta; 1226 if (time_before(now, expires)) 1227 return now; 1228 return expires; 1229} 1230 1231/** 1232 * get_next_timer_interrupt - return the jiffy of the next pending timer 1233 * @now: current time (in jiffies) 1234 */ 1235unsigned long get_next_timer_interrupt(unsigned long now) 1236{ 1237 struct tvec_base *base = __get_cpu_var(tvec_bases); 1238 unsigned long expires; 1239 1240 spin_lock(&base->lock); 1241 if (time_before_eq(base->next_timer, base->timer_jiffies)) 1242 base->next_timer = __next_timer_interrupt(base); 1243 expires = base->next_timer; 1244 spin_unlock(&base->lock); 1245 1246 if (time_before_eq(expires, now)) 1247 return now; 1248 1249 return cmp_next_hrtimer_event(now, expires); 1250} 1251#endif 1252 1253/* 1254 * Called from the timer interrupt handler to charge one tick to the current 1255 * process. user_tick is 1 if the tick is user time, 0 for system. 1256 */ 1257void update_process_times(int user_tick) 1258{ 1259 struct task_struct *p = current; 1260 int cpu = smp_processor_id(); 1261 1262 /* Note: this timer irq context must be accounted for as well. */ 1263 account_process_tick(p, user_tick); 1264 run_local_timers(); 1265 rcu_check_callbacks(cpu, user_tick); 1266 printk_tick(); 1267 perf_event_do_pending(); 1268 scheduler_tick(); 1269 run_posix_cpu_timers(p); 1270} 1271 1272/* 1273 * This function runs timers and the timer-tq in bottom half context. 1274 */ 1275static void run_timer_softirq(struct softirq_action *h) 1276{ 1277 struct tvec_base *base = __get_cpu_var(tvec_bases); 1278 1279 hrtimer_run_pending(); 1280 1281 if (time_after_eq(jiffies, base->timer_jiffies)) 1282 __run_timers(base); 1283} 1284 1285/* 1286 * Called by the local, per-CPU timer interrupt on SMP. 1287 */ 1288void run_local_timers(void) 1289{ 1290 hrtimer_run_queues(); 1291 raise_softirq(TIMER_SOFTIRQ); 1292 softlockup_tick(); 1293} 1294 1295/* 1296 * The 64-bit jiffies value is not atomic - you MUST NOT read it 1297 * without sampling the sequence number in xtime_lock. 1298 * jiffies is defined in the linker script... 1299 */ 1300 1301void do_timer(unsigned long ticks) 1302{ 1303 jiffies_64 += ticks; 1304 update_wall_time(); 1305 calc_global_load(); 1306} 1307 1308#ifdef __ARCH_WANT_SYS_ALARM 1309 1310/* 1311 * For backwards compatibility? This can be done in libc so Alpha 1312 * and all newer ports shouldn't need it. 1313 */ 1314SYSCALL_DEFINE1(alarm, unsigned int, seconds) 1315{ 1316 return alarm_setitimer(seconds); 1317} 1318 1319#endif 1320 1321#ifndef __alpha__ 1322 1323/* 1324 * The Alpha uses getxpid, getxuid, and getxgid instead. Maybe this 1325 * should be moved into arch/i386 instead? 1326 */ 1327 1328/** 1329 * sys_getpid - return the thread group id of the current process 1330 * 1331 * Note, despite the name, this returns the tgid not the pid. The tgid and 1332 * the pid are identical unless CLONE_THREAD was specified on clone() in 1333 * which case the tgid is the same in all threads of the same group. 1334 * 1335 * This is SMP safe as current->tgid does not change. 1336 */ 1337SYSCALL_DEFINE0(getpid) 1338{ 1339 return task_tgid_vnr(current); 1340} 1341 1342/* 1343 * Accessing ->real_parent is not SMP-safe, it could 1344 * change from under us. However, we can use a stale 1345 * value of ->real_parent under rcu_read_lock(), see 1346 * release_task()->call_rcu(delayed_put_task_struct). 1347 */ 1348SYSCALL_DEFINE0(getppid) 1349{ 1350 int pid; 1351 1352 rcu_read_lock(); 1353 pid = task_tgid_vnr(current->real_parent); 1354 rcu_read_unlock(); 1355 1356 return pid; 1357} 1358 1359SYSCALL_DEFINE0(getuid) 1360{ 1361 /* Only we change this so SMP safe */ 1362 return current_uid(); 1363} 1364 1365SYSCALL_DEFINE0(geteuid) 1366{ 1367 /* Only we change this so SMP safe */ 1368 return current_euid(); 1369} 1370 1371SYSCALL_DEFINE0(getgid) 1372{ 1373 /* Only we change this so SMP safe */ 1374 return current_gid(); 1375} 1376 1377SYSCALL_DEFINE0(getegid) 1378{ 1379 /* Only we change this so SMP safe */ 1380 return current_egid(); 1381} 1382 1383#endif 1384 1385static void process_timeout(unsigned long __data) 1386{ 1387 wake_up_process((struct task_struct *)__data); 1388} 1389 1390/** 1391 * schedule_timeout - sleep until timeout 1392 * @timeout: timeout value in jiffies 1393 * 1394 * Make the current task sleep until @timeout jiffies have 1395 * elapsed. The routine will return immediately unless 1396 * the current task state has been set (see set_current_state()). 1397 * 1398 * You can set the task state as follows - 1399 * 1400 * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to 1401 * pass before the routine returns. The routine will return 0 1402 * 1403 * %TASK_INTERRUPTIBLE - the routine may return early if a signal is 1404 * delivered to the current task. In this case the remaining time 1405 * in jiffies will be returned, or 0 if the timer expired in time 1406 * 1407 * The current task state is guaranteed to be TASK_RUNNING when this 1408 * routine returns. 1409 * 1410 * Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule 1411 * the CPU away without a bound on the timeout. In this case the return 1412 * value will be %MAX_SCHEDULE_TIMEOUT. 1413 * 1414 * In all cases the return value is guaranteed to be non-negative. 1415 */ 1416signed long __sched schedule_timeout(signed long timeout) 1417{ 1418 struct timer_list timer; 1419 unsigned long expire; 1420 1421 switch (timeout) 1422 { 1423 case MAX_SCHEDULE_TIMEOUT: 1424 /* 1425 * These two special cases are useful to be comfortable 1426 * in the caller. Nothing more. We could take 1427 * MAX_SCHEDULE_TIMEOUT from one of the negative value 1428 * but I' d like to return a valid offset (>=0) to allow 1429 * the caller to do everything it want with the retval. 1430 */ 1431 schedule(); 1432 goto out; 1433 default: 1434 /* 1435 * Another bit of PARANOID. Note that the retval will be 1436 * 0 since no piece of kernel is supposed to do a check 1437 * for a negative retval of schedule_timeout() (since it 1438 * should never happens anyway). You just have the printk() 1439 * that will tell you if something is gone wrong and where. 1440 */ 1441 if (timeout < 0) { 1442 printk(KERN_ERR "schedule_timeout: wrong timeout " 1443 "value %lx\n", timeout); 1444 dump_stack(); 1445 current->state = TASK_RUNNING; 1446 goto out; 1447 } 1448 } 1449 1450 expire = timeout + jiffies; 1451 1452 setup_timer_on_stack(&timer, process_timeout, (unsigned long)current); 1453 __mod_timer(&timer, expire, false, TIMER_NOT_PINNED); 1454 schedule(); 1455 del_singleshot_timer_sync(&timer); 1456 1457 /* Remove the timer from the object tracker */ 1458 destroy_timer_on_stack(&timer); 1459 1460 timeout = expire - jiffies; 1461 1462 out: 1463 return timeout < 0 ? 0 : timeout; 1464} 1465EXPORT_SYMBOL(schedule_timeout); 1466 1467/* 1468 * We can use __set_current_state() here because schedule_timeout() calls 1469 * schedule() unconditionally. 1470 */ 1471signed long __sched schedule_timeout_interruptible(signed long timeout) 1472{ 1473 __set_current_state(TASK_INTERRUPTIBLE); 1474 return schedule_timeout(timeout); 1475} 1476EXPORT_SYMBOL(schedule_timeout_interruptible); 1477 1478signed long __sched schedule_timeout_killable(signed long timeout) 1479{ 1480 __set_current_state(TASK_KILLABLE); 1481 return schedule_timeout(timeout); 1482} 1483EXPORT_SYMBOL(schedule_timeout_killable); 1484 1485signed long __sched schedule_timeout_uninterruptible(signed long timeout) 1486{ 1487 __set_current_state(TASK_UNINTERRUPTIBLE); 1488 return schedule_timeout(timeout); 1489} 1490EXPORT_SYMBOL(schedule_timeout_uninterruptible); 1491 1492/* Thread ID - the internal kernel "pid" */ 1493SYSCALL_DEFINE0(gettid) 1494{ 1495 return task_pid_vnr(current); 1496} 1497 1498/** 1499 * do_sysinfo - fill in sysinfo struct 1500 * @info: pointer to buffer to fill 1501 */ 1502int do_sysinfo(struct sysinfo *info) 1503{ 1504 unsigned long mem_total, sav_total; 1505 unsigned int mem_unit, bitcount; 1506 struct timespec tp; 1507 1508 memset(info, 0, sizeof(struct sysinfo)); 1509 1510 ktime_get_ts(&tp); 1511 monotonic_to_bootbased(&tp); 1512 info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0); 1513 1514 get_avenrun(info->loads, 0, SI_LOAD_SHIFT - FSHIFT); 1515 1516 info->procs = nr_threads; 1517 1518 si_meminfo(info); 1519 si_swapinfo(info); 1520 1521 /* 1522 * If the sum of all the available memory (i.e. ram + swap) 1523 * is less than can be stored in a 32 bit unsigned long then 1524 * we can be binary compatible with 2.2.x kernels. If not, 1525 * well, in that case 2.2.x was broken anyways... 1526 * 1527 * -Erik Andersen <andersee@debian.org> 1528 */ 1529 1530 mem_total = info->totalram + info->totalswap; 1531 if (mem_total < info->totalram || mem_total < info->totalswap) 1532 goto out; 1533 bitcount = 0; 1534 mem_unit = info->mem_unit; 1535 while (mem_unit > 1) { 1536 bitcount++; 1537 mem_unit >>= 1; 1538 sav_total = mem_total; 1539 mem_total <<= 1; 1540 if (mem_total < sav_total) 1541 goto out; 1542 } 1543 1544 /* 1545 * If mem_total did not overflow, multiply all memory values by 1546 * info->mem_unit and set it to 1. This leaves things compatible 1547 * with 2.2.x, and also retains compatibility with earlier 2.4.x 1548 * kernels... 1549 */ 1550 1551 info->mem_unit = 1; 1552 info->totalram <<= bitcount; 1553 info->freeram <<= bitcount; 1554 info->sharedram <<= bitcount; 1555 info->bufferram <<= bitcount; 1556 info->totalswap <<= bitcount; 1557 info->freeswap <<= bitcount; 1558 info->totalhigh <<= bitcount; 1559 info->freehigh <<= bitcount; 1560 1561out: 1562 return 0; 1563} 1564 1565SYSCALL_DEFINE1(sysinfo, struct sysinfo __user *, info) 1566{ 1567 struct sysinfo val; 1568 1569 do_sysinfo(&val); 1570 1571 if (copy_to_user(info, &val, sizeof(struct sysinfo))) 1572 return -EFAULT; 1573 1574 return 0; 1575} 1576 1577static int __cpuinit init_timers_cpu(int cpu) 1578{ 1579 int j; 1580 struct tvec_base *base; 1581 static char __cpuinitdata tvec_base_done[NR_CPUS]; 1582 1583 if (!tvec_base_done[cpu]) { 1584 static char boot_done; 1585 1586 if (boot_done) { 1587 /* 1588 * The APs use this path later in boot 1589 */ 1590 base = kmalloc_node(sizeof(*base), 1591 GFP_KERNEL | __GFP_ZERO, 1592 cpu_to_node(cpu)); 1593 if (!base) 1594 return -ENOMEM; 1595 1596 /* Make sure that tvec_base is 2 byte aligned */ 1597 if (tbase_get_deferrable(base)) { 1598 WARN_ON(1); 1599 kfree(base); 1600 return -ENOMEM; 1601 } 1602 per_cpu(tvec_bases, cpu) = base; 1603 } else { 1604 /* 1605 * This is for the boot CPU - we use compile-time 1606 * static initialisation because per-cpu memory isn't 1607 * ready yet and because the memory allocators are not 1608 * initialised either. 1609 */ 1610 boot_done = 1; 1611 base = &boot_tvec_bases; 1612 } 1613 tvec_base_done[cpu] = 1; 1614 } else { 1615 base = per_cpu(tvec_bases, cpu); 1616 } 1617 1618 spin_lock_init(&base->lock); 1619 1620 for (j = 0; j < TVN_SIZE; j++) { 1621 INIT_LIST_HEAD(base->tv5.vec + j); 1622 INIT_LIST_HEAD(base->tv4.vec + j); 1623 INIT_LIST_HEAD(base->tv3.vec + j); 1624 INIT_LIST_HEAD(base->tv2.vec + j); 1625 } 1626 for (j = 0; j < TVR_SIZE; j++) 1627 INIT_LIST_HEAD(base->tv1.vec + j); 1628 1629 base->timer_jiffies = jiffies; 1630 base->next_timer = base->timer_jiffies; 1631 return 0; 1632} 1633 1634#ifdef CONFIG_HOTPLUG_CPU 1635static void migrate_timer_list(struct tvec_base *new_base, struct list_head *head) 1636{ 1637 struct timer_list *timer; 1638 1639 while (!list_empty(head)) { 1640 timer = list_first_entry(head, struct timer_list, entry); 1641 detach_timer(timer, 0); 1642 timer_set_base(timer, new_base); 1643 if (time_before(timer->expires, new_base->next_timer) && 1644 !tbase_get_deferrable(timer->base)) 1645 new_base->next_timer = timer->expires; 1646 internal_add_timer(new_base, timer); 1647 } 1648} 1649 1650static void __cpuinit migrate_timers(int cpu) 1651{ 1652 struct tvec_base *old_base; 1653 struct tvec_base *new_base; 1654 int i; 1655 1656 BUG_ON(cpu_online(cpu)); 1657 old_base = per_cpu(tvec_bases, cpu); 1658 new_base = get_cpu_var(tvec_bases); 1659 /* 1660 * The caller is globally serialized and nobody else 1661 * takes two locks at once, deadlock is not possible. 1662 */ 1663 spin_lock_irq(&new_base->lock); 1664 spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); 1665 1666 BUG_ON(old_base->running_timer); 1667 1668 for (i = 0; i < TVR_SIZE; i++) 1669 migrate_timer_list(new_base, old_base->tv1.vec + i); 1670 for (i = 0; i < TVN_SIZE; i++) { 1671 migrate_timer_list(new_base, old_base->tv2.vec + i); 1672 migrate_timer_list(new_base, old_base->tv3.vec + i); 1673 migrate_timer_list(new_base, old_base->tv4.vec + i); 1674 migrate_timer_list(new_base, old_base->tv5.vec + i); 1675 } 1676 1677 spin_unlock(&old_base->lock); 1678 spin_unlock_irq(&new_base->lock); 1679 put_cpu_var(tvec_bases); 1680} 1681#endif /* CONFIG_HOTPLUG_CPU */ 1682 1683static int __cpuinit timer_cpu_notify(struct notifier_block *self, 1684 unsigned long action, void *hcpu) 1685{ 1686 long cpu = (long)hcpu; 1687 int err; 1688 1689 switch(action) { 1690 case CPU_UP_PREPARE: 1691 case CPU_UP_PREPARE_FROZEN: 1692 err = init_timers_cpu(cpu); 1693 if (err < 0) 1694 return notifier_from_errno(err); 1695 break; 1696#ifdef CONFIG_HOTPLUG_CPU 1697 case CPU_DEAD: 1698 case CPU_DEAD_FROZEN: 1699 migrate_timers(cpu); 1700 break; 1701#endif 1702 default: 1703 break; 1704 } 1705 return NOTIFY_OK; 1706} 1707 1708static struct notifier_block __cpuinitdata timers_nb = { 1709 .notifier_call = timer_cpu_notify, 1710}; 1711 1712 1713void __init init_timers(void) 1714{ 1715 int err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE, 1716 (void *)(long)smp_processor_id()); 1717 1718 init_timer_stats(); 1719 1720 BUG_ON(err == NOTIFY_BAD); 1721 register_cpu_notifier(&timers_nb); 1722 open_softirq(TIMER_SOFTIRQ, run_timer_softirq); 1723} 1724 1725/** 1726 * msleep - sleep safely even with waitqueue interruptions 1727 * @msecs: Time in milliseconds to sleep for 1728 */ 1729void msleep(unsigned int msecs) 1730{ 1731 unsigned long timeout = msecs_to_jiffies(msecs) + 1; 1732 1733 while (timeout) 1734 timeout = schedule_timeout_uninterruptible(timeout); 1735} 1736 1737EXPORT_SYMBOL(msleep); 1738 1739/** 1740 * msleep_interruptible - sleep waiting for signals 1741 * @msecs: Time in milliseconds to sleep for 1742 */ 1743unsigned long msleep_interruptible(unsigned int msecs) 1744{ 1745 unsigned long timeout = msecs_to_jiffies(msecs) + 1; 1746 1747 while (timeout && !signal_pending(current)) 1748 timeout = schedule_timeout_interruptible(timeout); 1749 return jiffies_to_msecs(timeout); 1750} 1751 1752EXPORT_SYMBOL(msleep_interruptible);