at v3.5-rc5 51 kB view raw
1/* 2 * linux/kernel/timer.c 3 * 4 * Kernel internal timers, basic process system calls 5 * 6 * Copyright (C) 1991, 1992 Linus Torvalds 7 * 8 * 1997-01-28 Modified by Finn Arne Gangstad to make timers scale better. 9 * 10 * 1997-09-10 Updated NTP code according to technical memorandum Jan '96 11 * "A Kernel Model for Precision Timekeeping" by Dave Mills 12 * 1998-12-24 Fixed a xtime SMP race (we need the xtime_lock rw spinlock to 13 * serialize accesses to xtime/lost_ticks). 14 * Copyright (C) 1998 Andrea Arcangeli 15 * 1999-03-10 Improved NTP compatibility by Ulrich Windl 16 * 2002-05-31 Move sys_sysinfo here and make its locking sane, Robert Love 17 * 2000-10-05 Implemented scalable SMP per-CPU timer handling. 18 * Copyright (C) 2000, 2001, 2002 Ingo Molnar 19 * Designed by David S. Miller, Alexey Kuznetsov and Ingo Molnar 20 */ 21 22#include <linux/kernel_stat.h> 23#include <linux/export.h> 24#include <linux/interrupt.h> 25#include <linux/percpu.h> 26#include <linux/init.h> 27#include <linux/mm.h> 28#include <linux/swap.h> 29#include <linux/pid_namespace.h> 30#include <linux/notifier.h> 31#include <linux/thread_info.h> 32#include <linux/time.h> 33#include <linux/jiffies.h> 34#include <linux/posix-timers.h> 35#include <linux/cpu.h> 36#include <linux/syscalls.h> 37#include <linux/delay.h> 38#include <linux/tick.h> 39#include <linux/kallsyms.h> 40#include <linux/irq_work.h> 41#include <linux/sched.h> 42#include <linux/slab.h> 43 44#include <asm/uaccess.h> 45#include <asm/unistd.h> 46#include <asm/div64.h> 47#include <asm/timex.h> 48#include <asm/io.h> 49 50#define CREATE_TRACE_POINTS 51#include <trace/events/timer.h> 52 53u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES; 54 55EXPORT_SYMBOL(jiffies_64); 56 57/* 58 * per-CPU timer vector definitions: 59 */ 60#define TVN_BITS (CONFIG_BASE_SMALL ? 4 : 6) 61#define TVR_BITS (CONFIG_BASE_SMALL ? 6 : 8) 62#define TVN_SIZE (1 << TVN_BITS) 63#define TVR_SIZE (1 << TVR_BITS) 64#define TVN_MASK (TVN_SIZE - 1) 65#define TVR_MASK (TVR_SIZE - 1) 66 67struct tvec { 68 struct list_head vec[TVN_SIZE]; 69}; 70 71struct tvec_root { 72 struct list_head vec[TVR_SIZE]; 73}; 74 75struct tvec_base { 76 spinlock_t lock; 77 struct timer_list *running_timer; 78 unsigned long timer_jiffies; 79 unsigned long next_timer; 80 struct tvec_root tv1; 81 struct tvec tv2; 82 struct tvec tv3; 83 struct tvec tv4; 84 struct tvec tv5; 85} ____cacheline_aligned; 86 87struct tvec_base boot_tvec_bases; 88EXPORT_SYMBOL(boot_tvec_bases); 89static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases; 90 91/* Functions below help us manage 'deferrable' flag */ 92static inline unsigned int tbase_get_deferrable(struct tvec_base *base) 93{ 94 return ((unsigned int)(unsigned long)base & TBASE_DEFERRABLE_FLAG); 95} 96 97static inline struct tvec_base *tbase_get_base(struct tvec_base *base) 98{ 99 return ((struct tvec_base *)((unsigned long)base & ~TBASE_DEFERRABLE_FLAG)); 100} 101 102static inline void timer_set_deferrable(struct timer_list *timer) 103{ 104 timer->base = TBASE_MAKE_DEFERRED(timer->base); 105} 106 107static inline void 108timer_set_base(struct timer_list *timer, struct tvec_base *new_base) 109{ 110 timer->base = (struct tvec_base *)((unsigned long)(new_base) | 111 tbase_get_deferrable(timer->base)); 112} 113 114static unsigned long round_jiffies_common(unsigned long j, int cpu, 115 bool force_up) 116{ 117 int rem; 118 unsigned long original = j; 119 120 /* 121 * We don't want all cpus firing their timers at once hitting the 122 * same lock or cachelines, so we skew each extra cpu with an extra 123 * 3 jiffies. This 3 jiffies came originally from the mm/ code which 124 * already did this. 125 * The skew is done by adding 3*cpunr, then round, then subtract this 126 * extra offset again. 127 */ 128 j += cpu * 3; 129 130 rem = j % HZ; 131 132 /* 133 * If the target jiffie is just after a whole second (which can happen 134 * due to delays of the timer irq, long irq off times etc etc) then 135 * we should round down to the whole second, not up. Use 1/4th second 136 * as cutoff for this rounding as an extreme upper bound for this. 137 * But never round down if @force_up is set. 138 */ 139 if (rem < HZ/4 && !force_up) /* round down */ 140 j = j - rem; 141 else /* round up */ 142 j = j - rem + HZ; 143 144 /* now that we have rounded, subtract the extra skew again */ 145 j -= cpu * 3; 146 147 if (j <= jiffies) /* rounding ate our timeout entirely; */ 148 return original; 149 return j; 150} 151 152/** 153 * __round_jiffies - function to round jiffies to a full second 154 * @j: the time in (absolute) jiffies that should be rounded 155 * @cpu: the processor number on which the timeout will happen 156 * 157 * __round_jiffies() rounds an absolute time in the future (in jiffies) 158 * up or down to (approximately) full seconds. This is useful for timers 159 * for which the exact time they fire does not matter too much, as long as 160 * they fire approximately every X seconds. 161 * 162 * By rounding these timers to whole seconds, all such timers will fire 163 * at the same time, rather than at various times spread out. The goal 164 * of this is to have the CPU wake up less, which saves power. 165 * 166 * The exact rounding is skewed for each processor to avoid all 167 * processors firing at the exact same time, which could lead 168 * to lock contention or spurious cache line bouncing. 169 * 170 * The return value is the rounded version of the @j parameter. 171 */ 172unsigned long __round_jiffies(unsigned long j, int cpu) 173{ 174 return round_jiffies_common(j, cpu, false); 175} 176EXPORT_SYMBOL_GPL(__round_jiffies); 177 178/** 179 * __round_jiffies_relative - function to round jiffies to a full second 180 * @j: the time in (relative) jiffies that should be rounded 181 * @cpu: the processor number on which the timeout will happen 182 * 183 * __round_jiffies_relative() rounds a time delta in the future (in jiffies) 184 * up or down to (approximately) full seconds. This is useful for timers 185 * for which the exact time they fire does not matter too much, as long as 186 * they fire approximately every X seconds. 187 * 188 * By rounding these timers to whole seconds, all such timers will fire 189 * at the same time, rather than at various times spread out. The goal 190 * of this is to have the CPU wake up less, which saves power. 191 * 192 * The exact rounding is skewed for each processor to avoid all 193 * processors firing at the exact same time, which could lead 194 * to lock contention or spurious cache line bouncing. 195 * 196 * The return value is the rounded version of the @j parameter. 197 */ 198unsigned long __round_jiffies_relative(unsigned long j, int cpu) 199{ 200 unsigned long j0 = jiffies; 201 202 /* Use j0 because jiffies might change while we run */ 203 return round_jiffies_common(j + j0, cpu, false) - j0; 204} 205EXPORT_SYMBOL_GPL(__round_jiffies_relative); 206 207/** 208 * round_jiffies - function to round jiffies to a full second 209 * @j: the time in (absolute) jiffies that should be rounded 210 * 211 * round_jiffies() rounds an absolute time in the future (in jiffies) 212 * up or down to (approximately) full seconds. This is useful for timers 213 * for which the exact time they fire does not matter too much, as long as 214 * they fire approximately every X seconds. 215 * 216 * By rounding these timers to whole seconds, all such timers will fire 217 * at the same time, rather than at various times spread out. The goal 218 * of this is to have the CPU wake up less, which saves power. 219 * 220 * The return value is the rounded version of the @j parameter. 221 */ 222unsigned long round_jiffies(unsigned long j) 223{ 224 return round_jiffies_common(j, raw_smp_processor_id(), false); 225} 226EXPORT_SYMBOL_GPL(round_jiffies); 227 228/** 229 * round_jiffies_relative - function to round jiffies to a full second 230 * @j: the time in (relative) jiffies that should be rounded 231 * 232 * round_jiffies_relative() rounds a time delta in the future (in jiffies) 233 * up or down to (approximately) full seconds. This is useful for timers 234 * for which the exact time they fire does not matter too much, as long as 235 * they fire approximately every X seconds. 236 * 237 * By rounding these timers to whole seconds, all such timers will fire 238 * at the same time, rather than at various times spread out. The goal 239 * of this is to have the CPU wake up less, which saves power. 240 * 241 * The return value is the rounded version of the @j parameter. 242 */ 243unsigned long round_jiffies_relative(unsigned long j) 244{ 245 return __round_jiffies_relative(j, raw_smp_processor_id()); 246} 247EXPORT_SYMBOL_GPL(round_jiffies_relative); 248 249/** 250 * __round_jiffies_up - function to round jiffies up to a full second 251 * @j: the time in (absolute) jiffies that should be rounded 252 * @cpu: the processor number on which the timeout will happen 253 * 254 * This is the same as __round_jiffies() except that it will never 255 * round down. This is useful for timeouts for which the exact time 256 * of firing does not matter too much, as long as they don't fire too 257 * early. 258 */ 259unsigned long __round_jiffies_up(unsigned long j, int cpu) 260{ 261 return round_jiffies_common(j, cpu, true); 262} 263EXPORT_SYMBOL_GPL(__round_jiffies_up); 264 265/** 266 * __round_jiffies_up_relative - function to round jiffies up to a full second 267 * @j: the time in (relative) jiffies that should be rounded 268 * @cpu: the processor number on which the timeout will happen 269 * 270 * This is the same as __round_jiffies_relative() except that it will never 271 * round down. This is useful for timeouts for which the exact time 272 * of firing does not matter too much, as long as they don't fire too 273 * early. 274 */ 275unsigned long __round_jiffies_up_relative(unsigned long j, int cpu) 276{ 277 unsigned long j0 = jiffies; 278 279 /* Use j0 because jiffies might change while we run */ 280 return round_jiffies_common(j + j0, cpu, true) - j0; 281} 282EXPORT_SYMBOL_GPL(__round_jiffies_up_relative); 283 284/** 285 * round_jiffies_up - function to round jiffies up to a full second 286 * @j: the time in (absolute) jiffies that should be rounded 287 * 288 * This is the same as round_jiffies() except that it will never 289 * round down. This is useful for timeouts for which the exact time 290 * of firing does not matter too much, as long as they don't fire too 291 * early. 292 */ 293unsigned long round_jiffies_up(unsigned long j) 294{ 295 return round_jiffies_common(j, raw_smp_processor_id(), true); 296} 297EXPORT_SYMBOL_GPL(round_jiffies_up); 298 299/** 300 * round_jiffies_up_relative - function to round jiffies up to a full second 301 * @j: the time in (relative) jiffies that should be rounded 302 * 303 * This is the same as round_jiffies_relative() except that it will never 304 * round down. This is useful for timeouts for which the exact time 305 * of firing does not matter too much, as long as they don't fire too 306 * early. 307 */ 308unsigned long round_jiffies_up_relative(unsigned long j) 309{ 310 return __round_jiffies_up_relative(j, raw_smp_processor_id()); 311} 312EXPORT_SYMBOL_GPL(round_jiffies_up_relative); 313 314/** 315 * set_timer_slack - set the allowed slack for a timer 316 * @timer: the timer to be modified 317 * @slack_hz: the amount of time (in jiffies) allowed for rounding 318 * 319 * Set the amount of time, in jiffies, that a certain timer has 320 * in terms of slack. By setting this value, the timer subsystem 321 * will schedule the actual timer somewhere between 322 * the time mod_timer() asks for, and that time plus the slack. 323 * 324 * By setting the slack to -1, a percentage of the delay is used 325 * instead. 326 */ 327void set_timer_slack(struct timer_list *timer, int slack_hz) 328{ 329 timer->slack = slack_hz; 330} 331EXPORT_SYMBOL_GPL(set_timer_slack); 332 333static void internal_add_timer(struct tvec_base *base, struct timer_list *timer) 334{ 335 unsigned long expires = timer->expires; 336 unsigned long idx = expires - base->timer_jiffies; 337 struct list_head *vec; 338 339 if (idx < TVR_SIZE) { 340 int i = expires & TVR_MASK; 341 vec = base->tv1.vec + i; 342 } else if (idx < 1 << (TVR_BITS + TVN_BITS)) { 343 int i = (expires >> TVR_BITS) & TVN_MASK; 344 vec = base->tv2.vec + i; 345 } else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) { 346 int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK; 347 vec = base->tv3.vec + i; 348 } else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) { 349 int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK; 350 vec = base->tv4.vec + i; 351 } else if ((signed long) idx < 0) { 352 /* 353 * Can happen if you add a timer with expires == jiffies, 354 * or you set a timer to go off in the past 355 */ 356 vec = base->tv1.vec + (base->timer_jiffies & TVR_MASK); 357 } else { 358 int i; 359 /* If the timeout is larger than 0xffffffff on 64-bit 360 * architectures then we use the maximum timeout: 361 */ 362 if (idx > 0xffffffffUL) { 363 idx = 0xffffffffUL; 364 expires = idx + base->timer_jiffies; 365 } 366 i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK; 367 vec = base->tv5.vec + i; 368 } 369 /* 370 * Timers are FIFO: 371 */ 372 list_add_tail(&timer->entry, vec); 373} 374 375#ifdef CONFIG_TIMER_STATS 376void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr) 377{ 378 if (timer->start_site) 379 return; 380 381 timer->start_site = addr; 382 memcpy(timer->start_comm, current->comm, TASK_COMM_LEN); 383 timer->start_pid = current->pid; 384} 385 386static void timer_stats_account_timer(struct timer_list *timer) 387{ 388 unsigned int flag = 0; 389 390 if (likely(!timer->start_site)) 391 return; 392 if (unlikely(tbase_get_deferrable(timer->base))) 393 flag |= TIMER_STATS_FLAG_DEFERRABLE; 394 395 timer_stats_update_stats(timer, timer->start_pid, timer->start_site, 396 timer->function, timer->start_comm, flag); 397} 398 399#else 400static void timer_stats_account_timer(struct timer_list *timer) {} 401#endif 402 403#ifdef CONFIG_DEBUG_OBJECTS_TIMERS 404 405static struct debug_obj_descr timer_debug_descr; 406 407static void *timer_debug_hint(void *addr) 408{ 409 return ((struct timer_list *) addr)->function; 410} 411 412/* 413 * fixup_init is called when: 414 * - an active object is initialized 415 */ 416static int timer_fixup_init(void *addr, enum debug_obj_state state) 417{ 418 struct timer_list *timer = addr; 419 420 switch (state) { 421 case ODEBUG_STATE_ACTIVE: 422 del_timer_sync(timer); 423 debug_object_init(timer, &timer_debug_descr); 424 return 1; 425 default: 426 return 0; 427 } 428} 429 430/* Stub timer callback for improperly used timers. */ 431static void stub_timer(unsigned long data) 432{ 433 WARN_ON(1); 434} 435 436/* 437 * fixup_activate is called when: 438 * - an active object is activated 439 * - an unknown object is activated (might be a statically initialized object) 440 */ 441static int timer_fixup_activate(void *addr, enum debug_obj_state state) 442{ 443 struct timer_list *timer = addr; 444 445 switch (state) { 446 447 case ODEBUG_STATE_NOTAVAILABLE: 448 /* 449 * This is not really a fixup. The timer was 450 * statically initialized. We just make sure that it 451 * is tracked in the object tracker. 452 */ 453 if (timer->entry.next == NULL && 454 timer->entry.prev == TIMER_ENTRY_STATIC) { 455 debug_object_init(timer, &timer_debug_descr); 456 debug_object_activate(timer, &timer_debug_descr); 457 return 0; 458 } else { 459 setup_timer(timer, stub_timer, 0); 460 return 1; 461 } 462 return 0; 463 464 case ODEBUG_STATE_ACTIVE: 465 WARN_ON(1); 466 467 default: 468 return 0; 469 } 470} 471 472/* 473 * fixup_free is called when: 474 * - an active object is freed 475 */ 476static int timer_fixup_free(void *addr, enum debug_obj_state state) 477{ 478 struct timer_list *timer = addr; 479 480 switch (state) { 481 case ODEBUG_STATE_ACTIVE: 482 del_timer_sync(timer); 483 debug_object_free(timer, &timer_debug_descr); 484 return 1; 485 default: 486 return 0; 487 } 488} 489 490/* 491 * fixup_assert_init is called when: 492 * - an untracked/uninit-ed object is found 493 */ 494static int timer_fixup_assert_init(void *addr, enum debug_obj_state state) 495{ 496 struct timer_list *timer = addr; 497 498 switch (state) { 499 case ODEBUG_STATE_NOTAVAILABLE: 500 if (timer->entry.prev == TIMER_ENTRY_STATIC) { 501 /* 502 * This is not really a fixup. The timer was 503 * statically initialized. We just make sure that it 504 * is tracked in the object tracker. 505 */ 506 debug_object_init(timer, &timer_debug_descr); 507 return 0; 508 } else { 509 setup_timer(timer, stub_timer, 0); 510 return 1; 511 } 512 default: 513 return 0; 514 } 515} 516 517static struct debug_obj_descr timer_debug_descr = { 518 .name = "timer_list", 519 .debug_hint = timer_debug_hint, 520 .fixup_init = timer_fixup_init, 521 .fixup_activate = timer_fixup_activate, 522 .fixup_free = timer_fixup_free, 523 .fixup_assert_init = timer_fixup_assert_init, 524}; 525 526static inline void debug_timer_init(struct timer_list *timer) 527{ 528 debug_object_init(timer, &timer_debug_descr); 529} 530 531static inline void debug_timer_activate(struct timer_list *timer) 532{ 533 debug_object_activate(timer, &timer_debug_descr); 534} 535 536static inline void debug_timer_deactivate(struct timer_list *timer) 537{ 538 debug_object_deactivate(timer, &timer_debug_descr); 539} 540 541static inline void debug_timer_free(struct timer_list *timer) 542{ 543 debug_object_free(timer, &timer_debug_descr); 544} 545 546static inline void debug_timer_assert_init(struct timer_list *timer) 547{ 548 debug_object_assert_init(timer, &timer_debug_descr); 549} 550 551static void __init_timer(struct timer_list *timer, 552 const char *name, 553 struct lock_class_key *key); 554 555void init_timer_on_stack_key(struct timer_list *timer, 556 const char *name, 557 struct lock_class_key *key) 558{ 559 debug_object_init_on_stack(timer, &timer_debug_descr); 560 __init_timer(timer, name, key); 561} 562EXPORT_SYMBOL_GPL(init_timer_on_stack_key); 563 564void destroy_timer_on_stack(struct timer_list *timer) 565{ 566 debug_object_free(timer, &timer_debug_descr); 567} 568EXPORT_SYMBOL_GPL(destroy_timer_on_stack); 569 570#else 571static inline void debug_timer_init(struct timer_list *timer) { } 572static inline void debug_timer_activate(struct timer_list *timer) { } 573static inline void debug_timer_deactivate(struct timer_list *timer) { } 574static inline void debug_timer_assert_init(struct timer_list *timer) { } 575#endif 576 577static inline void debug_init(struct timer_list *timer) 578{ 579 debug_timer_init(timer); 580 trace_timer_init(timer); 581} 582 583static inline void 584debug_activate(struct timer_list *timer, unsigned long expires) 585{ 586 debug_timer_activate(timer); 587 trace_timer_start(timer, expires); 588} 589 590static inline void debug_deactivate(struct timer_list *timer) 591{ 592 debug_timer_deactivate(timer); 593 trace_timer_cancel(timer); 594} 595 596static inline void debug_assert_init(struct timer_list *timer) 597{ 598 debug_timer_assert_init(timer); 599} 600 601static void __init_timer(struct timer_list *timer, 602 const char *name, 603 struct lock_class_key *key) 604{ 605 timer->entry.next = NULL; 606 timer->base = __raw_get_cpu_var(tvec_bases); 607 timer->slack = -1; 608#ifdef CONFIG_TIMER_STATS 609 timer->start_site = NULL; 610 timer->start_pid = -1; 611 memset(timer->start_comm, 0, TASK_COMM_LEN); 612#endif 613 lockdep_init_map(&timer->lockdep_map, name, key, 0); 614} 615 616void setup_deferrable_timer_on_stack_key(struct timer_list *timer, 617 const char *name, 618 struct lock_class_key *key, 619 void (*function)(unsigned long), 620 unsigned long data) 621{ 622 timer->function = function; 623 timer->data = data; 624 init_timer_on_stack_key(timer, name, key); 625 timer_set_deferrable(timer); 626} 627EXPORT_SYMBOL_GPL(setup_deferrable_timer_on_stack_key); 628 629/** 630 * init_timer_key - initialize a timer 631 * @timer: the timer to be initialized 632 * @name: name of the timer 633 * @key: lockdep class key of the fake lock used for tracking timer 634 * sync lock dependencies 635 * 636 * init_timer_key() must be done to a timer prior calling *any* of the 637 * other timer functions. 638 */ 639void init_timer_key(struct timer_list *timer, 640 const char *name, 641 struct lock_class_key *key) 642{ 643 debug_init(timer); 644 __init_timer(timer, name, key); 645} 646EXPORT_SYMBOL(init_timer_key); 647 648void init_timer_deferrable_key(struct timer_list *timer, 649 const char *name, 650 struct lock_class_key *key) 651{ 652 init_timer_key(timer, name, key); 653 timer_set_deferrable(timer); 654} 655EXPORT_SYMBOL(init_timer_deferrable_key); 656 657static inline void detach_timer(struct timer_list *timer, 658 int clear_pending) 659{ 660 struct list_head *entry = &timer->entry; 661 662 debug_deactivate(timer); 663 664 __list_del(entry->prev, entry->next); 665 if (clear_pending) 666 entry->next = NULL; 667 entry->prev = LIST_POISON2; 668} 669 670/* 671 * We are using hashed locking: holding per_cpu(tvec_bases).lock 672 * means that all timers which are tied to this base via timer->base are 673 * locked, and the base itself is locked too. 674 * 675 * So __run_timers/migrate_timers can safely modify all timers which could 676 * be found on ->tvX lists. 677 * 678 * When the timer's base is locked, and the timer removed from list, it is 679 * possible to set timer->base = NULL and drop the lock: the timer remains 680 * locked. 681 */ 682static struct tvec_base *lock_timer_base(struct timer_list *timer, 683 unsigned long *flags) 684 __acquires(timer->base->lock) 685{ 686 struct tvec_base *base; 687 688 for (;;) { 689 struct tvec_base *prelock_base = timer->base; 690 base = tbase_get_base(prelock_base); 691 if (likely(base != NULL)) { 692 spin_lock_irqsave(&base->lock, *flags); 693 if (likely(prelock_base == timer->base)) 694 return base; 695 /* The timer has migrated to another CPU */ 696 spin_unlock_irqrestore(&base->lock, *flags); 697 } 698 cpu_relax(); 699 } 700} 701 702static inline int 703__mod_timer(struct timer_list *timer, unsigned long expires, 704 bool pending_only, int pinned) 705{ 706 struct tvec_base *base, *new_base; 707 unsigned long flags; 708 int ret = 0 , cpu; 709 710 timer_stats_timer_set_start_info(timer); 711 BUG_ON(!timer->function); 712 713 base = lock_timer_base(timer, &flags); 714 715 if (timer_pending(timer)) { 716 detach_timer(timer, 0); 717 if (timer->expires == base->next_timer && 718 !tbase_get_deferrable(timer->base)) 719 base->next_timer = base->timer_jiffies; 720 ret = 1; 721 } else { 722 if (pending_only) 723 goto out_unlock; 724 } 725 726 debug_activate(timer, expires); 727 728 cpu = smp_processor_id(); 729 730#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP) 731 if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) 732 cpu = get_nohz_timer_target(); 733#endif 734 new_base = per_cpu(tvec_bases, cpu); 735 736 if (base != new_base) { 737 /* 738 * We are trying to schedule the timer on the local CPU. 739 * However we can't change timer's base while it is running, 740 * otherwise del_timer_sync() can't detect that the timer's 741 * handler yet has not finished. This also guarantees that 742 * the timer is serialized wrt itself. 743 */ 744 if (likely(base->running_timer != timer)) { 745 /* See the comment in lock_timer_base() */ 746 timer_set_base(timer, NULL); 747 spin_unlock(&base->lock); 748 base = new_base; 749 spin_lock(&base->lock); 750 timer_set_base(timer, base); 751 } 752 } 753 754 timer->expires = expires; 755 if (time_before(timer->expires, base->next_timer) && 756 !tbase_get_deferrable(timer->base)) 757 base->next_timer = timer->expires; 758 internal_add_timer(base, timer); 759 760out_unlock: 761 spin_unlock_irqrestore(&base->lock, flags); 762 763 return ret; 764} 765 766/** 767 * mod_timer_pending - modify a pending timer's timeout 768 * @timer: the pending timer to be modified 769 * @expires: new timeout in jiffies 770 * 771 * mod_timer_pending() is the same for pending timers as mod_timer(), 772 * but will not re-activate and modify already deleted timers. 773 * 774 * It is useful for unserialized use of timers. 775 */ 776int mod_timer_pending(struct timer_list *timer, unsigned long expires) 777{ 778 return __mod_timer(timer, expires, true, TIMER_NOT_PINNED); 779} 780EXPORT_SYMBOL(mod_timer_pending); 781 782/* 783 * Decide where to put the timer while taking the slack into account 784 * 785 * Algorithm: 786 * 1) calculate the maximum (absolute) time 787 * 2) calculate the highest bit where the expires and new max are different 788 * 3) use this bit to make a mask 789 * 4) use the bitmask to round down the maximum time, so that all last 790 * bits are zeros 791 */ 792static inline 793unsigned long apply_slack(struct timer_list *timer, unsigned long expires) 794{ 795 unsigned long expires_limit, mask; 796 int bit; 797 798 if (timer->slack >= 0) { 799 expires_limit = expires + timer->slack; 800 } else { 801 long delta = expires - jiffies; 802 803 if (delta < 256) 804 return expires; 805 806 expires_limit = expires + delta / 256; 807 } 808 mask = expires ^ expires_limit; 809 if (mask == 0) 810 return expires; 811 812 bit = find_last_bit(&mask, BITS_PER_LONG); 813 814 mask = (1 << bit) - 1; 815 816 expires_limit = expires_limit & ~(mask); 817 818 return expires_limit; 819} 820 821/** 822 * mod_timer - modify a timer's timeout 823 * @timer: the timer to be modified 824 * @expires: new timeout in jiffies 825 * 826 * mod_timer() is a more efficient way to update the expire field of an 827 * active timer (if the timer is inactive it will be activated) 828 * 829 * mod_timer(timer, expires) is equivalent to: 830 * 831 * del_timer(timer); timer->expires = expires; add_timer(timer); 832 * 833 * Note that if there are multiple unserialized concurrent users of the 834 * same timer, then mod_timer() is the only safe way to modify the timeout, 835 * since add_timer() cannot modify an already running timer. 836 * 837 * The function returns whether it has modified a pending timer or not. 838 * (ie. mod_timer() of an inactive timer returns 0, mod_timer() of an 839 * active timer returns 1.) 840 */ 841int mod_timer(struct timer_list *timer, unsigned long expires) 842{ 843 expires = apply_slack(timer, expires); 844 845 /* 846 * This is a common optimization triggered by the 847 * networking code - if the timer is re-modified 848 * to be the same thing then just return: 849 */ 850 if (timer_pending(timer) && timer->expires == expires) 851 return 1; 852 853 return __mod_timer(timer, expires, false, TIMER_NOT_PINNED); 854} 855EXPORT_SYMBOL(mod_timer); 856 857/** 858 * mod_timer_pinned - modify a timer's timeout 859 * @timer: the timer to be modified 860 * @expires: new timeout in jiffies 861 * 862 * mod_timer_pinned() is a way to update the expire field of an 863 * active timer (if the timer is inactive it will be activated) 864 * and to ensure that the timer is scheduled on the current CPU. 865 * 866 * Note that this does not prevent the timer from being migrated 867 * when the current CPU goes offline. If this is a problem for 868 * you, use CPU-hotplug notifiers to handle it correctly, for 869 * example, cancelling the timer when the corresponding CPU goes 870 * offline. 871 * 872 * mod_timer_pinned(timer, expires) is equivalent to: 873 * 874 * del_timer(timer); timer->expires = expires; add_timer(timer); 875 */ 876int mod_timer_pinned(struct timer_list *timer, unsigned long expires) 877{ 878 if (timer->expires == expires && timer_pending(timer)) 879 return 1; 880 881 return __mod_timer(timer, expires, false, TIMER_PINNED); 882} 883EXPORT_SYMBOL(mod_timer_pinned); 884 885/** 886 * add_timer - start a timer 887 * @timer: the timer to be added 888 * 889 * The kernel will do a ->function(->data) callback from the 890 * timer interrupt at the ->expires point in the future. The 891 * current time is 'jiffies'. 892 * 893 * The timer's ->expires, ->function (and if the handler uses it, ->data) 894 * fields must be set prior calling this function. 895 * 896 * Timers with an ->expires field in the past will be executed in the next 897 * timer tick. 898 */ 899void add_timer(struct timer_list *timer) 900{ 901 BUG_ON(timer_pending(timer)); 902 mod_timer(timer, timer->expires); 903} 904EXPORT_SYMBOL(add_timer); 905 906/** 907 * add_timer_on - start a timer on a particular CPU 908 * @timer: the timer to be added 909 * @cpu: the CPU to start it on 910 * 911 * This is not very scalable on SMP. Double adds are not possible. 912 */ 913void add_timer_on(struct timer_list *timer, int cpu) 914{ 915 struct tvec_base *base = per_cpu(tvec_bases, cpu); 916 unsigned long flags; 917 918 timer_stats_timer_set_start_info(timer); 919 BUG_ON(timer_pending(timer) || !timer->function); 920 spin_lock_irqsave(&base->lock, flags); 921 timer_set_base(timer, base); 922 debug_activate(timer, timer->expires); 923 if (time_before(timer->expires, base->next_timer) && 924 !tbase_get_deferrable(timer->base)) 925 base->next_timer = timer->expires; 926 internal_add_timer(base, timer); 927 /* 928 * Check whether the other CPU is idle and needs to be 929 * triggered to reevaluate the timer wheel when nohz is 930 * active. We are protected against the other CPU fiddling 931 * with the timer by holding the timer base lock. This also 932 * makes sure that a CPU on the way to idle can not evaluate 933 * the timer wheel. 934 */ 935 wake_up_idle_cpu(cpu); 936 spin_unlock_irqrestore(&base->lock, flags); 937} 938EXPORT_SYMBOL_GPL(add_timer_on); 939 940/** 941 * del_timer - deactive a timer. 942 * @timer: the timer to be deactivated 943 * 944 * del_timer() deactivates a timer - this works on both active and inactive 945 * timers. 946 * 947 * The function returns whether it has deactivated a pending timer or not. 948 * (ie. del_timer() of an inactive timer returns 0, del_timer() of an 949 * active timer returns 1.) 950 */ 951int del_timer(struct timer_list *timer) 952{ 953 struct tvec_base *base; 954 unsigned long flags; 955 int ret = 0; 956 957 debug_assert_init(timer); 958 959 timer_stats_timer_clear_start_info(timer); 960 if (timer_pending(timer)) { 961 base = lock_timer_base(timer, &flags); 962 if (timer_pending(timer)) { 963 detach_timer(timer, 1); 964 if (timer->expires == base->next_timer && 965 !tbase_get_deferrable(timer->base)) 966 base->next_timer = base->timer_jiffies; 967 ret = 1; 968 } 969 spin_unlock_irqrestore(&base->lock, flags); 970 } 971 972 return ret; 973} 974EXPORT_SYMBOL(del_timer); 975 976/** 977 * try_to_del_timer_sync - Try to deactivate a timer 978 * @timer: timer do del 979 * 980 * This function tries to deactivate a timer. Upon successful (ret >= 0) 981 * exit the timer is not queued and the handler is not running on any CPU. 982 */ 983int try_to_del_timer_sync(struct timer_list *timer) 984{ 985 struct tvec_base *base; 986 unsigned long flags; 987 int ret = -1; 988 989 debug_assert_init(timer); 990 991 base = lock_timer_base(timer, &flags); 992 993 if (base->running_timer == timer) 994 goto out; 995 996 timer_stats_timer_clear_start_info(timer); 997 ret = 0; 998 if (timer_pending(timer)) { 999 detach_timer(timer, 1); 1000 if (timer->expires == base->next_timer && 1001 !tbase_get_deferrable(timer->base)) 1002 base->next_timer = base->timer_jiffies; 1003 ret = 1; 1004 } 1005out: 1006 spin_unlock_irqrestore(&base->lock, flags); 1007 1008 return ret; 1009} 1010EXPORT_SYMBOL(try_to_del_timer_sync); 1011 1012#ifdef CONFIG_SMP 1013/** 1014 * del_timer_sync - deactivate a timer and wait for the handler to finish. 1015 * @timer: the timer to be deactivated 1016 * 1017 * This function only differs from del_timer() on SMP: besides deactivating 1018 * the timer it also makes sure the handler has finished executing on other 1019 * CPUs. 1020 * 1021 * Synchronization rules: Callers must prevent restarting of the timer, 1022 * otherwise this function is meaningless. It must not be called from 1023 * interrupt contexts. The caller must not hold locks which would prevent 1024 * completion of the timer's handler. The timer's handler must not call 1025 * add_timer_on(). Upon exit the timer is not queued and the handler is 1026 * not running on any CPU. 1027 * 1028 * Note: You must not hold locks that are held in interrupt context 1029 * while calling this function. Even if the lock has nothing to do 1030 * with the timer in question. Here's why: 1031 * 1032 * CPU0 CPU1 1033 * ---- ---- 1034 * <SOFTIRQ> 1035 * call_timer_fn(); 1036 * base->running_timer = mytimer; 1037 * spin_lock_irq(somelock); 1038 * <IRQ> 1039 * spin_lock(somelock); 1040 * del_timer_sync(mytimer); 1041 * while (base->running_timer == mytimer); 1042 * 1043 * Now del_timer_sync() will never return and never release somelock. 1044 * The interrupt on the other CPU is waiting to grab somelock but 1045 * it has interrupted the softirq that CPU0 is waiting to finish. 1046 * 1047 * The function returns whether it has deactivated a pending timer or not. 1048 */ 1049int del_timer_sync(struct timer_list *timer) 1050{ 1051#ifdef CONFIG_LOCKDEP 1052 unsigned long flags; 1053 1054 /* 1055 * If lockdep gives a backtrace here, please reference 1056 * the synchronization rules above. 1057 */ 1058 local_irq_save(flags); 1059 lock_map_acquire(&timer->lockdep_map); 1060 lock_map_release(&timer->lockdep_map); 1061 local_irq_restore(flags); 1062#endif 1063 /* 1064 * don't use it in hardirq context, because it 1065 * could lead to deadlock. 1066 */ 1067 WARN_ON(in_irq()); 1068 for (;;) { 1069 int ret = try_to_del_timer_sync(timer); 1070 if (ret >= 0) 1071 return ret; 1072 cpu_relax(); 1073 } 1074} 1075EXPORT_SYMBOL(del_timer_sync); 1076#endif 1077 1078static int cascade(struct tvec_base *base, struct tvec *tv, int index) 1079{ 1080 /* cascade all the timers from tv up one level */ 1081 struct timer_list *timer, *tmp; 1082 struct list_head tv_list; 1083 1084 list_replace_init(tv->vec + index, &tv_list); 1085 1086 /* 1087 * We are removing _all_ timers from the list, so we 1088 * don't have to detach them individually. 1089 */ 1090 list_for_each_entry_safe(timer, tmp, &tv_list, entry) { 1091 BUG_ON(tbase_get_base(timer->base) != base); 1092 internal_add_timer(base, timer); 1093 } 1094 1095 return index; 1096} 1097 1098static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long), 1099 unsigned long data) 1100{ 1101 int preempt_count = preempt_count(); 1102 1103#ifdef CONFIG_LOCKDEP 1104 /* 1105 * It is permissible to free the timer from inside the 1106 * function that is called from it, this we need to take into 1107 * account for lockdep too. To avoid bogus "held lock freed" 1108 * warnings as well as problems when looking into 1109 * timer->lockdep_map, make a copy and use that here. 1110 */ 1111 struct lockdep_map lockdep_map; 1112 1113 lockdep_copy_map(&lockdep_map, &timer->lockdep_map); 1114#endif 1115 /* 1116 * Couple the lock chain with the lock chain at 1117 * del_timer_sync() by acquiring the lock_map around the fn() 1118 * call here and in del_timer_sync(). 1119 */ 1120 lock_map_acquire(&lockdep_map); 1121 1122 trace_timer_expire_entry(timer); 1123 fn(data); 1124 trace_timer_expire_exit(timer); 1125 1126 lock_map_release(&lockdep_map); 1127 1128 if (preempt_count != preempt_count()) { 1129 WARN_ONCE(1, "timer: %pF preempt leak: %08x -> %08x\n", 1130 fn, preempt_count, preempt_count()); 1131 /* 1132 * Restore the preempt count. That gives us a decent 1133 * chance to survive and extract information. If the 1134 * callback kept a lock held, bad luck, but not worse 1135 * than the BUG() we had. 1136 */ 1137 preempt_count() = preempt_count; 1138 } 1139} 1140 1141#define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK) 1142 1143/** 1144 * __run_timers - run all expired timers (if any) on this CPU. 1145 * @base: the timer vector to be processed. 1146 * 1147 * This function cascades all vectors and executes all expired timer 1148 * vectors. 1149 */ 1150static inline void __run_timers(struct tvec_base *base) 1151{ 1152 struct timer_list *timer; 1153 1154 spin_lock_irq(&base->lock); 1155 while (time_after_eq(jiffies, base->timer_jiffies)) { 1156 struct list_head work_list; 1157 struct list_head *head = &work_list; 1158 int index = base->timer_jiffies & TVR_MASK; 1159 1160 /* 1161 * Cascade timers: 1162 */ 1163 if (!index && 1164 (!cascade(base, &base->tv2, INDEX(0))) && 1165 (!cascade(base, &base->tv3, INDEX(1))) && 1166 !cascade(base, &base->tv4, INDEX(2))) 1167 cascade(base, &base->tv5, INDEX(3)); 1168 ++base->timer_jiffies; 1169 list_replace_init(base->tv1.vec + index, &work_list); 1170 while (!list_empty(head)) { 1171 void (*fn)(unsigned long); 1172 unsigned long data; 1173 1174 timer = list_first_entry(head, struct timer_list,entry); 1175 fn = timer->function; 1176 data = timer->data; 1177 1178 timer_stats_account_timer(timer); 1179 1180 base->running_timer = timer; 1181 detach_timer(timer, 1); 1182 1183 spin_unlock_irq(&base->lock); 1184 call_timer_fn(timer, fn, data); 1185 spin_lock_irq(&base->lock); 1186 } 1187 } 1188 base->running_timer = NULL; 1189 spin_unlock_irq(&base->lock); 1190} 1191 1192#ifdef CONFIG_NO_HZ 1193/* 1194 * Find out when the next timer event is due to happen. This 1195 * is used on S/390 to stop all activity when a CPU is idle. 1196 * This function needs to be called with interrupts disabled. 1197 */ 1198static unsigned long __next_timer_interrupt(struct tvec_base *base) 1199{ 1200 unsigned long timer_jiffies = base->timer_jiffies; 1201 unsigned long expires = timer_jiffies + NEXT_TIMER_MAX_DELTA; 1202 int index, slot, array, found = 0; 1203 struct timer_list *nte; 1204 struct tvec *varray[4]; 1205 1206 /* Look for timer events in tv1. */ 1207 index = slot = timer_jiffies & TVR_MASK; 1208 do { 1209 list_for_each_entry(nte, base->tv1.vec + slot, entry) { 1210 if (tbase_get_deferrable(nte->base)) 1211 continue; 1212 1213 found = 1; 1214 expires = nte->expires; 1215 /* Look at the cascade bucket(s)? */ 1216 if (!index || slot < index) 1217 goto cascade; 1218 return expires; 1219 } 1220 slot = (slot + 1) & TVR_MASK; 1221 } while (slot != index); 1222 1223cascade: 1224 /* Calculate the next cascade event */ 1225 if (index) 1226 timer_jiffies += TVR_SIZE - index; 1227 timer_jiffies >>= TVR_BITS; 1228 1229 /* Check tv2-tv5. */ 1230 varray[0] = &base->tv2; 1231 varray[1] = &base->tv3; 1232 varray[2] = &base->tv4; 1233 varray[3] = &base->tv5; 1234 1235 for (array = 0; array < 4; array++) { 1236 struct tvec *varp = varray[array]; 1237 1238 index = slot = timer_jiffies & TVN_MASK; 1239 do { 1240 list_for_each_entry(nte, varp->vec + slot, entry) { 1241 if (tbase_get_deferrable(nte->base)) 1242 continue; 1243 1244 found = 1; 1245 if (time_before(nte->expires, expires)) 1246 expires = nte->expires; 1247 } 1248 /* 1249 * Do we still search for the first timer or are 1250 * we looking up the cascade buckets ? 1251 */ 1252 if (found) { 1253 /* Look at the cascade bucket(s)? */ 1254 if (!index || slot < index) 1255 break; 1256 return expires; 1257 } 1258 slot = (slot + 1) & TVN_MASK; 1259 } while (slot != index); 1260 1261 if (index) 1262 timer_jiffies += TVN_SIZE - index; 1263 timer_jiffies >>= TVN_BITS; 1264 } 1265 return expires; 1266} 1267 1268/* 1269 * Check, if the next hrtimer event is before the next timer wheel 1270 * event: 1271 */ 1272static unsigned long cmp_next_hrtimer_event(unsigned long now, 1273 unsigned long expires) 1274{ 1275 ktime_t hr_delta = hrtimer_get_next_event(); 1276 struct timespec tsdelta; 1277 unsigned long delta; 1278 1279 if (hr_delta.tv64 == KTIME_MAX) 1280 return expires; 1281 1282 /* 1283 * Expired timer available, let it expire in the next tick 1284 */ 1285 if (hr_delta.tv64 <= 0) 1286 return now + 1; 1287 1288 tsdelta = ktime_to_timespec(hr_delta); 1289 delta = timespec_to_jiffies(&tsdelta); 1290 1291 /* 1292 * Limit the delta to the max value, which is checked in 1293 * tick_nohz_stop_sched_tick(): 1294 */ 1295 if (delta > NEXT_TIMER_MAX_DELTA) 1296 delta = NEXT_TIMER_MAX_DELTA; 1297 1298 /* 1299 * Take rounding errors in to account and make sure, that it 1300 * expires in the next tick. Otherwise we go into an endless 1301 * ping pong due to tick_nohz_stop_sched_tick() retriggering 1302 * the timer softirq 1303 */ 1304 if (delta < 1) 1305 delta = 1; 1306 now += delta; 1307 if (time_before(now, expires)) 1308 return now; 1309 return expires; 1310} 1311 1312/** 1313 * get_next_timer_interrupt - return the jiffy of the next pending timer 1314 * @now: current time (in jiffies) 1315 */ 1316unsigned long get_next_timer_interrupt(unsigned long now) 1317{ 1318 struct tvec_base *base = __this_cpu_read(tvec_bases); 1319 unsigned long expires; 1320 1321 /* 1322 * Pretend that there is no timer pending if the cpu is offline. 1323 * Possible pending timers will be migrated later to an active cpu. 1324 */ 1325 if (cpu_is_offline(smp_processor_id())) 1326 return now + NEXT_TIMER_MAX_DELTA; 1327 spin_lock(&base->lock); 1328 if (time_before_eq(base->next_timer, base->timer_jiffies)) 1329 base->next_timer = __next_timer_interrupt(base); 1330 expires = base->next_timer; 1331 spin_unlock(&base->lock); 1332 1333 if (time_before_eq(expires, now)) 1334 return now; 1335 1336 return cmp_next_hrtimer_event(now, expires); 1337} 1338#endif 1339 1340/* 1341 * Called from the timer interrupt handler to charge one tick to the current 1342 * process. user_tick is 1 if the tick is user time, 0 for system. 1343 */ 1344void update_process_times(int user_tick) 1345{ 1346 struct task_struct *p = current; 1347 int cpu = smp_processor_id(); 1348 1349 /* Note: this timer irq context must be accounted for as well. */ 1350 account_process_tick(p, user_tick); 1351 run_local_timers(); 1352 rcu_check_callbacks(cpu, user_tick); 1353 printk_tick(); 1354#ifdef CONFIG_IRQ_WORK 1355 if (in_irq()) 1356 irq_work_run(); 1357#endif 1358 scheduler_tick(); 1359 run_posix_cpu_timers(p); 1360} 1361 1362/* 1363 * This function runs timers and the timer-tq in bottom half context. 1364 */ 1365static void run_timer_softirq(struct softirq_action *h) 1366{ 1367 struct tvec_base *base = __this_cpu_read(tvec_bases); 1368 1369 hrtimer_run_pending(); 1370 1371 if (time_after_eq(jiffies, base->timer_jiffies)) 1372 __run_timers(base); 1373} 1374 1375/* 1376 * Called by the local, per-CPU timer interrupt on SMP. 1377 */ 1378void run_local_timers(void) 1379{ 1380 hrtimer_run_queues(); 1381 raise_softirq(TIMER_SOFTIRQ); 1382} 1383 1384#ifdef __ARCH_WANT_SYS_ALARM 1385 1386/* 1387 * For backwards compatibility? This can be done in libc so Alpha 1388 * and all newer ports shouldn't need it. 1389 */ 1390SYSCALL_DEFINE1(alarm, unsigned int, seconds) 1391{ 1392 return alarm_setitimer(seconds); 1393} 1394 1395#endif 1396 1397#ifndef __alpha__ 1398 1399/* 1400 * The Alpha uses getxpid, getxuid, and getxgid instead. Maybe this 1401 * should be moved into arch/i386 instead? 1402 */ 1403 1404/** 1405 * sys_getpid - return the thread group id of the current process 1406 * 1407 * Note, despite the name, this returns the tgid not the pid. The tgid and 1408 * the pid are identical unless CLONE_THREAD was specified on clone() in 1409 * which case the tgid is the same in all threads of the same group. 1410 * 1411 * This is SMP safe as current->tgid does not change. 1412 */ 1413SYSCALL_DEFINE0(getpid) 1414{ 1415 return task_tgid_vnr(current); 1416} 1417 1418/* 1419 * Accessing ->real_parent is not SMP-safe, it could 1420 * change from under us. However, we can use a stale 1421 * value of ->real_parent under rcu_read_lock(), see 1422 * release_task()->call_rcu(delayed_put_task_struct). 1423 */ 1424SYSCALL_DEFINE0(getppid) 1425{ 1426 int pid; 1427 1428 rcu_read_lock(); 1429 pid = task_tgid_vnr(rcu_dereference(current->real_parent)); 1430 rcu_read_unlock(); 1431 1432 return pid; 1433} 1434 1435SYSCALL_DEFINE0(getuid) 1436{ 1437 /* Only we change this so SMP safe */ 1438 return from_kuid_munged(current_user_ns(), current_uid()); 1439} 1440 1441SYSCALL_DEFINE0(geteuid) 1442{ 1443 /* Only we change this so SMP safe */ 1444 return from_kuid_munged(current_user_ns(), current_euid()); 1445} 1446 1447SYSCALL_DEFINE0(getgid) 1448{ 1449 /* Only we change this so SMP safe */ 1450 return from_kgid_munged(current_user_ns(), current_gid()); 1451} 1452 1453SYSCALL_DEFINE0(getegid) 1454{ 1455 /* Only we change this so SMP safe */ 1456 return from_kgid_munged(current_user_ns(), current_egid()); 1457} 1458 1459#endif 1460 1461static void process_timeout(unsigned long __data) 1462{ 1463 wake_up_process((struct task_struct *)__data); 1464} 1465 1466/** 1467 * schedule_timeout - sleep until timeout 1468 * @timeout: timeout value in jiffies 1469 * 1470 * Make the current task sleep until @timeout jiffies have 1471 * elapsed. The routine will return immediately unless 1472 * the current task state has been set (see set_current_state()). 1473 * 1474 * You can set the task state as follows - 1475 * 1476 * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to 1477 * pass before the routine returns. The routine will return 0 1478 * 1479 * %TASK_INTERRUPTIBLE - the routine may return early if a signal is 1480 * delivered to the current task. In this case the remaining time 1481 * in jiffies will be returned, or 0 if the timer expired in time 1482 * 1483 * The current task state is guaranteed to be TASK_RUNNING when this 1484 * routine returns. 1485 * 1486 * Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule 1487 * the CPU away without a bound on the timeout. In this case the return 1488 * value will be %MAX_SCHEDULE_TIMEOUT. 1489 * 1490 * In all cases the return value is guaranteed to be non-negative. 1491 */ 1492signed long __sched schedule_timeout(signed long timeout) 1493{ 1494 struct timer_list timer; 1495 unsigned long expire; 1496 1497 switch (timeout) 1498 { 1499 case MAX_SCHEDULE_TIMEOUT: 1500 /* 1501 * These two special cases are useful to be comfortable 1502 * in the caller. Nothing more. We could take 1503 * MAX_SCHEDULE_TIMEOUT from one of the negative value 1504 * but I' d like to return a valid offset (>=0) to allow 1505 * the caller to do everything it want with the retval. 1506 */ 1507 schedule(); 1508 goto out; 1509 default: 1510 /* 1511 * Another bit of PARANOID. Note that the retval will be 1512 * 0 since no piece of kernel is supposed to do a check 1513 * for a negative retval of schedule_timeout() (since it 1514 * should never happens anyway). You just have the printk() 1515 * that will tell you if something is gone wrong and where. 1516 */ 1517 if (timeout < 0) { 1518 printk(KERN_ERR "schedule_timeout: wrong timeout " 1519 "value %lx\n", timeout); 1520 dump_stack(); 1521 current->state = TASK_RUNNING; 1522 goto out; 1523 } 1524 } 1525 1526 expire = timeout + jiffies; 1527 1528 setup_timer_on_stack(&timer, process_timeout, (unsigned long)current); 1529 __mod_timer(&timer, expire, false, TIMER_NOT_PINNED); 1530 schedule(); 1531 del_singleshot_timer_sync(&timer); 1532 1533 /* Remove the timer from the object tracker */ 1534 destroy_timer_on_stack(&timer); 1535 1536 timeout = expire - jiffies; 1537 1538 out: 1539 return timeout < 0 ? 0 : timeout; 1540} 1541EXPORT_SYMBOL(schedule_timeout); 1542 1543/* 1544 * We can use __set_current_state() here because schedule_timeout() calls 1545 * schedule() unconditionally. 1546 */ 1547signed long __sched schedule_timeout_interruptible(signed long timeout) 1548{ 1549 __set_current_state(TASK_INTERRUPTIBLE); 1550 return schedule_timeout(timeout); 1551} 1552EXPORT_SYMBOL(schedule_timeout_interruptible); 1553 1554signed long __sched schedule_timeout_killable(signed long timeout) 1555{ 1556 __set_current_state(TASK_KILLABLE); 1557 return schedule_timeout(timeout); 1558} 1559EXPORT_SYMBOL(schedule_timeout_killable); 1560 1561signed long __sched schedule_timeout_uninterruptible(signed long timeout) 1562{ 1563 __set_current_state(TASK_UNINTERRUPTIBLE); 1564 return schedule_timeout(timeout); 1565} 1566EXPORT_SYMBOL(schedule_timeout_uninterruptible); 1567 1568/* Thread ID - the internal kernel "pid" */ 1569SYSCALL_DEFINE0(gettid) 1570{ 1571 return task_pid_vnr(current); 1572} 1573 1574/** 1575 * do_sysinfo - fill in sysinfo struct 1576 * @info: pointer to buffer to fill 1577 */ 1578int do_sysinfo(struct sysinfo *info) 1579{ 1580 unsigned long mem_total, sav_total; 1581 unsigned int mem_unit, bitcount; 1582 struct timespec tp; 1583 1584 memset(info, 0, sizeof(struct sysinfo)); 1585 1586 ktime_get_ts(&tp); 1587 monotonic_to_bootbased(&tp); 1588 info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0); 1589 1590 get_avenrun(info->loads, 0, SI_LOAD_SHIFT - FSHIFT); 1591 1592 info->procs = nr_threads; 1593 1594 si_meminfo(info); 1595 si_swapinfo(info); 1596 1597 /* 1598 * If the sum of all the available memory (i.e. ram + swap) 1599 * is less than can be stored in a 32 bit unsigned long then 1600 * we can be binary compatible with 2.2.x kernels. If not, 1601 * well, in that case 2.2.x was broken anyways... 1602 * 1603 * -Erik Andersen <andersee@debian.org> 1604 */ 1605 1606 mem_total = info->totalram + info->totalswap; 1607 if (mem_total < info->totalram || mem_total < info->totalswap) 1608 goto out; 1609 bitcount = 0; 1610 mem_unit = info->mem_unit; 1611 while (mem_unit > 1) { 1612 bitcount++; 1613 mem_unit >>= 1; 1614 sav_total = mem_total; 1615 mem_total <<= 1; 1616 if (mem_total < sav_total) 1617 goto out; 1618 } 1619 1620 /* 1621 * If mem_total did not overflow, multiply all memory values by 1622 * info->mem_unit and set it to 1. This leaves things compatible 1623 * with 2.2.x, and also retains compatibility with earlier 2.4.x 1624 * kernels... 1625 */ 1626 1627 info->mem_unit = 1; 1628 info->totalram <<= bitcount; 1629 info->freeram <<= bitcount; 1630 info->sharedram <<= bitcount; 1631 info->bufferram <<= bitcount; 1632 info->totalswap <<= bitcount; 1633 info->freeswap <<= bitcount; 1634 info->totalhigh <<= bitcount; 1635 info->freehigh <<= bitcount; 1636 1637out: 1638 return 0; 1639} 1640 1641SYSCALL_DEFINE1(sysinfo, struct sysinfo __user *, info) 1642{ 1643 struct sysinfo val; 1644 1645 do_sysinfo(&val); 1646 1647 if (copy_to_user(info, &val, sizeof(struct sysinfo))) 1648 return -EFAULT; 1649 1650 return 0; 1651} 1652 1653static int __cpuinit init_timers_cpu(int cpu) 1654{ 1655 int j; 1656 struct tvec_base *base; 1657 static char __cpuinitdata tvec_base_done[NR_CPUS]; 1658 1659 if (!tvec_base_done[cpu]) { 1660 static char boot_done; 1661 1662 if (boot_done) { 1663 /* 1664 * The APs use this path later in boot 1665 */ 1666 base = kmalloc_node(sizeof(*base), 1667 GFP_KERNEL | __GFP_ZERO, 1668 cpu_to_node(cpu)); 1669 if (!base) 1670 return -ENOMEM; 1671 1672 /* Make sure that tvec_base is 2 byte aligned */ 1673 if (tbase_get_deferrable(base)) { 1674 WARN_ON(1); 1675 kfree(base); 1676 return -ENOMEM; 1677 } 1678 per_cpu(tvec_bases, cpu) = base; 1679 } else { 1680 /* 1681 * This is for the boot CPU - we use compile-time 1682 * static initialisation because per-cpu memory isn't 1683 * ready yet and because the memory allocators are not 1684 * initialised either. 1685 */ 1686 boot_done = 1; 1687 base = &boot_tvec_bases; 1688 } 1689 tvec_base_done[cpu] = 1; 1690 } else { 1691 base = per_cpu(tvec_bases, cpu); 1692 } 1693 1694 spin_lock_init(&base->lock); 1695 1696 for (j = 0; j < TVN_SIZE; j++) { 1697 INIT_LIST_HEAD(base->tv5.vec + j); 1698 INIT_LIST_HEAD(base->tv4.vec + j); 1699 INIT_LIST_HEAD(base->tv3.vec + j); 1700 INIT_LIST_HEAD(base->tv2.vec + j); 1701 } 1702 for (j = 0; j < TVR_SIZE; j++) 1703 INIT_LIST_HEAD(base->tv1.vec + j); 1704 1705 base->timer_jiffies = jiffies; 1706 base->next_timer = base->timer_jiffies; 1707 return 0; 1708} 1709 1710#ifdef CONFIG_HOTPLUG_CPU 1711static void migrate_timer_list(struct tvec_base *new_base, struct list_head *head) 1712{ 1713 struct timer_list *timer; 1714 1715 while (!list_empty(head)) { 1716 timer = list_first_entry(head, struct timer_list, entry); 1717 detach_timer(timer, 0); 1718 timer_set_base(timer, new_base); 1719 if (time_before(timer->expires, new_base->next_timer) && 1720 !tbase_get_deferrable(timer->base)) 1721 new_base->next_timer = timer->expires; 1722 internal_add_timer(new_base, timer); 1723 } 1724} 1725 1726static void __cpuinit migrate_timers(int cpu) 1727{ 1728 struct tvec_base *old_base; 1729 struct tvec_base *new_base; 1730 int i; 1731 1732 BUG_ON(cpu_online(cpu)); 1733 old_base = per_cpu(tvec_bases, cpu); 1734 new_base = get_cpu_var(tvec_bases); 1735 /* 1736 * The caller is globally serialized and nobody else 1737 * takes two locks at once, deadlock is not possible. 1738 */ 1739 spin_lock_irq(&new_base->lock); 1740 spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); 1741 1742 BUG_ON(old_base->running_timer); 1743 1744 for (i = 0; i < TVR_SIZE; i++) 1745 migrate_timer_list(new_base, old_base->tv1.vec + i); 1746 for (i = 0; i < TVN_SIZE; i++) { 1747 migrate_timer_list(new_base, old_base->tv2.vec + i); 1748 migrate_timer_list(new_base, old_base->tv3.vec + i); 1749 migrate_timer_list(new_base, old_base->tv4.vec + i); 1750 migrate_timer_list(new_base, old_base->tv5.vec + i); 1751 } 1752 1753 spin_unlock(&old_base->lock); 1754 spin_unlock_irq(&new_base->lock); 1755 put_cpu_var(tvec_bases); 1756} 1757#endif /* CONFIG_HOTPLUG_CPU */ 1758 1759static int __cpuinit timer_cpu_notify(struct notifier_block *self, 1760 unsigned long action, void *hcpu) 1761{ 1762 long cpu = (long)hcpu; 1763 int err; 1764 1765 switch(action) { 1766 case CPU_UP_PREPARE: 1767 case CPU_UP_PREPARE_FROZEN: 1768 err = init_timers_cpu(cpu); 1769 if (err < 0) 1770 return notifier_from_errno(err); 1771 break; 1772#ifdef CONFIG_HOTPLUG_CPU 1773 case CPU_DEAD: 1774 case CPU_DEAD_FROZEN: 1775 migrate_timers(cpu); 1776 break; 1777#endif 1778 default: 1779 break; 1780 } 1781 return NOTIFY_OK; 1782} 1783 1784static struct notifier_block __cpuinitdata timers_nb = { 1785 .notifier_call = timer_cpu_notify, 1786}; 1787 1788 1789void __init init_timers(void) 1790{ 1791 int err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE, 1792 (void *)(long)smp_processor_id()); 1793 1794 init_timer_stats(); 1795 1796 BUG_ON(err != NOTIFY_OK); 1797 register_cpu_notifier(&timers_nb); 1798 open_softirq(TIMER_SOFTIRQ, run_timer_softirq); 1799} 1800 1801/** 1802 * msleep - sleep safely even with waitqueue interruptions 1803 * @msecs: Time in milliseconds to sleep for 1804 */ 1805void msleep(unsigned int msecs) 1806{ 1807 unsigned long timeout = msecs_to_jiffies(msecs) + 1; 1808 1809 while (timeout) 1810 timeout = schedule_timeout_uninterruptible(timeout); 1811} 1812 1813EXPORT_SYMBOL(msleep); 1814 1815/** 1816 * msleep_interruptible - sleep waiting for signals 1817 * @msecs: Time in milliseconds to sleep for 1818 */ 1819unsigned long msleep_interruptible(unsigned int msecs) 1820{ 1821 unsigned long timeout = msecs_to_jiffies(msecs) + 1; 1822 1823 while (timeout && !signal_pending(current)) 1824 timeout = schedule_timeout_interruptible(timeout); 1825 return jiffies_to_msecs(timeout); 1826} 1827 1828EXPORT_SYMBOL(msleep_interruptible); 1829 1830static int __sched do_usleep_range(unsigned long min, unsigned long max) 1831{ 1832 ktime_t kmin; 1833 unsigned long delta; 1834 1835 kmin = ktime_set(0, min * NSEC_PER_USEC); 1836 delta = (max - min) * NSEC_PER_USEC; 1837 return schedule_hrtimeout_range(&kmin, delta, HRTIMER_MODE_REL); 1838} 1839 1840/** 1841 * usleep_range - Drop in replacement for udelay where wakeup is flexible 1842 * @min: Minimum time in usecs to sleep 1843 * @max: Maximum time in usecs to sleep 1844 */ 1845void usleep_range(unsigned long min, unsigned long max) 1846{ 1847 __set_current_state(TASK_UNINTERRUPTIBLE); 1848 do_usleep_range(min, max); 1849} 1850EXPORT_SYMBOL(usleep_range);