1/* 2 * linux/kernel/timer.c 3 * 4 * Kernel internal timers, basic process system calls 5 * 6 * Copyright (C) 1991, 1992 Linus Torvalds 7 * 8 * 1997-01-28 Modified by Finn Arne Gangstad to make timers scale better. 9 * 10 * 1997-09-10 Updated NTP code according to technical memorandum Jan '96 11 * "A Kernel Model for Precision Timekeeping" by Dave Mills 12 * 1998-12-24 Fixed a xtime SMP race (we need the xtime_lock rw spinlock to 13 * serialize accesses to xtime/lost_ticks). 14 * Copyright (C) 1998 Andrea Arcangeli 15 * 1999-03-10 Improved NTP compatibility by Ulrich Windl 16 * 2002-05-31 Move sys_sysinfo here and make its locking sane, Robert Love 17 * 2000-10-05 Implemented scalable SMP per-CPU timer handling. 18 * Copyright (C) 2000, 2001, 2002 Ingo Molnar 19 * Designed by David S. Miller, Alexey Kuznetsov and Ingo Molnar 20 */ 21 22#include <linux/kernel_stat.h> 23#include <linux/module.h> 24#include <linux/interrupt.h> 25#include <linux/percpu.h> 26#include <linux/init.h> 27#include <linux/mm.h> 28#include <linux/swap.h> 29#include <linux/notifier.h> 30#include <linux/thread_info.h> 31#include <linux/time.h> 32#include <linux/jiffies.h> 33#include <linux/posix-timers.h> 34#include <linux/cpu.h> 35#include <linux/syscalls.h> 36#include <linux/delay.h> 37#include <linux/tick.h> 38#include <linux/kallsyms.h> 39 40#include <asm/uaccess.h> 41#include <asm/unistd.h> 42#include <asm/div64.h> 43#include <asm/timex.h> 44#include <asm/io.h> 45 46u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES; 47 48EXPORT_SYMBOL(jiffies_64); 49 50/* 51 * per-CPU timer vector definitions: 52 */ 53#define TVN_BITS (CONFIG_BASE_SMALL ? 4 : 6) 54#define TVR_BITS (CONFIG_BASE_SMALL ? 6 : 8) 55#define TVN_SIZE (1 << TVN_BITS) 56#define TVR_SIZE (1 << TVR_BITS) 57#define TVN_MASK (TVN_SIZE - 1) 58#define TVR_MASK (TVR_SIZE - 1) 59 60typedef struct tvec_s { 61 struct list_head vec[TVN_SIZE]; 62} tvec_t; 63 64typedef struct tvec_root_s { 65 struct list_head vec[TVR_SIZE]; 66} tvec_root_t; 67 68struct tvec_t_base_s { 69 spinlock_t lock; 70 struct timer_list *running_timer; 71 unsigned long timer_jiffies; 72 tvec_root_t tv1; 73 tvec_t tv2; 74 tvec_t tv3; 75 tvec_t tv4; 76 tvec_t tv5; 77} ____cacheline_aligned; 78 79typedef struct tvec_t_base_s tvec_base_t; 80 81tvec_base_t boot_tvec_bases; 82EXPORT_SYMBOL(boot_tvec_bases); 83static DEFINE_PER_CPU(tvec_base_t *, tvec_bases) = &boot_tvec_bases; 84 85/* 86 * Note that all tvec_bases is 2 byte aligned and lower bit of 87 * base in timer_list is guaranteed to be zero. Use the LSB for 88 * the new flag to indicate whether the timer is deferrable 89 */ 90#define TBASE_DEFERRABLE_FLAG (0x1) 91 92/* Functions below help us manage 'deferrable' flag */ 93static inline unsigned int tbase_get_deferrable(tvec_base_t *base) 94{ 95 return ((unsigned int)(unsigned long)base & TBASE_DEFERRABLE_FLAG); 96} 97 98static inline tvec_base_t *tbase_get_base(tvec_base_t *base) 99{ 100 return ((tvec_base_t *)((unsigned long)base & ~TBASE_DEFERRABLE_FLAG)); 101} 102 103static inline void timer_set_deferrable(struct timer_list *timer) 104{ 105 timer->base = ((tvec_base_t *)((unsigned long)(timer->base) | 106 TBASE_DEFERRABLE_FLAG)); 107} 108 109static inline void 110timer_set_base(struct timer_list *timer, tvec_base_t *new_base) 111{ 112 timer->base = (tvec_base_t *)((unsigned long)(new_base) | 113 tbase_get_deferrable(timer->base)); 114} 115 116/** 117 * __round_jiffies - function to round jiffies to a full second 118 * @j: the time in (absolute) jiffies that should be rounded 119 * @cpu: the processor number on which the timeout will happen 120 * 121 * __round_jiffies() rounds an absolute time in the future (in jiffies) 122 * up or down to (approximately) full seconds. This is useful for timers 123 * for which the exact time they fire does not matter too much, as long as 124 * they fire approximately every X seconds. 125 * 126 * By rounding these timers to whole seconds, all such timers will fire 127 * at the same time, rather than at various times spread out. The goal 128 * of this is to have the CPU wake up less, which saves power. 129 * 130 * The exact rounding is skewed for each processor to avoid all 131 * processors firing at the exact same time, which could lead 132 * to lock contention or spurious cache line bouncing. 133 * 134 * The return value is the rounded version of the @j parameter. 135 */ 136unsigned long __round_jiffies(unsigned long j, int cpu) 137{ 138 int rem; 139 unsigned long original = j; 140 141 /* 142 * We don't want all cpus firing their timers at once hitting the 143 * same lock or cachelines, so we skew each extra cpu with an extra 144 * 3 jiffies. This 3 jiffies came originally from the mm/ code which 145 * already did this. 146 * The skew is done by adding 3*cpunr, then round, then subtract this 147 * extra offset again. 148 */ 149 j += cpu * 3; 150 151 rem = j % HZ; 152 153 /* 154 * If the target jiffie is just after a whole second (which can happen 155 * due to delays of the timer irq, long irq off times etc etc) then 156 * we should round down to the whole second, not up. Use 1/4th second 157 * as cutoff for this rounding as an extreme upper bound for this. 158 */ 159 if (rem < HZ/4) /* round down */ 160 j = j - rem; 161 else /* round up */ 162 j = j - rem + HZ; 163 164 /* now that we have rounded, subtract the extra skew again */ 165 j -= cpu * 3; 166 167 if (j <= jiffies) /* rounding ate our timeout entirely; */ 168 return original; 169 return j; 170} 171EXPORT_SYMBOL_GPL(__round_jiffies); 172 173/** 174 * __round_jiffies_relative - function to round jiffies to a full second 175 * @j: the time in (relative) jiffies that should be rounded 176 * @cpu: the processor number on which the timeout will happen 177 * 178 * __round_jiffies_relative() rounds a time delta in the future (in jiffies) 179 * up or down to (approximately) full seconds. This is useful for timers 180 * for which the exact time they fire does not matter too much, as long as 181 * they fire approximately every X seconds. 182 * 183 * By rounding these timers to whole seconds, all such timers will fire 184 * at the same time, rather than at various times spread out. The goal 185 * of this is to have the CPU wake up less, which saves power. 186 * 187 * The exact rounding is skewed for each processor to avoid all 188 * processors firing at the exact same time, which could lead 189 * to lock contention or spurious cache line bouncing. 190 * 191 * The return value is the rounded version of the @j parameter. 192 */ 193unsigned long __round_jiffies_relative(unsigned long j, int cpu) 194{ 195 /* 196 * In theory the following code can skip a jiffy in case jiffies 197 * increments right between the addition and the later subtraction. 198 * However since the entire point of this function is to use approximate 199 * timeouts, it's entirely ok to not handle that. 200 */ 201 return __round_jiffies(j + jiffies, cpu) - jiffies; 202} 203EXPORT_SYMBOL_GPL(__round_jiffies_relative); 204 205/** 206 * round_jiffies - function to round jiffies to a full second 207 * @j: the time in (absolute) jiffies that should be rounded 208 * 209 * round_jiffies() rounds an absolute time in the future (in jiffies) 210 * up or down to (approximately) full seconds. This is useful for timers 211 * for which the exact time they fire does not matter too much, as long as 212 * they fire approximately every X seconds. 213 * 214 * By rounding these timers to whole seconds, all such timers will fire 215 * at the same time, rather than at various times spread out. The goal 216 * of this is to have the CPU wake up less, which saves power. 217 * 218 * The return value is the rounded version of the @j parameter. 219 */ 220unsigned long round_jiffies(unsigned long j) 221{ 222 return __round_jiffies(j, raw_smp_processor_id()); 223} 224EXPORT_SYMBOL_GPL(round_jiffies); 225 226/** 227 * round_jiffies_relative - function to round jiffies to a full second 228 * @j: the time in (relative) jiffies that should be rounded 229 * 230 * round_jiffies_relative() rounds a time delta in the future (in jiffies) 231 * up or down to (approximately) full seconds. This is useful for timers 232 * for which the exact time they fire does not matter too much, as long as 233 * they fire approximately every X seconds. 234 * 235 * By rounding these timers to whole seconds, all such timers will fire 236 * at the same time, rather than at various times spread out. The goal 237 * of this is to have the CPU wake up less, which saves power. 238 * 239 * The return value is the rounded version of the @j parameter. 240 */ 241unsigned long round_jiffies_relative(unsigned long j) 242{ 243 return __round_jiffies_relative(j, raw_smp_processor_id()); 244} 245EXPORT_SYMBOL_GPL(round_jiffies_relative); 246 247 248static inline void set_running_timer(tvec_base_t *base, 249 struct timer_list *timer) 250{ 251#ifdef CONFIG_SMP 252 base->running_timer = timer; 253#endif 254} 255 256static void internal_add_timer(tvec_base_t *base, struct timer_list *timer) 257{ 258 unsigned long expires = timer->expires; 259 unsigned long idx = expires - base->timer_jiffies; 260 struct list_head *vec; 261 262 if (idx < TVR_SIZE) { 263 int i = expires & TVR_MASK; 264 vec = base->tv1.vec + i; 265 } else if (idx < 1 << (TVR_BITS + TVN_BITS)) { 266 int i = (expires >> TVR_BITS) & TVN_MASK; 267 vec = base->tv2.vec + i; 268 } else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) { 269 int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK; 270 vec = base->tv3.vec + i; 271 } else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) { 272 int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK; 273 vec = base->tv4.vec + i; 274 } else if ((signed long) idx < 0) { 275 /* 276 * Can happen if you add a timer with expires == jiffies, 277 * or you set a timer to go off in the past 278 */ 279 vec = base->tv1.vec + (base->timer_jiffies & TVR_MASK); 280 } else { 281 int i; 282 /* If the timeout is larger than 0xffffffff on 64-bit 283 * architectures then we use the maximum timeout: 284 */ 285 if (idx > 0xffffffffUL) { 286 idx = 0xffffffffUL; 287 expires = idx + base->timer_jiffies; 288 } 289 i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK; 290 vec = base->tv5.vec + i; 291 } 292 /* 293 * Timers are FIFO: 294 */ 295 list_add_tail(&timer->entry, vec); 296} 297 298#ifdef CONFIG_TIMER_STATS 299void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr) 300{ 301 if (timer->start_site) 302 return; 303 304 timer->start_site = addr; 305 memcpy(timer->start_comm, current->comm, TASK_COMM_LEN); 306 timer->start_pid = current->pid; 307} 308#endif 309 310/** 311 * init_timer - initialize a timer. 312 * @timer: the timer to be initialized 313 * 314 * init_timer() must be done to a timer prior calling *any* of the 315 * other timer functions. 316 */ 317void fastcall init_timer(struct timer_list *timer) 318{ 319 timer->entry.next = NULL; 320 timer->base = __raw_get_cpu_var(tvec_bases); 321#ifdef CONFIG_TIMER_STATS 322 timer->start_site = NULL; 323 timer->start_pid = -1; 324 memset(timer->start_comm, 0, TASK_COMM_LEN); 325#endif 326} 327EXPORT_SYMBOL(init_timer); 328 329void fastcall init_timer_deferrable(struct timer_list *timer) 330{ 331 init_timer(timer); 332 timer_set_deferrable(timer); 333} 334EXPORT_SYMBOL(init_timer_deferrable); 335 336static inline void detach_timer(struct timer_list *timer, 337 int clear_pending) 338{ 339 struct list_head *entry = &timer->entry; 340 341 __list_del(entry->prev, entry->next); 342 if (clear_pending) 343 entry->next = NULL; 344 entry->prev = LIST_POISON2; 345} 346 347/* 348 * We are using hashed locking: holding per_cpu(tvec_bases).lock 349 * means that all timers which are tied to this base via timer->base are 350 * locked, and the base itself is locked too. 351 * 352 * So __run_timers/migrate_timers can safely modify all timers which could 353 * be found on ->tvX lists. 354 * 355 * When the timer's base is locked, and the timer removed from list, it is 356 * possible to set timer->base = NULL and drop the lock: the timer remains 357 * locked. 358 */ 359static tvec_base_t *lock_timer_base(struct timer_list *timer, 360 unsigned long *flags) 361 __acquires(timer->base->lock) 362{ 363 tvec_base_t *base; 364 365 for (;;) { 366 tvec_base_t *prelock_base = timer->base; 367 base = tbase_get_base(prelock_base); 368 if (likely(base != NULL)) { 369 spin_lock_irqsave(&base->lock, *flags); 370 if (likely(prelock_base == timer->base)) 371 return base; 372 /* The timer has migrated to another CPU */ 373 spin_unlock_irqrestore(&base->lock, *flags); 374 } 375 cpu_relax(); 376 } 377} 378 379int __mod_timer(struct timer_list *timer, unsigned long expires) 380{ 381 tvec_base_t *base, *new_base; 382 unsigned long flags; 383 int ret = 0; 384 385 timer_stats_timer_set_start_info(timer); 386 BUG_ON(!timer->function); 387 388 base = lock_timer_base(timer, &flags); 389 390 if (timer_pending(timer)) { 391 detach_timer(timer, 0); 392 ret = 1; 393 } 394 395 new_base = __get_cpu_var(tvec_bases); 396 397 if (base != new_base) { 398 /* 399 * We are trying to schedule the timer on the local CPU. 400 * However we can't change timer's base while it is running, 401 * otherwise del_timer_sync() can't detect that the timer's 402 * handler yet has not finished. This also guarantees that 403 * the timer is serialized wrt itself. 404 */ 405 if (likely(base->running_timer != timer)) { 406 /* See the comment in lock_timer_base() */ 407 timer_set_base(timer, NULL); 408 spin_unlock(&base->lock); 409 base = new_base; 410 spin_lock(&base->lock); 411 timer_set_base(timer, base); 412 } 413 } 414 415 timer->expires = expires; 416 internal_add_timer(base, timer); 417 spin_unlock_irqrestore(&base->lock, flags); 418 419 return ret; 420} 421 422EXPORT_SYMBOL(__mod_timer); 423 424/** 425 * add_timer_on - start a timer on a particular CPU 426 * @timer: the timer to be added 427 * @cpu: the CPU to start it on 428 * 429 * This is not very scalable on SMP. Double adds are not possible. 430 */ 431void add_timer_on(struct timer_list *timer, int cpu) 432{ 433 tvec_base_t *base = per_cpu(tvec_bases, cpu); 434 unsigned long flags; 435 436 timer_stats_timer_set_start_info(timer); 437 BUG_ON(timer_pending(timer) || !timer->function); 438 spin_lock_irqsave(&base->lock, flags); 439 timer_set_base(timer, base); 440 internal_add_timer(base, timer); 441 spin_unlock_irqrestore(&base->lock, flags); 442} 443 444 445/** 446 * mod_timer - modify a timer's timeout 447 * @timer: the timer to be modified 448 * @expires: new timeout in jiffies 449 * 450 * mod_timer() is a more efficient way to update the expire field of an 451 * active timer (if the timer is inactive it will be activated) 452 * 453 * mod_timer(timer, expires) is equivalent to: 454 * 455 * del_timer(timer); timer->expires = expires; add_timer(timer); 456 * 457 * Note that if there are multiple unserialized concurrent users of the 458 * same timer, then mod_timer() is the only safe way to modify the timeout, 459 * since add_timer() cannot modify an already running timer. 460 * 461 * The function returns whether it has modified a pending timer or not. 462 * (ie. mod_timer() of an inactive timer returns 0, mod_timer() of an 463 * active timer returns 1.) 464 */ 465int mod_timer(struct timer_list *timer, unsigned long expires) 466{ 467 BUG_ON(!timer->function); 468 469 timer_stats_timer_set_start_info(timer); 470 /* 471 * This is a common optimization triggered by the 472 * networking code - if the timer is re-modified 473 * to be the same thing then just return: 474 */ 475 if (timer->expires == expires && timer_pending(timer)) 476 return 1; 477 478 return __mod_timer(timer, expires); 479} 480 481EXPORT_SYMBOL(mod_timer); 482 483/** 484 * del_timer - deactive a timer. 485 * @timer: the timer to be deactivated 486 * 487 * del_timer() deactivates a timer - this works on both active and inactive 488 * timers. 489 * 490 * The function returns whether it has deactivated a pending timer or not. 491 * (ie. del_timer() of an inactive timer returns 0, del_timer() of an 492 * active timer returns 1.) 493 */ 494int del_timer(struct timer_list *timer) 495{ 496 tvec_base_t *base; 497 unsigned long flags; 498 int ret = 0; 499 500 timer_stats_timer_clear_start_info(timer); 501 if (timer_pending(timer)) { 502 base = lock_timer_base(timer, &flags); 503 if (timer_pending(timer)) { 504 detach_timer(timer, 1); 505 ret = 1; 506 } 507 spin_unlock_irqrestore(&base->lock, flags); 508 } 509 510 return ret; 511} 512 513EXPORT_SYMBOL(del_timer); 514 515#ifdef CONFIG_SMP 516/** 517 * try_to_del_timer_sync - Try to deactivate a timer 518 * @timer: timer do del 519 * 520 * This function tries to deactivate a timer. Upon successful (ret >= 0) 521 * exit the timer is not queued and the handler is not running on any CPU. 522 * 523 * It must not be called from interrupt contexts. 524 */ 525int try_to_del_timer_sync(struct timer_list *timer) 526{ 527 tvec_base_t *base; 528 unsigned long flags; 529 int ret = -1; 530 531 base = lock_timer_base(timer, &flags); 532 533 if (base->running_timer == timer) 534 goto out; 535 536 ret = 0; 537 if (timer_pending(timer)) { 538 detach_timer(timer, 1); 539 ret = 1; 540 } 541out: 542 spin_unlock_irqrestore(&base->lock, flags); 543 544 return ret; 545} 546 547EXPORT_SYMBOL(try_to_del_timer_sync); 548 549/** 550 * del_timer_sync - deactivate a timer and wait for the handler to finish. 551 * @timer: the timer to be deactivated 552 * 553 * This function only differs from del_timer() on SMP: besides deactivating 554 * the timer it also makes sure the handler has finished executing on other 555 * CPUs. 556 * 557 * Synchronization rules: Callers must prevent restarting of the timer, 558 * otherwise this function is meaningless. It must not be called from 559 * interrupt contexts. The caller must not hold locks which would prevent 560 * completion of the timer's handler. The timer's handler must not call 561 * add_timer_on(). Upon exit the timer is not queued and the handler is 562 * not running on any CPU. 563 * 564 * The function returns whether it has deactivated a pending timer or not. 565 */ 566int del_timer_sync(struct timer_list *timer) 567{ 568 for (;;) { 569 int ret = try_to_del_timer_sync(timer); 570 if (ret >= 0) 571 return ret; 572 cpu_relax(); 573 } 574} 575 576EXPORT_SYMBOL(del_timer_sync); 577#endif 578 579static int cascade(tvec_base_t *base, tvec_t *tv, int index) 580{ 581 /* cascade all the timers from tv up one level */ 582 struct timer_list *timer, *tmp; 583 struct list_head tv_list; 584 585 list_replace_init(tv->vec + index, &tv_list); 586 587 /* 588 * We are removing _all_ timers from the list, so we 589 * don't have to detach them individually. 590 */ 591 list_for_each_entry_safe(timer, tmp, &tv_list, entry) { 592 BUG_ON(tbase_get_base(timer->base) != base); 593 internal_add_timer(base, timer); 594 } 595 596 return index; 597} 598 599#define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK) 600 601/** 602 * __run_timers - run all expired timers (if any) on this CPU. 603 * @base: the timer vector to be processed. 604 * 605 * This function cascades all vectors and executes all expired timer 606 * vectors. 607 */ 608static inline void __run_timers(tvec_base_t *base) 609{ 610 struct timer_list *timer; 611 612 spin_lock_irq(&base->lock); 613 while (time_after_eq(jiffies, base->timer_jiffies)) { 614 struct list_head work_list; 615 struct list_head *head = &work_list; 616 int index = base->timer_jiffies & TVR_MASK; 617 618 /* 619 * Cascade timers: 620 */ 621 if (!index && 622 (!cascade(base, &base->tv2, INDEX(0))) && 623 (!cascade(base, &base->tv3, INDEX(1))) && 624 !cascade(base, &base->tv4, INDEX(2))) 625 cascade(base, &base->tv5, INDEX(3)); 626 ++base->timer_jiffies; 627 list_replace_init(base->tv1.vec + index, &work_list); 628 while (!list_empty(head)) { 629 void (*fn)(unsigned long); 630 unsigned long data; 631 632 timer = list_first_entry(head, struct timer_list,entry); 633 fn = timer->function; 634 data = timer->data; 635 636 timer_stats_account_timer(timer); 637 638 set_running_timer(base, timer); 639 detach_timer(timer, 1); 640 spin_unlock_irq(&base->lock); 641 { 642 int preempt_count = preempt_count(); 643 fn(data); 644 if (preempt_count != preempt_count()) { 645 printk(KERN_WARNING "huh, entered %p " 646 "with preempt_count %08x, exited" 647 " with %08x?\n", 648 fn, preempt_count, 649 preempt_count()); 650 BUG(); 651 } 652 } 653 spin_lock_irq(&base->lock); 654 } 655 } 656 set_running_timer(base, NULL); 657 spin_unlock_irq(&base->lock); 658} 659 660#if defined(CONFIG_NO_IDLE_HZ) || defined(CONFIG_NO_HZ) 661/* 662 * Find out when the next timer event is due to happen. This 663 * is used on S/390 to stop all activity when a cpus is idle. 664 * This functions needs to be called disabled. 665 */ 666static unsigned long __next_timer_interrupt(tvec_base_t *base) 667{ 668 unsigned long timer_jiffies = base->timer_jiffies; 669 unsigned long expires = timer_jiffies + (LONG_MAX >> 1); 670 int index, slot, array, found = 0; 671 struct timer_list *nte; 672 tvec_t *varray[4]; 673 674 /* Look for timer events in tv1. */ 675 index = slot = timer_jiffies & TVR_MASK; 676 do { 677 list_for_each_entry(nte, base->tv1.vec + slot, entry) { 678 if (tbase_get_deferrable(nte->base)) 679 continue; 680 681 found = 1; 682 expires = nte->expires; 683 /* Look at the cascade bucket(s)? */ 684 if (!index || slot < index) 685 goto cascade; 686 return expires; 687 } 688 slot = (slot + 1) & TVR_MASK; 689 } while (slot != index); 690 691cascade: 692 /* Calculate the next cascade event */ 693 if (index) 694 timer_jiffies += TVR_SIZE - index; 695 timer_jiffies >>= TVR_BITS; 696 697 /* Check tv2-tv5. */ 698 varray[0] = &base->tv2; 699 varray[1] = &base->tv3; 700 varray[2] = &base->tv4; 701 varray[3] = &base->tv5; 702 703 for (array = 0; array < 4; array++) { 704 tvec_t *varp = varray[array]; 705 706 index = slot = timer_jiffies & TVN_MASK; 707 do { 708 list_for_each_entry(nte, varp->vec + slot, entry) { 709 found = 1; 710 if (time_before(nte->expires, expires)) 711 expires = nte->expires; 712 } 713 /* 714 * Do we still search for the first timer or are 715 * we looking up the cascade buckets ? 716 */ 717 if (found) { 718 /* Look at the cascade bucket(s)? */ 719 if (!index || slot < index) 720 break; 721 return expires; 722 } 723 slot = (slot + 1) & TVN_MASK; 724 } while (slot != index); 725 726 if (index) 727 timer_jiffies += TVN_SIZE - index; 728 timer_jiffies >>= TVN_BITS; 729 } 730 return expires; 731} 732 733/* 734 * Check, if the next hrtimer event is before the next timer wheel 735 * event: 736 */ 737static unsigned long cmp_next_hrtimer_event(unsigned long now, 738 unsigned long expires) 739{ 740 ktime_t hr_delta = hrtimer_get_next_event(); 741 struct timespec tsdelta; 742 unsigned long delta; 743 744 if (hr_delta.tv64 == KTIME_MAX) 745 return expires; 746 747 /* 748 * Expired timer available, let it expire in the next tick 749 */ 750 if (hr_delta.tv64 <= 0) 751 return now + 1; 752 753 tsdelta = ktime_to_timespec(hr_delta); 754 delta = timespec_to_jiffies(&tsdelta); 755 /* 756 * Take rounding errors in to account and make sure, that it 757 * expires in the next tick. Otherwise we go into an endless 758 * ping pong due to tick_nohz_stop_sched_tick() retriggering 759 * the timer softirq 760 */ 761 if (delta < 1) 762 delta = 1; 763 now += delta; 764 if (time_before(now, expires)) 765 return now; 766 return expires; 767} 768 769/** 770 * next_timer_interrupt - return the jiffy of the next pending timer 771 * @now: current time (in jiffies) 772 */ 773unsigned long get_next_timer_interrupt(unsigned long now) 774{ 775 tvec_base_t *base = __get_cpu_var(tvec_bases); 776 unsigned long expires; 777 778 spin_lock(&base->lock); 779 expires = __next_timer_interrupt(base); 780 spin_unlock(&base->lock); 781 782 if (time_before_eq(expires, now)) 783 return now; 784 785 return cmp_next_hrtimer_event(now, expires); 786} 787 788#ifdef CONFIG_NO_IDLE_HZ 789unsigned long next_timer_interrupt(void) 790{ 791 return get_next_timer_interrupt(jiffies); 792} 793#endif 794 795#endif 796 797/* 798 * Called from the timer interrupt handler to charge one tick to the current 799 * process. user_tick is 1 if the tick is user time, 0 for system. 800 */ 801void update_process_times(int user_tick) 802{ 803 struct task_struct *p = current; 804 int cpu = smp_processor_id(); 805 806 /* Note: this timer irq context must be accounted for as well. */ 807 if (user_tick) 808 account_user_time(p, jiffies_to_cputime(1)); 809 else 810 account_system_time(p, HARDIRQ_OFFSET, jiffies_to_cputime(1)); 811 run_local_timers(); 812 if (rcu_pending(cpu)) 813 rcu_check_callbacks(cpu, user_tick); 814 scheduler_tick(); 815 run_posix_cpu_timers(p); 816} 817 818/* 819 * Nr of active tasks - counted in fixed-point numbers 820 */ 821static unsigned long count_active_tasks(void) 822{ 823 return nr_active() * FIXED_1; 824} 825 826/* 827 * Hmm.. Changed this, as the GNU make sources (load.c) seems to 828 * imply that avenrun[] is the standard name for this kind of thing. 829 * Nothing else seems to be standardized: the fractional size etc 830 * all seem to differ on different machines. 831 * 832 * Requires xtime_lock to access. 833 */ 834unsigned long avenrun[3]; 835 836EXPORT_SYMBOL(avenrun); 837 838/* 839 * calc_load - given tick count, update the avenrun load estimates. 840 * This is called while holding a write_lock on xtime_lock. 841 */ 842static inline void calc_load(unsigned long ticks) 843{ 844 unsigned long active_tasks; /* fixed-point */ 845 static int count = LOAD_FREQ; 846 847 count -= ticks; 848 if (unlikely(count < 0)) { 849 active_tasks = count_active_tasks(); 850 do { 851 CALC_LOAD(avenrun[0], EXP_1, active_tasks); 852 CALC_LOAD(avenrun[1], EXP_5, active_tasks); 853 CALC_LOAD(avenrun[2], EXP_15, active_tasks); 854 count += LOAD_FREQ; 855 } while (count < 0); 856 } 857} 858 859/* 860 * This function runs timers and the timer-tq in bottom half context. 861 */ 862static void run_timer_softirq(struct softirq_action *h) 863{ 864 tvec_base_t *base = __get_cpu_var(tvec_bases); 865 866 hrtimer_run_queues(); 867 868 if (time_after_eq(jiffies, base->timer_jiffies)) 869 __run_timers(base); 870} 871 872/* 873 * Called by the local, per-CPU timer interrupt on SMP. 874 */ 875void run_local_timers(void) 876{ 877 raise_softirq(TIMER_SOFTIRQ); 878 softlockup_tick(); 879} 880 881/* 882 * Called by the timer interrupt. xtime_lock must already be taken 883 * by the timer IRQ! 884 */ 885static inline void update_times(unsigned long ticks) 886{ 887 update_wall_time(); 888 calc_load(ticks); 889} 890 891/* 892 * The 64-bit jiffies value is not atomic - you MUST NOT read it 893 * without sampling the sequence number in xtime_lock. 894 * jiffies is defined in the linker script... 895 */ 896 897void do_timer(unsigned long ticks) 898{ 899 jiffies_64 += ticks; 900 update_times(ticks); 901} 902 903#ifdef __ARCH_WANT_SYS_ALARM 904 905/* 906 * For backwards compatibility? This can be done in libc so Alpha 907 * and all newer ports shouldn't need it. 908 */ 909asmlinkage unsigned long sys_alarm(unsigned int seconds) 910{ 911 return alarm_setitimer(seconds); 912} 913 914#endif 915 916#ifndef __alpha__ 917 918/* 919 * The Alpha uses getxpid, getxuid, and getxgid instead. Maybe this 920 * should be moved into arch/i386 instead? 921 */ 922 923/** 924 * sys_getpid - return the thread group id of the current process 925 * 926 * Note, despite the name, this returns the tgid not the pid. The tgid and 927 * the pid are identical unless CLONE_THREAD was specified on clone() in 928 * which case the tgid is the same in all threads of the same group. 929 * 930 * This is SMP safe as current->tgid does not change. 931 */ 932asmlinkage long sys_getpid(void) 933{ 934 return current->tgid; 935} 936 937/* 938 * Accessing ->real_parent is not SMP-safe, it could 939 * change from under us. However, we can use a stale 940 * value of ->real_parent under rcu_read_lock(), see 941 * release_task()->call_rcu(delayed_put_task_struct). 942 */ 943asmlinkage long sys_getppid(void) 944{ 945 int pid; 946 947 rcu_read_lock(); 948 pid = rcu_dereference(current->real_parent)->tgid; 949 rcu_read_unlock(); 950 951 return pid; 952} 953 954asmlinkage long sys_getuid(void) 955{ 956 /* Only we change this so SMP safe */ 957 return current->uid; 958} 959 960asmlinkage long sys_geteuid(void) 961{ 962 /* Only we change this so SMP safe */ 963 return current->euid; 964} 965 966asmlinkage long sys_getgid(void) 967{ 968 /* Only we change this so SMP safe */ 969 return current->gid; 970} 971 972asmlinkage long sys_getegid(void) 973{ 974 /* Only we change this so SMP safe */ 975 return current->egid; 976} 977 978#endif 979 980static void process_timeout(unsigned long __data) 981{ 982 wake_up_process((struct task_struct *)__data); 983} 984 985/** 986 * schedule_timeout - sleep until timeout 987 * @timeout: timeout value in jiffies 988 * 989 * Make the current task sleep until @timeout jiffies have 990 * elapsed. The routine will return immediately unless 991 * the current task state has been set (see set_current_state()). 992 * 993 * You can set the task state as follows - 994 * 995 * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to 996 * pass before the routine returns. The routine will return 0 997 * 998 * %TASK_INTERRUPTIBLE - the routine may return early if a signal is 999 * delivered to the current task. In this case the remaining time 1000 * in jiffies will be returned, or 0 if the timer expired in time 1001 * 1002 * The current task state is guaranteed to be TASK_RUNNING when this 1003 * routine returns. 1004 * 1005 * Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule 1006 * the CPU away without a bound on the timeout. In this case the return 1007 * value will be %MAX_SCHEDULE_TIMEOUT. 1008 * 1009 * In all cases the return value is guaranteed to be non-negative. 1010 */ 1011fastcall signed long __sched schedule_timeout(signed long timeout) 1012{ 1013 struct timer_list timer; 1014 unsigned long expire; 1015 1016 switch (timeout) 1017 { 1018 case MAX_SCHEDULE_TIMEOUT: 1019 /* 1020 * These two special cases are useful to be comfortable 1021 * in the caller. Nothing more. We could take 1022 * MAX_SCHEDULE_TIMEOUT from one of the negative value 1023 * but I' d like to return a valid offset (>=0) to allow 1024 * the caller to do everything it want with the retval. 1025 */ 1026 schedule(); 1027 goto out; 1028 default: 1029 /* 1030 * Another bit of PARANOID. Note that the retval will be 1031 * 0 since no piece of kernel is supposed to do a check 1032 * for a negative retval of schedule_timeout() (since it 1033 * should never happens anyway). You just have the printk() 1034 * that will tell you if something is gone wrong and where. 1035 */ 1036 if (timeout < 0) { 1037 printk(KERN_ERR "schedule_timeout: wrong timeout " 1038 "value %lx\n", timeout); 1039 dump_stack(); 1040 current->state = TASK_RUNNING; 1041 goto out; 1042 } 1043 } 1044 1045 expire = timeout + jiffies; 1046 1047 setup_timer(&timer, process_timeout, (unsigned long)current); 1048 __mod_timer(&timer, expire); 1049 schedule(); 1050 del_singleshot_timer_sync(&timer); 1051 1052 timeout = expire - jiffies; 1053 1054 out: 1055 return timeout < 0 ? 0 : timeout; 1056} 1057EXPORT_SYMBOL(schedule_timeout); 1058 1059/* 1060 * We can use __set_current_state() here because schedule_timeout() calls 1061 * schedule() unconditionally. 1062 */ 1063signed long __sched schedule_timeout_interruptible(signed long timeout) 1064{ 1065 __set_current_state(TASK_INTERRUPTIBLE); 1066 return schedule_timeout(timeout); 1067} 1068EXPORT_SYMBOL(schedule_timeout_interruptible); 1069 1070signed long __sched schedule_timeout_uninterruptible(signed long timeout) 1071{ 1072 __set_current_state(TASK_UNINTERRUPTIBLE); 1073 return schedule_timeout(timeout); 1074} 1075EXPORT_SYMBOL(schedule_timeout_uninterruptible); 1076 1077/* Thread ID - the internal kernel "pid" */ 1078asmlinkage long sys_gettid(void) 1079{ 1080 return current->pid; 1081} 1082 1083/** 1084 * do_sysinfo - fill in sysinfo struct 1085 * @info: pointer to buffer to fill 1086 */ 1087int do_sysinfo(struct sysinfo *info) 1088{ 1089 unsigned long mem_total, sav_total; 1090 unsigned int mem_unit, bitcount; 1091 unsigned long seq; 1092 1093 memset(info, 0, sizeof(struct sysinfo)); 1094 1095 do { 1096 struct timespec tp; 1097 seq = read_seqbegin(&xtime_lock); 1098 1099 /* 1100 * This is annoying. The below is the same thing 1101 * posix_get_clock_monotonic() does, but it wants to 1102 * take the lock which we want to cover the loads stuff 1103 * too. 1104 */ 1105 1106 getnstimeofday(&tp); 1107 tp.tv_sec += wall_to_monotonic.tv_sec; 1108 tp.tv_nsec += wall_to_monotonic.tv_nsec; 1109 if (tp.tv_nsec - NSEC_PER_SEC >= 0) { 1110 tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC; 1111 tp.tv_sec++; 1112 } 1113 info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0); 1114 1115 info->loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT); 1116 info->loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT); 1117 info->loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT); 1118 1119 info->procs = nr_threads; 1120 } while (read_seqretry(&xtime_lock, seq)); 1121 1122 si_meminfo(info); 1123 si_swapinfo(info); 1124 1125 /* 1126 * If the sum of all the available memory (i.e. ram + swap) 1127 * is less than can be stored in a 32 bit unsigned long then 1128 * we can be binary compatible with 2.2.x kernels. If not, 1129 * well, in that case 2.2.x was broken anyways... 1130 * 1131 * -Erik Andersen <andersee@debian.org> 1132 */ 1133 1134 mem_total = info->totalram + info->totalswap; 1135 if (mem_total < info->totalram || mem_total < info->totalswap) 1136 goto out; 1137 bitcount = 0; 1138 mem_unit = info->mem_unit; 1139 while (mem_unit > 1) { 1140 bitcount++; 1141 mem_unit >>= 1; 1142 sav_total = mem_total; 1143 mem_total <<= 1; 1144 if (mem_total < sav_total) 1145 goto out; 1146 } 1147 1148 /* 1149 * If mem_total did not overflow, multiply all memory values by 1150 * info->mem_unit and set it to 1. This leaves things compatible 1151 * with 2.2.x, and also retains compatibility with earlier 2.4.x 1152 * kernels... 1153 */ 1154 1155 info->mem_unit = 1; 1156 info->totalram <<= bitcount; 1157 info->freeram <<= bitcount; 1158 info->sharedram <<= bitcount; 1159 info->bufferram <<= bitcount; 1160 info->totalswap <<= bitcount; 1161 info->freeswap <<= bitcount; 1162 info->totalhigh <<= bitcount; 1163 info->freehigh <<= bitcount; 1164 1165out: 1166 return 0; 1167} 1168 1169asmlinkage long sys_sysinfo(struct sysinfo __user *info) 1170{ 1171 struct sysinfo val; 1172 1173 do_sysinfo(&val); 1174 1175 if (copy_to_user(info, &val, sizeof(struct sysinfo))) 1176 return -EFAULT; 1177 1178 return 0; 1179} 1180 1181/* 1182 * lockdep: we want to track each per-CPU base as a separate lock-class, 1183 * but timer-bases are kmalloc()-ed, so we need to attach separate 1184 * keys to them: 1185 */ 1186static struct lock_class_key base_lock_keys[NR_CPUS]; 1187 1188static int __devinit init_timers_cpu(int cpu) 1189{ 1190 int j; 1191 tvec_base_t *base; 1192 static char __devinitdata tvec_base_done[NR_CPUS]; 1193 1194 if (!tvec_base_done[cpu]) { 1195 static char boot_done; 1196 1197 if (boot_done) { 1198 /* 1199 * The APs use this path later in boot 1200 */ 1201 base = kmalloc_node(sizeof(*base), GFP_KERNEL, 1202 cpu_to_node(cpu)); 1203 if (!base) 1204 return -ENOMEM; 1205 1206 /* Make sure that tvec_base is 2 byte aligned */ 1207 if (tbase_get_deferrable(base)) { 1208 WARN_ON(1); 1209 kfree(base); 1210 return -ENOMEM; 1211 } 1212 memset(base, 0, sizeof(*base)); 1213 per_cpu(tvec_bases, cpu) = base; 1214 } else { 1215 /* 1216 * This is for the boot CPU - we use compile-time 1217 * static initialisation because per-cpu memory isn't 1218 * ready yet and because the memory allocators are not 1219 * initialised either. 1220 */ 1221 boot_done = 1; 1222 base = &boot_tvec_bases; 1223 } 1224 tvec_base_done[cpu] = 1; 1225 } else { 1226 base = per_cpu(tvec_bases, cpu); 1227 } 1228 1229 spin_lock_init(&base->lock); 1230 lockdep_set_class(&base->lock, base_lock_keys + cpu); 1231 1232 for (j = 0; j < TVN_SIZE; j++) { 1233 INIT_LIST_HEAD(base->tv5.vec + j); 1234 INIT_LIST_HEAD(base->tv4.vec + j); 1235 INIT_LIST_HEAD(base->tv3.vec + j); 1236 INIT_LIST_HEAD(base->tv2.vec + j); 1237 } 1238 for (j = 0; j < TVR_SIZE; j++) 1239 INIT_LIST_HEAD(base->tv1.vec + j); 1240 1241 base->timer_jiffies = jiffies; 1242 return 0; 1243} 1244 1245#ifdef CONFIG_HOTPLUG_CPU 1246static void migrate_timer_list(tvec_base_t *new_base, struct list_head *head) 1247{ 1248 struct timer_list *timer; 1249 1250 while (!list_empty(head)) { 1251 timer = list_first_entry(head, struct timer_list, entry); 1252 detach_timer(timer, 0); 1253 timer_set_base(timer, new_base); 1254 internal_add_timer(new_base, timer); 1255 } 1256} 1257 1258static void __devinit migrate_timers(int cpu) 1259{ 1260 tvec_base_t *old_base; 1261 tvec_base_t *new_base; 1262 int i; 1263 1264 BUG_ON(cpu_online(cpu)); 1265 old_base = per_cpu(tvec_bases, cpu); 1266 new_base = get_cpu_var(tvec_bases); 1267 1268 local_irq_disable(); 1269 double_spin_lock(&new_base->lock, &old_base->lock, 1270 smp_processor_id() < cpu); 1271 1272 BUG_ON(old_base->running_timer); 1273 1274 for (i = 0; i < TVR_SIZE; i++) 1275 migrate_timer_list(new_base, old_base->tv1.vec + i); 1276 for (i = 0; i < TVN_SIZE; i++) { 1277 migrate_timer_list(new_base, old_base->tv2.vec + i); 1278 migrate_timer_list(new_base, old_base->tv3.vec + i); 1279 migrate_timer_list(new_base, old_base->tv4.vec + i); 1280 migrate_timer_list(new_base, old_base->tv5.vec + i); 1281 } 1282 1283 double_spin_unlock(&new_base->lock, &old_base->lock, 1284 smp_processor_id() < cpu); 1285 local_irq_enable(); 1286 put_cpu_var(tvec_bases); 1287} 1288#endif /* CONFIG_HOTPLUG_CPU */ 1289 1290static int __cpuinit timer_cpu_notify(struct notifier_block *self, 1291 unsigned long action, void *hcpu) 1292{ 1293 long cpu = (long)hcpu; 1294 switch(action) { 1295 case CPU_UP_PREPARE: 1296 case CPU_UP_PREPARE_FROZEN: 1297 if (init_timers_cpu(cpu) < 0) 1298 return NOTIFY_BAD; 1299 break; 1300#ifdef CONFIG_HOTPLUG_CPU 1301 case CPU_DEAD: 1302 case CPU_DEAD_FROZEN: 1303 migrate_timers(cpu); 1304 break; 1305#endif 1306 default: 1307 break; 1308 } 1309 return NOTIFY_OK; 1310} 1311 1312static struct notifier_block __cpuinitdata timers_nb = { 1313 .notifier_call = timer_cpu_notify, 1314}; 1315 1316 1317void __init init_timers(void) 1318{ 1319 int err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE, 1320 (void *)(long)smp_processor_id()); 1321 1322 init_timer_stats(); 1323 1324 BUG_ON(err == NOTIFY_BAD); 1325 register_cpu_notifier(&timers_nb); 1326 open_softirq(TIMER_SOFTIRQ, run_timer_softirq, NULL); 1327} 1328 1329#ifdef CONFIG_TIME_INTERPOLATION 1330 1331struct time_interpolator *time_interpolator __read_mostly; 1332static struct time_interpolator *time_interpolator_list __read_mostly; 1333static DEFINE_SPINLOCK(time_interpolator_lock); 1334 1335static inline cycles_t time_interpolator_get_cycles(unsigned int src) 1336{ 1337 unsigned long (*x)(void); 1338 1339 switch (src) 1340 { 1341 case TIME_SOURCE_FUNCTION: 1342 x = time_interpolator->addr; 1343 return x(); 1344 1345 case TIME_SOURCE_MMIO64 : 1346 return readq_relaxed((void __iomem *)time_interpolator->addr); 1347 1348 case TIME_SOURCE_MMIO32 : 1349 return readl_relaxed((void __iomem *)time_interpolator->addr); 1350 1351 default: return get_cycles(); 1352 } 1353} 1354 1355static inline u64 time_interpolator_get_counter(int writelock) 1356{ 1357 unsigned int src = time_interpolator->source; 1358 1359 if (time_interpolator->jitter) 1360 { 1361 cycles_t lcycle; 1362 cycles_t now; 1363 1364 do { 1365 lcycle = time_interpolator->last_cycle; 1366 now = time_interpolator_get_cycles(src); 1367 if (lcycle && time_after(lcycle, now)) 1368 return lcycle; 1369 1370 /* When holding the xtime write lock, there's no need 1371 * to add the overhead of the cmpxchg. Readers are 1372 * force to retry until the write lock is released. 1373 */ 1374 if (writelock) { 1375 time_interpolator->last_cycle = now; 1376 return now; 1377 } 1378 /* Keep track of the last timer value returned. The use of cmpxchg here 1379 * will cause contention in an SMP environment. 1380 */ 1381 } while (unlikely(cmpxchg(&time_interpolator->last_cycle, lcycle, now) != lcycle)); 1382 return now; 1383 } 1384 else 1385 return time_interpolator_get_cycles(src); 1386} 1387 1388void time_interpolator_reset(void) 1389{ 1390 time_interpolator->offset = 0; 1391 time_interpolator->last_counter = time_interpolator_get_counter(1); 1392} 1393 1394#define GET_TI_NSECS(count,i) (((((count) - i->last_counter) & (i)->mask) * (i)->nsec_per_cyc) >> (i)->shift) 1395 1396unsigned long time_interpolator_get_offset(void) 1397{ 1398 /* If we do not have a time interpolator set up then just return zero */ 1399 if (!time_interpolator) 1400 return 0; 1401 1402 return time_interpolator->offset + 1403 GET_TI_NSECS(time_interpolator_get_counter(0), time_interpolator); 1404} 1405 1406#define INTERPOLATOR_ADJUST 65536 1407#define INTERPOLATOR_MAX_SKIP 10*INTERPOLATOR_ADJUST 1408 1409void time_interpolator_update(long delta_nsec) 1410{ 1411 u64 counter; 1412 unsigned long offset; 1413 1414 /* If there is no time interpolator set up then do nothing */ 1415 if (!time_interpolator) 1416 return; 1417 1418 /* 1419 * The interpolator compensates for late ticks by accumulating the late 1420 * time in time_interpolator->offset. A tick earlier than expected will 1421 * lead to a reset of the offset and a corresponding jump of the clock 1422 * forward. Again this only works if the interpolator clock is running 1423 * slightly slower than the regular clock and the tuning logic insures 1424 * that. 1425 */ 1426 1427 counter = time_interpolator_get_counter(1); 1428 offset = time_interpolator->offset + 1429 GET_TI_NSECS(counter, time_interpolator); 1430 1431 if (delta_nsec < 0 || (unsigned long) delta_nsec < offset) 1432 time_interpolator->offset = offset - delta_nsec; 1433 else { 1434 time_interpolator->skips++; 1435 time_interpolator->ns_skipped += delta_nsec - offset; 1436 time_interpolator->offset = 0; 1437 } 1438 time_interpolator->last_counter = counter; 1439 1440 /* Tuning logic for time interpolator invoked every minute or so. 1441 * Decrease interpolator clock speed if no skips occurred and an offset is carried. 1442 * Increase interpolator clock speed if we skip too much time. 1443 */ 1444 if (jiffies % INTERPOLATOR_ADJUST == 0) 1445 { 1446 if (time_interpolator->skips == 0 && time_interpolator->offset > tick_nsec) 1447 time_interpolator->nsec_per_cyc--; 1448 if (time_interpolator->ns_skipped > INTERPOLATOR_MAX_SKIP && time_interpolator->offset == 0) 1449 time_interpolator->nsec_per_cyc++; 1450 time_interpolator->skips = 0; 1451 time_interpolator->ns_skipped = 0; 1452 } 1453} 1454 1455static inline int 1456is_better_time_interpolator(struct time_interpolator *new) 1457{ 1458 if (!time_interpolator) 1459 return 1; 1460 return new->frequency > 2*time_interpolator->frequency || 1461 (unsigned long)new->drift < (unsigned long)time_interpolator->drift; 1462} 1463 1464void 1465register_time_interpolator(struct time_interpolator *ti) 1466{ 1467 unsigned long flags; 1468 1469 /* Sanity check */ 1470 BUG_ON(ti->frequency == 0 || ti->mask == 0); 1471 1472 ti->nsec_per_cyc = ((u64)NSEC_PER_SEC << ti->shift) / ti->frequency; 1473 spin_lock(&time_interpolator_lock); 1474 write_seqlock_irqsave(&xtime_lock, flags); 1475 if (is_better_time_interpolator(ti)) { 1476 time_interpolator = ti; 1477 time_interpolator_reset(); 1478 } 1479 write_sequnlock_irqrestore(&xtime_lock, flags); 1480 1481 ti->next = time_interpolator_list; 1482 time_interpolator_list = ti; 1483 spin_unlock(&time_interpolator_lock); 1484} 1485 1486void 1487unregister_time_interpolator(struct time_interpolator *ti) 1488{ 1489 struct time_interpolator *curr, **prev; 1490 unsigned long flags; 1491 1492 spin_lock(&time_interpolator_lock); 1493 prev = &time_interpolator_list; 1494 for (curr = *prev; curr; curr = curr->next) { 1495 if (curr == ti) { 1496 *prev = curr->next; 1497 break; 1498 } 1499 prev = &curr->next; 1500 } 1501 1502 write_seqlock_irqsave(&xtime_lock, flags); 1503 if (ti == time_interpolator) { 1504 /* we lost the best time-interpolator: */ 1505 time_interpolator = NULL; 1506 /* find the next-best interpolator */ 1507 for (curr = time_interpolator_list; curr; curr = curr->next) 1508 if (is_better_time_interpolator(curr)) 1509 time_interpolator = curr; 1510 time_interpolator_reset(); 1511 } 1512 write_sequnlock_irqrestore(&xtime_lock, flags); 1513 spin_unlock(&time_interpolator_lock); 1514} 1515#endif /* CONFIG_TIME_INTERPOLATION */ 1516 1517/** 1518 * msleep - sleep safely even with waitqueue interruptions 1519 * @msecs: Time in milliseconds to sleep for 1520 */ 1521void msleep(unsigned int msecs) 1522{ 1523 unsigned long timeout = msecs_to_jiffies(msecs) + 1; 1524 1525 while (timeout) 1526 timeout = schedule_timeout_uninterruptible(timeout); 1527} 1528 1529EXPORT_SYMBOL(msleep); 1530 1531/** 1532 * msleep_interruptible - sleep waiting for signals 1533 * @msecs: Time in milliseconds to sleep for 1534 */ 1535unsigned long msleep_interruptible(unsigned int msecs) 1536{ 1537 unsigned long timeout = msecs_to_jiffies(msecs) + 1; 1538 1539 while (timeout && !signal_pending(current)) 1540 timeout = schedule_timeout_interruptible(timeout); 1541 return jiffies_to_msecs(timeout); 1542} 1543 1544EXPORT_SYMBOL(msleep_interruptible);