Merge branch 'release' of git://git.kernel.org/pub/scm/linux/kernel/git/aegl/linux-2.6

* 'release' of git://git.kernel.org/pub/scm/linux/kernel/git/aegl/linux-2.6:
[IA64] Fix large MCA bootmem allocation
[IA64] Simplify cpu_idle_wait
[IA64] Synchronize RBS on PTRACE_ATTACH
[IA64] Synchronize kernel RSE to user-space and back
[IA64] Rename TIF_PERFMON_WORK back to TIF_NOTIFY_RESUME
[IA64] Wire up timerfd_{create,settime,gettime} syscalls

+225 -81
+4 -1
arch/ia64/kernel/entry.S
··· 1573 1573 data8 sys_fchmodat 1574 1574 data8 sys_faccessat 1575 1575 data8 sys_pselect6 1576 - data8 sys_ppoll 1576 + data8 sys_ppoll // 1295 1577 1577 data8 sys_unshare 1578 1578 data8 sys_splice 1579 1579 data8 sys_set_robust_list ··· 1588 1588 data8 sys_signalfd 1589 1589 data8 sys_ni_syscall 1590 1590 data8 sys_eventfd 1591 + data8 sys_timerfd_create // 1310 1592 + data8 sys_timerfd_settime 1593 + data8 sys_timerfd_gettime 1591 1594 1592 1595 .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls
+26 -29
arch/ia64/kernel/mca.c
··· 17 17 * Copyright (C) 2000 Intel 18 18 * Copyright (C) Chuck Fleckenstein <cfleck@co.intel.com> 19 19 * 20 - * Copyright (C) 1999, 2004 Silicon Graphics, Inc. 20 + * Copyright (C) 1999, 2004-2008 Silicon Graphics, Inc. 21 21 * Copyright (C) Vijay Chander <vijay@engr.sgi.com> 22 22 * 23 23 * Copyright (C) 2006 FUJITSU LIMITED ··· 1762 1762 /* Caller prevents this from being called after init */ 1763 1763 static void * __init_refok mca_bootmem(void) 1764 1764 { 1765 - void *p; 1766 - 1767 - p = alloc_bootmem(sizeof(struct ia64_mca_cpu) * NR_CPUS + 1768 - KERNEL_STACK_SIZE); 1769 - return (void *)ALIGN((unsigned long)p, KERNEL_STACK_SIZE); 1765 + return __alloc_bootmem(sizeof(struct ia64_mca_cpu), 1766 + KERNEL_STACK_SIZE, 0); 1770 1767 } 1771 1768 1772 1769 /* Do per-CPU MCA-related initialization. */ ··· 1771 1774 ia64_mca_cpu_init(void *cpu_data) 1772 1775 { 1773 1776 void *pal_vaddr; 1777 + void *data; 1778 + long sz = sizeof(struct ia64_mca_cpu); 1779 + int cpu = smp_processor_id(); 1774 1780 static int first_time = 1; 1775 1781 1776 - if (first_time) { 1777 - void *mca_data; 1778 - int cpu; 1779 - 1780 - first_time = 0; 1781 - mca_data = mca_bootmem(); 1782 - for (cpu = 0; cpu < NR_CPUS; cpu++) { 1783 - format_mca_init_stack(mca_data, 1784 - offsetof(struct ia64_mca_cpu, mca_stack), 1785 - "MCA", cpu); 1786 - format_mca_init_stack(mca_data, 1787 - offsetof(struct ia64_mca_cpu, init_stack), 1788 - "INIT", cpu); 1789 - __per_cpu_mca[cpu] = __pa(mca_data); 1790 - mca_data += sizeof(struct ia64_mca_cpu); 1791 - } 1792 - } 1793 - 1794 1782 /* 1795 - * The MCA info structure was allocated earlier and its 1796 - * physical address saved in __per_cpu_mca[cpu]. Copy that 1797 - * address * to ia64_mca_data so we can access it as a per-CPU 1798 - * variable. 1783 + * Structure will already be allocated if cpu has been online, 1784 + * then offlined. 1799 1785 */ 1800 - __get_cpu_var(ia64_mca_data) = __per_cpu_mca[smp_processor_id()]; 1786 + if (__per_cpu_mca[cpu]) { 1787 + data = __va(__per_cpu_mca[cpu]); 1788 + } else { 1789 + if (first_time) { 1790 + data = mca_bootmem(); 1791 + first_time = 0; 1792 + } else 1793 + data = page_address(alloc_pages_node(numa_node_id(), 1794 + GFP_KERNEL, get_order(sz))); 1795 + if (!data) 1796 + panic("Could not allocate MCA memory for cpu %d\n", 1797 + cpu); 1798 + } 1799 + format_mca_init_stack(data, offsetof(struct ia64_mca_cpu, mca_stack), 1800 + "MCA", cpu); 1801 + format_mca_init_stack(data, offsetof(struct ia64_mca_cpu, init_stack), 1802 + "INIT", cpu); 1803 + __get_cpu_var(ia64_mca_data) = __per_cpu_mca[cpu] = __pa(data); 1801 1804 1802 1805 /* 1803 1806 * Stash away a copy of the PTE needed to map the per-CPU page.
+3 -18
arch/ia64/kernel/perfmon.c
··· 586 586 } 587 587 588 588 static inline void 589 - pfm_set_task_notify(struct task_struct *task) 590 - { 591 - struct thread_info *info; 592 - 593 - info = (struct thread_info *) ((char *) task + IA64_TASK_SIZE); 594 - set_bit(TIF_PERFMON_WORK, &info->flags); 595 - } 596 - 597 - static inline void 598 - pfm_clear_task_notify(void) 599 - { 600 - clear_thread_flag(TIF_PERFMON_WORK); 601 - } 602 - 603 - static inline void 604 589 pfm_reserve_page(unsigned long a) 605 590 { 606 591 SetPageReserved(vmalloc_to_page((void *)a)); ··· 3709 3724 3710 3725 PFM_SET_WORK_PENDING(task, 1); 3711 3726 3712 - pfm_set_task_notify(task); 3727 + tsk_set_notify_resume(task); 3713 3728 3714 3729 /* 3715 3730 * XXX: send reschedule if task runs on another CPU ··· 5067 5082 5068 5083 PFM_SET_WORK_PENDING(current, 0); 5069 5084 5070 - pfm_clear_task_notify(); 5085 + tsk_clear_notify_resume(current); 5071 5086 5072 5087 regs = task_pt_regs(current); 5073 5088 ··· 5435 5450 * when coming from ctxsw, current still points to the 5436 5451 * previous task, therefore we must work with task and not current. 5437 5452 */ 5438 - pfm_set_task_notify(task); 5453 + tsk_set_notify_resume(task); 5439 5454 } 5440 5455 /* 5441 5456 * defer until state is changed (shorten spin window). the context is locked
+30 -29
arch/ia64/kernel/process.c
··· 52 52 #include "sigframe.h" 53 53 54 54 void (*ia64_mark_idle)(int); 55 - static DEFINE_PER_CPU(unsigned int, cpu_idle_state); 56 55 57 56 unsigned long boot_option_idle_override = 0; 58 57 EXPORT_SYMBOL(boot_option_idle_override); ··· 156 157 show_stack(NULL, NULL); 157 158 } 158 159 160 + void tsk_clear_notify_resume(struct task_struct *tsk) 161 + { 162 + #ifdef CONFIG_PERFMON 163 + if (tsk->thread.pfm_needs_checking) 164 + return; 165 + #endif 166 + if (test_ti_thread_flag(task_thread_info(tsk), TIF_RESTORE_RSE)) 167 + return; 168 + clear_ti_thread_flag(task_thread_info(tsk), TIF_NOTIFY_RESUME); 169 + } 170 + 159 171 void 160 172 do_notify_resume_user (sigset_t *unused, struct sigscratch *scr, long in_syscall) 161 173 { ··· 185 175 /* deal with pending signal delivery */ 186 176 if (test_thread_flag(TIF_SIGPENDING)||test_thread_flag(TIF_RESTORE_SIGMASK)) 187 177 ia64_do_signal(scr, in_syscall); 178 + 179 + /* copy user rbs to kernel rbs */ 180 + if (unlikely(test_thread_flag(TIF_RESTORE_RSE))) 181 + ia64_sync_krbs(); 188 182 } 189 183 190 184 static int pal_halt = 1; ··· 253 239 } 254 240 #endif /* CONFIG_HOTPLUG_CPU */ 255 241 242 + static void do_nothing(void *unused) 243 + { 244 + } 245 + 246 + /* 247 + * cpu_idle_wait - Used to ensure that all the CPUs discard old value of 248 + * pm_idle and update to new pm_idle value. Required while changing pm_idle 249 + * handler on SMP systems. 250 + * 251 + * Caller must have changed pm_idle to the new value before the call. Old 252 + * pm_idle value will not be used by any CPU after the return of this function. 253 + */ 256 254 void cpu_idle_wait(void) 257 255 { 258 - unsigned int cpu, this_cpu = get_cpu(); 259 - cpumask_t map; 260 - cpumask_t tmp = current->cpus_allowed; 261 - 262 - set_cpus_allowed(current, cpumask_of_cpu(this_cpu)); 263 - put_cpu(); 264 - 265 - cpus_clear(map); 266 - for_each_online_cpu(cpu) { 267 - per_cpu(cpu_idle_state, cpu) = 1; 268 - cpu_set(cpu, map); 269 - } 270 - 271 - __get_cpu_var(cpu_idle_state) = 0; 272 - 273 - wmb(); 274 - do { 275 - ssleep(1); 276 - for_each_online_cpu(cpu) { 277 - if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu)) 278 - cpu_clear(cpu, map); 279 - } 280 - cpus_and(map, map, cpu_online_map); 281 - } while (!cpus_empty(map)); 282 - set_cpus_allowed(current, tmp); 256 + smp_mb(); 257 + /* kick all the CPUs so that they exit out of pm_idle */ 258 + smp_call_function(do_nothing, NULL, 0, 1); 283 259 } 284 260 EXPORT_SYMBOL_GPL(cpu_idle_wait); 285 261 ··· 297 293 #ifdef CONFIG_SMP 298 294 min_xtp(); 299 295 #endif 300 - if (__get_cpu_var(cpu_idle_state)) 301 - __get_cpu_var(cpu_idle_state) = 0; 302 - 303 296 rmb(); 304 297 if (mark_idle) 305 298 (*mark_idle)(1);
+139
arch/ia64/kernel/ptrace.c
··· 547 547 return 0; 548 548 } 549 549 550 + static long 551 + ia64_sync_kernel_rbs (struct task_struct *child, struct switch_stack *sw, 552 + unsigned long user_rbs_start, unsigned long user_rbs_end) 553 + { 554 + unsigned long addr, val; 555 + long ret; 556 + 557 + /* now copy word for word from user rbs to kernel rbs: */ 558 + for (addr = user_rbs_start; addr < user_rbs_end; addr += 8) { 559 + if (access_process_vm(child, addr, &val, sizeof(val), 0) 560 + != sizeof(val)) 561 + return -EIO; 562 + 563 + ret = ia64_poke(child, sw, user_rbs_end, addr, val); 564 + if (ret < 0) 565 + return ret; 566 + } 567 + return 0; 568 + } 569 + 570 + typedef long (*syncfunc_t)(struct task_struct *, struct switch_stack *, 571 + unsigned long, unsigned long); 572 + 573 + static void do_sync_rbs(struct unw_frame_info *info, void *arg) 574 + { 575 + struct pt_regs *pt; 576 + unsigned long urbs_end; 577 + syncfunc_t fn = arg; 578 + 579 + if (unw_unwind_to_user(info) < 0) 580 + return; 581 + pt = task_pt_regs(info->task); 582 + urbs_end = ia64_get_user_rbs_end(info->task, pt, NULL); 583 + 584 + fn(info->task, info->sw, pt->ar_bspstore, urbs_end); 585 + } 586 + 587 + /* 588 + * when a thread is stopped (ptraced), debugger might change thread's user 589 + * stack (change memory directly), and we must avoid the RSE stored in kernel 590 + * to override user stack (user space's RSE is newer than kernel's in the 591 + * case). To workaround the issue, we copy kernel RSE to user RSE before the 592 + * task is stopped, so user RSE has updated data. we then copy user RSE to 593 + * kernel after the task is resummed from traced stop and kernel will use the 594 + * newer RSE to return to user. TIF_RESTORE_RSE is the flag to indicate we need 595 + * synchronize user RSE to kernel. 596 + */ 597 + void ia64_ptrace_stop(void) 598 + { 599 + if (test_and_set_tsk_thread_flag(current, TIF_RESTORE_RSE)) 600 + return; 601 + tsk_set_notify_resume(current); 602 + unw_init_running(do_sync_rbs, ia64_sync_user_rbs); 603 + } 604 + 605 + /* 606 + * This is called to read back the register backing store. 607 + */ 608 + void ia64_sync_krbs(void) 609 + { 610 + clear_tsk_thread_flag(current, TIF_RESTORE_RSE); 611 + tsk_clear_notify_resume(current); 612 + 613 + unw_init_running(do_sync_rbs, ia64_sync_kernel_rbs); 614 + } 615 + 616 + /* 617 + * After PTRACE_ATTACH, a thread's register backing store area in user 618 + * space is assumed to contain correct data whenever the thread is 619 + * stopped. arch_ptrace_stop takes care of this on tracing stops. 620 + * But if the child was already stopped for job control when we attach 621 + * to it, then it might not ever get into ptrace_stop by the time we 622 + * want to examine the user memory containing the RBS. 623 + */ 624 + void 625 + ptrace_attach_sync_user_rbs (struct task_struct *child) 626 + { 627 + int stopped = 0; 628 + struct unw_frame_info info; 629 + 630 + /* 631 + * If the child is in TASK_STOPPED, we need to change that to 632 + * TASK_TRACED momentarily while we operate on it. This ensures 633 + * that the child won't be woken up and return to user mode while 634 + * we are doing the sync. (It can only be woken up for SIGKILL.) 635 + */ 636 + 637 + read_lock(&tasklist_lock); 638 + if (child->signal) { 639 + spin_lock_irq(&child->sighand->siglock); 640 + if (child->state == TASK_STOPPED && 641 + !test_and_set_tsk_thread_flag(child, TIF_RESTORE_RSE)) { 642 + tsk_set_notify_resume(child); 643 + 644 + child->state = TASK_TRACED; 645 + stopped = 1; 646 + } 647 + spin_unlock_irq(&child->sighand->siglock); 648 + } 649 + read_unlock(&tasklist_lock); 650 + 651 + if (!stopped) 652 + return; 653 + 654 + unw_init_from_blocked_task(&info, child); 655 + do_sync_rbs(&info, ia64_sync_user_rbs); 656 + 657 + /* 658 + * Now move the child back into TASK_STOPPED if it should be in a 659 + * job control stop, so that SIGCONT can be used to wake it up. 660 + */ 661 + read_lock(&tasklist_lock); 662 + if (child->signal) { 663 + spin_lock_irq(&child->sighand->siglock); 664 + if (child->state == TASK_TRACED && 665 + (child->signal->flags & SIGNAL_STOP_STOPPED)) { 666 + child->state = TASK_STOPPED; 667 + } 668 + spin_unlock_irq(&child->sighand->siglock); 669 + } 670 + read_unlock(&tasklist_lock); 671 + } 672 + 550 673 static inline int 551 674 thread_matches (struct task_struct *thread, unsigned long addr) 552 675 { ··· 1545 1422 struct task_struct *child; 1546 1423 struct switch_stack *sw; 1547 1424 long ret; 1425 + struct unw_frame_info info; 1548 1426 1549 1427 lock_kernel(); 1550 1428 ret = -EPERM; ··· 1577 1453 1578 1454 if (request == PTRACE_ATTACH) { 1579 1455 ret = ptrace_attach(child); 1456 + if (!ret) 1457 + arch_ptrace_attach(child); 1580 1458 goto out_tsk; 1581 1459 } 1582 1460 ··· 1607 1481 /* write the word at location addr */ 1608 1482 urbs_end = ia64_get_user_rbs_end(child, pt, NULL); 1609 1483 ret = ia64_poke(child, sw, urbs_end, addr, data); 1484 + 1485 + /* Make sure user RBS has the latest data */ 1486 + unw_init_from_blocked_task(&info, child); 1487 + do_sync_rbs(&info, ia64_sync_user_rbs); 1488 + 1610 1489 goto out_tsk; 1611 1490 1612 1491 case PTRACE_PEEKUSR: ··· 1765 1634 && (current->ptrace & PT_PTRACED)) 1766 1635 syscall_trace(); 1767 1636 1637 + /* copy user rbs to kernel rbs */ 1638 + if (test_thread_flag(TIF_RESTORE_RSE)) 1639 + ia64_sync_krbs(); 1640 + 1768 1641 if (unlikely(current->audit_context)) { 1769 1642 long syscall; 1770 1643 int arch; ··· 1806 1671 || test_thread_flag(TIF_SINGLESTEP)) 1807 1672 && (current->ptrace & PT_PTRACED)) 1808 1673 syscall_trace(); 1674 + 1675 + /* copy user rbs to kernel rbs */ 1676 + if (test_thread_flag(TIF_RESTORE_RSE)) 1677 + ia64_sync_krbs(); 1809 1678 }
+11
include/asm-ia64/ptrace.h
··· 292 292 unsigned long, long); 293 293 extern void ia64_flush_fph (struct task_struct *); 294 294 extern void ia64_sync_fph (struct task_struct *); 295 + extern void ia64_sync_krbs(void); 295 296 extern long ia64_sync_user_rbs (struct task_struct *, struct switch_stack *, 296 297 unsigned long, unsigned long); 297 298 ··· 303 302 304 303 extern void ia64_increment_ip (struct pt_regs *pt); 305 304 extern void ia64_decrement_ip (struct pt_regs *pt); 305 + 306 + extern void ia64_ptrace_stop(void); 307 + #define arch_ptrace_stop(code, info) \ 308 + ia64_ptrace_stop() 309 + #define arch_ptrace_stop_needed(code, info) \ 310 + (!test_thread_flag(TIF_RESTORE_RSE)) 311 + 312 + extern void ptrace_attach_sync_user_rbs (struct task_struct *); 313 + #define arch_ptrace_attach(child) \ 314 + ptrace_attach_sync_user_rbs(child) 306 315 307 316 #endif /* !__KERNEL__ */ 308 317
+8 -3
include/asm-ia64/thread_info.h
··· 71 71 #define alloc_task_struct() ((struct task_struct *)__get_free_pages(GFP_KERNEL | __GFP_COMP, KERNEL_STACK_SIZE_ORDER)) 72 72 #define free_task_struct(tsk) free_pages((unsigned long) (tsk), KERNEL_STACK_SIZE_ORDER) 73 73 74 + #define tsk_set_notify_resume(tsk) \ 75 + set_ti_thread_flag(task_thread_info(tsk), TIF_NOTIFY_RESUME) 76 + extern void tsk_clear_notify_resume(struct task_struct *tsk); 74 77 #endif /* !__ASSEMBLY */ 75 78 76 79 /* ··· 88 85 #define TIF_SYSCALL_AUDIT 3 /* syscall auditing active */ 89 86 #define TIF_SINGLESTEP 4 /* restore singlestep on return to user mode */ 90 87 #define TIF_RESTORE_SIGMASK 5 /* restore signal mask in do_signal() */ 91 - #define TIF_PERFMON_WORK 6 /* work for pfm_handle_work() */ 88 + #define TIF_NOTIFY_RESUME 6 /* resumption notification requested */ 92 89 #define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling TIF_NEED_RESCHED */ 93 90 #define TIF_MEMDIE 17 94 91 #define TIF_MCA_INIT 18 /* this task is processing MCA or INIT */ 95 92 #define TIF_DB_DISABLED 19 /* debug trap disabled for fsyscall */ 96 93 #define TIF_FREEZE 20 /* is freezing for suspend */ 94 + #define TIF_RESTORE_RSE 21 /* user RBS is newer than kernel RBS */ 97 95 98 96 #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) 99 97 #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) 100 98 #define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) 101 99 #define _TIF_SYSCALL_TRACEAUDIT (_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP) 102 100 #define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK) 103 - #define _TIF_PERFMON_WORK (1 << TIF_PERFMON_WORK) 101 + #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) 104 102 #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) 105 103 #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) 106 104 #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) 107 105 #define _TIF_MCA_INIT (1 << TIF_MCA_INIT) 108 106 #define _TIF_DB_DISABLED (1 << TIF_DB_DISABLED) 109 107 #define _TIF_FREEZE (1 << TIF_FREEZE) 108 + #define _TIF_RESTORE_RSE (1 << TIF_RESTORE_RSE) 110 109 111 110 /* "work to do on user-return" bits */ 112 - #define TIF_ALLWORK_MASK (_TIF_SIGPENDING|_TIF_PERFMON_WORK|_TIF_SYSCALL_AUDIT|\ 111 + #define TIF_ALLWORK_MASK (_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SYSCALL_AUDIT|\ 113 112 _TIF_NEED_RESCHED| _TIF_SYSCALL_TRACE|\ 114 113 _TIF_RESTORE_SIGMASK) 115 114 /* like TIF_ALLWORK_BITS but sans TIF_SYSCALL_TRACE or TIF_SYSCALL_AUDIT */
+4 -1
include/asm-ia64/unistd.h
··· 299 299 #define __NR_signalfd 1307 300 300 #define __NR_timerfd 1308 301 301 #define __NR_eventfd 1309 302 + #define __NR_timerfd_create 1310 303 + #define __NR_timerfd_settime 1311 304 + #define __NR_timerfd_gettime 1312 302 305 303 306 #ifdef __KERNEL__ 304 307 305 308 306 - #define NR_syscalls 286 /* length of syscall table */ 309 + #define NR_syscalls 289 /* length of syscall table */ 307 310 308 311 /* 309 312 * The following defines stop scripts/checksyscalls.sh from complaining about