Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf_counter: Add forward/backward attribute ABI compatibility

Provide for means of extending the perf_counter_attr in a 'natural' way.

We allow growing the structure by appending fields at the end by specifying
the full structure size inside it.

When a new kernel sees a smaller (old) structure, it will 0 pad the tail.
When an old kernel sees a larger (new) structure, it will verify the tail
consists of 0s, otherwise fail.

If we fail due to a size-mismatch, we return -E2BIG and write the kernel's
native attribe size back into the provided structure.

Furthermore, add some attribute verification, so that we'll fail counter
creation when unknown bits are present (PERF_SAMPLE, PERF_FORMAT, or in
the __reserved fields).

(This ABI detail is introduced while keeping the existing syscall ABI.)

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

authored by

Peter Zijlstra and committed by
Ingo Molnar
974802ea bbd36e5e

+105 -10
+15 -4
include/linux/perf_counter.h
··· 120 120 PERF_SAMPLE_ID = 1U << 6, 121 121 PERF_SAMPLE_CPU = 1U << 7, 122 122 PERF_SAMPLE_PERIOD = 1U << 8, 123 + 124 + PERF_SAMPLE_MAX = 1U << 9, /* non-ABI */ 123 125 }; 124 126 125 127 /* ··· 133 131 PERF_FORMAT_TOTAL_TIME_ENABLED = 1U << 0, 134 132 PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1, 135 133 PERF_FORMAT_ID = 1U << 2, 134 + 135 + PERF_FORMAT_MAX = 1U << 3, /* non-ABI */ 136 136 }; 137 + 138 + #define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ 137 139 138 140 /* 139 141 * Hardware event to monitor via a performance monitoring counter: 140 142 */ 141 143 struct perf_counter_attr { 144 + 142 145 /* 143 146 * Major type: hardware/software/tracepoint/etc. 144 147 */ 145 148 __u32 type; 146 - __u32 __reserved_1; 149 + 150 + /* 151 + * Size of the attr structure, for fwd/bwd compat. 152 + */ 153 + __u32 size; 147 154 148 155 /* 149 156 * Type specific configuration information. ··· 179 168 comm : 1, /* include comm data */ 180 169 freq : 1, /* use freq, not period */ 181 170 182 - __reserved_2 : 53; 171 + __reserved_1 : 53; 183 172 184 173 __u32 wakeup_events; /* wakeup every n events */ 185 - __u32 __reserved_3; 174 + __u32 __reserved_2; 186 175 187 - __u64 __reserved_4; 176 + __u64 __reserved_3; 188 177 }; 189 178 190 179 /*
+1 -1
include/linux/syscalls.h
··· 758 758 759 759 760 760 asmlinkage long sys_perf_counter_open( 761 - const struct perf_counter_attr __user *attr_uptr, 761 + struct perf_counter_attr __user *attr_uptr, 762 762 pid_t pid, int cpu, int group_fd, unsigned long flags); 763 763 #endif
+86 -3
kernel/perf_counter.c
··· 3584 3584 case PERF_TYPE_TRACEPOINT: 3585 3585 pmu = tp_perf_counter_init(counter); 3586 3586 break; 3587 + 3588 + default: 3589 + break; 3587 3590 } 3588 3591 done: 3589 3592 err = 0; ··· 3613 3610 return counter; 3614 3611 } 3615 3612 3613 + static int perf_copy_attr(struct perf_counter_attr __user *uattr, 3614 + struct perf_counter_attr *attr) 3615 + { 3616 + int ret; 3617 + u32 size; 3618 + 3619 + if (!access_ok(VERIFY_WRITE, uattr, PERF_ATTR_SIZE_VER0)) 3620 + return -EFAULT; 3621 + 3622 + /* 3623 + * zero the full structure, so that a short copy will be nice. 3624 + */ 3625 + memset(attr, 0, sizeof(*attr)); 3626 + 3627 + ret = get_user(size, &uattr->size); 3628 + if (ret) 3629 + return ret; 3630 + 3631 + if (size > PAGE_SIZE) /* silly large */ 3632 + goto err_size; 3633 + 3634 + if (!size) /* abi compat */ 3635 + size = PERF_ATTR_SIZE_VER0; 3636 + 3637 + if (size < PERF_ATTR_SIZE_VER0) 3638 + goto err_size; 3639 + 3640 + /* 3641 + * If we're handed a bigger struct than we know of, 3642 + * ensure all the unknown bits are 0. 3643 + */ 3644 + if (size > sizeof(*attr)) { 3645 + unsigned long val; 3646 + unsigned long __user *addr; 3647 + unsigned long __user *end; 3648 + 3649 + addr = PTR_ALIGN((void __user *)uattr + sizeof(*attr), 3650 + sizeof(unsigned long)); 3651 + end = PTR_ALIGN((void __user *)uattr + size, 3652 + sizeof(unsigned long)); 3653 + 3654 + for (; addr < end; addr += sizeof(unsigned long)) { 3655 + ret = get_user(val, addr); 3656 + if (ret) 3657 + return ret; 3658 + if (val) 3659 + goto err_size; 3660 + } 3661 + } 3662 + 3663 + ret = copy_from_user(attr, uattr, size); 3664 + if (ret) 3665 + return -EFAULT; 3666 + 3667 + /* 3668 + * If the type exists, the corresponding creation will verify 3669 + * the attr->config. 3670 + */ 3671 + if (attr->type >= PERF_TYPE_MAX) 3672 + return -EINVAL; 3673 + 3674 + if (attr->__reserved_1 || attr->__reserved_2 || attr->__reserved_3) 3675 + return -EINVAL; 3676 + 3677 + if (attr->sample_type & ~(PERF_SAMPLE_MAX-1)) 3678 + return -EINVAL; 3679 + 3680 + if (attr->read_format & ~(PERF_FORMAT_MAX-1)) 3681 + return -EINVAL; 3682 + 3683 + out: 3684 + return ret; 3685 + 3686 + err_size: 3687 + put_user(sizeof(*attr), &uattr->size); 3688 + ret = -E2BIG; 3689 + goto out; 3690 + } 3691 + 3616 3692 /** 3617 3693 * sys_perf_counter_open - open a performance counter, associate it to a task/cpu 3618 3694 * ··· 3701 3619 * @group_fd: group leader counter fd 3702 3620 */ 3703 3621 SYSCALL_DEFINE5(perf_counter_open, 3704 - const struct perf_counter_attr __user *, attr_uptr, 3622 + struct perf_counter_attr __user *, attr_uptr, 3705 3623 pid_t, pid, int, cpu, int, group_fd, unsigned long, flags) 3706 3624 { 3707 3625 struct perf_counter *counter, *group_leader; ··· 3717 3635 if (flags) 3718 3636 return -EINVAL; 3719 3637 3720 - if (copy_from_user(&attr, attr_uptr, sizeof(attr)) != 0) 3721 - return -EFAULT; 3638 + ret = perf_copy_attr(attr_uptr, &attr); 3639 + if (ret) 3640 + return ret; 3722 3641 3723 3642 if (!attr.exclude_kernel) { 3724 3643 if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
+3 -2
tools/perf/perf.h
··· 53 53 _min1 < _min2 ? _min1 : _min2; }) 54 54 55 55 static inline int 56 - sys_perf_counter_open(struct perf_counter_attr *attr_uptr, 56 + sys_perf_counter_open(struct perf_counter_attr *attr, 57 57 pid_t pid, int cpu, int group_fd, 58 58 unsigned long flags) 59 59 { 60 - return syscall(__NR_perf_counter_open, attr_uptr, pid, cpu, 60 + attr->size = sizeof(*attr); 61 + return syscall(__NR_perf_counter_open, attr, pid, cpu, 61 62 group_fd, flags); 62 63 } 63 64