tangled
alpha
login
or
join now
tjh.dev
/
kernel
1
fork
atom
Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1
fork
atom
overview
issues
pulls
pipelines
Merge branch 'perf/x86-ibs' into perf/core
Ingo Molnar
14 years ago
ad8537cd
149936a0
+438
-7
3 changed files
expand all
collapse all
unified
split
arch
x86
include
asm
msr-index.h
perf_event.h
kernel
cpu
perf_event_amd_ibs.c
+5
arch/x86/include/asm/msr-index.h
reviewed
···
134
134
#define MSR_AMD64_IBSFETCHCTL 0xc0011030
135
135
#define MSR_AMD64_IBSFETCHLINAD 0xc0011031
136
136
#define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032
137
137
+
#define MSR_AMD64_IBSFETCH_REG_COUNT 3
138
138
+
#define MSR_AMD64_IBSFETCH_REG_MASK ((1UL<<MSR_AMD64_IBSFETCH_REG_COUNT)-1)
137
139
#define MSR_AMD64_IBSOPCTL 0xc0011033
138
140
#define MSR_AMD64_IBSOPRIP 0xc0011034
139
141
#define MSR_AMD64_IBSOPDATA 0xc0011035
···
143
141
#define MSR_AMD64_IBSOPDATA3 0xc0011037
144
142
#define MSR_AMD64_IBSDCLINAD 0xc0011038
145
143
#define MSR_AMD64_IBSDCPHYSAD 0xc0011039
144
144
+
#define MSR_AMD64_IBSOP_REG_COUNT 7
145
145
+
#define MSR_AMD64_IBSOP_REG_MASK ((1UL<<MSR_AMD64_IBSOP_REG_COUNT)-1)
146
146
#define MSR_AMD64_IBSCTL 0xc001103a
147
147
#define MSR_AMD64_IBSBRTARGET 0xc001103b
148
148
+
#define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */
148
149
149
150
/* Fam 15h MSRs */
150
151
#define MSR_F15H_PERF_CTL 0xc0010200
+2
arch/x86/include/asm/perf_event.h
reviewed
···
178
178
#define IBS_FETCH_MAX_CNT 0x0000FFFFULL
179
179
180
180
/* IbsOpCtl bits */
181
181
+
/* lower 4 bits of the current count are ignored: */
182
182
+
#define IBS_OP_CUR_CNT (0xFFFF0ULL<<32)
181
183
#define IBS_OP_CNT_CTL (1ULL<<19)
182
184
#define IBS_OP_VAL (1ULL<<18)
183
185
#define IBS_OP_ENABLE (1ULL<<17)
+431
-7
arch/x86/kernel/cpu/perf_event_amd_ibs.c
reviewed
···
16
16
17
17
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
18
18
19
19
-
static struct pmu perf_ibs;
19
19
+
#include <linux/kprobes.h>
20
20
+
#include <linux/hardirq.h>
21
21
+
22
22
+
#include <asm/nmi.h>
23
23
+
24
24
+
#define IBS_FETCH_CONFIG_MASK (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT)
25
25
+
#define IBS_OP_CONFIG_MASK IBS_OP_MAX_CNT
26
26
+
27
27
+
enum ibs_states {
28
28
+
IBS_ENABLED = 0,
29
29
+
IBS_STARTED = 1,
30
30
+
IBS_STOPPING = 2,
31
31
+
32
32
+
IBS_MAX_STATES,
33
33
+
};
34
34
+
35
35
+
struct cpu_perf_ibs {
36
36
+
struct perf_event *event;
37
37
+
unsigned long state[BITS_TO_LONGS(IBS_MAX_STATES)];
38
38
+
};
39
39
+
40
40
+
struct perf_ibs {
41
41
+
struct pmu pmu;
42
42
+
unsigned int msr;
43
43
+
u64 config_mask;
44
44
+
u64 cnt_mask;
45
45
+
u64 enable_mask;
46
46
+
u64 valid_mask;
47
47
+
u64 max_period;
48
48
+
unsigned long offset_mask[1];
49
49
+
int offset_max;
50
50
+
struct cpu_perf_ibs __percpu *pcpu;
51
51
+
u64 (*get_count)(u64 config);
52
52
+
};
53
53
+
54
54
+
struct perf_ibs_data {
55
55
+
u32 size;
56
56
+
union {
57
57
+
u32 data[0]; /* data buffer starts here */
58
58
+
u32 caps;
59
59
+
};
60
60
+
u64 regs[MSR_AMD64_IBS_REG_COUNT_MAX];
61
61
+
};
62
62
+
63
63
+
static int
64
64
+
perf_event_set_period(struct hw_perf_event *hwc, u64 min, u64 max, u64 *count)
65
65
+
{
66
66
+
s64 left = local64_read(&hwc->period_left);
67
67
+
s64 period = hwc->sample_period;
68
68
+
int overflow = 0;
69
69
+
70
70
+
/*
71
71
+
* If we are way outside a reasonable range then just skip forward:
72
72
+
*/
73
73
+
if (unlikely(left <= -period)) {
74
74
+
left = period;
75
75
+
local64_set(&hwc->period_left, left);
76
76
+
hwc->last_period = period;
77
77
+
overflow = 1;
78
78
+
}
79
79
+
80
80
+
if (unlikely(left <= 0)) {
81
81
+
left += period;
82
82
+
local64_set(&hwc->period_left, left);
83
83
+
hwc->last_period = period;
84
84
+
overflow = 1;
85
85
+
}
86
86
+
87
87
+
if (unlikely(left < min))
88
88
+
left = min;
89
89
+
90
90
+
if (left > max)
91
91
+
left = max;
92
92
+
93
93
+
*count = (u64)left;
94
94
+
95
95
+
return overflow;
96
96
+
}
97
97
+
98
98
+
static int
99
99
+
perf_event_try_update(struct perf_event *event, u64 new_raw_count, int width)
100
100
+
{
101
101
+
struct hw_perf_event *hwc = &event->hw;
102
102
+
int shift = 64 - width;
103
103
+
u64 prev_raw_count;
104
104
+
u64 delta;
105
105
+
106
106
+
/*
107
107
+
* Careful: an NMI might modify the previous event value.
108
108
+
*
109
109
+
* Our tactic to handle this is to first atomically read and
110
110
+
* exchange a new raw count - then add that new-prev delta
111
111
+
* count to the generic event atomically:
112
112
+
*/
113
113
+
prev_raw_count = local64_read(&hwc->prev_count);
114
114
+
if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
115
115
+
new_raw_count) != prev_raw_count)
116
116
+
return 0;
117
117
+
118
118
+
/*
119
119
+
* Now we have the new raw value and have updated the prev
120
120
+
* timestamp already. We can now calculate the elapsed delta
121
121
+
* (event-)time and add that to the generic event.
122
122
+
*
123
123
+
* Careful, not all hw sign-extends above the physical width
124
124
+
* of the count.
125
125
+
*/
126
126
+
delta = (new_raw_count << shift) - (prev_raw_count << shift);
127
127
+
delta >>= shift;
128
128
+
129
129
+
local64_add(delta, &event->count);
130
130
+
local64_sub(delta, &hwc->period_left);
131
131
+
132
132
+
return 1;
133
133
+
}
134
134
+
135
135
+
static struct perf_ibs perf_ibs_fetch;
136
136
+
static struct perf_ibs perf_ibs_op;
137
137
+
138
138
+
static struct perf_ibs *get_ibs_pmu(int type)
139
139
+
{
140
140
+
if (perf_ibs_fetch.pmu.type == type)
141
141
+
return &perf_ibs_fetch;
142
142
+
if (perf_ibs_op.pmu.type == type)
143
143
+
return &perf_ibs_op;
144
144
+
return NULL;
145
145
+
}
20
146
21
147
static int perf_ibs_init(struct perf_event *event)
22
148
{
23
23
-
if (perf_ibs.type != event->attr.type)
149
149
+
struct hw_perf_event *hwc = &event->hw;
150
150
+
struct perf_ibs *perf_ibs;
151
151
+
u64 max_cnt, config;
152
152
+
153
153
+
perf_ibs = get_ibs_pmu(event->attr.type);
154
154
+
if (!perf_ibs)
24
155
return -ENOENT;
156
156
+
157
157
+
config = event->attr.config;
158
158
+
if (config & ~perf_ibs->config_mask)
159
159
+
return -EINVAL;
160
160
+
161
161
+
if (hwc->sample_period) {
162
162
+
if (config & perf_ibs->cnt_mask)
163
163
+
/* raw max_cnt may not be set */
164
164
+
return -EINVAL;
165
165
+
if (hwc->sample_period & 0x0f)
166
166
+
/* lower 4 bits can not be set in ibs max cnt */
167
167
+
return -EINVAL;
168
168
+
} else {
169
169
+
max_cnt = config & perf_ibs->cnt_mask;
170
170
+
config &= ~perf_ibs->cnt_mask;
171
171
+
event->attr.sample_period = max_cnt << 4;
172
172
+
hwc->sample_period = event->attr.sample_period;
173
173
+
}
174
174
+
175
175
+
if (!hwc->sample_period)
176
176
+
return -EINVAL;
177
177
+
178
178
+
hwc->config_base = perf_ibs->msr;
179
179
+
hwc->config = config;
180
180
+
25
181
return 0;
182
182
+
}
183
183
+
184
184
+
static int perf_ibs_set_period(struct perf_ibs *perf_ibs,
185
185
+
struct hw_perf_event *hwc, u64 *period)
186
186
+
{
187
187
+
int ret;
188
188
+
189
189
+
/* ignore lower 4 bits in min count: */
190
190
+
ret = perf_event_set_period(hwc, 1<<4, perf_ibs->max_period, period);
191
191
+
local64_set(&hwc->prev_count, 0);
192
192
+
193
193
+
return ret;
194
194
+
}
195
195
+
196
196
+
static u64 get_ibs_fetch_count(u64 config)
197
197
+
{
198
198
+
return (config & IBS_FETCH_CNT) >> 12;
199
199
+
}
200
200
+
201
201
+
static u64 get_ibs_op_count(u64 config)
202
202
+
{
203
203
+
return (config & IBS_OP_CUR_CNT) >> 32;
204
204
+
}
205
205
+
206
206
+
static void
207
207
+
perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event,
208
208
+
u64 config)
209
209
+
{
210
210
+
u64 count = perf_ibs->get_count(config);
211
211
+
212
212
+
while (!perf_event_try_update(event, count, 20)) {
213
213
+
rdmsrl(event->hw.config_base, config);
214
214
+
count = perf_ibs->get_count(config);
215
215
+
}
216
216
+
}
217
217
+
218
218
+
/* Note: The enable mask must be encoded in the config argument. */
219
219
+
static inline void perf_ibs_enable_event(struct hw_perf_event *hwc, u64 config)
220
220
+
{
221
221
+
wrmsrl(hwc->config_base, hwc->config | config);
222
222
+
}
223
223
+
224
224
+
/*
225
225
+
* We cannot restore the ibs pmu state, so we always needs to update
226
226
+
* the event while stopping it and then reset the state when starting
227
227
+
* again. Thus, ignoring PERF_EF_RELOAD and PERF_EF_UPDATE flags in
228
228
+
* perf_ibs_start()/perf_ibs_stop() and instead always do it.
229
229
+
*/
230
230
+
static void perf_ibs_start(struct perf_event *event, int flags)
231
231
+
{
232
232
+
struct hw_perf_event *hwc = &event->hw;
233
233
+
struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
234
234
+
struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
235
235
+
u64 config;
236
236
+
237
237
+
if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
238
238
+
return;
239
239
+
240
240
+
WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
241
241
+
hwc->state = 0;
242
242
+
243
243
+
perf_ibs_set_period(perf_ibs, hwc, &config);
244
244
+
config = (config >> 4) | perf_ibs->enable_mask;
245
245
+
set_bit(IBS_STARTED, pcpu->state);
246
246
+
perf_ibs_enable_event(hwc, config);
247
247
+
248
248
+
perf_event_update_userpage(event);
249
249
+
}
250
250
+
251
251
+
static void perf_ibs_stop(struct perf_event *event, int flags)
252
252
+
{
253
253
+
struct hw_perf_event *hwc = &event->hw;
254
254
+
struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
255
255
+
struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
256
256
+
u64 val;
257
257
+
int stopping;
258
258
+
259
259
+
stopping = test_and_clear_bit(IBS_STARTED, pcpu->state);
260
260
+
261
261
+
if (!stopping && (hwc->state & PERF_HES_UPTODATE))
262
262
+
return;
263
263
+
264
264
+
rdmsrl(hwc->config_base, val);
265
265
+
266
266
+
if (stopping) {
267
267
+
set_bit(IBS_STOPPING, pcpu->state);
268
268
+
val &= ~perf_ibs->enable_mask;
269
269
+
wrmsrl(hwc->config_base, val);
270
270
+
WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
271
271
+
hwc->state |= PERF_HES_STOPPED;
272
272
+
}
273
273
+
274
274
+
if (hwc->state & PERF_HES_UPTODATE)
275
275
+
return;
276
276
+
277
277
+
perf_ibs_event_update(perf_ibs, event, val);
278
278
+
hwc->state |= PERF_HES_UPTODATE;
26
279
}
27
280
28
281
static int perf_ibs_add(struct perf_event *event, int flags)
29
282
{
283
283
+
struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
284
284
+
struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
285
285
+
286
286
+
if (test_and_set_bit(IBS_ENABLED, pcpu->state))
287
287
+
return -ENOSPC;
288
288
+
289
289
+
event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
290
290
+
291
291
+
pcpu->event = event;
292
292
+
293
293
+
if (flags & PERF_EF_START)
294
294
+
perf_ibs_start(event, PERF_EF_RELOAD);
295
295
+
30
296
return 0;
31
297
}
32
298
33
299
static void perf_ibs_del(struct perf_event *event, int flags)
34
300
{
301
301
+
struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
302
302
+
struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
303
303
+
304
304
+
if (!test_and_clear_bit(IBS_ENABLED, pcpu->state))
305
305
+
return;
306
306
+
307
307
+
perf_ibs_stop(event, PERF_EF_UPDATE);
308
308
+
309
309
+
pcpu->event = NULL;
310
310
+
311
311
+
perf_event_update_userpage(event);
35
312
}
36
313
37
37
-
static struct pmu perf_ibs = {
38
38
-
.event_init= perf_ibs_init,
39
39
-
.add= perf_ibs_add,
40
40
-
.del= perf_ibs_del,
314
314
+
static void perf_ibs_read(struct perf_event *event) { }
315
315
+
316
316
+
static struct perf_ibs perf_ibs_fetch = {
317
317
+
.pmu = {
318
318
+
.task_ctx_nr = perf_invalid_context,
319
319
+
320
320
+
.event_init = perf_ibs_init,
321
321
+
.add = perf_ibs_add,
322
322
+
.del = perf_ibs_del,
323
323
+
.start = perf_ibs_start,
324
324
+
.stop = perf_ibs_stop,
325
325
+
.read = perf_ibs_read,
326
326
+
},
327
327
+
.msr = MSR_AMD64_IBSFETCHCTL,
328
328
+
.config_mask = IBS_FETCH_CONFIG_MASK,
329
329
+
.cnt_mask = IBS_FETCH_MAX_CNT,
330
330
+
.enable_mask = IBS_FETCH_ENABLE,
331
331
+
.valid_mask = IBS_FETCH_VAL,
332
332
+
.max_period = IBS_FETCH_MAX_CNT << 4,
333
333
+
.offset_mask = { MSR_AMD64_IBSFETCH_REG_MASK },
334
334
+
.offset_max = MSR_AMD64_IBSFETCH_REG_COUNT,
335
335
+
336
336
+
.get_count = get_ibs_fetch_count,
41
337
};
338
338
+
339
339
+
static struct perf_ibs perf_ibs_op = {
340
340
+
.pmu = {
341
341
+
.task_ctx_nr = perf_invalid_context,
342
342
+
343
343
+
.event_init = perf_ibs_init,
344
344
+
.add = perf_ibs_add,
345
345
+
.del = perf_ibs_del,
346
346
+
.start = perf_ibs_start,
347
347
+
.stop = perf_ibs_stop,
348
348
+
.read = perf_ibs_read,
349
349
+
},
350
350
+
.msr = MSR_AMD64_IBSOPCTL,
351
351
+
.config_mask = IBS_OP_CONFIG_MASK,
352
352
+
.cnt_mask = IBS_OP_MAX_CNT,
353
353
+
.enable_mask = IBS_OP_ENABLE,
354
354
+
.valid_mask = IBS_OP_VAL,
355
355
+
.max_period = IBS_OP_MAX_CNT << 4,
356
356
+
.offset_mask = { MSR_AMD64_IBSOP_REG_MASK },
357
357
+
.offset_max = MSR_AMD64_IBSOP_REG_COUNT,
358
358
+
359
359
+
.get_count = get_ibs_op_count,
360
360
+
};
361
361
+
362
362
+
static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
363
363
+
{
364
364
+
struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
365
365
+
struct perf_event *event = pcpu->event;
366
366
+
struct hw_perf_event *hwc = &event->hw;
367
367
+
struct perf_sample_data data;
368
368
+
struct perf_raw_record raw;
369
369
+
struct pt_regs regs;
370
370
+
struct perf_ibs_data ibs_data;
371
371
+
int offset, size, overflow, reenable;
372
372
+
unsigned int msr;
373
373
+
u64 *buf, config;
374
374
+
375
375
+
if (!test_bit(IBS_STARTED, pcpu->state)) {
376
376
+
/* Catch spurious interrupts after stopping IBS: */
377
377
+
if (!test_and_clear_bit(IBS_STOPPING, pcpu->state))
378
378
+
return 0;
379
379
+
rdmsrl(perf_ibs->msr, *ibs_data.regs);
380
380
+
return (*ibs_data.regs & perf_ibs->valid_mask) ? 1 : 0;
381
381
+
}
382
382
+
383
383
+
msr = hwc->config_base;
384
384
+
buf = ibs_data.regs;
385
385
+
rdmsrl(msr, *buf);
386
386
+
if (!(*buf++ & perf_ibs->valid_mask))
387
387
+
return 0;
388
388
+
389
389
+
perf_sample_data_init(&data, 0);
390
390
+
if (event->attr.sample_type & PERF_SAMPLE_RAW) {
391
391
+
ibs_data.caps = ibs_caps;
392
392
+
size = 1;
393
393
+
offset = 1;
394
394
+
do {
395
395
+
rdmsrl(msr + offset, *buf++);
396
396
+
size++;
397
397
+
offset = find_next_bit(perf_ibs->offset_mask,
398
398
+
perf_ibs->offset_max,
399
399
+
offset + 1);
400
400
+
} while (offset < perf_ibs->offset_max);
401
401
+
raw.size = sizeof(u32) + sizeof(u64) * size;
402
402
+
raw.data = ibs_data.data;
403
403
+
data.raw = &raw;
404
404
+
}
405
405
+
406
406
+
regs = *iregs; /* XXX: update ip from ibs sample */
407
407
+
408
408
+
/*
409
409
+
* Emulate IbsOpCurCnt in MSRC001_1033 (IbsOpCtl), not
410
410
+
* supported in all cpus. As this triggered an interrupt, we
411
411
+
* set the current count to the max count.
412
412
+
*/
413
413
+
config = ibs_data.regs[0];
414
414
+
if (perf_ibs == &perf_ibs_op && !(ibs_caps & IBS_CAPS_RDWROPCNT)) {
415
415
+
config &= ~IBS_OP_CUR_CNT;
416
416
+
config |= (config & IBS_OP_MAX_CNT) << 36;
417
417
+
}
418
418
+
419
419
+
perf_ibs_event_update(perf_ibs, event, config);
420
420
+
421
421
+
overflow = perf_ibs_set_period(perf_ibs, hwc, &config);
422
422
+
reenable = !(overflow && perf_event_overflow(event, &data, ®s));
423
423
+
config = (config >> 4) | (reenable ? perf_ibs->enable_mask : 0);
424
424
+
perf_ibs_enable_event(hwc, config);
425
425
+
426
426
+
perf_event_update_userpage(event);
427
427
+
428
428
+
return 1;
429
429
+
}
430
430
+
431
431
+
static int __kprobes
432
432
+
perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs)
433
433
+
{
434
434
+
int handled = 0;
435
435
+
436
436
+
handled += perf_ibs_handle_irq(&perf_ibs_fetch, regs);
437
437
+
handled += perf_ibs_handle_irq(&perf_ibs_op, regs);
438
438
+
439
439
+
if (handled)
440
440
+
inc_irq_stat(apic_perf_irqs);
441
441
+
442
442
+
return handled;
443
443
+
}
444
444
+
445
445
+
static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name)
446
446
+
{
447
447
+
struct cpu_perf_ibs __percpu *pcpu;
448
448
+
int ret;
449
449
+
450
450
+
pcpu = alloc_percpu(struct cpu_perf_ibs);
451
451
+
if (!pcpu)
452
452
+
return -ENOMEM;
453
453
+
454
454
+
perf_ibs->pcpu = pcpu;
455
455
+
456
456
+
ret = perf_pmu_register(&perf_ibs->pmu, name, -1);
457
457
+
if (ret) {
458
458
+
perf_ibs->pcpu = NULL;
459
459
+
free_percpu(pcpu);
460
460
+
}
461
461
+
462
462
+
return ret;
463
463
+
}
42
464
43
465
static __init int perf_event_ibs_init(void)
44
466
{
45
467
if (!ibs_caps)
46
468
return -ENODEV; /* ibs not supported by the cpu */
47
469
48
48
-
perf_pmu_register(&perf_ibs, "ibs", -1);
470
470
+
perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
471
471
+
perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
472
472
+
register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs");
49
473
printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps);
50
474
51
475
return 0;