Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf events, x86: Fix Intel Nehalem and Westmere last level cache event definitions

The Intel Nehalem offcore bits implemented in:

e994d7d23a0b: perf: Fix LLC-* events on Intel Nehalem/Westmere

... are wrong: they implemented _ACCESS as _HIT and counted OTHER_CORE_HIT* as
MISS even though its clearly documented as an L3 hit ...

Fix them and the Westmere definitions as well.

Cc: Andi Kleen <ak@linux.intel.com>
Cc: Lin Ming <ming.m.lin@intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/1299119690-13991-3-git-send-email-ming.m.lin@intel.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>

authored by

Peter Zijlstra and committed by
Ingo Molnar
63b6a675 925f83c0

+52 -35
+52 -35
arch/x86/kernel/cpu/perf_event_intel.c
··· 184 184 }, 185 185 }, 186 186 [ C(LL ) ] = { 187 - /* 188 - * TBD: Need Off-core Response Performance Monitoring support 189 - */ 190 187 [ C(OP_READ) ] = { 191 - /* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */ 188 + /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */ 192 189 [ C(RESULT_ACCESS) ] = 0x01b7, 193 - /* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */ 194 - [ C(RESULT_MISS) ] = 0x01bb, 190 + /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */ 191 + [ C(RESULT_MISS) ] = 0x01b7, 195 192 }, 196 193 [ C(OP_WRITE) ] = { 197 - /* OFFCORE_RESPONSE_0.ANY_RFO.LOCAL_CACHE */ 194 + /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */ 198 195 [ C(RESULT_ACCESS) ] = 0x01b7, 199 - /* OFFCORE_RESPONSE_1.ANY_RFO.ANY_LLC_MISS */ 200 - [ C(RESULT_MISS) ] = 0x01bb, 196 + /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */ 197 + [ C(RESULT_MISS) ] = 0x01b7, 201 198 }, 202 199 [ C(OP_PREFETCH) ] = { 203 - /* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */ 200 + /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */ 204 201 [ C(RESULT_ACCESS) ] = 0x01b7, 205 - /* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */ 206 - [ C(RESULT_MISS) ] = 0x01bb, 202 + /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */ 203 + [ C(RESULT_MISS) ] = 0x01b7, 207 204 }, 208 205 }, 209 206 [ C(DTLB) ] = { ··· 282 285 }, 283 286 [ C(LL ) ] = { 284 287 [ C(OP_READ) ] = { 285 - /* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */ 288 + /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */ 286 289 [ C(RESULT_ACCESS) ] = 0x01b7, 287 - /* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */ 288 - [ C(RESULT_MISS) ] = 0x01bb, 290 + /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */ 291 + [ C(RESULT_MISS) ] = 0x01b7, 289 292 }, 290 293 /* 291 294 * Use RFO, not WRITEBACK, because a write miss would typically occur 292 295 * on RFO. 293 296 */ 294 297 [ C(OP_WRITE) ] = { 295 - /* OFFCORE_RESPONSE_1.ANY_RFO.LOCAL_CACHE */ 296 - [ C(RESULT_ACCESS) ] = 0x01bb, 297 - /* OFFCORE_RESPONSE_0.ANY_RFO.ANY_LLC_MISS */ 298 + /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */ 299 + [ C(RESULT_ACCESS) ] = 0x01b7, 300 + /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */ 298 301 [ C(RESULT_MISS) ] = 0x01b7, 299 302 }, 300 303 [ C(OP_PREFETCH) ] = { 301 - /* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */ 304 + /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */ 302 305 [ C(RESULT_ACCESS) ] = 0x01b7, 303 - /* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */ 304 - [ C(RESULT_MISS) ] = 0x01bb, 306 + /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */ 307 + [ C(RESULT_MISS) ] = 0x01b7, 305 308 }, 306 309 }, 307 310 [ C(DTLB) ] = { ··· 349 352 }; 350 353 351 354 /* 352 - * OFFCORE_RESPONSE MSR bits (subset), See IA32 SDM Vol 3 30.6.1.3 355 + * Nehalem/Westmere MSR_OFFCORE_RESPONSE bits; 356 + * See IA32 SDM Vol 3B 30.6.1.3 353 357 */ 354 358 355 - #define DMND_DATA_RD (1 << 0) 356 - #define DMND_RFO (1 << 1) 357 - #define DMND_WB (1 << 3) 358 - #define PF_DATA_RD (1 << 4) 359 - #define PF_DATA_RFO (1 << 5) 360 - #define RESP_UNCORE_HIT (1 << 8) 361 - #define RESP_MISS (0xf600) /* non uncore hit */ 359 + #define NHM_DMND_DATA_RD (1 << 0) 360 + #define NHM_DMND_RFO (1 << 1) 361 + #define NHM_DMND_IFETCH (1 << 2) 362 + #define NHM_DMND_WB (1 << 3) 363 + #define NHM_PF_DATA_RD (1 << 4) 364 + #define NHM_PF_DATA_RFO (1 << 5) 365 + #define NHM_PF_IFETCH (1 << 6) 366 + #define NHM_OFFCORE_OTHER (1 << 7) 367 + #define NHM_UNCORE_HIT (1 << 8) 368 + #define NHM_OTHER_CORE_HIT_SNP (1 << 9) 369 + #define NHM_OTHER_CORE_HITM (1 << 10) 370 + /* reserved */ 371 + #define NHM_REMOTE_CACHE_FWD (1 << 12) 372 + #define NHM_REMOTE_DRAM (1 << 13) 373 + #define NHM_LOCAL_DRAM (1 << 14) 374 + #define NHM_NON_DRAM (1 << 15) 375 + 376 + #define NHM_ALL_DRAM (NHM_REMOTE_DRAM|NHM_LOCAL_DRAM) 377 + 378 + #define NHM_DMND_READ (NHM_DMND_DATA_RD) 379 + #define NHM_DMND_WRITE (NHM_DMND_RFO|NHM_DMND_WB) 380 + #define NHM_DMND_PREFETCH (NHM_PF_DATA_RD|NHM_PF_DATA_RFO) 381 + 382 + #define NHM_L3_HIT (NHM_UNCORE_HIT|NHM_OTHER_CORE_HIT_SNP|NHM_OTHER_CORE_HITM) 383 + #define NHM_L3_MISS (NHM_NON_DRAM|NHM_ALL_DRAM|NHM_REMOTE_CACHE_FWD) 384 + #define NHM_L3_ACCESS (NHM_L3_HIT|NHM_L3_MISS) 362 385 363 386 static __initconst const u64 nehalem_hw_cache_extra_regs 364 387 [PERF_COUNT_HW_CACHE_MAX] ··· 387 370 { 388 371 [ C(LL ) ] = { 389 372 [ C(OP_READ) ] = { 390 - [ C(RESULT_ACCESS) ] = DMND_DATA_RD|RESP_UNCORE_HIT, 391 - [ C(RESULT_MISS) ] = DMND_DATA_RD|RESP_MISS, 373 + [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_L3_ACCESS, 374 + [ C(RESULT_MISS) ] = NHM_DMND_READ|NHM_L3_MISS, 392 375 }, 393 376 [ C(OP_WRITE) ] = { 394 - [ C(RESULT_ACCESS) ] = DMND_RFO|DMND_WB|RESP_UNCORE_HIT, 395 - [ C(RESULT_MISS) ] = DMND_RFO|DMND_WB|RESP_MISS, 377 + [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_L3_ACCESS, 378 + [ C(RESULT_MISS) ] = NHM_DMND_WRITE|NHM_L3_MISS, 396 379 }, 397 380 [ C(OP_PREFETCH) ] = { 398 - [ C(RESULT_ACCESS) ] = PF_DATA_RD|PF_DATA_RFO|RESP_UNCORE_HIT, 399 - [ C(RESULT_MISS) ] = PF_DATA_RD|PF_DATA_RFO|RESP_MISS, 381 + [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_L3_ACCESS, 382 + [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_L3_MISS, 400 383 }, 401 384 } 402 385 };