Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

tracing: add trace event for memory-failure

RAS user space tools like rasdaemon which base on trace event, could
receive mce error event, but no memory recovery result event. So, I want
to add this event to make this scenario complete.

This patch add a event at ras group for memory-failure.

The output like below:
# tracer: nop
#
# entries-in-buffer/entries-written: 2/2 #P:24
#
# _-----=> irqs-off
# / _----=> need-resched
# | / _---=> hardirq/softirq
# || / _--=> preempt-depth
# ||| / delay
# TASK-PID CPU# |||| TIMESTAMP FUNCTION
# | | | |||| | |
mce-inject-13150 [001] .... 277.019359: memory_failure_event: pfn 0x19869: recovery action for free buddy page: Delayed

[xiexiuqi@huawei.com: fix build error]
Signed-off-by: Xie XiuQi <xiexiuqi@huawei.com>
Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Chen Gong <gong.chen@linux.intel.com>
Cc: Jim Davis <jim.epost@gmail.com>
Signed-off-by: Xie XiuQi <xiexiuqi@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Xie XiuQi and committed by
Linus Torvalds
97f0b134 cc3e2af4

+89
+85
include/ras/ras_event.h
··· 11 11 #include <linux/pci.h> 12 12 #include <linux/aer.h> 13 13 #include <linux/cper.h> 14 + #include <linux/mm.h> 14 15 15 16 /* 16 17 * MCE Extended Error Log trace event ··· 233 232 __print_flags(__entry->status, "|", aer_uncorrectable_errors)) 234 233 ); 235 234 235 + /* 236 + * memory-failure recovery action result event 237 + * 238 + * unsigned long pfn - Page Frame Number of the corrupted page 239 + * int type - Page types of the corrupted page 240 + * int result - Result of recovery action 241 + */ 242 + 243 + #ifdef CONFIG_MEMORY_FAILURE 244 + #define MF_ACTION_RESULT \ 245 + EM ( MF_IGNORED, "Ignored" ) \ 246 + EM ( MF_FAILED, "Failed" ) \ 247 + EM ( MF_DELAYED, "Delayed" ) \ 248 + EMe ( MF_RECOVERED, "Recovered" ) 249 + 250 + #define MF_PAGE_TYPE \ 251 + EM ( MF_MSG_KERNEL, "reserved kernel page" ) \ 252 + EM ( MF_MSG_KERNEL_HIGH_ORDER, "high-order kernel page" ) \ 253 + EM ( MF_MSG_SLAB, "kernel slab page" ) \ 254 + EM ( MF_MSG_DIFFERENT_COMPOUND, "different compound page after locking" ) \ 255 + EM ( MF_MSG_POISONED_HUGE, "huge page already hardware poisoned" ) \ 256 + EM ( MF_MSG_HUGE, "huge page" ) \ 257 + EM ( MF_MSG_FREE_HUGE, "free huge page" ) \ 258 + EM ( MF_MSG_UNMAP_FAILED, "unmapping failed page" ) \ 259 + EM ( MF_MSG_DIRTY_SWAPCACHE, "dirty swapcache page" ) \ 260 + EM ( MF_MSG_CLEAN_SWAPCACHE, "clean swapcache page" ) \ 261 + EM ( MF_MSG_DIRTY_MLOCKED_LRU, "dirty mlocked LRU page" ) \ 262 + EM ( MF_MSG_CLEAN_MLOCKED_LRU, "clean mlocked LRU page" ) \ 263 + EM ( MF_MSG_DIRTY_UNEVICTABLE_LRU, "dirty unevictable LRU page" ) \ 264 + EM ( MF_MSG_CLEAN_UNEVICTABLE_LRU, "clean unevictable LRU page" ) \ 265 + EM ( MF_MSG_DIRTY_LRU, "dirty LRU page" ) \ 266 + EM ( MF_MSG_CLEAN_LRU, "clean LRU page" ) \ 267 + EM ( MF_MSG_TRUNCATED_LRU, "already truncated LRU page" ) \ 268 + EM ( MF_MSG_BUDDY, "free buddy page" ) \ 269 + EM ( MF_MSG_BUDDY_2ND, "free buddy page (2nd try)" ) \ 270 + EMe ( MF_MSG_UNKNOWN, "unknown page" ) 271 + 272 + /* 273 + * First define the enums in MM_ACTION_RESULT to be exported to userspace 274 + * via TRACE_DEFINE_ENUM(). 275 + */ 276 + #undef EM 277 + #undef EMe 278 + #define EM(a, b) TRACE_DEFINE_ENUM(a); 279 + #define EMe(a, b) TRACE_DEFINE_ENUM(a); 280 + 281 + MF_ACTION_RESULT 282 + MF_PAGE_TYPE 283 + 284 + /* 285 + * Now redefine the EM() and EMe() macros to map the enums to the strings 286 + * that will be printed in the output. 287 + */ 288 + #undef EM 289 + #undef EMe 290 + #define EM(a, b) { a, b }, 291 + #define EMe(a, b) { a, b } 292 + 293 + TRACE_EVENT(memory_failure_event, 294 + TP_PROTO(unsigned long pfn, 295 + int type, 296 + int result), 297 + 298 + TP_ARGS(pfn, type, result), 299 + 300 + TP_STRUCT__entry( 301 + __field(unsigned long, pfn) 302 + __field(int, type) 303 + __field(int, result) 304 + ), 305 + 306 + TP_fast_assign( 307 + __entry->pfn = pfn; 308 + __entry->type = type; 309 + __entry->result = result; 310 + ), 311 + 312 + TP_printk("pfn %#lx: recovery action for %s: %s", 313 + __entry->pfn, 314 + __print_symbolic(__entry->type, MF_PAGE_TYPE), 315 + __print_symbolic(__entry->result, MF_ACTION_RESULT) 316 + ) 317 + ); 318 + #endif /* CONFIG_MEMORY_FAILURE */ 236 319 #endif /* _TRACE_HW_EVENT_MC_H */ 237 320 238 321 /* This part must be outside protection */
+1
mm/Kconfig
··· 368 368 depends on ARCH_SUPPORTS_MEMORY_FAILURE 369 369 bool "Enable recovery from hardware memory errors" 370 370 select MEMORY_ISOLATION 371 + select RAS 371 372 help 372 373 Enables code to recover from some memory failures on systems 373 374 with MCA recovery. This allows a system to continue running
+3
mm/memory-failure.c
··· 57 57 #include <linux/mm_inline.h> 58 58 #include <linux/kfifo.h> 59 59 #include "internal.h" 60 + #include "ras/ras_event.h" 60 61 61 62 int sysctl_memory_failure_early_kill __read_mostly = 0; 62 63 ··· 856 855 static void action_result(unsigned long pfn, enum mf_action_page_type type, 857 856 enum mf_result result) 858 857 { 858 + trace_memory_failure_event(pfn, type, result); 859 + 859 860 pr_err("MCE %#lx: recovery action for %s: %s\n", 860 861 pfn, action_page_types[type], action_name[result]); 861 862 }