Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[S390] kernel: Add z/VM LGR detection

Currently the following mechanisms are available to move active
Linux on System z instances between machines:
* z/VM 6.2 SSI (Single System Image)
* Suspend/resume
For moving Linux instances in this patch the term LGR (Linux Guest
Relocation) is used. Because such an operation is critical, it
should be detectable from Linux. With this patch for both, a live
system and a kernel dump, the information about LGRs is accessible.
To identify a guest, stsi and stfle data is used. A new function
lgr_info_log() compares the current data (lgr_info_cur) with the
last recorded one (lgr_info_last). In case the two data sets differ,
lgr_info_cur is logged to the "lgr" s390dbf.

The following trigger points call lgr_info_log():
* panic
* die
* kdump
* LGR timer
* PSW restart
* QDIO recovery
* resume

This patch also changes the s390dbf hex_ascii view. Now only printable ASCII
characters are shown.

Reviewed-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

authored by

Michael Holzheu and committed by
Martin Schwidefsky
3ab121ab fde15c3a

+297 -31
+1
arch/s390/include/asm/debug.h
··· 131 131 132 132 void debug_set_level(debug_info_t* id, int new_level); 133 133 134 + void debug_set_critical(void); 134 135 void debug_stop_all(void); 135 136 136 137 static inline debug_entry_t*
+1
arch/s390/include/asm/ipl.h
··· 169 169 extern int diag308(unsigned long subcode, void *addr); 170 170 extern void diag308_reset(void); 171 171 extern void store_status(void); 172 + extern void lgr_info_log(void); 172 173 173 174 #endif /* _ASM_S390_IPL_H */
+34
arch/s390/include/asm/system.h
··· 7 7 #ifndef __ASM_SYSTEM_H 8 8 #define __ASM_SYSTEM_H 9 9 10 + #include <linux/preempt.h> 10 11 #include <linux/kernel.h> 11 12 #include <linux/errno.h> 13 + #include <linux/string.h> 12 14 #include <asm/types.h> 13 15 #include <asm/ptrace.h> 14 16 #include <asm/setup.h> ··· 248 246 return 0; 249 247 ptr = (unsigned char *) &S390_lowcore.stfle_fac_list + (nr >> 3); 250 248 return (*ptr & (0x80 >> (nr & 7))) != 0; 249 + } 250 + 251 + /** 252 + * stfle - Store facility list extended 253 + * @stfle_fac_list: array where facility list can be stored 254 + * @size: size of passed in array in double words 255 + */ 256 + static inline void stfle(u64 *stfle_fac_list, int size) 257 + { 258 + unsigned long nr; 259 + 260 + preempt_disable(); 261 + S390_lowcore.stfl_fac_list = 0; 262 + asm volatile( 263 + " .insn s,0xb2b10000,0(0)\n" /* stfl */ 264 + "0:\n" 265 + EX_TABLE(0b, 0b) 266 + : "=m" (S390_lowcore.stfl_fac_list)); 267 + nr = 4; /* bytes stored by stfl */ 268 + memcpy(stfle_fac_list, &S390_lowcore.stfl_fac_list, 4); 269 + if (S390_lowcore.stfl_fac_list & 0x01000000) { 270 + /* More facility bits available with stfle */ 271 + register unsigned long reg0 asm("0") = size - 1; 272 + 273 + asm volatile(".insn s,0xb2b00000,0(%1)" /* stfle */ 274 + : "+d" (reg0) 275 + : "a" (stfle_fac_list) 276 + : "memory", "cc"); 277 + nr = (reg0 + 1) * 8; /* # bytes stored by stfle */ 278 + } 279 + memset((char *) stfle_fac_list + nr, 0, size * 8 - nr); 280 + preempt_enable(); 251 281 } 252 282 253 283 static inline unsigned short stap(void)
+1 -1
arch/s390/kernel/Makefile
··· 23 23 obj-y := bitmap.o traps.o time.o process.o base.o early.o setup.o vtime.o \ 24 24 processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o \ 25 25 debug.o irq.o ipl.o dis.o diag.o mem_detect.o sclp.o vdso.o \ 26 - sysinfo.o jump_label.o 26 + sysinfo.o jump_label.o lgr.o 27 27 28 28 obj-y += $(if $(CONFIG_64BIT),entry64.o,entry.o) 29 29 obj-y += $(if $(CONFIG_64BIT),reipl64.o,reipl.o)
+31 -9
arch/s390/kernel/debug.c
··· 2 2 * arch/s390/kernel/debug.c 3 3 * S/390 debug facility 4 4 * 5 - * Copyright (C) 1999, 2000 IBM Deutschland Entwicklung GmbH, 6 - * IBM Corporation 5 + * Copyright IBM Corp. 1999, 2012 6 + * 7 7 * Author(s): Michael Holzheu (holzheu@de.ibm.com), 8 8 * Holger Smolinski (Holger.Smolinski@de.ibm.com) 9 9 * ··· 167 167 static DEFINE_MUTEX(debug_mutex); 168 168 169 169 static int initialized; 170 + static int debug_critical; 170 171 171 172 static const struct file_operations debug_file_ops = { 172 173 .owner = THIS_MODULE, ··· 933 932 } 934 933 935 934 935 + void debug_set_critical(void) 936 + { 937 + debug_critical = 1; 938 + } 939 + 936 940 /* 937 941 * debug_event_common: 938 942 * - write debug entry with given size ··· 951 945 952 946 if (!debug_active || !id->areas) 953 947 return NULL; 954 - spin_lock_irqsave(&id->lock, flags); 948 + if (debug_critical) { 949 + if (!spin_trylock_irqsave(&id->lock, flags)) 950 + return NULL; 951 + } else 952 + spin_lock_irqsave(&id->lock, flags); 955 953 active = get_active_entry(id); 956 954 memset(DEBUG_DATA(active), 0, id->buf_size); 957 955 memcpy(DEBUG_DATA(active), buf, min(len, id->buf_size)); ··· 978 968 979 969 if (!debug_active || !id->areas) 980 970 return NULL; 981 - spin_lock_irqsave(&id->lock, flags); 971 + if (debug_critical) { 972 + if (!spin_trylock_irqsave(&id->lock, flags)) 973 + return NULL; 974 + } else 975 + spin_lock_irqsave(&id->lock, flags); 982 976 active = get_active_entry(id); 983 977 memset(DEBUG_DATA(active), 0, id->buf_size); 984 978 memcpy(DEBUG_DATA(active), buf, min(len, id->buf_size)); ··· 1027 1013 return NULL; 1028 1014 numargs=debug_count_numargs(string); 1029 1015 1030 - spin_lock_irqsave(&id->lock, flags); 1016 + if (debug_critical) { 1017 + if (!spin_trylock_irqsave(&id->lock, flags)) 1018 + return NULL; 1019 + } else 1020 + spin_lock_irqsave(&id->lock, flags); 1031 1021 active = get_active_entry(id); 1032 1022 curr_event=(debug_sprintf_entry_t *) DEBUG_DATA(active); 1033 1023 va_start(ap,string); ··· 1065 1047 1066 1048 numargs=debug_count_numargs(string); 1067 1049 1068 - spin_lock_irqsave(&id->lock, flags); 1050 + if (debug_critical) { 1051 + if (!spin_trylock_irqsave(&id->lock, flags)) 1052 + return NULL; 1053 + } else 1054 + spin_lock_irqsave(&id->lock, flags); 1069 1055 active = get_active_entry(id); 1070 1056 curr_event=(debug_sprintf_entry_t *)DEBUG_DATA(active); 1071 1057 va_start(ap,string); ··· 1450 1428 rc += sprintf(out_buf + rc, "| "); 1451 1429 for (i = 0; i < id->buf_size; i++) { 1452 1430 unsigned char c = in_buf[i]; 1453 - if (!isprint(c)) 1454 - rc += sprintf(out_buf + rc, "."); 1455 - else 1431 + if (isascii(c) && isprint(c)) 1456 1432 rc += sprintf(out_buf + rc, "%c", c); 1433 + else 1434 + rc += sprintf(out_buf + rc, "."); 1457 1435 } 1458 1436 rc += sprintf(out_buf + rc, "\n"); 1459 1437 return rc;
+3 -19
arch/s390/kernel/early.c
··· 29 29 #include <asm/sysinfo.h> 30 30 #include <asm/cpcmd.h> 31 31 #include <asm/sclp.h> 32 + #include <asm/system.h> 32 33 #include "entry.h" 33 34 34 35 /* ··· 263 262 264 263 static noinline __init void setup_facility_list(void) 265 264 { 266 - unsigned long nr; 267 - 268 - S390_lowcore.stfl_fac_list = 0; 269 - asm volatile( 270 - " .insn s,0xb2b10000,0(0)\n" /* stfl */ 271 - "0:\n" 272 - EX_TABLE(0b,0b) : "=m" (S390_lowcore.stfl_fac_list)); 273 - memcpy(&S390_lowcore.stfle_fac_list, &S390_lowcore.stfl_fac_list, 4); 274 - nr = 4; /* # bytes stored by stfl */ 275 - if (test_facility(7)) { 276 - /* More facility bits available with stfle */ 277 - register unsigned long reg0 asm("0") = MAX_FACILITY_BIT/64 - 1; 278 - asm volatile(".insn s,0xb2b00000,%0" /* stfle */ 279 - : "=m" (S390_lowcore.stfle_fac_list), "+d" (reg0) 280 - : : "cc"); 281 - nr = (reg0 + 1) * 8; /* # bytes stored by stfle */ 282 - } 283 - memset((char *) S390_lowcore.stfle_fac_list + nr, 0, 284 - MAX_FACILITY_BIT/8 - nr); 265 + stfle(S390_lowcore.stfle_fac_list, 266 + ARRAY_SIZE(S390_lowcore.stfle_fac_list)); 285 267 } 286 268 287 269 static noinline __init void setup_hpage(void)
+6
arch/s390/kernel/ipl.c
··· 17 17 #include <linux/fs.h> 18 18 #include <linux/gfp.h> 19 19 #include <linux/crash_dump.h> 20 + #include <linux/debug_locks.h> 20 21 #include <asm/ipl.h> 21 22 #include <asm/smp.h> 22 23 #include <asm/setup.h> ··· 27 26 #include <asm/reset.h> 28 27 #include <asm/sclp.h> 29 28 #include <asm/checksum.h> 29 + #include <asm/debug.h> 30 30 #include "entry.h" 31 31 32 32 #define IPL_PARM_BLOCK_VERSION 0 ··· 1694 1692 1695 1693 static void do_panic(void) 1696 1694 { 1695 + lgr_info_log(); 1697 1696 on_panic_trigger.action->fn(&on_panic_trigger); 1698 1697 stop_run(&on_panic_trigger); 1699 1698 } ··· 1732 1729 1733 1730 void do_restart(void) 1734 1731 { 1732 + tracing_off(); 1733 + debug_locks_off(); 1734 + lgr_info_log(); 1735 1735 smp_call_online_cpu(__do_restart, NULL); 1736 1736 } 1737 1737
+200
arch/s390/kernel/lgr.c
··· 1 + /* 2 + * Linux Guest Relocation (LGR) detection 3 + * 4 + * Copyright IBM Corp. 2012 5 + * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com> 6 + */ 7 + 8 + #include <linux/module.h> 9 + #include <linux/timer.h> 10 + #include <linux/slab.h> 11 + #include <asm/sysinfo.h> 12 + #include <asm/ebcdic.h> 13 + #include <asm/system.h> 14 + #include <asm/debug.h> 15 + #include <asm/ipl.h> 16 + 17 + #define LGR_TIMER_INTERVAL_SECS (30 * 60) 18 + #define VM_LEVEL_MAX 2 /* Maximum is 8, but we only record two levels */ 19 + 20 + /* 21 + * LGR info: Contains stfle and stsi data 22 + */ 23 + struct lgr_info { 24 + /* Bit field with facility information: 4 DWORDs are stored */ 25 + u64 stfle_fac_list[4]; 26 + /* Level of system (1 = CEC, 2 = LPAR, 3 = z/VM */ 27 + u32 level; 28 + /* Level 1: CEC info (stsi 1.1.1) */ 29 + char manufacturer[16]; 30 + char type[4]; 31 + char sequence[16]; 32 + char plant[4]; 33 + char model[16]; 34 + /* Level 2: LPAR info (stsi 2.2.2) */ 35 + u16 lpar_number; 36 + char name[8]; 37 + /* Level 3: VM info (stsi 3.2.2) */ 38 + u8 vm_count; 39 + struct { 40 + char name[8]; 41 + char cpi[16]; 42 + } vm[VM_LEVEL_MAX]; 43 + } __packed __aligned(8); 44 + 45 + /* 46 + * LGR globals 47 + */ 48 + static void *lgr_page; 49 + static struct lgr_info lgr_info_last; 50 + static struct lgr_info lgr_info_cur; 51 + static struct debug_info *lgr_dbf; 52 + 53 + /* 54 + * Return number of valid stsi levels 55 + */ 56 + static inline int stsi_0(void) 57 + { 58 + int rc = stsi(NULL, 0, 0, 0); 59 + 60 + return rc == -ENOSYS ? rc : (((unsigned int) rc) >> 28); 61 + } 62 + 63 + /* 64 + * Copy buffer and then convert it to ASCII 65 + */ 66 + static void cpascii(char *dst, char *src, int size) 67 + { 68 + memcpy(dst, src, size); 69 + EBCASC(dst, size); 70 + } 71 + 72 + /* 73 + * Fill LGR info with 1.1.1 stsi data 74 + */ 75 + static void lgr_stsi_1_1_1(struct lgr_info *lgr_info) 76 + { 77 + struct sysinfo_1_1_1 *si = lgr_page; 78 + 79 + if (stsi(si, 1, 1, 1) == -ENOSYS) 80 + return; 81 + cpascii(lgr_info->manufacturer, si->manufacturer, 82 + sizeof(si->manufacturer)); 83 + cpascii(lgr_info->type, si->type, sizeof(si->type)); 84 + cpascii(lgr_info->model, si->model, sizeof(si->model)); 85 + cpascii(lgr_info->sequence, si->sequence, sizeof(si->sequence)); 86 + cpascii(lgr_info->plant, si->plant, sizeof(si->plant)); 87 + } 88 + 89 + /* 90 + * Fill LGR info with 2.2.2 stsi data 91 + */ 92 + static void lgr_stsi_2_2_2(struct lgr_info *lgr_info) 93 + { 94 + struct sysinfo_2_2_2 *si = lgr_page; 95 + 96 + if (stsi(si, 2, 2, 2) == -ENOSYS) 97 + return; 98 + cpascii(lgr_info->name, si->name, sizeof(si->name)); 99 + memcpy(&lgr_info->lpar_number, &si->lpar_number, 100 + sizeof(lgr_info->lpar_number)); 101 + } 102 + 103 + /* 104 + * Fill LGR info with 3.2.2 stsi data 105 + */ 106 + static void lgr_stsi_3_2_2(struct lgr_info *lgr_info) 107 + { 108 + struct sysinfo_3_2_2 *si = lgr_page; 109 + int i; 110 + 111 + if (stsi(si, 3, 2, 2) == -ENOSYS) 112 + return; 113 + for (i = 0; i < min_t(u8, si->count, VM_LEVEL_MAX); i++) { 114 + cpascii(lgr_info->vm[i].name, si->vm[i].name, 115 + sizeof(si->vm[i].name)); 116 + cpascii(lgr_info->vm[i].cpi, si->vm[i].cpi, 117 + sizeof(si->vm[i].cpi)); 118 + } 119 + lgr_info->vm_count = si->count; 120 + } 121 + 122 + /* 123 + * Fill LGR info with current data 124 + */ 125 + static void lgr_info_get(struct lgr_info *lgr_info) 126 + { 127 + memset(lgr_info, 0, sizeof(*lgr_info)); 128 + stfle(lgr_info->stfle_fac_list, ARRAY_SIZE(lgr_info->stfle_fac_list)); 129 + lgr_info->level = stsi_0(); 130 + if (lgr_info->level == -ENOSYS) 131 + return; 132 + if (lgr_info->level >= 1) 133 + lgr_stsi_1_1_1(lgr_info); 134 + if (lgr_info->level >= 2) 135 + lgr_stsi_2_2_2(lgr_info); 136 + if (lgr_info->level >= 3) 137 + lgr_stsi_3_2_2(lgr_info); 138 + } 139 + 140 + /* 141 + * Check if LGR info has changed and if yes log new LGR info to s390dbf 142 + */ 143 + void lgr_info_log(void) 144 + { 145 + static DEFINE_SPINLOCK(lgr_info_lock); 146 + unsigned long flags; 147 + 148 + if (!spin_trylock_irqsave(&lgr_info_lock, flags)) 149 + return; 150 + lgr_info_get(&lgr_info_cur); 151 + if (memcmp(&lgr_info_last, &lgr_info_cur, sizeof(lgr_info_cur)) != 0) { 152 + debug_event(lgr_dbf, 1, &lgr_info_cur, sizeof(lgr_info_cur)); 153 + lgr_info_last = lgr_info_cur; 154 + } 155 + spin_unlock_irqrestore(&lgr_info_lock, flags); 156 + } 157 + EXPORT_SYMBOL_GPL(lgr_info_log); 158 + 159 + static void lgr_timer_set(void); 160 + 161 + /* 162 + * LGR timer callback 163 + */ 164 + static void lgr_timer_fn(unsigned long ignored) 165 + { 166 + lgr_info_log(); 167 + lgr_timer_set(); 168 + } 169 + 170 + static struct timer_list lgr_timer = 171 + TIMER_DEFERRED_INITIALIZER(lgr_timer_fn, 0, 0); 172 + 173 + /* 174 + * Setup next LGR timer 175 + */ 176 + static void lgr_timer_set(void) 177 + { 178 + mod_timer(&lgr_timer, jiffies + LGR_TIMER_INTERVAL_SECS * HZ); 179 + } 180 + 181 + /* 182 + * Initialize LGR: Add s390dbf, write initial lgr_info and setup timer 183 + */ 184 + static int __init lgr_init(void) 185 + { 186 + lgr_page = (void *) __get_free_pages(GFP_KERNEL, 0); 187 + if (!lgr_page) 188 + return -ENOMEM; 189 + lgr_dbf = debug_register("lgr", 1, 1, sizeof(struct lgr_info)); 190 + if (!lgr_dbf) { 191 + free_page((unsigned long) lgr_page); 192 + return -ENOMEM; 193 + } 194 + debug_register_view(lgr_dbf, &debug_hex_ascii_view); 195 + lgr_info_get(&lgr_info_last); 196 + debug_event(lgr_dbf, 1, &lgr_info_last, sizeof(lgr_info_last)); 197 + lgr_timer_set(); 198 + return 0; 199 + } 200 + module_init(lgr_init);
+7 -2
arch/s390/kernel/machine_kexec.c
··· 14 14 #include <linux/delay.h> 15 15 #include <linux/reboot.h> 16 16 #include <linux/ftrace.h> 17 + #include <linux/debug_locks.h> 17 18 #include <asm/cio.h> 18 19 #include <asm/setup.h> 19 20 #include <asm/pgtable.h> ··· 210 209 struct kimage *image = data; 211 210 212 211 pfault_fini(); 213 - if (image->type == KEXEC_TYPE_CRASH) 212 + tracing_off(); 213 + debug_locks_off(); 214 + if (image->type == KEXEC_TYPE_CRASH) { 215 + lgr_info_log(); 214 216 s390_reset_system(__do_machine_kdump, data); 215 - else 217 + } else { 216 218 s390_reset_system(__do_machine_kexec, data); 219 + } 217 220 disabled_wait((unsigned long) __builtin_return_address(0)); 218 221 } 219 222
+2
arch/s390/kernel/smp.c
··· 40 40 #include <asm/lowcore.h> 41 41 #include <asm/sclp.h> 42 42 #include <asm/vdso.h> 43 + #include <asm/debug.h> 43 44 #include "entry.h" 44 45 45 46 enum { ··· 407 406 __load_psw_mask(psw_kernel_bits | PSW_MASK_DAT); 408 407 trace_hardirqs_off(); 409 408 409 + debug_set_critical(); 410 410 cpumask_copy(&cpumask, cpu_online_mask); 411 411 cpumask_clear_cpu(smp_processor_id(), &cpumask); 412 412
+3
arch/s390/kernel/swsusp_asm64.S
··· 257 257 lghi %r2,0 258 258 brasl %r14,arch_set_page_states 259 259 260 + /* Log potential guest relocation */ 261 + brasl %r14,lgr_info_log 262 + 260 263 /* Reinitialize the channel subsystem */ 261 264 brasl %r14,channel_subsystem_reinit 262 265
+2
arch/s390/kernel/traps.c
··· 41 41 #include <asm/cpcmd.h> 42 42 #include <asm/lowcore.h> 43 43 #include <asm/debug.h> 44 + #include <asm/ipl.h> 44 45 #include "entry.h" 45 46 46 47 void (*pgm_check_table[128])(struct pt_regs *regs); ··· 240 239 static int die_counter; 241 240 242 241 oops_enter(); 242 + lgr_info_log(); 243 243 debug_stop_all(); 244 244 console_verbose(); 245 245 spin_lock_irq(&die_lock);
+6
drivers/s390/cio/qdio_main.c
··· 18 18 #include <linux/atomic.h> 19 19 #include <asm/debug.h> 20 20 #include <asm/qdio.h> 21 + #include <asm/ipl.h> 21 22 22 23 #include "cio.h" 23 24 #include "css.h" ··· 1094 1093 q->nr, q->first_to_kick, count, irq_ptr->int_parm); 1095 1094 no_handler: 1096 1095 qdio_set_state(irq_ptr, QDIO_IRQ_STATE_STOPPED); 1096 + /* 1097 + * In case of z/VM LGR (Live Guest Migration) QDIO recovery will happen. 1098 + * Therefore we call the LGR detection function here. 1099 + */ 1100 + lgr_info_log(); 1097 1101 } 1098 1102 1099 1103 static void qdio_establish_handle_irq(struct ccw_device *cdev, int cstat,