Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v4.13 477 lines 11 kB view raw
1/* 2 * store hypervisor information instruction emulation functions. 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License (version 2 only) 6 * as published by the Free Software Foundation. 7 * 8 * Copyright IBM Corp. 2016 9 * Author(s): Janosch Frank <frankja@linux.vnet.ibm.com> 10 */ 11#include <linux/kvm_host.h> 12#include <linux/errno.h> 13#include <linux/pagemap.h> 14#include <linux/vmalloc.h> 15#include <linux/ratelimit.h> 16 17#include <asm/kvm_host.h> 18#include <asm/asm-offsets.h> 19#include <asm/sclp.h> 20#include <asm/diag.h> 21#include <asm/sysinfo.h> 22#include <asm/ebcdic.h> 23 24#include "kvm-s390.h" 25#include "gaccess.h" 26#include "trace.h" 27 28#define DED_WEIGHT 0xffff 29/* 30 * CP and IFL as EBCDIC strings, SP/0x40 determines the end of string 31 * as they are justified with spaces. 32 */ 33#define CP 0xc3d7404040404040UL 34#define IFL 0xc9c6d34040404040UL 35 36enum hdr_flags { 37 HDR_NOT_LPAR = 0x10, 38 HDR_STACK_INCM = 0x20, 39 HDR_STSI_UNAV = 0x40, 40 HDR_PERF_UNAV = 0x80, 41}; 42 43enum mac_validity { 44 MAC_NAME_VLD = 0x20, 45 MAC_ID_VLD = 0x40, 46 MAC_CNT_VLD = 0x80, 47}; 48 49enum par_flag { 50 PAR_MT_EN = 0x80, 51}; 52 53enum par_validity { 54 PAR_GRP_VLD = 0x08, 55 PAR_ID_VLD = 0x10, 56 PAR_ABS_VLD = 0x20, 57 PAR_WGHT_VLD = 0x40, 58 PAR_PCNT_VLD = 0x80, 59}; 60 61struct hdr_sctn { 62 u8 infhflg1; 63 u8 infhflg2; /* reserved */ 64 u8 infhval1; /* reserved */ 65 u8 infhval2; /* reserved */ 66 u8 reserved[3]; 67 u8 infhygct; 68 u16 infhtotl; 69 u16 infhdln; 70 u16 infmoff; 71 u16 infmlen; 72 u16 infpoff; 73 u16 infplen; 74 u16 infhoff1; 75 u16 infhlen1; 76 u16 infgoff1; 77 u16 infglen1; 78 u16 infhoff2; 79 u16 infhlen2; 80 u16 infgoff2; 81 u16 infglen2; 82 u16 infhoff3; 83 u16 infhlen3; 84 u16 infgoff3; 85 u16 infglen3; 86 u8 reserved2[4]; 87} __packed; 88 89struct mac_sctn { 90 u8 infmflg1; /* reserved */ 91 u8 infmflg2; /* reserved */ 92 u8 infmval1; 93 u8 infmval2; /* reserved */ 94 u16 infmscps; 95 u16 infmdcps; 96 u16 infmsifl; 97 u16 infmdifl; 98 char infmname[8]; 99 char infmtype[4]; 100 char infmmanu[16]; 101 char infmseq[16]; 102 char infmpman[4]; 103 u8 reserved[4]; 104} __packed; 105 106struct par_sctn { 107 u8 infpflg1; 108 u8 infpflg2; /* reserved */ 109 u8 infpval1; 110 u8 infpval2; /* reserved */ 111 u16 infppnum; 112 u16 infpscps; 113 u16 infpdcps; 114 u16 infpsifl; 115 u16 infpdifl; 116 u16 reserved; 117 char infppnam[8]; 118 u32 infpwbcp; 119 u32 infpabcp; 120 u32 infpwbif; 121 u32 infpabif; 122 char infplgnm[8]; 123 u32 infplgcp; 124 u32 infplgif; 125} __packed; 126 127struct sthyi_sctns { 128 struct hdr_sctn hdr; 129 struct mac_sctn mac; 130 struct par_sctn par; 131} __packed; 132 133struct cpu_inf { 134 u64 lpar_cap; 135 u64 lpar_grp_cap; 136 u64 lpar_weight; 137 u64 all_weight; 138 int cpu_num_ded; 139 int cpu_num_shd; 140}; 141 142struct lpar_cpu_inf { 143 struct cpu_inf cp; 144 struct cpu_inf ifl; 145}; 146 147static inline u64 cpu_id(u8 ctidx, void *diag224_buf) 148{ 149 return *((u64 *)(diag224_buf + (ctidx + 1) * DIAG204_CPU_NAME_LEN)); 150} 151 152/* 153 * Scales the cpu capping from the lpar range to the one expected in 154 * sthyi data. 155 * 156 * diag204 reports a cap in hundredths of processor units. 157 * z/VM's range for one core is 0 - 0x10000. 158 */ 159static u32 scale_cap(u32 in) 160{ 161 return (0x10000 * in) / 100; 162} 163 164static void fill_hdr(struct sthyi_sctns *sctns) 165{ 166 sctns->hdr.infhdln = sizeof(sctns->hdr); 167 sctns->hdr.infmoff = sizeof(sctns->hdr); 168 sctns->hdr.infmlen = sizeof(sctns->mac); 169 sctns->hdr.infplen = sizeof(sctns->par); 170 sctns->hdr.infpoff = sctns->hdr.infhdln + sctns->hdr.infmlen; 171 sctns->hdr.infhtotl = sctns->hdr.infpoff + sctns->hdr.infplen; 172} 173 174static void fill_stsi_mac(struct sthyi_sctns *sctns, 175 struct sysinfo_1_1_1 *sysinfo) 176{ 177 if (stsi(sysinfo, 1, 1, 1)) 178 return; 179 180 sclp_ocf_cpc_name_copy(sctns->mac.infmname); 181 182 memcpy(sctns->mac.infmtype, sysinfo->type, sizeof(sctns->mac.infmtype)); 183 memcpy(sctns->mac.infmmanu, sysinfo->manufacturer, sizeof(sctns->mac.infmmanu)); 184 memcpy(sctns->mac.infmpman, sysinfo->plant, sizeof(sctns->mac.infmpman)); 185 memcpy(sctns->mac.infmseq, sysinfo->sequence, sizeof(sctns->mac.infmseq)); 186 187 sctns->mac.infmval1 |= MAC_ID_VLD | MAC_NAME_VLD; 188} 189 190static void fill_stsi_par(struct sthyi_sctns *sctns, 191 struct sysinfo_2_2_2 *sysinfo) 192{ 193 if (stsi(sysinfo, 2, 2, 2)) 194 return; 195 196 sctns->par.infppnum = sysinfo->lpar_number; 197 memcpy(sctns->par.infppnam, sysinfo->name, sizeof(sctns->par.infppnam)); 198 199 sctns->par.infpval1 |= PAR_ID_VLD; 200} 201 202static void fill_stsi(struct sthyi_sctns *sctns) 203{ 204 void *sysinfo; 205 206 /* Errors are handled through the validity bits in the response. */ 207 sysinfo = (void *)__get_free_page(GFP_KERNEL); 208 if (!sysinfo) 209 return; 210 211 fill_stsi_mac(sctns, sysinfo); 212 fill_stsi_par(sctns, sysinfo); 213 214 free_pages((unsigned long)sysinfo, 0); 215} 216 217static void fill_diag_mac(struct sthyi_sctns *sctns, 218 struct diag204_x_phys_block *block, 219 void *diag224_buf) 220{ 221 int i; 222 223 for (i = 0; i < block->hdr.cpus; i++) { 224 switch (cpu_id(block->cpus[i].ctidx, diag224_buf)) { 225 case CP: 226 if (block->cpus[i].weight == DED_WEIGHT) 227 sctns->mac.infmdcps++; 228 else 229 sctns->mac.infmscps++; 230 break; 231 case IFL: 232 if (block->cpus[i].weight == DED_WEIGHT) 233 sctns->mac.infmdifl++; 234 else 235 sctns->mac.infmsifl++; 236 break; 237 } 238 } 239 sctns->mac.infmval1 |= MAC_CNT_VLD; 240} 241 242/* Returns a pointer to the the next partition block. */ 243static struct diag204_x_part_block *lpar_cpu_inf(struct lpar_cpu_inf *part_inf, 244 bool this_lpar, 245 void *diag224_buf, 246 struct diag204_x_part_block *block) 247{ 248 int i, capped = 0, weight_cp = 0, weight_ifl = 0; 249 struct cpu_inf *cpu_inf; 250 251 for (i = 0; i < block->hdr.rcpus; i++) { 252 if (!(block->cpus[i].cflag & DIAG204_CPU_ONLINE)) 253 continue; 254 255 switch (cpu_id(block->cpus[i].ctidx, diag224_buf)) { 256 case CP: 257 cpu_inf = &part_inf->cp; 258 if (block->cpus[i].cur_weight < DED_WEIGHT) 259 weight_cp |= block->cpus[i].cur_weight; 260 break; 261 case IFL: 262 cpu_inf = &part_inf->ifl; 263 if (block->cpus[i].cur_weight < DED_WEIGHT) 264 weight_ifl |= block->cpus[i].cur_weight; 265 break; 266 default: 267 continue; 268 } 269 270 if (!this_lpar) 271 continue; 272 273 capped |= block->cpus[i].cflag & DIAG204_CPU_CAPPED; 274 cpu_inf->lpar_cap |= block->cpus[i].cpu_type_cap; 275 cpu_inf->lpar_grp_cap |= block->cpus[i].group_cpu_type_cap; 276 277 if (block->cpus[i].weight == DED_WEIGHT) 278 cpu_inf->cpu_num_ded += 1; 279 else 280 cpu_inf->cpu_num_shd += 1; 281 } 282 283 if (this_lpar && capped) { 284 part_inf->cp.lpar_weight = weight_cp; 285 part_inf->ifl.lpar_weight = weight_ifl; 286 } 287 part_inf->cp.all_weight += weight_cp; 288 part_inf->ifl.all_weight += weight_ifl; 289 return (struct diag204_x_part_block *)&block->cpus[i]; 290} 291 292static void fill_diag(struct sthyi_sctns *sctns) 293{ 294 int i, r, pages; 295 bool this_lpar; 296 void *diag204_buf; 297 void *diag224_buf = NULL; 298 struct diag204_x_info_blk_hdr *ti_hdr; 299 struct diag204_x_part_block *part_block; 300 struct diag204_x_phys_block *phys_block; 301 struct lpar_cpu_inf lpar_inf = {}; 302 303 /* Errors are handled through the validity bits in the response. */ 304 pages = diag204((unsigned long)DIAG204_SUBC_RSI | 305 (unsigned long)DIAG204_INFO_EXT, 0, NULL); 306 if (pages <= 0) 307 return; 308 309 diag204_buf = vmalloc(PAGE_SIZE * pages); 310 if (!diag204_buf) 311 return; 312 313 r = diag204((unsigned long)DIAG204_SUBC_STIB7 | 314 (unsigned long)DIAG204_INFO_EXT, pages, diag204_buf); 315 if (r < 0) 316 goto out; 317 318 diag224_buf = (void *)__get_free_page(GFP_KERNEL | GFP_DMA); 319 if (!diag224_buf || diag224(diag224_buf)) 320 goto out; 321 322 ti_hdr = diag204_buf; 323 part_block = diag204_buf + sizeof(*ti_hdr); 324 325 for (i = 0; i < ti_hdr->npar; i++) { 326 /* 327 * For the calling lpar we also need to get the cpu 328 * caps and weights. The time information block header 329 * specifies the offset to the partition block of the 330 * caller lpar, so we know when we process its data. 331 */ 332 this_lpar = (void *)part_block - diag204_buf == ti_hdr->this_part; 333 part_block = lpar_cpu_inf(&lpar_inf, this_lpar, diag224_buf, 334 part_block); 335 } 336 337 phys_block = (struct diag204_x_phys_block *)part_block; 338 part_block = diag204_buf + ti_hdr->this_part; 339 if (part_block->hdr.mtid) 340 sctns->par.infpflg1 = PAR_MT_EN; 341 342 sctns->par.infpval1 |= PAR_GRP_VLD; 343 sctns->par.infplgcp = scale_cap(lpar_inf.cp.lpar_grp_cap); 344 sctns->par.infplgif = scale_cap(lpar_inf.ifl.lpar_grp_cap); 345 memcpy(sctns->par.infplgnm, part_block->hdr.hardware_group_name, 346 sizeof(sctns->par.infplgnm)); 347 348 sctns->par.infpscps = lpar_inf.cp.cpu_num_shd; 349 sctns->par.infpdcps = lpar_inf.cp.cpu_num_ded; 350 sctns->par.infpsifl = lpar_inf.ifl.cpu_num_shd; 351 sctns->par.infpdifl = lpar_inf.ifl.cpu_num_ded; 352 sctns->par.infpval1 |= PAR_PCNT_VLD; 353 354 sctns->par.infpabcp = scale_cap(lpar_inf.cp.lpar_cap); 355 sctns->par.infpabif = scale_cap(lpar_inf.ifl.lpar_cap); 356 sctns->par.infpval1 |= PAR_ABS_VLD; 357 358 /* 359 * Everything below needs global performance data to be 360 * meaningful. 361 */ 362 if (!(ti_hdr->flags & DIAG204_LPAR_PHYS_FLG)) { 363 sctns->hdr.infhflg1 |= HDR_PERF_UNAV; 364 goto out; 365 } 366 367 fill_diag_mac(sctns, phys_block, diag224_buf); 368 369 if (lpar_inf.cp.lpar_weight) { 370 sctns->par.infpwbcp = sctns->mac.infmscps * 0x10000 * 371 lpar_inf.cp.lpar_weight / lpar_inf.cp.all_weight; 372 } 373 374 if (lpar_inf.ifl.lpar_weight) { 375 sctns->par.infpwbif = sctns->mac.infmsifl * 0x10000 * 376 lpar_inf.ifl.lpar_weight / lpar_inf.ifl.all_weight; 377 } 378 sctns->par.infpval1 |= PAR_WGHT_VLD; 379 380out: 381 free_page((unsigned long)diag224_buf); 382 vfree(diag204_buf); 383} 384 385static int sthyi(u64 vaddr) 386{ 387 register u64 code asm("0") = 0; 388 register u64 addr asm("2") = vaddr; 389 int cc; 390 391 asm volatile( 392 ".insn rre,0xB2560000,%[code],%[addr]\n" 393 "ipm %[cc]\n" 394 "srl %[cc],28\n" 395 : [cc] "=d" (cc) 396 : [code] "d" (code), [addr] "a" (addr) 397 : "3", "memory", "cc"); 398 return cc; 399} 400 401int handle_sthyi(struct kvm_vcpu *vcpu) 402{ 403 int reg1, reg2, r = 0; 404 u64 code, addr, cc = 0; 405 struct sthyi_sctns *sctns = NULL; 406 407 if (!test_kvm_facility(vcpu->kvm, 74)) 408 return kvm_s390_inject_program_int(vcpu, PGM_OPERATION); 409 410 /* 411 * STHYI requires extensive locking in the higher hypervisors 412 * and is very computational/memory expensive. Therefore we 413 * ratelimit the executions per VM. 414 */ 415 if (!__ratelimit(&vcpu->kvm->arch.sthyi_limit)) { 416 kvm_s390_retry_instr(vcpu); 417 return 0; 418 } 419 420 kvm_s390_get_regs_rre(vcpu, &reg1, &reg2); 421 code = vcpu->run->s.regs.gprs[reg1]; 422 addr = vcpu->run->s.regs.gprs[reg2]; 423 424 vcpu->stat.instruction_sthyi++; 425 VCPU_EVENT(vcpu, 3, "STHYI: fc: %llu addr: 0x%016llx", code, addr); 426 trace_kvm_s390_handle_sthyi(vcpu, code, addr); 427 428 if (reg1 == reg2 || reg1 & 1 || reg2 & 1) 429 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 430 431 if (code & 0xffff) { 432 cc = 3; 433 goto out; 434 } 435 436 if (addr & ~PAGE_MASK) 437 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 438 439 /* 440 * If the page has not yet been faulted in, we want to do that 441 * now and not after all the expensive calculations. 442 */ 443 r = write_guest(vcpu, addr, reg2, &cc, 1); 444 if (r) 445 return kvm_s390_inject_prog_cond(vcpu, r); 446 447 sctns = (void *)get_zeroed_page(GFP_KERNEL); 448 if (!sctns) 449 return -ENOMEM; 450 451 /* 452 * If we are a guest, we don't want to emulate an emulated 453 * instruction. We ask the hypervisor to provide the data. 454 */ 455 if (test_facility(74)) { 456 cc = sthyi((u64)sctns); 457 goto out; 458 } 459 460 fill_hdr(sctns); 461 fill_stsi(sctns); 462 fill_diag(sctns); 463 464out: 465 if (!cc) { 466 r = write_guest(vcpu, addr, reg2, sctns, PAGE_SIZE); 467 if (r) { 468 free_page((unsigned long)sctns); 469 return kvm_s390_inject_prog_cond(vcpu, r); 470 } 471 } 472 473 free_page((unsigned long)sctns); 474 vcpu->run->s.regs.gprs[reg2 + 1] = cc ? 4 : 0; 475 kvm_s390_set_psw_cc(vcpu, cc); 476 return r; 477}