Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ocxl: Driver code for 'generic' opencapi devices

Add an ocxl driver to handle generic opencapi devices. Of course, it's
not meant to be the only opencapi driver, any device is free to
implement its own. But if a host application only needs basic services
like attaching to an opencapi adapter, have translation faults handled
or allocate AFU interrupts, it should suffice.

The AFU config space must follow the opencapi specification and use
the expected vendor/device ID to be seen by the generic driver.

The driver exposes the device AFUs as a char device in /dev/ocxl/

Note that the driver currently doesn't handle memory attached to the
opencapi device.

Signed-off-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com>
Signed-off-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
Signed-off-by: Alastair D'Silva <alastair@d-silva.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>

authored by

Frederic Barrat and committed by
Michael Ellerman
5ef3166e 2cb3d64b

+3043
+712
drivers/misc/ocxl/config.c
··· 1 + // SPDX-License-Identifier: GPL-2.0+ 2 + // Copyright 2017 IBM Corp. 3 + #include <linux/pci.h> 4 + #include <asm/pnv-ocxl.h> 5 + #include <misc/ocxl-config.h> 6 + #include "ocxl_internal.h" 7 + 8 + #define EXTRACT_BIT(val, bit) (!!(val & BIT(bit))) 9 + #define EXTRACT_BITS(val, s, e) ((val & GENMASK(e, s)) >> s) 10 + 11 + #define OCXL_DVSEC_AFU_IDX_MASK GENMASK(5, 0) 12 + #define OCXL_DVSEC_ACTAG_MASK GENMASK(11, 0) 13 + #define OCXL_DVSEC_PASID_MASK GENMASK(19, 0) 14 + #define OCXL_DVSEC_PASID_LOG_MASK GENMASK(4, 0) 15 + 16 + #define OCXL_DVSEC_TEMPL_VERSION 0x0 17 + #define OCXL_DVSEC_TEMPL_NAME 0x4 18 + #define OCXL_DVSEC_TEMPL_AFU_VERSION 0x1C 19 + #define OCXL_DVSEC_TEMPL_MMIO_GLOBAL 0x20 20 + #define OCXL_DVSEC_TEMPL_MMIO_GLOBAL_SZ 0x28 21 + #define OCXL_DVSEC_TEMPL_MMIO_PP 0x30 22 + #define OCXL_DVSEC_TEMPL_MMIO_PP_SZ 0x38 23 + #define OCXL_DVSEC_TEMPL_MEM_SZ 0x3C 24 + #define OCXL_DVSEC_TEMPL_WWID 0x40 25 + 26 + #define OCXL_MAX_AFU_PER_FUNCTION 64 27 + #define OCXL_TEMPL_LEN 0x58 28 + #define OCXL_TEMPL_NAME_LEN 24 29 + #define OCXL_CFG_TIMEOUT 3 30 + 31 + static int find_dvsec(struct pci_dev *dev, int dvsec_id) 32 + { 33 + int vsec = 0; 34 + u16 vendor, id; 35 + 36 + while ((vsec = pci_find_next_ext_capability(dev, vsec, 37 + OCXL_EXT_CAP_ID_DVSEC))) { 38 + pci_read_config_word(dev, vsec + OCXL_DVSEC_VENDOR_OFFSET, 39 + &vendor); 40 + pci_read_config_word(dev, vsec + OCXL_DVSEC_ID_OFFSET, &id); 41 + if (vendor == PCI_VENDOR_ID_IBM && id == dvsec_id) 42 + return vsec; 43 + } 44 + return 0; 45 + } 46 + 47 + static int find_dvsec_afu_ctrl(struct pci_dev *dev, u8 afu_idx) 48 + { 49 + int vsec = 0; 50 + u16 vendor, id; 51 + u8 idx; 52 + 53 + while ((vsec = pci_find_next_ext_capability(dev, vsec, 54 + OCXL_EXT_CAP_ID_DVSEC))) { 55 + pci_read_config_word(dev, vsec + OCXL_DVSEC_VENDOR_OFFSET, 56 + &vendor); 57 + pci_read_config_word(dev, vsec + OCXL_DVSEC_ID_OFFSET, &id); 58 + 59 + if (vendor == PCI_VENDOR_ID_IBM && 60 + id == OCXL_DVSEC_AFU_CTRL_ID) { 61 + pci_read_config_byte(dev, 62 + vsec + OCXL_DVSEC_AFU_CTRL_AFU_IDX, 63 + &idx); 64 + if (idx == afu_idx) 65 + return vsec; 66 + } 67 + } 68 + return 0; 69 + } 70 + 71 + static int read_pasid(struct pci_dev *dev, struct ocxl_fn_config *fn) 72 + { 73 + u16 val; 74 + int pos; 75 + 76 + pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_PASID); 77 + if (!pos) { 78 + /* 79 + * PASID capability is not mandatory, but there 80 + * shouldn't be any AFU 81 + */ 82 + dev_dbg(&dev->dev, "Function doesn't require any PASID\n"); 83 + fn->max_pasid_log = -1; 84 + goto out; 85 + } 86 + pci_read_config_word(dev, pos + PCI_PASID_CAP, &val); 87 + fn->max_pasid_log = EXTRACT_BITS(val, 8, 12); 88 + 89 + out: 90 + dev_dbg(&dev->dev, "PASID capability:\n"); 91 + dev_dbg(&dev->dev, " Max PASID log = %d\n", fn->max_pasid_log); 92 + return 0; 93 + } 94 + 95 + static int read_dvsec_tl(struct pci_dev *dev, struct ocxl_fn_config *fn) 96 + { 97 + int pos; 98 + 99 + pos = find_dvsec(dev, OCXL_DVSEC_TL_ID); 100 + if (!pos && PCI_FUNC(dev->devfn) == 0) { 101 + dev_err(&dev->dev, "Can't find TL DVSEC\n"); 102 + return -ENODEV; 103 + } 104 + if (pos && PCI_FUNC(dev->devfn) != 0) { 105 + dev_err(&dev->dev, "TL DVSEC is only allowed on function 0\n"); 106 + return -ENODEV; 107 + } 108 + fn->dvsec_tl_pos = pos; 109 + return 0; 110 + } 111 + 112 + static int read_dvsec_function(struct pci_dev *dev, struct ocxl_fn_config *fn) 113 + { 114 + int pos, afu_present; 115 + u32 val; 116 + 117 + pos = find_dvsec(dev, OCXL_DVSEC_FUNC_ID); 118 + if (!pos) { 119 + dev_err(&dev->dev, "Can't find function DVSEC\n"); 120 + return -ENODEV; 121 + } 122 + fn->dvsec_function_pos = pos; 123 + 124 + pci_read_config_dword(dev, pos + OCXL_DVSEC_FUNC_OFF_INDEX, &val); 125 + afu_present = EXTRACT_BIT(val, 31); 126 + if (!afu_present) { 127 + fn->max_afu_index = -1; 128 + dev_dbg(&dev->dev, "Function doesn't define any AFU\n"); 129 + goto out; 130 + } 131 + fn->max_afu_index = EXTRACT_BITS(val, 24, 29); 132 + 133 + out: 134 + dev_dbg(&dev->dev, "Function DVSEC:\n"); 135 + dev_dbg(&dev->dev, " Max AFU index = %d\n", fn->max_afu_index); 136 + return 0; 137 + } 138 + 139 + static int read_dvsec_afu_info(struct pci_dev *dev, struct ocxl_fn_config *fn) 140 + { 141 + int pos; 142 + 143 + if (fn->max_afu_index < 0) { 144 + fn->dvsec_afu_info_pos = -1; 145 + return 0; 146 + } 147 + 148 + pos = find_dvsec(dev, OCXL_DVSEC_AFU_INFO_ID); 149 + if (!pos) { 150 + dev_err(&dev->dev, "Can't find AFU information DVSEC\n"); 151 + return -ENODEV; 152 + } 153 + fn->dvsec_afu_info_pos = pos; 154 + return 0; 155 + } 156 + 157 + static int read_dvsec_vendor(struct pci_dev *dev) 158 + { 159 + int pos; 160 + u32 cfg, tlx, dlx; 161 + 162 + /* 163 + * vendor specific DVSEC is optional 164 + * 165 + * It's currently only used on function 0 to specify the 166 + * version of some logic blocks. Some older images may not 167 + * even have it so we ignore any errors 168 + */ 169 + if (PCI_FUNC(dev->devfn) != 0) 170 + return 0; 171 + 172 + pos = find_dvsec(dev, OCXL_DVSEC_VENDOR_ID); 173 + if (!pos) 174 + return 0; 175 + 176 + pci_read_config_dword(dev, pos + OCXL_DVSEC_VENDOR_CFG_VERS, &cfg); 177 + pci_read_config_dword(dev, pos + OCXL_DVSEC_VENDOR_TLX_VERS, &tlx); 178 + pci_read_config_dword(dev, pos + OCXL_DVSEC_VENDOR_DLX_VERS, &dlx); 179 + 180 + dev_dbg(&dev->dev, "Vendor specific DVSEC:\n"); 181 + dev_dbg(&dev->dev, " CFG version = 0x%x\n", cfg); 182 + dev_dbg(&dev->dev, " TLX version = 0x%x\n", tlx); 183 + dev_dbg(&dev->dev, " DLX version = 0x%x\n", dlx); 184 + return 0; 185 + } 186 + 187 + static int validate_function(struct pci_dev *dev, struct ocxl_fn_config *fn) 188 + { 189 + if (fn->max_pasid_log == -1 && fn->max_afu_index >= 0) { 190 + dev_err(&dev->dev, 191 + "AFUs are defined but no PASIDs are requested\n"); 192 + return -EINVAL; 193 + } 194 + 195 + if (fn->max_afu_index > OCXL_MAX_AFU_PER_FUNCTION) { 196 + dev_err(&dev->dev, 197 + "Max AFU index out of architectural limit (%d vs %d)\n", 198 + fn->max_afu_index, OCXL_MAX_AFU_PER_FUNCTION); 199 + return -EINVAL; 200 + } 201 + return 0; 202 + } 203 + 204 + int ocxl_config_read_function(struct pci_dev *dev, struct ocxl_fn_config *fn) 205 + { 206 + int rc; 207 + 208 + rc = read_pasid(dev, fn); 209 + if (rc) { 210 + dev_err(&dev->dev, "Invalid PASID configuration: %d\n", rc); 211 + return -ENODEV; 212 + } 213 + 214 + rc = read_dvsec_tl(dev, fn); 215 + if (rc) { 216 + dev_err(&dev->dev, 217 + "Invalid Transaction Layer DVSEC configuration: %d\n", 218 + rc); 219 + return -ENODEV; 220 + } 221 + 222 + rc = read_dvsec_function(dev, fn); 223 + if (rc) { 224 + dev_err(&dev->dev, 225 + "Invalid Function DVSEC configuration: %d\n", rc); 226 + return -ENODEV; 227 + } 228 + 229 + rc = read_dvsec_afu_info(dev, fn); 230 + if (rc) { 231 + dev_err(&dev->dev, "Invalid AFU configuration: %d\n", rc); 232 + return -ENODEV; 233 + } 234 + 235 + rc = read_dvsec_vendor(dev); 236 + if (rc) { 237 + dev_err(&dev->dev, 238 + "Invalid vendor specific DVSEC configuration: %d\n", 239 + rc); 240 + return -ENODEV; 241 + } 242 + 243 + rc = validate_function(dev, fn); 244 + return rc; 245 + } 246 + 247 + static int read_afu_info(struct pci_dev *dev, struct ocxl_fn_config *fn, 248 + int offset, u32 *data) 249 + { 250 + u32 val; 251 + unsigned long timeout = jiffies + (HZ * OCXL_CFG_TIMEOUT); 252 + int pos = fn->dvsec_afu_info_pos; 253 + 254 + /* Protect 'data valid' bit */ 255 + if (EXTRACT_BIT(offset, 31)) { 256 + dev_err(&dev->dev, "Invalid offset in AFU info DVSEC\n"); 257 + return -EINVAL; 258 + } 259 + 260 + pci_write_config_dword(dev, pos + OCXL_DVSEC_AFU_INFO_OFF, offset); 261 + pci_read_config_dword(dev, pos + OCXL_DVSEC_AFU_INFO_OFF, &val); 262 + while (!EXTRACT_BIT(val, 31)) { 263 + if (time_after_eq(jiffies, timeout)) { 264 + dev_err(&dev->dev, 265 + "Timeout while reading AFU info DVSEC (offset=%d)\n", 266 + offset); 267 + return -EBUSY; 268 + } 269 + cpu_relax(); 270 + pci_read_config_dword(dev, pos + OCXL_DVSEC_AFU_INFO_OFF, &val); 271 + } 272 + pci_read_config_dword(dev, pos + OCXL_DVSEC_AFU_INFO_DATA, data); 273 + return 0; 274 + } 275 + 276 + int ocxl_config_check_afu_index(struct pci_dev *dev, 277 + struct ocxl_fn_config *fn, int afu_idx) 278 + { 279 + u32 val; 280 + int rc, templ_major, templ_minor, len; 281 + 282 + pci_write_config_word(dev, fn->dvsec_afu_info_pos, afu_idx); 283 + rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_VERSION, &val); 284 + if (rc) 285 + return rc; 286 + 287 + /* AFU index map can have holes */ 288 + if (!val) 289 + return 0; 290 + 291 + templ_major = EXTRACT_BITS(val, 8, 15); 292 + templ_minor = EXTRACT_BITS(val, 0, 7); 293 + dev_dbg(&dev->dev, "AFU descriptor template version %d.%d\n", 294 + templ_major, templ_minor); 295 + 296 + len = EXTRACT_BITS(val, 16, 31); 297 + if (len != OCXL_TEMPL_LEN) { 298 + dev_warn(&dev->dev, 299 + "Unexpected template length in AFU information (%#x)\n", 300 + len); 301 + } 302 + return 1; 303 + } 304 + 305 + static int read_afu_name(struct pci_dev *dev, struct ocxl_fn_config *fn, 306 + struct ocxl_afu_config *afu) 307 + { 308 + int i, rc; 309 + u32 val, *ptr; 310 + 311 + BUILD_BUG_ON(OCXL_AFU_NAME_SZ < OCXL_TEMPL_NAME_LEN); 312 + for (i = 0; i < OCXL_TEMPL_NAME_LEN; i += 4) { 313 + rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_NAME + i, &val); 314 + if (rc) 315 + return rc; 316 + ptr = (u32 *) &afu->name[i]; 317 + *ptr = val; 318 + } 319 + afu->name[OCXL_AFU_NAME_SZ - 1] = '\0'; /* play safe */ 320 + return 0; 321 + } 322 + 323 + static int read_afu_mmio(struct pci_dev *dev, struct ocxl_fn_config *fn, 324 + struct ocxl_afu_config *afu) 325 + { 326 + int rc; 327 + u32 val; 328 + 329 + /* 330 + * Global MMIO 331 + */ 332 + rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_GLOBAL, &val); 333 + if (rc) 334 + return rc; 335 + afu->global_mmio_bar = EXTRACT_BITS(val, 0, 2); 336 + afu->global_mmio_offset = EXTRACT_BITS(val, 16, 31) << 16; 337 + 338 + rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_GLOBAL + 4, &val); 339 + if (rc) 340 + return rc; 341 + afu->global_mmio_offset += (u64) val << 32; 342 + 343 + rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_GLOBAL_SZ, &val); 344 + if (rc) 345 + return rc; 346 + afu->global_mmio_size = val; 347 + 348 + /* 349 + * Per-process MMIO 350 + */ 351 + rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_PP, &val); 352 + if (rc) 353 + return rc; 354 + afu->pp_mmio_bar = EXTRACT_BITS(val, 0, 2); 355 + afu->pp_mmio_offset = EXTRACT_BITS(val, 16, 31) << 16; 356 + 357 + rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_PP + 4, &val); 358 + if (rc) 359 + return rc; 360 + afu->pp_mmio_offset += (u64) val << 32; 361 + 362 + rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_PP_SZ, &val); 363 + if (rc) 364 + return rc; 365 + afu->pp_mmio_stride = val; 366 + 367 + return 0; 368 + } 369 + 370 + static int read_afu_control(struct pci_dev *dev, struct ocxl_afu_config *afu) 371 + { 372 + int pos; 373 + u8 val8; 374 + u16 val16; 375 + 376 + pos = find_dvsec_afu_ctrl(dev, afu->idx); 377 + if (!pos) { 378 + dev_err(&dev->dev, "Can't find AFU control DVSEC for AFU %d\n", 379 + afu->idx); 380 + return -ENODEV; 381 + } 382 + afu->dvsec_afu_control_pos = pos; 383 + 384 + pci_read_config_byte(dev, pos + OCXL_DVSEC_AFU_CTRL_PASID_SUP, &val8); 385 + afu->pasid_supported_log = EXTRACT_BITS(val8, 0, 4); 386 + 387 + pci_read_config_word(dev, pos + OCXL_DVSEC_AFU_CTRL_ACTAG_SUP, &val16); 388 + afu->actag_supported = EXTRACT_BITS(val16, 0, 11); 389 + return 0; 390 + } 391 + 392 + static bool char_allowed(int c) 393 + { 394 + /* 395 + * Permitted Characters : Alphanumeric, hyphen, underscore, comma 396 + */ 397 + if ((c >= 0x30 && c <= 0x39) /* digits */ || 398 + (c >= 0x41 && c <= 0x5A) /* upper case */ || 399 + (c >= 0x61 && c <= 0x7A) /* lower case */ || 400 + c == 0 /* NULL */ || 401 + c == 0x2D /* - */ || 402 + c == 0x5F /* _ */ || 403 + c == 0x2C /* , */) 404 + return true; 405 + return false; 406 + } 407 + 408 + static int validate_afu(struct pci_dev *dev, struct ocxl_afu_config *afu) 409 + { 410 + int i; 411 + 412 + if (!afu->name[0]) { 413 + dev_err(&dev->dev, "Empty AFU name\n"); 414 + return -EINVAL; 415 + } 416 + for (i = 0; i < OCXL_TEMPL_NAME_LEN; i++) { 417 + if (!char_allowed(afu->name[i])) { 418 + dev_err(&dev->dev, 419 + "Invalid character in AFU name\n"); 420 + return -EINVAL; 421 + } 422 + } 423 + 424 + if (afu->global_mmio_bar != 0 && 425 + afu->global_mmio_bar != 2 && 426 + afu->global_mmio_bar != 4) { 427 + dev_err(&dev->dev, "Invalid global MMIO bar number\n"); 428 + return -EINVAL; 429 + } 430 + if (afu->pp_mmio_bar != 0 && 431 + afu->pp_mmio_bar != 2 && 432 + afu->pp_mmio_bar != 4) { 433 + dev_err(&dev->dev, "Invalid per-process MMIO bar number\n"); 434 + return -EINVAL; 435 + } 436 + return 0; 437 + } 438 + 439 + int ocxl_config_read_afu(struct pci_dev *dev, struct ocxl_fn_config *fn, 440 + struct ocxl_afu_config *afu, u8 afu_idx) 441 + { 442 + int rc; 443 + u32 val32; 444 + 445 + /* 446 + * First, we need to write the AFU idx for the AFU we want to 447 + * access. 448 + */ 449 + WARN_ON((afu_idx & OCXL_DVSEC_AFU_IDX_MASK) != afu_idx); 450 + afu->idx = afu_idx; 451 + pci_write_config_byte(dev, 452 + fn->dvsec_afu_info_pos + OCXL_DVSEC_AFU_INFO_AFU_IDX, 453 + afu->idx); 454 + 455 + rc = read_afu_name(dev, fn, afu); 456 + if (rc) 457 + return rc; 458 + 459 + rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_AFU_VERSION, &val32); 460 + if (rc) 461 + return rc; 462 + afu->version_major = EXTRACT_BITS(val32, 24, 31); 463 + afu->version_minor = EXTRACT_BITS(val32, 16, 23); 464 + afu->afuc_type = EXTRACT_BITS(val32, 14, 15); 465 + afu->afum_type = EXTRACT_BITS(val32, 12, 13); 466 + afu->profile = EXTRACT_BITS(val32, 0, 7); 467 + 468 + rc = read_afu_mmio(dev, fn, afu); 469 + if (rc) 470 + return rc; 471 + 472 + rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MEM_SZ, &val32); 473 + if (rc) 474 + return rc; 475 + afu->log_mem_size = EXTRACT_BITS(val32, 0, 7); 476 + 477 + rc = read_afu_control(dev, afu); 478 + if (rc) 479 + return rc; 480 + 481 + dev_dbg(&dev->dev, "AFU configuration:\n"); 482 + dev_dbg(&dev->dev, " name = %s\n", afu->name); 483 + dev_dbg(&dev->dev, " version = %d.%d\n", afu->version_major, 484 + afu->version_minor); 485 + dev_dbg(&dev->dev, " global mmio bar = %hhu\n", afu->global_mmio_bar); 486 + dev_dbg(&dev->dev, " global mmio offset = %#llx\n", 487 + afu->global_mmio_offset); 488 + dev_dbg(&dev->dev, " global mmio size = %#x\n", afu->global_mmio_size); 489 + dev_dbg(&dev->dev, " pp mmio bar = %hhu\n", afu->pp_mmio_bar); 490 + dev_dbg(&dev->dev, " pp mmio offset = %#llx\n", afu->pp_mmio_offset); 491 + dev_dbg(&dev->dev, " pp mmio stride = %#x\n", afu->pp_mmio_stride); 492 + dev_dbg(&dev->dev, " mem size (log) = %hhu\n", afu->log_mem_size); 493 + dev_dbg(&dev->dev, " pasid supported (log) = %u\n", 494 + afu->pasid_supported_log); 495 + dev_dbg(&dev->dev, " actag supported = %u\n", 496 + afu->actag_supported); 497 + 498 + rc = validate_afu(dev, afu); 499 + return rc; 500 + } 501 + 502 + int ocxl_config_get_actag_info(struct pci_dev *dev, u16 *base, u16 *enabled, 503 + u16 *supported) 504 + { 505 + int rc; 506 + 507 + /* 508 + * This is really a simple wrapper for the kernel API, to 509 + * avoid an external driver using ocxl as a library to call 510 + * platform-dependent code 511 + */ 512 + rc = pnv_ocxl_get_actag(dev, base, enabled, supported); 513 + if (rc) { 514 + dev_err(&dev->dev, "Can't get actag for device: %d\n", rc); 515 + return rc; 516 + } 517 + return 0; 518 + } 519 + 520 + void ocxl_config_set_afu_actag(struct pci_dev *dev, int pos, int actag_base, 521 + int actag_count) 522 + { 523 + u16 val; 524 + 525 + val = actag_count & OCXL_DVSEC_ACTAG_MASK; 526 + pci_write_config_byte(dev, pos + OCXL_DVSEC_AFU_CTRL_ACTAG_EN, val); 527 + 528 + val = actag_base & OCXL_DVSEC_ACTAG_MASK; 529 + pci_write_config_dword(dev, pos + OCXL_DVSEC_AFU_CTRL_ACTAG_BASE, val); 530 + } 531 + 532 + int ocxl_config_get_pasid_info(struct pci_dev *dev, int *count) 533 + { 534 + return pnv_ocxl_get_pasid_count(dev, count); 535 + } 536 + 537 + void ocxl_config_set_afu_pasid(struct pci_dev *dev, int pos, int pasid_base, 538 + u32 pasid_count_log) 539 + { 540 + u8 val8; 541 + u32 val32; 542 + 543 + val8 = pasid_count_log & OCXL_DVSEC_PASID_LOG_MASK; 544 + pci_write_config_byte(dev, pos + OCXL_DVSEC_AFU_CTRL_PASID_EN, val8); 545 + 546 + pci_read_config_dword(dev, pos + OCXL_DVSEC_AFU_CTRL_PASID_BASE, 547 + &val32); 548 + val32 &= ~OCXL_DVSEC_PASID_MASK; 549 + val32 |= pasid_base & OCXL_DVSEC_PASID_MASK; 550 + pci_write_config_dword(dev, pos + OCXL_DVSEC_AFU_CTRL_PASID_BASE, 551 + val32); 552 + } 553 + 554 + void ocxl_config_set_afu_state(struct pci_dev *dev, int pos, int enable) 555 + { 556 + u8 val; 557 + 558 + pci_read_config_byte(dev, pos + OCXL_DVSEC_AFU_CTRL_ENABLE, &val); 559 + if (enable) 560 + val |= 1; 561 + else 562 + val &= 0xFE; 563 + pci_write_config_byte(dev, pos + OCXL_DVSEC_AFU_CTRL_ENABLE, val); 564 + } 565 + 566 + int ocxl_config_set_TL(struct pci_dev *dev, int tl_dvsec) 567 + { 568 + u32 val; 569 + __be32 *be32ptr; 570 + u8 timers; 571 + int i, rc; 572 + long recv_cap; 573 + char *recv_rate; 574 + 575 + /* 576 + * Skip on function != 0, as the TL can only be defined on 0 577 + */ 578 + if (PCI_FUNC(dev->devfn) != 0) 579 + return 0; 580 + 581 + recv_rate = kzalloc(PNV_OCXL_TL_RATE_BUF_SIZE, GFP_KERNEL); 582 + if (!recv_rate) 583 + return -ENOMEM; 584 + /* 585 + * The spec defines 64 templates for messages in the 586 + * Transaction Layer (TL). 587 + * 588 + * The host and device each support a subset, so we need to 589 + * configure the transmitters on each side to send only 590 + * templates the receiver understands, at a rate the receiver 591 + * can process. Per the spec, template 0 must be supported by 592 + * everybody. That's the template which has been used by the 593 + * host and device so far. 594 + * 595 + * The sending rate limit must be set before the template is 596 + * enabled. 597 + */ 598 + 599 + /* 600 + * Device -> host 601 + */ 602 + rc = pnv_ocxl_get_tl_cap(dev, &recv_cap, recv_rate, 603 + PNV_OCXL_TL_RATE_BUF_SIZE); 604 + if (rc) 605 + goto out; 606 + 607 + for (i = 0; i < PNV_OCXL_TL_RATE_BUF_SIZE; i += 4) { 608 + be32ptr = (__be32 *) &recv_rate[i]; 609 + pci_write_config_dword(dev, 610 + tl_dvsec + OCXL_DVSEC_TL_SEND_RATE + i, 611 + be32_to_cpu(*be32ptr)); 612 + } 613 + val = recv_cap >> 32; 614 + pci_write_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_SEND_CAP, val); 615 + val = recv_cap & GENMASK(31, 0); 616 + pci_write_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_SEND_CAP + 4, val); 617 + 618 + /* 619 + * Host -> device 620 + */ 621 + for (i = 0; i < PNV_OCXL_TL_RATE_BUF_SIZE; i += 4) { 622 + pci_read_config_dword(dev, 623 + tl_dvsec + OCXL_DVSEC_TL_RECV_RATE + i, 624 + &val); 625 + be32ptr = (__be32 *) &recv_rate[i]; 626 + *be32ptr = cpu_to_be32(val); 627 + } 628 + pci_read_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_RECV_CAP, &val); 629 + recv_cap = (long) val << 32; 630 + pci_read_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_RECV_CAP + 4, &val); 631 + recv_cap |= val; 632 + 633 + rc = pnv_ocxl_set_tl_conf(dev, recv_cap, __pa(recv_rate), 634 + PNV_OCXL_TL_RATE_BUF_SIZE); 635 + if (rc) 636 + goto out; 637 + 638 + /* 639 + * Opencapi commands needing to be retried are classified per 640 + * the TL in 2 groups: short and long commands. 641 + * 642 + * The short back off timer it not used for now. It will be 643 + * for opencapi 4.0. 644 + * 645 + * The long back off timer is typically used when an AFU hits 646 + * a page fault but the NPU is already processing one. So the 647 + * AFU needs to wait before it can resubmit. Having a value 648 + * too low doesn't break anything, but can generate extra 649 + * traffic on the link. 650 + * We set it to 1.6 us for now. It's shorter than, but in the 651 + * same order of magnitude as the time spent to process a page 652 + * fault. 653 + */ 654 + timers = 0x2 << 4; /* long timer = 1.6 us */ 655 + pci_write_config_byte(dev, tl_dvsec + OCXL_DVSEC_TL_BACKOFF_TIMERS, 656 + timers); 657 + 658 + rc = 0; 659 + out: 660 + kfree(recv_rate); 661 + return rc; 662 + } 663 + 664 + int ocxl_config_terminate_pasid(struct pci_dev *dev, int afu_control, int pasid) 665 + { 666 + u32 val; 667 + unsigned long timeout; 668 + 669 + pci_read_config_dword(dev, afu_control + OCXL_DVSEC_AFU_CTRL_TERM_PASID, 670 + &val); 671 + if (EXTRACT_BIT(val, 20)) { 672 + dev_err(&dev->dev, 673 + "Can't terminate PASID %#x, previous termination didn't complete\n", 674 + pasid); 675 + return -EBUSY; 676 + } 677 + 678 + val &= ~OCXL_DVSEC_PASID_MASK; 679 + val |= pasid & OCXL_DVSEC_PASID_MASK; 680 + val |= BIT(20); 681 + pci_write_config_dword(dev, 682 + afu_control + OCXL_DVSEC_AFU_CTRL_TERM_PASID, 683 + val); 684 + 685 + timeout = jiffies + (HZ * OCXL_CFG_TIMEOUT); 686 + pci_read_config_dword(dev, afu_control + OCXL_DVSEC_AFU_CTRL_TERM_PASID, 687 + &val); 688 + while (EXTRACT_BIT(val, 20)) { 689 + if (time_after_eq(jiffies, timeout)) { 690 + dev_err(&dev->dev, 691 + "Timeout while waiting for AFU to terminate PASID %#x\n", 692 + pasid); 693 + return -EBUSY; 694 + } 695 + cpu_relax(); 696 + pci_read_config_dword(dev, 697 + afu_control + OCXL_DVSEC_AFU_CTRL_TERM_PASID, 698 + &val); 699 + } 700 + return 0; 701 + } 702 + 703 + void ocxl_config_set_actag(struct pci_dev *dev, int func_dvsec, u32 tag_first, 704 + u32 tag_count) 705 + { 706 + u32 val; 707 + 708 + val = (tag_first & OCXL_DVSEC_ACTAG_MASK) << 16; 709 + val |= tag_count & OCXL_DVSEC_ACTAG_MASK; 710 + pci_write_config_dword(dev, func_dvsec + OCXL_DVSEC_FUNC_OFF_ACTAG, 711 + val); 712 + }
+230
drivers/misc/ocxl/context.c
··· 1 + // SPDX-License-Identifier: GPL-2.0+ 2 + // Copyright 2017 IBM Corp. 3 + #include <linux/sched/mm.h> 4 + #include "ocxl_internal.h" 5 + 6 + struct ocxl_context *ocxl_context_alloc(void) 7 + { 8 + return kzalloc(sizeof(struct ocxl_context), GFP_KERNEL); 9 + } 10 + 11 + int ocxl_context_init(struct ocxl_context *ctx, struct ocxl_afu *afu, 12 + struct address_space *mapping) 13 + { 14 + int pasid; 15 + 16 + ctx->afu = afu; 17 + mutex_lock(&afu->contexts_lock); 18 + pasid = idr_alloc(&afu->contexts_idr, ctx, afu->pasid_base, 19 + afu->pasid_base + afu->pasid_max, GFP_KERNEL); 20 + if (pasid < 0) { 21 + mutex_unlock(&afu->contexts_lock); 22 + return pasid; 23 + } 24 + afu->pasid_count++; 25 + mutex_unlock(&afu->contexts_lock); 26 + 27 + ctx->pasid = pasid; 28 + ctx->status = OPENED; 29 + mutex_init(&ctx->status_mutex); 30 + ctx->mapping = mapping; 31 + mutex_init(&ctx->mapping_lock); 32 + init_waitqueue_head(&ctx->events_wq); 33 + mutex_init(&ctx->xsl_error_lock); 34 + /* 35 + * Keep a reference on the AFU to make sure it's valid for the 36 + * duration of the life of the context 37 + */ 38 + ocxl_afu_get(afu); 39 + return 0; 40 + } 41 + 42 + /* 43 + * Callback for when a translation fault triggers an error 44 + * data: a pointer to the context which triggered the fault 45 + * addr: the address that triggered the error 46 + * dsisr: the value of the PPC64 dsisr register 47 + */ 48 + static void xsl_fault_error(void *data, u64 addr, u64 dsisr) 49 + { 50 + struct ocxl_context *ctx = (struct ocxl_context *) data; 51 + 52 + mutex_lock(&ctx->xsl_error_lock); 53 + ctx->xsl_error.addr = addr; 54 + ctx->xsl_error.dsisr = dsisr; 55 + ctx->xsl_error.count++; 56 + mutex_unlock(&ctx->xsl_error_lock); 57 + 58 + wake_up_all(&ctx->events_wq); 59 + } 60 + 61 + int ocxl_context_attach(struct ocxl_context *ctx, u64 amr) 62 + { 63 + int rc; 64 + 65 + mutex_lock(&ctx->status_mutex); 66 + if (ctx->status != OPENED) { 67 + rc = -EIO; 68 + goto out; 69 + } 70 + 71 + rc = ocxl_link_add_pe(ctx->afu->fn->link, ctx->pasid, 72 + current->mm->context.id, 0, amr, current->mm, 73 + xsl_fault_error, ctx); 74 + if (rc) 75 + goto out; 76 + 77 + ctx->status = ATTACHED; 78 + out: 79 + mutex_unlock(&ctx->status_mutex); 80 + return rc; 81 + } 82 + 83 + static int map_pp_mmio(struct vm_area_struct *vma, unsigned long address, 84 + u64 offset, struct ocxl_context *ctx) 85 + { 86 + u64 pp_mmio_addr; 87 + int pasid_off; 88 + 89 + if (offset >= ctx->afu->config.pp_mmio_stride) 90 + return VM_FAULT_SIGBUS; 91 + 92 + mutex_lock(&ctx->status_mutex); 93 + if (ctx->status != ATTACHED) { 94 + mutex_unlock(&ctx->status_mutex); 95 + pr_debug("%s: Context not attached, failing mmio mmap\n", 96 + __func__); 97 + return VM_FAULT_SIGBUS; 98 + } 99 + 100 + pasid_off = ctx->pasid - ctx->afu->pasid_base; 101 + pp_mmio_addr = ctx->afu->pp_mmio_start + 102 + pasid_off * ctx->afu->config.pp_mmio_stride + 103 + offset; 104 + 105 + vm_insert_pfn(vma, address, pp_mmio_addr >> PAGE_SHIFT); 106 + mutex_unlock(&ctx->status_mutex); 107 + return VM_FAULT_NOPAGE; 108 + } 109 + 110 + static int ocxl_mmap_fault(struct vm_fault *vmf) 111 + { 112 + struct vm_area_struct *vma = vmf->vma; 113 + struct ocxl_context *ctx = vma->vm_file->private_data; 114 + u64 offset; 115 + int rc; 116 + 117 + offset = vmf->pgoff << PAGE_SHIFT; 118 + pr_debug("%s: pasid %d address 0x%lx offset 0x%llx\n", __func__, 119 + ctx->pasid, vmf->address, offset); 120 + 121 + rc = map_pp_mmio(vma, vmf->address, offset, ctx); 122 + return rc; 123 + } 124 + 125 + static const struct vm_operations_struct ocxl_vmops = { 126 + .fault = ocxl_mmap_fault, 127 + }; 128 + 129 + static int check_mmap_mmio(struct ocxl_context *ctx, 130 + struct vm_area_struct *vma) 131 + { 132 + if ((vma_pages(vma) + vma->vm_pgoff) > 133 + (ctx->afu->config.pp_mmio_stride >> PAGE_SHIFT)) 134 + return -EINVAL; 135 + return 0; 136 + } 137 + 138 + int ocxl_context_mmap(struct ocxl_context *ctx, struct vm_area_struct *vma) 139 + { 140 + int rc; 141 + 142 + rc = check_mmap_mmio(ctx, vma); 143 + if (rc) 144 + return rc; 145 + 146 + vma->vm_flags |= VM_IO | VM_PFNMAP; 147 + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); 148 + vma->vm_ops = &ocxl_vmops; 149 + return 0; 150 + } 151 + 152 + int ocxl_context_detach(struct ocxl_context *ctx) 153 + { 154 + struct pci_dev *dev; 155 + int afu_control_pos; 156 + enum ocxl_context_status status; 157 + int rc; 158 + 159 + mutex_lock(&ctx->status_mutex); 160 + status = ctx->status; 161 + ctx->status = CLOSED; 162 + mutex_unlock(&ctx->status_mutex); 163 + if (status != ATTACHED) 164 + return 0; 165 + 166 + dev = to_pci_dev(ctx->afu->fn->dev.parent); 167 + afu_control_pos = ctx->afu->config.dvsec_afu_control_pos; 168 + 169 + mutex_lock(&ctx->afu->afu_control_lock); 170 + rc = ocxl_config_terminate_pasid(dev, afu_control_pos, ctx->pasid); 171 + mutex_unlock(&ctx->afu->afu_control_lock); 172 + if (rc) { 173 + /* 174 + * If we timeout waiting for the AFU to terminate the 175 + * pasid, then it's dangerous to clean up the Process 176 + * Element entry in the SPA, as it may be referenced 177 + * in the future by the AFU. In which case, we would 178 + * checkstop because of an invalid PE access (FIR 179 + * register 2, bit 42). So leave the PE 180 + * defined. Caller shouldn't free the context so that 181 + * PASID remains allocated. 182 + * 183 + * A link reset will be required to cleanup the AFU 184 + * and the SPA. 185 + */ 186 + if (rc == -EBUSY) 187 + return rc; 188 + } 189 + rc = ocxl_link_remove_pe(ctx->afu->fn->link, ctx->pasid); 190 + if (rc) { 191 + dev_warn(&ctx->afu->dev, 192 + "Couldn't remove PE entry cleanly: %d\n", rc); 193 + } 194 + return 0; 195 + } 196 + 197 + void ocxl_context_detach_all(struct ocxl_afu *afu) 198 + { 199 + struct ocxl_context *ctx; 200 + int tmp; 201 + 202 + mutex_lock(&afu->contexts_lock); 203 + idr_for_each_entry(&afu->contexts_idr, ctx, tmp) { 204 + ocxl_context_detach(ctx); 205 + /* 206 + * We are force detaching - remove any active mmio 207 + * mappings so userspace cannot interfere with the 208 + * card if it comes back. Easiest way to exercise 209 + * this is to unbind and rebind the driver via sysfs 210 + * while it is in use. 211 + */ 212 + mutex_lock(&ctx->mapping_lock); 213 + if (ctx->mapping) 214 + unmap_mapping_range(ctx->mapping, 0, 0, 1); 215 + mutex_unlock(&ctx->mapping_lock); 216 + } 217 + mutex_unlock(&afu->contexts_lock); 218 + } 219 + 220 + void ocxl_context_free(struct ocxl_context *ctx) 221 + { 222 + mutex_lock(&ctx->afu->contexts_lock); 223 + ctx->afu->pasid_count--; 224 + idr_remove(&ctx->afu->contexts_idr, ctx->pasid); 225 + mutex_unlock(&ctx->afu->contexts_lock); 226 + 227 + /* reference to the AFU taken in ocxl_context_init */ 228 + ocxl_afu_put(ctx->afu); 229 + kfree(ctx); 230 + }
+398
drivers/misc/ocxl/file.c
··· 1 + // SPDX-License-Identifier: GPL-2.0+ 2 + // Copyright 2017 IBM Corp. 3 + #include <linux/fs.h> 4 + #include <linux/poll.h> 5 + #include <linux/sched/signal.h> 6 + #include <linux/uaccess.h> 7 + #include <uapi/misc/ocxl.h> 8 + #include "ocxl_internal.h" 9 + 10 + 11 + #define OCXL_NUM_MINORS 256 /* Total to reserve */ 12 + 13 + static dev_t ocxl_dev; 14 + static struct class *ocxl_class; 15 + static struct mutex minors_idr_lock; 16 + static struct idr minors_idr; 17 + 18 + static struct ocxl_afu *find_and_get_afu(dev_t devno) 19 + { 20 + struct ocxl_afu *afu; 21 + int afu_minor; 22 + 23 + afu_minor = MINOR(devno); 24 + /* 25 + * We don't declare an RCU critical section here, as our AFU 26 + * is protected by a reference counter on the device. By the time the 27 + * minor number of a device is removed from the idr, the ref count of 28 + * the device is already at 0, so no user API will access that AFU and 29 + * this function can't return it. 30 + */ 31 + afu = idr_find(&minors_idr, afu_minor); 32 + if (afu) 33 + ocxl_afu_get(afu); 34 + return afu; 35 + } 36 + 37 + static int allocate_afu_minor(struct ocxl_afu *afu) 38 + { 39 + int minor; 40 + 41 + mutex_lock(&minors_idr_lock); 42 + minor = idr_alloc(&minors_idr, afu, 0, OCXL_NUM_MINORS, GFP_KERNEL); 43 + mutex_unlock(&minors_idr_lock); 44 + return minor; 45 + } 46 + 47 + static void free_afu_minor(struct ocxl_afu *afu) 48 + { 49 + mutex_lock(&minors_idr_lock); 50 + idr_remove(&minors_idr, MINOR(afu->dev.devt)); 51 + mutex_unlock(&minors_idr_lock); 52 + } 53 + 54 + static int afu_open(struct inode *inode, struct file *file) 55 + { 56 + struct ocxl_afu *afu; 57 + struct ocxl_context *ctx; 58 + int rc; 59 + 60 + pr_debug("%s for device %x\n", __func__, inode->i_rdev); 61 + 62 + afu = find_and_get_afu(inode->i_rdev); 63 + if (!afu) 64 + return -ENODEV; 65 + 66 + ctx = ocxl_context_alloc(); 67 + if (!ctx) { 68 + rc = -ENOMEM; 69 + goto put_afu; 70 + } 71 + 72 + rc = ocxl_context_init(ctx, afu, inode->i_mapping); 73 + if (rc) 74 + goto put_afu; 75 + file->private_data = ctx; 76 + ocxl_afu_put(afu); 77 + return 0; 78 + 79 + put_afu: 80 + ocxl_afu_put(afu); 81 + return rc; 82 + } 83 + 84 + static long afu_ioctl_attach(struct ocxl_context *ctx, 85 + struct ocxl_ioctl_attach __user *uarg) 86 + { 87 + struct ocxl_ioctl_attach arg; 88 + u64 amr = 0; 89 + int rc; 90 + 91 + pr_debug("%s for context %d\n", __func__, ctx->pasid); 92 + 93 + if (copy_from_user(&arg, uarg, sizeof(arg))) 94 + return -EFAULT; 95 + 96 + /* Make sure reserved fields are not set for forward compatibility */ 97 + if (arg.reserved1 || arg.reserved2 || arg.reserved3) 98 + return -EINVAL; 99 + 100 + amr = arg.amr & mfspr(SPRN_UAMOR); 101 + rc = ocxl_context_attach(ctx, amr); 102 + return rc; 103 + } 104 + 105 + #define CMD_STR(x) (x == OCXL_IOCTL_ATTACH ? "ATTACH" : \ 106 + "UNKNOWN") 107 + 108 + static long afu_ioctl(struct file *file, unsigned int cmd, 109 + unsigned long args) 110 + { 111 + struct ocxl_context *ctx = file->private_data; 112 + long rc; 113 + 114 + pr_debug("%s for context %d, command %s\n", __func__, ctx->pasid, 115 + CMD_STR(cmd)); 116 + 117 + if (ctx->status == CLOSED) 118 + return -EIO; 119 + 120 + switch (cmd) { 121 + case OCXL_IOCTL_ATTACH: 122 + rc = afu_ioctl_attach(ctx, 123 + (struct ocxl_ioctl_attach __user *) args); 124 + break; 125 + 126 + default: 127 + rc = -EINVAL; 128 + } 129 + return rc; 130 + } 131 + 132 + static long afu_compat_ioctl(struct file *file, unsigned int cmd, 133 + unsigned long args) 134 + { 135 + return afu_ioctl(file, cmd, args); 136 + } 137 + 138 + static int afu_mmap(struct file *file, struct vm_area_struct *vma) 139 + { 140 + struct ocxl_context *ctx = file->private_data; 141 + 142 + pr_debug("%s for context %d\n", __func__, ctx->pasid); 143 + return ocxl_context_mmap(ctx, vma); 144 + } 145 + 146 + static bool has_xsl_error(struct ocxl_context *ctx) 147 + { 148 + bool ret; 149 + 150 + mutex_lock(&ctx->xsl_error_lock); 151 + ret = !!ctx->xsl_error.addr; 152 + mutex_unlock(&ctx->xsl_error_lock); 153 + 154 + return ret; 155 + } 156 + 157 + /* 158 + * Are there any events pending on the AFU 159 + * ctx: The AFU context 160 + * Returns: true if there are events pending 161 + */ 162 + static bool afu_events_pending(struct ocxl_context *ctx) 163 + { 164 + if (has_xsl_error(ctx)) 165 + return true; 166 + return false; 167 + } 168 + 169 + static unsigned int afu_poll(struct file *file, struct poll_table_struct *wait) 170 + { 171 + struct ocxl_context *ctx = file->private_data; 172 + unsigned int mask = 0; 173 + bool closed; 174 + 175 + pr_debug("%s for context %d\n", __func__, ctx->pasid); 176 + 177 + poll_wait(file, &ctx->events_wq, wait); 178 + 179 + mutex_lock(&ctx->status_mutex); 180 + closed = (ctx->status == CLOSED); 181 + mutex_unlock(&ctx->status_mutex); 182 + 183 + if (afu_events_pending(ctx)) 184 + mask = POLLIN | POLLRDNORM; 185 + else if (closed) 186 + mask = POLLERR; 187 + 188 + return mask; 189 + } 190 + 191 + /* 192 + * Populate the supplied buffer with a single XSL error 193 + * ctx: The AFU context to report the error from 194 + * header: the event header to populate 195 + * buf: The buffer to write the body into (should be at least 196 + * AFU_EVENT_BODY_XSL_ERROR_SIZE) 197 + * Return: the amount of buffer that was populated 198 + */ 199 + static ssize_t append_xsl_error(struct ocxl_context *ctx, 200 + struct ocxl_kernel_event_header *header, 201 + char __user *buf) 202 + { 203 + struct ocxl_kernel_event_xsl_fault_error body; 204 + 205 + memset(&body, 0, sizeof(body)); 206 + 207 + mutex_lock(&ctx->xsl_error_lock); 208 + if (!ctx->xsl_error.addr) { 209 + mutex_unlock(&ctx->xsl_error_lock); 210 + return 0; 211 + } 212 + 213 + body.addr = ctx->xsl_error.addr; 214 + body.dsisr = ctx->xsl_error.dsisr; 215 + body.count = ctx->xsl_error.count; 216 + 217 + ctx->xsl_error.addr = 0; 218 + ctx->xsl_error.dsisr = 0; 219 + ctx->xsl_error.count = 0; 220 + 221 + mutex_unlock(&ctx->xsl_error_lock); 222 + 223 + header->type = OCXL_AFU_EVENT_XSL_FAULT_ERROR; 224 + 225 + if (copy_to_user(buf, &body, sizeof(body))) 226 + return -EFAULT; 227 + 228 + return sizeof(body); 229 + } 230 + 231 + #define AFU_EVENT_BODY_MAX_SIZE sizeof(struct ocxl_kernel_event_xsl_fault_error) 232 + 233 + /* 234 + * Reports events on the AFU 235 + * Format: 236 + * Header (struct ocxl_kernel_event_header) 237 + * Body (struct ocxl_kernel_event_*) 238 + * Header... 239 + */ 240 + static ssize_t afu_read(struct file *file, char __user *buf, size_t count, 241 + loff_t *off) 242 + { 243 + struct ocxl_context *ctx = file->private_data; 244 + struct ocxl_kernel_event_header header; 245 + ssize_t rc; 246 + size_t used = 0; 247 + DEFINE_WAIT(event_wait); 248 + 249 + memset(&header, 0, sizeof(header)); 250 + 251 + /* Require offset to be 0 */ 252 + if (*off != 0) 253 + return -EINVAL; 254 + 255 + if (count < (sizeof(struct ocxl_kernel_event_header) + 256 + AFU_EVENT_BODY_MAX_SIZE)) 257 + return -EINVAL; 258 + 259 + for (;;) { 260 + prepare_to_wait(&ctx->events_wq, &event_wait, 261 + TASK_INTERRUPTIBLE); 262 + 263 + if (afu_events_pending(ctx)) 264 + break; 265 + 266 + if (ctx->status == CLOSED) 267 + break; 268 + 269 + if (file->f_flags & O_NONBLOCK) { 270 + finish_wait(&ctx->events_wq, &event_wait); 271 + return -EAGAIN; 272 + } 273 + 274 + if (signal_pending(current)) { 275 + finish_wait(&ctx->events_wq, &event_wait); 276 + return -ERESTARTSYS; 277 + } 278 + 279 + schedule(); 280 + } 281 + 282 + finish_wait(&ctx->events_wq, &event_wait); 283 + 284 + if (has_xsl_error(ctx)) { 285 + used = append_xsl_error(ctx, &header, buf + sizeof(header)); 286 + if (used < 0) 287 + return used; 288 + } 289 + 290 + if (!afu_events_pending(ctx)) 291 + header.flags |= OCXL_KERNEL_EVENT_FLAG_LAST; 292 + 293 + if (copy_to_user(buf, &header, sizeof(header))) 294 + return -EFAULT; 295 + 296 + used += sizeof(header); 297 + 298 + rc = (ssize_t) used; 299 + return rc; 300 + } 301 + 302 + static int afu_release(struct inode *inode, struct file *file) 303 + { 304 + struct ocxl_context *ctx = file->private_data; 305 + int rc; 306 + 307 + pr_debug("%s for device %x\n", __func__, inode->i_rdev); 308 + rc = ocxl_context_detach(ctx); 309 + mutex_lock(&ctx->mapping_lock); 310 + ctx->mapping = NULL; 311 + mutex_unlock(&ctx->mapping_lock); 312 + wake_up_all(&ctx->events_wq); 313 + if (rc != -EBUSY) 314 + ocxl_context_free(ctx); 315 + return 0; 316 + } 317 + 318 + static const struct file_operations ocxl_afu_fops = { 319 + .owner = THIS_MODULE, 320 + .open = afu_open, 321 + .unlocked_ioctl = afu_ioctl, 322 + .compat_ioctl = afu_compat_ioctl, 323 + .mmap = afu_mmap, 324 + .poll = afu_poll, 325 + .read = afu_read, 326 + .release = afu_release, 327 + }; 328 + 329 + int ocxl_create_cdev(struct ocxl_afu *afu) 330 + { 331 + int rc; 332 + 333 + cdev_init(&afu->cdev, &ocxl_afu_fops); 334 + rc = cdev_add(&afu->cdev, afu->dev.devt, 1); 335 + if (rc) { 336 + dev_err(&afu->dev, "Unable to add afu char device: %d\n", rc); 337 + return rc; 338 + } 339 + return 0; 340 + } 341 + 342 + void ocxl_destroy_cdev(struct ocxl_afu *afu) 343 + { 344 + cdev_del(&afu->cdev); 345 + } 346 + 347 + int ocxl_register_afu(struct ocxl_afu *afu) 348 + { 349 + int minor; 350 + 351 + minor = allocate_afu_minor(afu); 352 + if (minor < 0) 353 + return minor; 354 + afu->dev.devt = MKDEV(MAJOR(ocxl_dev), minor); 355 + afu->dev.class = ocxl_class; 356 + return device_register(&afu->dev); 357 + } 358 + 359 + void ocxl_unregister_afu(struct ocxl_afu *afu) 360 + { 361 + free_afu_minor(afu); 362 + } 363 + 364 + static char *ocxl_devnode(struct device *dev, umode_t *mode) 365 + { 366 + return kasprintf(GFP_KERNEL, "ocxl/%s", dev_name(dev)); 367 + } 368 + 369 + int ocxl_file_init(void) 370 + { 371 + int rc; 372 + 373 + mutex_init(&minors_idr_lock); 374 + idr_init(&minors_idr); 375 + 376 + rc = alloc_chrdev_region(&ocxl_dev, 0, OCXL_NUM_MINORS, "ocxl"); 377 + if (rc) { 378 + pr_err("Unable to allocate ocxl major number: %d\n", rc); 379 + return rc; 380 + } 381 + 382 + ocxl_class = class_create(THIS_MODULE, "ocxl"); 383 + if (IS_ERR(ocxl_class)) { 384 + pr_err("Unable to create ocxl class\n"); 385 + unregister_chrdev_region(ocxl_dev, OCXL_NUM_MINORS); 386 + return PTR_ERR(ocxl_class); 387 + } 388 + 389 + ocxl_class->devnode = ocxl_devnode; 390 + return 0; 391 + } 392 + 393 + void ocxl_file_exit(void) 394 + { 395 + class_destroy(ocxl_class); 396 + unregister_chrdev_region(ocxl_dev, OCXL_NUM_MINORS); 397 + idr_destroy(&minors_idr); 398 + }
+603
drivers/misc/ocxl/link.c
··· 1 + // SPDX-License-Identifier: GPL-2.0+ 2 + // Copyright 2017 IBM Corp. 3 + #include <linux/sched/mm.h> 4 + #include <linux/mutex.h> 5 + #include <linux/mmu_context.h> 6 + #include <asm/copro.h> 7 + #include <asm/pnv-ocxl.h> 8 + #include "ocxl_internal.h" 9 + 10 + 11 + #define SPA_PASID_BITS 15 12 + #define SPA_PASID_MAX ((1 << SPA_PASID_BITS) - 1) 13 + #define SPA_PE_MASK SPA_PASID_MAX 14 + #define SPA_SPA_SIZE_LOG 22 /* Each SPA is 4 Mb */ 15 + 16 + #define SPA_CFG_SF (1ull << (63-0)) 17 + #define SPA_CFG_TA (1ull << (63-1)) 18 + #define SPA_CFG_HV (1ull << (63-3)) 19 + #define SPA_CFG_UV (1ull << (63-4)) 20 + #define SPA_CFG_XLAT_hpt (0ull << (63-6)) /* Hashed page table (HPT) mode */ 21 + #define SPA_CFG_XLAT_roh (2ull << (63-6)) /* Radix on HPT mode */ 22 + #define SPA_CFG_XLAT_ror (3ull << (63-6)) /* Radix on Radix mode */ 23 + #define SPA_CFG_PR (1ull << (63-49)) 24 + #define SPA_CFG_TC (1ull << (63-54)) 25 + #define SPA_CFG_DR (1ull << (63-59)) 26 + 27 + #define SPA_XSL_TF (1ull << (63-3)) /* Translation fault */ 28 + #define SPA_XSL_S (1ull << (63-38)) /* Store operation */ 29 + 30 + #define SPA_PE_VALID 0x80000000 31 + 32 + 33 + struct pe_data { 34 + struct mm_struct *mm; 35 + /* callback to trigger when a translation fault occurs */ 36 + void (*xsl_err_cb)(void *data, u64 addr, u64 dsisr); 37 + /* opaque pointer to be passed to the above callback */ 38 + void *xsl_err_data; 39 + struct rcu_head rcu; 40 + }; 41 + 42 + struct spa { 43 + struct ocxl_process_element *spa_mem; 44 + int spa_order; 45 + struct mutex spa_lock; 46 + struct radix_tree_root pe_tree; /* Maps PE handles to pe_data */ 47 + char *irq_name; 48 + int virq; 49 + void __iomem *reg_dsisr; 50 + void __iomem *reg_dar; 51 + void __iomem *reg_tfc; 52 + void __iomem *reg_pe_handle; 53 + /* 54 + * The following field are used by the memory fault 55 + * interrupt handler. We can only have one interrupt at a 56 + * time. The NPU won't raise another interrupt until the 57 + * previous one has been ack'd by writing to the TFC register 58 + */ 59 + struct xsl_fault { 60 + struct work_struct fault_work; 61 + u64 pe; 62 + u64 dsisr; 63 + u64 dar; 64 + struct pe_data pe_data; 65 + } xsl_fault; 66 + }; 67 + 68 + /* 69 + * A opencapi link can be used be by several PCI functions. We have 70 + * one link per device slot. 71 + * 72 + * A linked list of opencapi links should suffice, as there's a 73 + * limited number of opencapi slots on a system and lookup is only 74 + * done when the device is probed 75 + */ 76 + struct link { 77 + struct list_head list; 78 + struct kref ref; 79 + int domain; 80 + int bus; 81 + int dev; 82 + atomic_t irq_available; 83 + struct spa *spa; 84 + void *platform_data; 85 + }; 86 + static struct list_head links_list = LIST_HEAD_INIT(links_list); 87 + static DEFINE_MUTEX(links_list_lock); 88 + 89 + enum xsl_response { 90 + CONTINUE, 91 + ADDRESS_ERROR, 92 + RESTART, 93 + }; 94 + 95 + 96 + static void read_irq(struct spa *spa, u64 *dsisr, u64 *dar, u64 *pe) 97 + { 98 + u64 reg; 99 + 100 + *dsisr = in_be64(spa->reg_dsisr); 101 + *dar = in_be64(spa->reg_dar); 102 + reg = in_be64(spa->reg_pe_handle); 103 + *pe = reg & SPA_PE_MASK; 104 + } 105 + 106 + static void ack_irq(struct spa *spa, enum xsl_response r) 107 + { 108 + u64 reg = 0; 109 + 110 + /* continue is not supported */ 111 + if (r == RESTART) 112 + reg = PPC_BIT(31); 113 + else if (r == ADDRESS_ERROR) 114 + reg = PPC_BIT(30); 115 + else 116 + WARN(1, "Invalid irq response %d\n", r); 117 + 118 + if (reg) 119 + out_be64(spa->reg_tfc, reg); 120 + } 121 + 122 + static void xsl_fault_handler_bh(struct work_struct *fault_work) 123 + { 124 + unsigned int flt = 0; 125 + unsigned long access, flags, inv_flags = 0; 126 + enum xsl_response r; 127 + struct xsl_fault *fault = container_of(fault_work, struct xsl_fault, 128 + fault_work); 129 + struct spa *spa = container_of(fault, struct spa, xsl_fault); 130 + 131 + int rc; 132 + 133 + /* 134 + * We need to release a reference on the mm whenever exiting this 135 + * function (taken in the memory fault interrupt handler) 136 + */ 137 + rc = copro_handle_mm_fault(fault->pe_data.mm, fault->dar, fault->dsisr, 138 + &flt); 139 + if (rc) { 140 + pr_debug("copro_handle_mm_fault failed: %d\n", rc); 141 + if (fault->pe_data.xsl_err_cb) { 142 + fault->pe_data.xsl_err_cb( 143 + fault->pe_data.xsl_err_data, 144 + fault->dar, fault->dsisr); 145 + } 146 + r = ADDRESS_ERROR; 147 + goto ack; 148 + } 149 + 150 + if (!radix_enabled()) { 151 + /* 152 + * update_mmu_cache() will not have loaded the hash 153 + * since current->trap is not a 0x400 or 0x300, so 154 + * just call hash_page_mm() here. 155 + */ 156 + access = _PAGE_PRESENT | _PAGE_READ; 157 + if (fault->dsisr & SPA_XSL_S) 158 + access |= _PAGE_WRITE; 159 + 160 + if (REGION_ID(fault->dar) != USER_REGION_ID) 161 + access |= _PAGE_PRIVILEGED; 162 + 163 + local_irq_save(flags); 164 + hash_page_mm(fault->pe_data.mm, fault->dar, access, 0x300, 165 + inv_flags); 166 + local_irq_restore(flags); 167 + } 168 + r = RESTART; 169 + ack: 170 + mmdrop(fault->pe_data.mm); 171 + ack_irq(spa, r); 172 + } 173 + 174 + static irqreturn_t xsl_fault_handler(int irq, void *data) 175 + { 176 + struct link *link = (struct link *) data; 177 + struct spa *spa = link->spa; 178 + u64 dsisr, dar, pe_handle; 179 + struct pe_data *pe_data; 180 + struct ocxl_process_element *pe; 181 + int lpid, pid, tid; 182 + 183 + read_irq(spa, &dsisr, &dar, &pe_handle); 184 + 185 + WARN_ON(pe_handle > SPA_PE_MASK); 186 + pe = spa->spa_mem + pe_handle; 187 + lpid = be32_to_cpu(pe->lpid); 188 + pid = be32_to_cpu(pe->pid); 189 + tid = be32_to_cpu(pe->tid); 190 + /* We could be reading all null values here if the PE is being 191 + * removed while an interrupt kicks in. It's not supposed to 192 + * happen if the driver notified the AFU to terminate the 193 + * PASID, and the AFU waited for pending operations before 194 + * acknowledging. But even if it happens, we won't find a 195 + * memory context below and fail silently, so it should be ok. 196 + */ 197 + if (!(dsisr & SPA_XSL_TF)) { 198 + WARN(1, "Invalid xsl interrupt fault register %#llx\n", dsisr); 199 + ack_irq(spa, ADDRESS_ERROR); 200 + return IRQ_HANDLED; 201 + } 202 + 203 + rcu_read_lock(); 204 + pe_data = radix_tree_lookup(&spa->pe_tree, pe_handle); 205 + if (!pe_data) { 206 + /* 207 + * Could only happen if the driver didn't notify the 208 + * AFU about PASID termination before removing the PE, 209 + * or the AFU didn't wait for all memory access to 210 + * have completed. 211 + * 212 + * Either way, we fail early, but we shouldn't log an 213 + * error message, as it is a valid (if unexpected) 214 + * scenario 215 + */ 216 + rcu_read_unlock(); 217 + pr_debug("Unknown mm context for xsl interrupt\n"); 218 + ack_irq(spa, ADDRESS_ERROR); 219 + return IRQ_HANDLED; 220 + } 221 + WARN_ON(pe_data->mm->context.id != pid); 222 + 223 + spa->xsl_fault.pe = pe_handle; 224 + spa->xsl_fault.dar = dar; 225 + spa->xsl_fault.dsisr = dsisr; 226 + spa->xsl_fault.pe_data = *pe_data; 227 + mmgrab(pe_data->mm); /* mm count is released by bottom half */ 228 + 229 + rcu_read_unlock(); 230 + schedule_work(&spa->xsl_fault.fault_work); 231 + return IRQ_HANDLED; 232 + } 233 + 234 + static void unmap_irq_registers(struct spa *spa) 235 + { 236 + pnv_ocxl_unmap_xsl_regs(spa->reg_dsisr, spa->reg_dar, spa->reg_tfc, 237 + spa->reg_pe_handle); 238 + } 239 + 240 + static int map_irq_registers(struct pci_dev *dev, struct spa *spa) 241 + { 242 + return pnv_ocxl_map_xsl_regs(dev, &spa->reg_dsisr, &spa->reg_dar, 243 + &spa->reg_tfc, &spa->reg_pe_handle); 244 + } 245 + 246 + static int setup_xsl_irq(struct pci_dev *dev, struct link *link) 247 + { 248 + struct spa *spa = link->spa; 249 + int rc; 250 + int hwirq; 251 + 252 + rc = pnv_ocxl_get_xsl_irq(dev, &hwirq); 253 + if (rc) 254 + return rc; 255 + 256 + rc = map_irq_registers(dev, spa); 257 + if (rc) 258 + return rc; 259 + 260 + spa->irq_name = kasprintf(GFP_KERNEL, "ocxl-xsl-%x-%x-%x", 261 + link->domain, link->bus, link->dev); 262 + if (!spa->irq_name) { 263 + unmap_irq_registers(spa); 264 + dev_err(&dev->dev, "Can't allocate name for xsl interrupt\n"); 265 + return -ENOMEM; 266 + } 267 + /* 268 + * At some point, we'll need to look into allowing a higher 269 + * number of interrupts. Could we have an IRQ domain per link? 270 + */ 271 + spa->virq = irq_create_mapping(NULL, hwirq); 272 + if (!spa->virq) { 273 + kfree(spa->irq_name); 274 + unmap_irq_registers(spa); 275 + dev_err(&dev->dev, 276 + "irq_create_mapping failed for translation interrupt\n"); 277 + return -EINVAL; 278 + } 279 + 280 + dev_dbg(&dev->dev, "hwirq %d mapped to virq %d\n", hwirq, spa->virq); 281 + 282 + rc = request_irq(spa->virq, xsl_fault_handler, 0, spa->irq_name, 283 + link); 284 + if (rc) { 285 + irq_dispose_mapping(spa->virq); 286 + kfree(spa->irq_name); 287 + unmap_irq_registers(spa); 288 + dev_err(&dev->dev, 289 + "request_irq failed for translation interrupt: %d\n", 290 + rc); 291 + return -EINVAL; 292 + } 293 + return 0; 294 + } 295 + 296 + static void release_xsl_irq(struct link *link) 297 + { 298 + struct spa *spa = link->spa; 299 + 300 + if (spa->virq) { 301 + free_irq(spa->virq, link); 302 + irq_dispose_mapping(spa->virq); 303 + } 304 + kfree(spa->irq_name); 305 + unmap_irq_registers(spa); 306 + } 307 + 308 + static int alloc_spa(struct pci_dev *dev, struct link *link) 309 + { 310 + struct spa *spa; 311 + 312 + spa = kzalloc(sizeof(struct spa), GFP_KERNEL); 313 + if (!spa) 314 + return -ENOMEM; 315 + 316 + mutex_init(&spa->spa_lock); 317 + INIT_RADIX_TREE(&spa->pe_tree, GFP_KERNEL); 318 + INIT_WORK(&spa->xsl_fault.fault_work, xsl_fault_handler_bh); 319 + 320 + spa->spa_order = SPA_SPA_SIZE_LOG - PAGE_SHIFT; 321 + spa->spa_mem = (struct ocxl_process_element *) 322 + __get_free_pages(GFP_KERNEL | __GFP_ZERO, spa->spa_order); 323 + if (!spa->spa_mem) { 324 + dev_err(&dev->dev, "Can't allocate Shared Process Area\n"); 325 + kfree(spa); 326 + return -ENOMEM; 327 + } 328 + pr_debug("Allocated SPA for %x:%x:%x at %p\n", link->domain, link->bus, 329 + link->dev, spa->spa_mem); 330 + 331 + link->spa = spa; 332 + return 0; 333 + } 334 + 335 + static void free_spa(struct link *link) 336 + { 337 + struct spa *spa = link->spa; 338 + 339 + pr_debug("Freeing SPA for %x:%x:%x\n", link->domain, link->bus, 340 + link->dev); 341 + 342 + if (spa && spa->spa_mem) { 343 + free_pages((unsigned long) spa->spa_mem, spa->spa_order); 344 + kfree(spa); 345 + link->spa = NULL; 346 + } 347 + } 348 + 349 + static int alloc_link(struct pci_dev *dev, int PE_mask, struct link **out_link) 350 + { 351 + struct link *link; 352 + int rc; 353 + 354 + link = kzalloc(sizeof(struct link), GFP_KERNEL); 355 + if (!link) 356 + return -ENOMEM; 357 + 358 + kref_init(&link->ref); 359 + link->domain = pci_domain_nr(dev->bus); 360 + link->bus = dev->bus->number; 361 + link->dev = PCI_SLOT(dev->devfn); 362 + atomic_set(&link->irq_available, MAX_IRQ_PER_LINK); 363 + 364 + rc = alloc_spa(dev, link); 365 + if (rc) 366 + goto err_free; 367 + 368 + rc = setup_xsl_irq(dev, link); 369 + if (rc) 370 + goto err_spa; 371 + 372 + /* platform specific hook */ 373 + rc = pnv_ocxl_spa_setup(dev, link->spa->spa_mem, PE_mask, 374 + &link->platform_data); 375 + if (rc) 376 + goto err_xsl_irq; 377 + 378 + *out_link = link; 379 + return 0; 380 + 381 + err_xsl_irq: 382 + release_xsl_irq(link); 383 + err_spa: 384 + free_spa(link); 385 + err_free: 386 + kfree(link); 387 + return rc; 388 + } 389 + 390 + static void free_link(struct link *link) 391 + { 392 + release_xsl_irq(link); 393 + free_spa(link); 394 + kfree(link); 395 + } 396 + 397 + int ocxl_link_setup(struct pci_dev *dev, int PE_mask, void **link_handle) 398 + { 399 + int rc = 0; 400 + struct link *link; 401 + 402 + mutex_lock(&links_list_lock); 403 + list_for_each_entry(link, &links_list, list) { 404 + /* The functions of a device all share the same link */ 405 + if (link->domain == pci_domain_nr(dev->bus) && 406 + link->bus == dev->bus->number && 407 + link->dev == PCI_SLOT(dev->devfn)) { 408 + kref_get(&link->ref); 409 + *link_handle = link; 410 + goto unlock; 411 + } 412 + } 413 + rc = alloc_link(dev, PE_mask, &link); 414 + if (rc) 415 + goto unlock; 416 + 417 + list_add(&link->list, &links_list); 418 + *link_handle = link; 419 + unlock: 420 + mutex_unlock(&links_list_lock); 421 + return rc; 422 + } 423 + 424 + static void release_xsl(struct kref *ref) 425 + { 426 + struct link *link = container_of(ref, struct link, ref); 427 + 428 + list_del(&link->list); 429 + /* call platform code before releasing data */ 430 + pnv_ocxl_spa_release(link->platform_data); 431 + free_link(link); 432 + } 433 + 434 + void ocxl_link_release(struct pci_dev *dev, void *link_handle) 435 + { 436 + struct link *link = (struct link *) link_handle; 437 + 438 + mutex_lock(&links_list_lock); 439 + kref_put(&link->ref, release_xsl); 440 + mutex_unlock(&links_list_lock); 441 + } 442 + 443 + static u64 calculate_cfg_state(bool kernel) 444 + { 445 + u64 state; 446 + 447 + state = SPA_CFG_DR; 448 + if (mfspr(SPRN_LPCR) & LPCR_TC) 449 + state |= SPA_CFG_TC; 450 + if (radix_enabled()) 451 + state |= SPA_CFG_XLAT_ror; 452 + else 453 + state |= SPA_CFG_XLAT_hpt; 454 + state |= SPA_CFG_HV; 455 + if (kernel) { 456 + if (mfmsr() & MSR_SF) 457 + state |= SPA_CFG_SF; 458 + } else { 459 + state |= SPA_CFG_PR; 460 + if (!test_tsk_thread_flag(current, TIF_32BIT)) 461 + state |= SPA_CFG_SF; 462 + } 463 + return state; 464 + } 465 + 466 + int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr, 467 + u64 amr, struct mm_struct *mm, 468 + void (*xsl_err_cb)(void *data, u64 addr, u64 dsisr), 469 + void *xsl_err_data) 470 + { 471 + struct link *link = (struct link *) link_handle; 472 + struct spa *spa = link->spa; 473 + struct ocxl_process_element *pe; 474 + int pe_handle, rc = 0; 475 + struct pe_data *pe_data; 476 + 477 + BUILD_BUG_ON(sizeof(struct ocxl_process_element) != 128); 478 + if (pasid > SPA_PASID_MAX) 479 + return -EINVAL; 480 + 481 + mutex_lock(&spa->spa_lock); 482 + pe_handle = pasid & SPA_PE_MASK; 483 + pe = spa->spa_mem + pe_handle; 484 + 485 + if (pe->software_state) { 486 + rc = -EBUSY; 487 + goto unlock; 488 + } 489 + 490 + pe_data = kmalloc(sizeof(*pe_data), GFP_KERNEL); 491 + if (!pe_data) { 492 + rc = -ENOMEM; 493 + goto unlock; 494 + } 495 + 496 + pe_data->mm = mm; 497 + pe_data->xsl_err_cb = xsl_err_cb; 498 + pe_data->xsl_err_data = xsl_err_data; 499 + 500 + memset(pe, 0, sizeof(struct ocxl_process_element)); 501 + pe->config_state = cpu_to_be64(calculate_cfg_state(pidr == 0)); 502 + pe->lpid = cpu_to_be32(mfspr(SPRN_LPID)); 503 + pe->pid = cpu_to_be32(pidr); 504 + pe->tid = cpu_to_be32(tidr); 505 + pe->amr = cpu_to_be64(amr); 506 + pe->software_state = cpu_to_be32(SPA_PE_VALID); 507 + 508 + mm_context_add_copro(mm); 509 + /* 510 + * Barrier is to make sure PE is visible in the SPA before it 511 + * is used by the device. It also helps with the global TLBI 512 + * invalidation 513 + */ 514 + mb(); 515 + radix_tree_insert(&spa->pe_tree, pe_handle, pe_data); 516 + 517 + /* 518 + * The mm must stay valid for as long as the device uses it. We 519 + * lower the count when the context is removed from the SPA. 520 + * 521 + * We grab mm_count (and not mm_users), as we don't want to 522 + * end up in a circular dependency if a process mmaps its 523 + * mmio, therefore incrementing the file ref count when 524 + * calling mmap(), and forgets to unmap before exiting. In 525 + * that scenario, when the kernel handles the death of the 526 + * process, the file is not cleaned because unmap was not 527 + * called, and the mm wouldn't be freed because we would still 528 + * have a reference on mm_users. Incrementing mm_count solves 529 + * the problem. 530 + */ 531 + mmgrab(mm); 532 + unlock: 533 + mutex_unlock(&spa->spa_lock); 534 + return rc; 535 + } 536 + 537 + int ocxl_link_remove_pe(void *link_handle, int pasid) 538 + { 539 + struct link *link = (struct link *) link_handle; 540 + struct spa *spa = link->spa; 541 + struct ocxl_process_element *pe; 542 + struct pe_data *pe_data; 543 + int pe_handle, rc; 544 + 545 + if (pasid > SPA_PASID_MAX) 546 + return -EINVAL; 547 + 548 + /* 549 + * About synchronization with our memory fault handler: 550 + * 551 + * Before removing the PE, the driver is supposed to have 552 + * notified the AFU, which should have cleaned up and make 553 + * sure the PASID is no longer in use, including pending 554 + * interrupts. However, there's no way to be sure... 555 + * 556 + * We clear the PE and remove the context from our radix 557 + * tree. From that point on, any new interrupt for that 558 + * context will fail silently, which is ok. As mentioned 559 + * above, that's not expected, but it could happen if the 560 + * driver or AFU didn't do the right thing. 561 + * 562 + * There could still be a bottom half running, but we don't 563 + * need to wait/flush, as it is managing a reference count on 564 + * the mm it reads from the radix tree. 565 + */ 566 + pe_handle = pasid & SPA_PE_MASK; 567 + pe = spa->spa_mem + pe_handle; 568 + 569 + mutex_lock(&spa->spa_lock); 570 + 571 + if (!(be32_to_cpu(pe->software_state) & SPA_PE_VALID)) { 572 + rc = -EINVAL; 573 + goto unlock; 574 + } 575 + 576 + memset(pe, 0, sizeof(struct ocxl_process_element)); 577 + /* 578 + * The barrier makes sure the PE is removed from the SPA 579 + * before we clear the NPU context cache below, so that the 580 + * old PE cannot be reloaded erroneously. 581 + */ 582 + mb(); 583 + 584 + /* 585 + * hook to platform code 586 + * On powerpc, the entry needs to be cleared from the context 587 + * cache of the NPU. 588 + */ 589 + rc = pnv_ocxl_spa_remove_pe(link->platform_data, pe_handle); 590 + WARN_ON(rc); 591 + 592 + pe_data = radix_tree_delete(&spa->pe_tree, pe_handle); 593 + if (!pe_data) { 594 + WARN(1, "Couldn't find pe data when removing PE\n"); 595 + } else { 596 + mm_context_remove_copro(pe_data->mm); 597 + mmdrop(pe_data->mm); 598 + kfree_rcu(pe_data, rcu); 599 + } 600 + unlock: 601 + mutex_unlock(&spa->spa_lock); 602 + return rc; 603 + }
+33
drivers/misc/ocxl/main.c
··· 1 + // SPDX-License-Identifier: GPL-2.0+ 2 + // Copyright 2017 IBM Corp. 3 + #include <linux/module.h> 4 + #include <linux/pci.h> 5 + #include "ocxl_internal.h" 6 + 7 + static int __init init_ocxl(void) 8 + { 9 + int rc = 0; 10 + 11 + rc = ocxl_file_init(); 12 + if (rc) 13 + return rc; 14 + 15 + rc = pci_register_driver(&ocxl_pci_driver); 16 + if (rc) { 17 + ocxl_file_exit(); 18 + return rc; 19 + } 20 + return 0; 21 + } 22 + 23 + static void exit_ocxl(void) 24 + { 25 + pci_unregister_driver(&ocxl_pci_driver); 26 + ocxl_file_exit(); 27 + } 28 + 29 + module_init(init_ocxl); 30 + module_exit(exit_ocxl); 31 + 32 + MODULE_DESCRIPTION("Open Coherent Accelerator"); 33 + MODULE_LICENSE("GPL");
+193
drivers/misc/ocxl/ocxl_internal.h
··· 1 + // SPDX-License-Identifier: GPL-2.0+ 2 + // Copyright 2017 IBM Corp. 3 + #ifndef _OCXL_INTERNAL_H_ 4 + #define _OCXL_INTERNAL_H_ 5 + 6 + #include <linux/pci.h> 7 + #include <linux/cdev.h> 8 + #include <linux/list.h> 9 + 10 + #define OCXL_AFU_NAME_SZ (24+1) /* add 1 for NULL termination */ 11 + #define MAX_IRQ_PER_LINK 2000 12 + #define MAX_IRQ_PER_CONTEXT MAX_IRQ_PER_LINK 13 + 14 + #define to_ocxl_function(d) container_of(d, struct ocxl_fn, dev) 15 + #define to_ocxl_afu(d) container_of(d, struct ocxl_afu, dev) 16 + 17 + extern struct pci_driver ocxl_pci_driver; 18 + 19 + /* 20 + * The following 2 structures are a fairly generic way of representing 21 + * the configuration data for a function and AFU, as read from the 22 + * configuration space. 23 + */ 24 + struct ocxl_afu_config { 25 + u8 idx; 26 + int dvsec_afu_control_pos; 27 + char name[OCXL_AFU_NAME_SZ]; 28 + u8 version_major; 29 + u8 version_minor; 30 + u8 afuc_type; 31 + u8 afum_type; 32 + u8 profile; 33 + u8 global_mmio_bar; 34 + u64 global_mmio_offset; 35 + u32 global_mmio_size; 36 + u8 pp_mmio_bar; 37 + u64 pp_mmio_offset; 38 + u32 pp_mmio_stride; 39 + u8 log_mem_size; 40 + u8 pasid_supported_log; 41 + u16 actag_supported; 42 + }; 43 + 44 + struct ocxl_fn_config { 45 + int dvsec_tl_pos; 46 + int dvsec_function_pos; 47 + int dvsec_afu_info_pos; 48 + s8 max_pasid_log; 49 + s8 max_afu_index; 50 + }; 51 + 52 + struct ocxl_fn { 53 + struct device dev; 54 + int bar_used[3]; 55 + struct ocxl_fn_config config; 56 + struct list_head afu_list; 57 + int pasid_base; 58 + int actag_base; 59 + int actag_enabled; 60 + int actag_supported; 61 + struct list_head pasid_list; 62 + struct list_head actag_list; 63 + void *link; 64 + }; 65 + 66 + struct ocxl_afu { 67 + struct ocxl_fn *fn; 68 + struct list_head list; 69 + struct device dev; 70 + struct cdev cdev; 71 + struct ocxl_afu_config config; 72 + int pasid_base; 73 + int pasid_count; /* opened contexts */ 74 + int pasid_max; /* maximum number of contexts */ 75 + int actag_base; 76 + int actag_enabled; 77 + struct mutex contexts_lock; 78 + struct idr contexts_idr; 79 + struct mutex afu_control_lock; 80 + u64 global_mmio_start; 81 + u64 irq_base_offset; 82 + void __iomem *global_mmio_ptr; 83 + u64 pp_mmio_start; 84 + struct bin_attribute attr_global_mmio; 85 + }; 86 + 87 + enum ocxl_context_status { 88 + CLOSED, 89 + OPENED, 90 + ATTACHED, 91 + }; 92 + 93 + // Contains metadata about a translation fault 94 + struct ocxl_xsl_error { 95 + u64 addr; // The address that triggered the fault 96 + u64 dsisr; // the value of the dsisr register 97 + u64 count; // The number of times this fault has been triggered 98 + }; 99 + 100 + struct ocxl_context { 101 + struct ocxl_afu *afu; 102 + int pasid; 103 + struct mutex status_mutex; 104 + enum ocxl_context_status status; 105 + struct address_space *mapping; 106 + struct mutex mapping_lock; 107 + wait_queue_head_t events_wq; 108 + struct mutex xsl_error_lock; 109 + struct ocxl_xsl_error xsl_error; 110 + struct mutex irq_lock; 111 + struct idr irq_idr; 112 + }; 113 + 114 + struct ocxl_process_element { 115 + __be64 config_state; 116 + __be32 reserved1[11]; 117 + __be32 lpid; 118 + __be32 tid; 119 + __be32 pid; 120 + __be32 reserved2[10]; 121 + __be64 amr; 122 + __be32 reserved3[3]; 123 + __be32 software_state; 124 + }; 125 + 126 + 127 + extern struct ocxl_afu *ocxl_afu_get(struct ocxl_afu *afu); 128 + extern void ocxl_afu_put(struct ocxl_afu *afu); 129 + 130 + extern int ocxl_create_cdev(struct ocxl_afu *afu); 131 + extern void ocxl_destroy_cdev(struct ocxl_afu *afu); 132 + extern int ocxl_register_afu(struct ocxl_afu *afu); 133 + extern void ocxl_unregister_afu(struct ocxl_afu *afu); 134 + 135 + extern int ocxl_file_init(void); 136 + extern void ocxl_file_exit(void); 137 + 138 + extern int ocxl_config_read_function(struct pci_dev *dev, 139 + struct ocxl_fn_config *fn); 140 + 141 + extern int ocxl_config_check_afu_index(struct pci_dev *dev, 142 + struct ocxl_fn_config *fn, int afu_idx); 143 + extern int ocxl_config_read_afu(struct pci_dev *dev, 144 + struct ocxl_fn_config *fn, 145 + struct ocxl_afu_config *afu, 146 + u8 afu_idx); 147 + extern int ocxl_config_get_pasid_info(struct pci_dev *dev, int *count); 148 + extern void ocxl_config_set_afu_pasid(struct pci_dev *dev, 149 + int afu_control, 150 + int pasid_base, u32 pasid_count_log); 151 + extern int ocxl_config_get_actag_info(struct pci_dev *dev, 152 + u16 *base, u16 *enabled, u16 *supported); 153 + extern void ocxl_config_set_actag(struct pci_dev *dev, int func_dvsec, 154 + u32 tag_first, u32 tag_count); 155 + extern void ocxl_config_set_afu_actag(struct pci_dev *dev, int afu_control, 156 + int actag_base, int actag_count); 157 + extern void ocxl_config_set_afu_state(struct pci_dev *dev, int afu_control, 158 + int enable); 159 + extern int ocxl_config_set_TL(struct pci_dev *dev, int tl_dvsec); 160 + extern int ocxl_config_terminate_pasid(struct pci_dev *dev, int afu_control, 161 + int pasid); 162 + 163 + extern int ocxl_link_setup(struct pci_dev *dev, int PE_mask, 164 + void **link_handle); 165 + extern void ocxl_link_release(struct pci_dev *dev, void *link_handle); 166 + extern int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr, 167 + u64 amr, struct mm_struct *mm, 168 + void (*xsl_err_cb)(void *data, u64 addr, u64 dsisr), 169 + void *xsl_err_data); 170 + extern int ocxl_link_remove_pe(void *link_handle, int pasid); 171 + extern int ocxl_link_irq_alloc(void *link_handle, int *hw_irq, 172 + u64 *addr); 173 + extern void ocxl_link_free_irq(void *link_handle, int hw_irq); 174 + 175 + extern int ocxl_pasid_afu_alloc(struct ocxl_fn *fn, u32 size); 176 + extern void ocxl_pasid_afu_free(struct ocxl_fn *fn, u32 start, u32 size); 177 + extern int ocxl_actag_afu_alloc(struct ocxl_fn *fn, u32 size); 178 + extern void ocxl_actag_afu_free(struct ocxl_fn *fn, u32 start, u32 size); 179 + 180 + extern struct ocxl_context *ocxl_context_alloc(void); 181 + extern int ocxl_context_init(struct ocxl_context *ctx, struct ocxl_afu *afu, 182 + struct address_space *mapping); 183 + extern int ocxl_context_attach(struct ocxl_context *ctx, u64 amr); 184 + extern int ocxl_context_mmap(struct ocxl_context *ctx, 185 + struct vm_area_struct *vma); 186 + extern int ocxl_context_detach(struct ocxl_context *ctx); 187 + extern void ocxl_context_detach_all(struct ocxl_afu *afu); 188 + extern void ocxl_context_free(struct ocxl_context *ctx); 189 + 190 + extern int ocxl_sysfs_add_afu(struct ocxl_afu *afu); 191 + extern void ocxl_sysfs_remove_afu(struct ocxl_afu *afu); 192 + 193 + #endif /* _OCXL_INTERNAL_H_ */
+107
drivers/misc/ocxl/pasid.c
··· 1 + // SPDX-License-Identifier: GPL-2.0+ 2 + // Copyright 2017 IBM Corp. 3 + #include "ocxl_internal.h" 4 + 5 + 6 + struct id_range { 7 + struct list_head list; 8 + u32 start; 9 + u32 end; 10 + }; 11 + 12 + #ifdef DEBUG 13 + static void dump_list(struct list_head *head, char *type_str) 14 + { 15 + struct id_range *cur; 16 + 17 + pr_debug("%s ranges allocated:\n", type_str); 18 + list_for_each_entry(cur, head, list) { 19 + pr_debug("Range %d->%d\n", cur->start, cur->end); 20 + } 21 + } 22 + #endif 23 + 24 + static int range_alloc(struct list_head *head, u32 size, int max_id, 25 + char *type_str) 26 + { 27 + struct list_head *pos; 28 + struct id_range *cur, *new; 29 + int rc, last_end; 30 + 31 + new = kmalloc(sizeof(struct id_range), GFP_KERNEL); 32 + if (!new) 33 + return -ENOMEM; 34 + 35 + pos = head; 36 + last_end = -1; 37 + list_for_each_entry(cur, head, list) { 38 + if ((cur->start - last_end) > size) 39 + break; 40 + last_end = cur->end; 41 + pos = &cur->list; 42 + } 43 + 44 + new->start = last_end + 1; 45 + new->end = new->start + size - 1; 46 + 47 + if (new->end > max_id) { 48 + kfree(new); 49 + rc = -ENOSPC; 50 + } else { 51 + list_add(&new->list, pos); 52 + rc = new->start; 53 + } 54 + 55 + #ifdef DEBUG 56 + dump_list(head, type_str); 57 + #endif 58 + return rc; 59 + } 60 + 61 + static void range_free(struct list_head *head, u32 start, u32 size, 62 + char *type_str) 63 + { 64 + bool found = false; 65 + struct id_range *cur, *tmp; 66 + 67 + list_for_each_entry_safe(cur, tmp, head, list) { 68 + if (cur->start == start && cur->end == (start + size - 1)) { 69 + found = true; 70 + list_del(&cur->list); 71 + kfree(cur); 72 + break; 73 + } 74 + } 75 + WARN_ON(!found); 76 + #ifdef DEBUG 77 + dump_list(head, type_str); 78 + #endif 79 + } 80 + 81 + int ocxl_pasid_afu_alloc(struct ocxl_fn *fn, u32 size) 82 + { 83 + int max_pasid; 84 + 85 + if (fn->config.max_pasid_log < 0) 86 + return -ENOSPC; 87 + max_pasid = 1 << fn->config.max_pasid_log; 88 + return range_alloc(&fn->pasid_list, size, max_pasid, "afu pasid"); 89 + } 90 + 91 + void ocxl_pasid_afu_free(struct ocxl_fn *fn, u32 start, u32 size) 92 + { 93 + return range_free(&fn->pasid_list, start, size, "afu pasid"); 94 + } 95 + 96 + int ocxl_actag_afu_alloc(struct ocxl_fn *fn, u32 size) 97 + { 98 + int max_actag; 99 + 100 + max_actag = fn->actag_enabled; 101 + return range_alloc(&fn->actag_list, size, max_actag, "afu actag"); 102 + } 103 + 104 + void ocxl_actag_afu_free(struct ocxl_fn *fn, u32 start, u32 size) 105 + { 106 + return range_free(&fn->actag_list, start, size, "afu actag"); 107 + }
+585
drivers/misc/ocxl/pci.c
··· 1 + // SPDX-License-Identifier: GPL-2.0+ 2 + // Copyright 2017 IBM Corp. 3 + #include <linux/module.h> 4 + #include <linux/pci.h> 5 + #include <linux/idr.h> 6 + #include <asm/pnv-ocxl.h> 7 + #include "ocxl_internal.h" 8 + 9 + /* 10 + * Any opencapi device which wants to use this 'generic' driver should 11 + * use the 0x062B device ID. Vendors should define the subsystem 12 + * vendor/device ID to help differentiate devices. 13 + */ 14 + static const struct pci_device_id ocxl_pci_tbl[] = { 15 + { PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x062B), }, 16 + { } 17 + }; 18 + MODULE_DEVICE_TABLE(pci, ocxl_pci_tbl); 19 + 20 + 21 + static struct ocxl_fn *ocxl_fn_get(struct ocxl_fn *fn) 22 + { 23 + return (get_device(&fn->dev) == NULL) ? NULL : fn; 24 + } 25 + 26 + static void ocxl_fn_put(struct ocxl_fn *fn) 27 + { 28 + put_device(&fn->dev); 29 + } 30 + 31 + struct ocxl_afu *ocxl_afu_get(struct ocxl_afu *afu) 32 + { 33 + return (get_device(&afu->dev) == NULL) ? NULL : afu; 34 + } 35 + 36 + void ocxl_afu_put(struct ocxl_afu *afu) 37 + { 38 + put_device(&afu->dev); 39 + } 40 + 41 + static struct ocxl_afu *alloc_afu(struct ocxl_fn *fn) 42 + { 43 + struct ocxl_afu *afu; 44 + 45 + afu = kzalloc(sizeof(struct ocxl_afu), GFP_KERNEL); 46 + if (!afu) 47 + return NULL; 48 + 49 + mutex_init(&afu->contexts_lock); 50 + mutex_init(&afu->afu_control_lock); 51 + idr_init(&afu->contexts_idr); 52 + afu->fn = fn; 53 + ocxl_fn_get(fn); 54 + return afu; 55 + } 56 + 57 + static void free_afu(struct ocxl_afu *afu) 58 + { 59 + idr_destroy(&afu->contexts_idr); 60 + ocxl_fn_put(afu->fn); 61 + kfree(afu); 62 + } 63 + 64 + static void free_afu_dev(struct device *dev) 65 + { 66 + struct ocxl_afu *afu = to_ocxl_afu(dev); 67 + 68 + ocxl_unregister_afu(afu); 69 + free_afu(afu); 70 + } 71 + 72 + static int set_afu_device(struct ocxl_afu *afu, const char *location) 73 + { 74 + struct ocxl_fn *fn = afu->fn; 75 + int rc; 76 + 77 + afu->dev.parent = &fn->dev; 78 + afu->dev.release = free_afu_dev; 79 + rc = dev_set_name(&afu->dev, "%s.%s.%hhu", afu->config.name, location, 80 + afu->config.idx); 81 + return rc; 82 + } 83 + 84 + static int assign_afu_actag(struct ocxl_afu *afu, struct pci_dev *dev) 85 + { 86 + struct ocxl_fn *fn = afu->fn; 87 + int actag_count, actag_offset; 88 + 89 + /* 90 + * if there were not enough actags for the function, each afu 91 + * reduces its count as well 92 + */ 93 + actag_count = afu->config.actag_supported * 94 + fn->actag_enabled / fn->actag_supported; 95 + actag_offset = ocxl_actag_afu_alloc(fn, actag_count); 96 + if (actag_offset < 0) { 97 + dev_err(&afu->dev, "Can't allocate %d actags for AFU: %d\n", 98 + actag_count, actag_offset); 99 + return actag_offset; 100 + } 101 + afu->actag_base = fn->actag_base + actag_offset; 102 + afu->actag_enabled = actag_count; 103 + 104 + ocxl_config_set_afu_actag(dev, afu->config.dvsec_afu_control_pos, 105 + afu->actag_base, afu->actag_enabled); 106 + dev_dbg(&afu->dev, "actag base=%d enabled=%d\n", 107 + afu->actag_base, afu->actag_enabled); 108 + return 0; 109 + } 110 + 111 + static void reclaim_afu_actag(struct ocxl_afu *afu) 112 + { 113 + struct ocxl_fn *fn = afu->fn; 114 + int start_offset, size; 115 + 116 + start_offset = afu->actag_base - fn->actag_base; 117 + size = afu->actag_enabled; 118 + ocxl_actag_afu_free(afu->fn, start_offset, size); 119 + } 120 + 121 + static int assign_afu_pasid(struct ocxl_afu *afu, struct pci_dev *dev) 122 + { 123 + struct ocxl_fn *fn = afu->fn; 124 + int pasid_count, pasid_offset; 125 + 126 + /* 127 + * We only support the case where the function configuration 128 + * requested enough PASIDs to cover all AFUs. 129 + */ 130 + pasid_count = 1 << afu->config.pasid_supported_log; 131 + pasid_offset = ocxl_pasid_afu_alloc(fn, pasid_count); 132 + if (pasid_offset < 0) { 133 + dev_err(&afu->dev, "Can't allocate %d PASIDs for AFU: %d\n", 134 + pasid_count, pasid_offset); 135 + return pasid_offset; 136 + } 137 + afu->pasid_base = fn->pasid_base + pasid_offset; 138 + afu->pasid_count = 0; 139 + afu->pasid_max = pasid_count; 140 + 141 + ocxl_config_set_afu_pasid(dev, afu->config.dvsec_afu_control_pos, 142 + afu->pasid_base, 143 + afu->config.pasid_supported_log); 144 + dev_dbg(&afu->dev, "PASID base=%d, enabled=%d\n", 145 + afu->pasid_base, pasid_count); 146 + return 0; 147 + } 148 + 149 + static void reclaim_afu_pasid(struct ocxl_afu *afu) 150 + { 151 + struct ocxl_fn *fn = afu->fn; 152 + int start_offset, size; 153 + 154 + start_offset = afu->pasid_base - fn->pasid_base; 155 + size = 1 << afu->config.pasid_supported_log; 156 + ocxl_pasid_afu_free(afu->fn, start_offset, size); 157 + } 158 + 159 + static int reserve_fn_bar(struct ocxl_fn *fn, int bar) 160 + { 161 + struct pci_dev *dev = to_pci_dev(fn->dev.parent); 162 + int rc, idx; 163 + 164 + if (bar != 0 && bar != 2 && bar != 4) 165 + return -EINVAL; 166 + 167 + idx = bar >> 1; 168 + if (fn->bar_used[idx]++ == 0) { 169 + rc = pci_request_region(dev, bar, "ocxl"); 170 + if (rc) 171 + return rc; 172 + } 173 + return 0; 174 + } 175 + 176 + static void release_fn_bar(struct ocxl_fn *fn, int bar) 177 + { 178 + struct pci_dev *dev = to_pci_dev(fn->dev.parent); 179 + int idx; 180 + 181 + if (bar != 0 && bar != 2 && bar != 4) 182 + return; 183 + 184 + idx = bar >> 1; 185 + if (--fn->bar_used[idx] == 0) 186 + pci_release_region(dev, bar); 187 + WARN_ON(fn->bar_used[idx] < 0); 188 + } 189 + 190 + static int map_mmio_areas(struct ocxl_afu *afu, struct pci_dev *dev) 191 + { 192 + int rc; 193 + 194 + rc = reserve_fn_bar(afu->fn, afu->config.global_mmio_bar); 195 + if (rc) 196 + return rc; 197 + 198 + rc = reserve_fn_bar(afu->fn, afu->config.pp_mmio_bar); 199 + if (rc) { 200 + release_fn_bar(afu->fn, afu->config.global_mmio_bar); 201 + return rc; 202 + } 203 + 204 + afu->global_mmio_start = 205 + pci_resource_start(dev, afu->config.global_mmio_bar) + 206 + afu->config.global_mmio_offset; 207 + afu->pp_mmio_start = 208 + pci_resource_start(dev, afu->config.pp_mmio_bar) + 209 + afu->config.pp_mmio_offset; 210 + 211 + afu->global_mmio_ptr = ioremap(afu->global_mmio_start, 212 + afu->config.global_mmio_size); 213 + if (!afu->global_mmio_ptr) { 214 + release_fn_bar(afu->fn, afu->config.pp_mmio_bar); 215 + release_fn_bar(afu->fn, afu->config.global_mmio_bar); 216 + dev_err(&dev->dev, "Error mapping global mmio area\n"); 217 + return -ENOMEM; 218 + } 219 + 220 + /* 221 + * Leave an empty page between the per-process mmio area and 222 + * the AFU interrupt mappings 223 + */ 224 + afu->irq_base_offset = afu->config.pp_mmio_stride + PAGE_SIZE; 225 + return 0; 226 + } 227 + 228 + static void unmap_mmio_areas(struct ocxl_afu *afu) 229 + { 230 + if (afu->global_mmio_ptr) { 231 + iounmap(afu->global_mmio_ptr); 232 + afu->global_mmio_ptr = NULL; 233 + } 234 + afu->global_mmio_start = 0; 235 + afu->pp_mmio_start = 0; 236 + release_fn_bar(afu->fn, afu->config.pp_mmio_bar); 237 + release_fn_bar(afu->fn, afu->config.global_mmio_bar); 238 + } 239 + 240 + static int configure_afu(struct ocxl_afu *afu, u8 afu_idx, struct pci_dev *dev) 241 + { 242 + int rc; 243 + 244 + rc = ocxl_config_read_afu(dev, &afu->fn->config, &afu->config, afu_idx); 245 + if (rc) 246 + return rc; 247 + 248 + rc = set_afu_device(afu, dev_name(&dev->dev)); 249 + if (rc) 250 + return rc; 251 + 252 + rc = assign_afu_actag(afu, dev); 253 + if (rc) 254 + return rc; 255 + 256 + rc = assign_afu_pasid(afu, dev); 257 + if (rc) { 258 + reclaim_afu_actag(afu); 259 + return rc; 260 + } 261 + 262 + rc = map_mmio_areas(afu, dev); 263 + if (rc) { 264 + reclaim_afu_pasid(afu); 265 + reclaim_afu_actag(afu); 266 + return rc; 267 + } 268 + return 0; 269 + } 270 + 271 + static void deconfigure_afu(struct ocxl_afu *afu) 272 + { 273 + unmap_mmio_areas(afu); 274 + reclaim_afu_pasid(afu); 275 + reclaim_afu_actag(afu); 276 + } 277 + 278 + static int activate_afu(struct pci_dev *dev, struct ocxl_afu *afu) 279 + { 280 + int rc; 281 + 282 + ocxl_config_set_afu_state(dev, afu->config.dvsec_afu_control_pos, 1); 283 + /* 284 + * Char device creation is the last step, as processes can 285 + * call our driver immediately, so all our inits must be finished. 286 + */ 287 + rc = ocxl_create_cdev(afu); 288 + if (rc) 289 + return rc; 290 + return 0; 291 + } 292 + 293 + static void deactivate_afu(struct ocxl_afu *afu) 294 + { 295 + struct pci_dev *dev = to_pci_dev(afu->fn->dev.parent); 296 + 297 + ocxl_destroy_cdev(afu); 298 + ocxl_config_set_afu_state(dev, afu->config.dvsec_afu_control_pos, 0); 299 + } 300 + 301 + static int init_afu(struct pci_dev *dev, struct ocxl_fn *fn, u8 afu_idx) 302 + { 303 + int rc; 304 + struct ocxl_afu *afu; 305 + 306 + afu = alloc_afu(fn); 307 + if (!afu) 308 + return -ENOMEM; 309 + 310 + rc = configure_afu(afu, afu_idx, dev); 311 + if (rc) { 312 + free_afu(afu); 313 + return rc; 314 + } 315 + 316 + rc = ocxl_register_afu(afu); 317 + if (rc) 318 + goto err; 319 + 320 + rc = ocxl_sysfs_add_afu(afu); 321 + if (rc) 322 + goto err; 323 + 324 + rc = activate_afu(dev, afu); 325 + if (rc) 326 + goto err_sys; 327 + 328 + list_add_tail(&afu->list, &fn->afu_list); 329 + return 0; 330 + 331 + err_sys: 332 + ocxl_sysfs_remove_afu(afu); 333 + err: 334 + deconfigure_afu(afu); 335 + device_unregister(&afu->dev); 336 + return rc; 337 + } 338 + 339 + static void remove_afu(struct ocxl_afu *afu) 340 + { 341 + list_del(&afu->list); 342 + ocxl_context_detach_all(afu); 343 + deactivate_afu(afu); 344 + ocxl_sysfs_remove_afu(afu); 345 + deconfigure_afu(afu); 346 + device_unregister(&afu->dev); 347 + } 348 + 349 + static struct ocxl_fn *alloc_function(struct pci_dev *dev) 350 + { 351 + struct ocxl_fn *fn; 352 + 353 + fn = kzalloc(sizeof(struct ocxl_fn), GFP_KERNEL); 354 + if (!fn) 355 + return NULL; 356 + 357 + INIT_LIST_HEAD(&fn->afu_list); 358 + INIT_LIST_HEAD(&fn->pasid_list); 359 + INIT_LIST_HEAD(&fn->actag_list); 360 + return fn; 361 + } 362 + 363 + static void free_function(struct ocxl_fn *fn) 364 + { 365 + WARN_ON(!list_empty(&fn->afu_list)); 366 + WARN_ON(!list_empty(&fn->pasid_list)); 367 + kfree(fn); 368 + } 369 + 370 + static void free_function_dev(struct device *dev) 371 + { 372 + struct ocxl_fn *fn = to_ocxl_function(dev); 373 + 374 + free_function(fn); 375 + } 376 + 377 + static int set_function_device(struct ocxl_fn *fn, struct pci_dev *dev) 378 + { 379 + int rc; 380 + 381 + fn->dev.parent = &dev->dev; 382 + fn->dev.release = free_function_dev; 383 + rc = dev_set_name(&fn->dev, "ocxlfn.%s", dev_name(&dev->dev)); 384 + if (rc) 385 + return rc; 386 + pci_set_drvdata(dev, fn); 387 + return 0; 388 + } 389 + 390 + static int assign_function_actag(struct ocxl_fn *fn) 391 + { 392 + struct pci_dev *dev = to_pci_dev(fn->dev.parent); 393 + u16 base, enabled, supported; 394 + int rc; 395 + 396 + rc = ocxl_config_get_actag_info(dev, &base, &enabled, &supported); 397 + if (rc) 398 + return rc; 399 + 400 + fn->actag_base = base; 401 + fn->actag_enabled = enabled; 402 + fn->actag_supported = supported; 403 + 404 + ocxl_config_set_actag(dev, fn->config.dvsec_function_pos, 405 + fn->actag_base, fn->actag_enabled); 406 + dev_dbg(&fn->dev, "actag range starting at %d, enabled %d\n", 407 + fn->actag_base, fn->actag_enabled); 408 + return 0; 409 + } 410 + 411 + static int set_function_pasid(struct ocxl_fn *fn) 412 + { 413 + struct pci_dev *dev = to_pci_dev(fn->dev.parent); 414 + int rc, desired_count, max_count; 415 + 416 + /* A function may not require any PASID */ 417 + if (fn->config.max_pasid_log < 0) 418 + return 0; 419 + 420 + rc = ocxl_config_get_pasid_info(dev, &max_count); 421 + if (rc) 422 + return rc; 423 + 424 + desired_count = 1 << fn->config.max_pasid_log; 425 + 426 + if (desired_count > max_count) { 427 + dev_err(&fn->dev, 428 + "Function requires more PASIDs than is available (%d vs. %d)\n", 429 + desired_count, max_count); 430 + return -ENOSPC; 431 + } 432 + 433 + fn->pasid_base = 0; 434 + return 0; 435 + } 436 + 437 + static int configure_function(struct ocxl_fn *fn, struct pci_dev *dev) 438 + { 439 + int rc; 440 + 441 + rc = pci_enable_device(dev); 442 + if (rc) { 443 + dev_err(&dev->dev, "pci_enable_device failed: %d\n", rc); 444 + return rc; 445 + } 446 + 447 + /* 448 + * Once it has been confirmed to work on our hardware, we 449 + * should reset the function, to force the adapter to restart 450 + * from scratch. 451 + * A function reset would also reset all its AFUs. 452 + * 453 + * Some hints for implementation: 454 + * 455 + * - there's not status bit to know when the reset is done. We 456 + * should try reading the config space to know when it's 457 + * done. 458 + * - probably something like: 459 + * Reset 460 + * wait 100ms 461 + * issue config read 462 + * allow device up to 1 sec to return success on config 463 + * read before declaring it broken 464 + * 465 + * Some shared logic on the card (CFG, TLX) won't be reset, so 466 + * there's no guarantee that it will be enough. 467 + */ 468 + rc = ocxl_config_read_function(dev, &fn->config); 469 + if (rc) 470 + return rc; 471 + 472 + rc = set_function_device(fn, dev); 473 + if (rc) 474 + return rc; 475 + 476 + rc = assign_function_actag(fn); 477 + if (rc) 478 + return rc; 479 + 480 + rc = set_function_pasid(fn); 481 + if (rc) 482 + return rc; 483 + 484 + rc = ocxl_link_setup(dev, 0, &fn->link); 485 + if (rc) 486 + return rc; 487 + 488 + rc = ocxl_config_set_TL(dev, fn->config.dvsec_tl_pos); 489 + if (rc) { 490 + ocxl_link_release(dev, fn->link); 491 + return rc; 492 + } 493 + return 0; 494 + } 495 + 496 + static void deconfigure_function(struct ocxl_fn *fn) 497 + { 498 + struct pci_dev *dev = to_pci_dev(fn->dev.parent); 499 + 500 + ocxl_link_release(dev, fn->link); 501 + pci_disable_device(dev); 502 + } 503 + 504 + static struct ocxl_fn *init_function(struct pci_dev *dev) 505 + { 506 + struct ocxl_fn *fn; 507 + int rc; 508 + 509 + fn = alloc_function(dev); 510 + if (!fn) 511 + return ERR_PTR(-ENOMEM); 512 + 513 + rc = configure_function(fn, dev); 514 + if (rc) { 515 + free_function(fn); 516 + return ERR_PTR(rc); 517 + } 518 + 519 + rc = device_register(&fn->dev); 520 + if (rc) { 521 + deconfigure_function(fn); 522 + device_unregister(&fn->dev); 523 + return ERR_PTR(rc); 524 + } 525 + return fn; 526 + } 527 + 528 + static void remove_function(struct ocxl_fn *fn) 529 + { 530 + deconfigure_function(fn); 531 + device_unregister(&fn->dev); 532 + } 533 + 534 + static int ocxl_probe(struct pci_dev *dev, const struct pci_device_id *id) 535 + { 536 + int rc, afu_count = 0; 537 + u8 afu; 538 + struct ocxl_fn *fn; 539 + 540 + if (!radix_enabled()) { 541 + dev_err(&dev->dev, "Unsupported memory model (hash)\n"); 542 + return -ENODEV; 543 + } 544 + 545 + fn = init_function(dev); 546 + if (IS_ERR(fn)) { 547 + dev_err(&dev->dev, "function init failed: %li\n", 548 + PTR_ERR(fn)); 549 + return PTR_ERR(fn); 550 + } 551 + 552 + for (afu = 0; afu <= fn->config.max_afu_index; afu++) { 553 + rc = ocxl_config_check_afu_index(dev, &fn->config, afu); 554 + if (rc > 0) { 555 + rc = init_afu(dev, fn, afu); 556 + if (rc) { 557 + dev_err(&dev->dev, 558 + "Can't initialize AFU index %d\n", afu); 559 + continue; 560 + } 561 + afu_count++; 562 + } 563 + } 564 + dev_info(&dev->dev, "%d AFU(s) configured\n", afu_count); 565 + return 0; 566 + } 567 + 568 + static void ocxl_remove(struct pci_dev *dev) 569 + { 570 + struct ocxl_afu *afu, *tmp; 571 + struct ocxl_fn *fn = pci_get_drvdata(dev); 572 + 573 + list_for_each_entry_safe(afu, tmp, &fn->afu_list, list) { 574 + remove_afu(afu); 575 + } 576 + remove_function(fn); 577 + } 578 + 579 + struct pci_driver ocxl_pci_driver = { 580 + .name = "ocxl", 581 + .id_table = ocxl_pci_tbl, 582 + .probe = ocxl_probe, 583 + .remove = ocxl_remove, 584 + .shutdown = ocxl_remove, 585 + };
+142
drivers/misc/ocxl/sysfs.c
··· 1 + // SPDX-License-Identifier: GPL-2.0+ 2 + // Copyright 2017 IBM Corp. 3 + #include <linux/sysfs.h> 4 + #include "ocxl_internal.h" 5 + 6 + static ssize_t global_mmio_size_show(struct device *device, 7 + struct device_attribute *attr, 8 + char *buf) 9 + { 10 + struct ocxl_afu *afu = to_ocxl_afu(device); 11 + 12 + return scnprintf(buf, PAGE_SIZE, "%d\n", 13 + afu->config.global_mmio_size); 14 + } 15 + 16 + static ssize_t pp_mmio_size_show(struct device *device, 17 + struct device_attribute *attr, 18 + char *buf) 19 + { 20 + struct ocxl_afu *afu = to_ocxl_afu(device); 21 + 22 + return scnprintf(buf, PAGE_SIZE, "%d\n", 23 + afu->config.pp_mmio_stride); 24 + } 25 + 26 + static ssize_t afu_version_show(struct device *device, 27 + struct device_attribute *attr, 28 + char *buf) 29 + { 30 + struct ocxl_afu *afu = to_ocxl_afu(device); 31 + 32 + return scnprintf(buf, PAGE_SIZE, "%hhu:%hhu\n", 33 + afu->config.version_major, 34 + afu->config.version_minor); 35 + } 36 + 37 + static ssize_t contexts_show(struct device *device, 38 + struct device_attribute *attr, 39 + char *buf) 40 + { 41 + struct ocxl_afu *afu = to_ocxl_afu(device); 42 + 43 + return scnprintf(buf, PAGE_SIZE, "%d/%d\n", 44 + afu->pasid_count, afu->pasid_max); 45 + } 46 + 47 + static struct device_attribute afu_attrs[] = { 48 + __ATTR_RO(global_mmio_size), 49 + __ATTR_RO(pp_mmio_size), 50 + __ATTR_RO(afu_version), 51 + __ATTR_RO(contexts), 52 + }; 53 + 54 + static ssize_t global_mmio_read(struct file *filp, struct kobject *kobj, 55 + struct bin_attribute *bin_attr, char *buf, 56 + loff_t off, size_t count) 57 + { 58 + struct ocxl_afu *afu = to_ocxl_afu(kobj_to_dev(kobj)); 59 + 60 + if (count == 0 || off < 0 || 61 + off >= afu->config.global_mmio_size) 62 + return 0; 63 + memcpy_fromio(buf, afu->global_mmio_ptr + off, count); 64 + return count; 65 + } 66 + 67 + static int global_mmio_fault(struct vm_fault *vmf) 68 + { 69 + struct vm_area_struct *vma = vmf->vma; 70 + struct ocxl_afu *afu = vma->vm_private_data; 71 + unsigned long offset; 72 + 73 + if (vmf->pgoff >= (afu->config.global_mmio_size >> PAGE_SHIFT)) 74 + return VM_FAULT_SIGBUS; 75 + 76 + offset = vmf->pgoff; 77 + offset += (afu->global_mmio_start >> PAGE_SHIFT); 78 + vm_insert_pfn(vma, vmf->address, offset); 79 + return VM_FAULT_NOPAGE; 80 + } 81 + 82 + static const struct vm_operations_struct global_mmio_vmops = { 83 + .fault = global_mmio_fault, 84 + }; 85 + 86 + static int global_mmio_mmap(struct file *filp, struct kobject *kobj, 87 + struct bin_attribute *bin_attr, 88 + struct vm_area_struct *vma) 89 + { 90 + struct ocxl_afu *afu = to_ocxl_afu(kobj_to_dev(kobj)); 91 + 92 + if ((vma_pages(vma) + vma->vm_pgoff) > 93 + (afu->config.global_mmio_size >> PAGE_SHIFT)) 94 + return -EINVAL; 95 + 96 + vma->vm_flags |= VM_IO | VM_PFNMAP; 97 + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); 98 + vma->vm_ops = &global_mmio_vmops; 99 + vma->vm_private_data = afu; 100 + return 0; 101 + } 102 + 103 + int ocxl_sysfs_add_afu(struct ocxl_afu *afu) 104 + { 105 + int i, rc; 106 + 107 + for (i = 0; i < ARRAY_SIZE(afu_attrs); i++) { 108 + rc = device_create_file(&afu->dev, &afu_attrs[i]); 109 + if (rc) 110 + goto err; 111 + } 112 + 113 + sysfs_attr_init(&afu->attr_global_mmio.attr); 114 + afu->attr_global_mmio.attr.name = "global_mmio_area"; 115 + afu->attr_global_mmio.attr.mode = 0600; 116 + afu->attr_global_mmio.size = afu->config.global_mmio_size; 117 + afu->attr_global_mmio.read = global_mmio_read; 118 + afu->attr_global_mmio.mmap = global_mmio_mmap; 119 + rc = device_create_bin_file(&afu->dev, &afu->attr_global_mmio); 120 + if (rc) { 121 + dev_err(&afu->dev, 122 + "Unable to create global mmio attr for afu: %d\n", 123 + rc); 124 + goto err; 125 + } 126 + 127 + return 0; 128 + 129 + err: 130 + for (i--; i >= 0; i--) 131 + device_remove_file(&afu->dev, &afu_attrs[i]); 132 + return rc; 133 + } 134 + 135 + void ocxl_sysfs_remove_afu(struct ocxl_afu *afu) 136 + { 137 + int i; 138 + 139 + for (i = 0; i < ARRAY_SIZE(afu_attrs); i++) 140 + device_remove_file(&afu->dev, &afu_attrs[i]); 141 + device_remove_bin_file(&afu->dev, &afu->attr_global_mmio); 142 + }
+40
include/uapi/misc/ocxl.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ 2 + /* Copyright 2017 IBM Corp. */ 3 + #ifndef _UAPI_MISC_OCXL_H 4 + #define _UAPI_MISC_OCXL_H 5 + 6 + #include <linux/types.h> 7 + #include <linux/ioctl.h> 8 + 9 + enum ocxl_event_type { 10 + OCXL_AFU_EVENT_XSL_FAULT_ERROR = 0, 11 + }; 12 + 13 + #define OCXL_KERNEL_EVENT_FLAG_LAST 0x0001 /* This is the last event pending */ 14 + 15 + struct ocxl_kernel_event_header { 16 + __u16 type; 17 + __u16 flags; 18 + __u32 reserved; 19 + }; 20 + 21 + struct ocxl_kernel_event_xsl_fault_error { 22 + __u64 addr; 23 + __u64 dsisr; 24 + __u64 count; 25 + __u64 reserved; 26 + }; 27 + 28 + struct ocxl_ioctl_attach { 29 + __u64 amr; 30 + __u64 reserved1; 31 + __u64 reserved2; 32 + __u64 reserved3; 33 + }; 34 + 35 + /* ioctl numbers */ 36 + #define OCXL_MAGIC 0xCA 37 + /* AFU devices */ 38 + #define OCXL_IOCTL_ATTACH _IOW(OCXL_MAGIC, 0x10, struct ocxl_ioctl_attach) 39 + 40 + #endif /* _UAPI_MISC_OCXL_H */