Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

PCI: Add Virtual Channel to save/restore support

While we don't really have any infrastructure for making use of VC
support, the system BIOS can configure the topology to non-default
VC values prior to boot. This may be due to silicon bugs, desire to
reserve traffic classes, or perhaps just BIOS bugs. When we reset
devices, the VC configuration may return to default values, which can
be incompatible with devices upstream. For instance, Nvidia GRID
cards provide a PCIe switch and some number of GPUs, all supporting
VC. The power-on default for VC is to support TC0-7 across VC0,
however some platforms will only enable TC0/VC0 mapping across the
topology. When we do a secondary bus reset on the downstream switch
port, the GPU is reset to a TC0-7/VC0 mapping while the opposite end
of the link only enables TC0/VC0. If the GPU attempts to use TC1-7,
it fails.

This patch attempts to provide complete support for VC save/restore,
even beyond the minimally required use case above. This includes
save/restore and reload of the arbitration table, save/restore and
reload of the port arbitration tables, and re-enabling of the
channels for VC, VC9, and MFVC capabilities.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>

authored by

Alex Williamson and committed by
Bjorn Helgaas
425c1b22 fd0f7f73

+466 -5
+1 -1
drivers/pci/Makefile
··· 4 4 5 5 obj-y += access.o bus.o probe.o host-bridge.o remove.o pci.o \ 6 6 pci-driver.o search.o pci-sysfs.o rom.o setup-res.o \ 7 - irq.o vpd.o setup-bus.o 7 + irq.o vpd.o setup-bus.o vc.o 8 8 obj-$(CONFIG_PROC_FS) += proc.o 9 9 obj-$(CONFIG_SYSFS) += slot.o 10 10
+5
drivers/pci/pci.c
··· 984 984 return i; 985 985 if ((i = pci_save_pcix_state(dev)) != 0) 986 986 return i; 987 + if ((i = pci_save_vc_state(dev)) != 0) 988 + return i; 987 989 return 0; 988 990 } 989 991 ··· 1048 1046 /* PCI Express register must be restored first */ 1049 1047 pci_restore_pcie_state(dev); 1050 1048 pci_restore_ats_state(dev); 1049 + pci_restore_vc_state(dev); 1051 1050 1052 1051 pci_restore_config_space(dev); 1053 1052 ··· 2121 2118 if (error) 2122 2119 dev_err(&dev->dev, 2123 2120 "unable to preallocate PCI-X save buffer\n"); 2121 + 2122 + pci_allocate_vc_save_buffers(dev); 2124 2123 } 2125 2124 2126 2125 void pci_free_cap_save_buffers(struct pci_dev *dev)
+434
drivers/pci/vc.c
··· 1 + /* 2 + * PCI Virtual Channel support 3 + * 4 + * Copyright (C) 2013 Red Hat, Inc. All rights reserved. 5 + * Author: Alex Williamson <alex.williamson@redhat.com> 6 + * 7 + * This program is free software; you can redistribute it and/or modify 8 + * it under the terms of the GNU General Public License version 2 as 9 + * published by the Free Software Foundation. 10 + */ 11 + 12 + #include <linux/device.h> 13 + #include <linux/kernel.h> 14 + #include <linux/module.h> 15 + #include <linux/pci.h> 16 + #include <linux/pci_regs.h> 17 + #include <linux/types.h> 18 + 19 + /** 20 + * pci_vc_save_restore_dwords - Save or restore a series of dwords 21 + * @dev: device 22 + * @pos: starting config space position 23 + * @buf: buffer to save to or restore from 24 + * @dwords: number of dwords to save/restore 25 + * @save: whether to save or restore 26 + */ 27 + static void pci_vc_save_restore_dwords(struct pci_dev *dev, int pos, 28 + u32 *buf, int dwords, bool save) 29 + { 30 + int i; 31 + 32 + for (i = 0; i < dwords; i++, buf++) { 33 + if (save) 34 + pci_read_config_dword(dev, pos + (i * 4), buf); 35 + else 36 + pci_write_config_dword(dev, pos + (i * 4), *buf); 37 + } 38 + } 39 + 40 + /** 41 + * pci_vc_load_arb_table - load and wait for VC arbitration table 42 + * @dev: device 43 + * @pos: starting position of VC capability (VC/VC9/MFVC) 44 + * 45 + * Set Load VC Arbitration Table bit requesting hardware to apply the VC 46 + * Arbitration Table (previously loaded). When the VC Arbitration Table 47 + * Status clears, hardware has latched the table into VC arbitration logic. 48 + */ 49 + static void pci_vc_load_arb_table(struct pci_dev *dev, int pos) 50 + { 51 + u16 ctrl; 52 + 53 + pci_read_config_word(dev, pos + PCI_VC_PORT_CTRL, &ctrl); 54 + pci_write_config_word(dev, pos + PCI_VC_PORT_CTRL, 55 + ctrl | PCI_VC_PORT_CTRL_LOAD_TABLE); 56 + if (pci_wait_for_pending(dev, pos + PCI_VC_PORT_STATUS, 57 + PCI_VC_PORT_STATUS_TABLE)) 58 + return; 59 + 60 + dev_err(&dev->dev, "VC arbitration table failed to load\n"); 61 + } 62 + 63 + /** 64 + * pci_vc_load_port_arb_table - Load and wait for VC port arbitration table 65 + * @dev: device 66 + * @pos: starting position of VC capability (VC/VC9/MFVC) 67 + * @res: VC resource number, ie. VCn (0-7) 68 + * 69 + * Set Load Port Arbitration Table bit requesting hardware to apply the Port 70 + * Arbitration Table (previously loaded). When the Port Arbitration Table 71 + * Status clears, hardware has latched the table into port arbitration logic. 72 + */ 73 + static void pci_vc_load_port_arb_table(struct pci_dev *dev, int pos, int res) 74 + { 75 + int ctrl_pos, status_pos; 76 + u32 ctrl; 77 + 78 + ctrl_pos = pos + PCI_VC_RES_CTRL + (res * PCI_CAP_VC_PER_VC_SIZEOF); 79 + status_pos = pos + PCI_VC_RES_STATUS + (res * PCI_CAP_VC_PER_VC_SIZEOF); 80 + 81 + pci_read_config_dword(dev, ctrl_pos, &ctrl); 82 + pci_write_config_dword(dev, ctrl_pos, 83 + ctrl | PCI_VC_RES_CTRL_LOAD_TABLE); 84 + 85 + if (pci_wait_for_pending(dev, status_pos, PCI_VC_RES_STATUS_TABLE)) 86 + return; 87 + 88 + dev_err(&dev->dev, "VC%d port arbitration table failed to load\n", res); 89 + } 90 + 91 + /** 92 + * pci_vc_enable - Enable virtual channel 93 + * @dev: device 94 + * @pos: starting position of VC capability (VC/VC9/MFVC) 95 + * @res: VC res number, ie. VCn (0-7) 96 + * 97 + * A VC is enabled by setting the enable bit in matching resource control 98 + * registers on both sides of a link. We therefore need to find the opposite 99 + * end of the link. To keep this simple we enable from the downstream device. 100 + * RC devices do not have an upstream device, nor does it seem that VC9 do 101 + * (spec is unclear). Once we find the upstream device, match the VC ID to 102 + * get the correct resource, disable and enable on both ends. 103 + */ 104 + static void pci_vc_enable(struct pci_dev *dev, int pos, int res) 105 + { 106 + int ctrl_pos, status_pos, id, pos2, evcc, i, ctrl_pos2, status_pos2; 107 + u32 ctrl, header, reg1, ctrl2; 108 + struct pci_dev *link = NULL; 109 + 110 + /* Enable VCs from the downstream device */ 111 + if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT || 112 + pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM) 113 + return; 114 + 115 + ctrl_pos = pos + PCI_VC_RES_CTRL + (res * PCI_CAP_VC_PER_VC_SIZEOF); 116 + status_pos = pos + PCI_VC_RES_STATUS + (res * PCI_CAP_VC_PER_VC_SIZEOF); 117 + 118 + pci_read_config_dword(dev, ctrl_pos, &ctrl); 119 + id = ctrl & PCI_VC_RES_CTRL_ID; 120 + 121 + pci_read_config_dword(dev, pos, &header); 122 + 123 + /* If there is no opposite end of the link, skip to enable */ 124 + if (PCI_EXT_CAP_ID(header) == PCI_EXT_CAP_ID_VC9 || 125 + pci_is_root_bus(dev->bus)) 126 + goto enable; 127 + 128 + pos2 = pci_find_ext_capability(dev->bus->self, PCI_EXT_CAP_ID_VC); 129 + if (!pos2) 130 + goto enable; 131 + 132 + pci_read_config_dword(dev->bus->self, pos2 + PCI_VC_PORT_REG1, &reg1); 133 + evcc = reg1 & PCI_VC_REG1_EVCC; 134 + 135 + /* VC0 is hardwired enabled, so we can start with 1 */ 136 + for (i = 1; i < evcc + 1; i++) { 137 + ctrl_pos2 = pos2 + PCI_VC_RES_CTRL + 138 + (i * PCI_CAP_VC_PER_VC_SIZEOF); 139 + status_pos2 = pos2 + PCI_VC_RES_STATUS + 140 + (i * PCI_CAP_VC_PER_VC_SIZEOF); 141 + pci_read_config_dword(dev->bus->self, ctrl_pos2, &ctrl2); 142 + if ((ctrl2 & PCI_VC_RES_CTRL_ID) == id) { 143 + link = dev->bus->self; 144 + break; 145 + } 146 + } 147 + 148 + if (!link) 149 + goto enable; 150 + 151 + /* Disable if enabled */ 152 + if (ctrl2 & PCI_VC_RES_CTRL_ENABLE) { 153 + ctrl2 &= ~PCI_VC_RES_CTRL_ENABLE; 154 + pci_write_config_dword(link, ctrl_pos2, ctrl2); 155 + } 156 + 157 + /* Enable on both ends */ 158 + ctrl2 |= PCI_VC_RES_CTRL_ENABLE; 159 + pci_write_config_dword(link, ctrl_pos2, ctrl2); 160 + enable: 161 + ctrl |= PCI_VC_RES_CTRL_ENABLE; 162 + pci_write_config_dword(dev, ctrl_pos, ctrl); 163 + 164 + if (!pci_wait_for_pending(dev, status_pos, PCI_VC_RES_STATUS_NEGO)) 165 + dev_err(&dev->dev, "VC%d negotiation stuck pending\n", id); 166 + 167 + if (link && !pci_wait_for_pending(link, status_pos2, 168 + PCI_VC_RES_STATUS_NEGO)) 169 + dev_err(&link->dev, "VC%d negotiation stuck pending\n", id); 170 + } 171 + 172 + /** 173 + * pci_vc_do_save_buffer - Size, save, or restore VC state 174 + * @dev: device 175 + * @pos: starting position of VC capability (VC/VC9/MFVC) 176 + * @save_state: buffer for save/restore 177 + * @name: for error message 178 + * @save: if provided a buffer, this indicates what to do with it 179 + * 180 + * Walking Virtual Channel config space to size, save, or restore it 181 + * is complicated, so we do it all from one function to reduce code and 182 + * guarantee ordering matches in the buffer. When called with NULL 183 + * @save_state, return the size of the necessary save buffer. When called 184 + * with a non-NULL @save_state, @save determines whether we save to the 185 + * buffer or restore from it. 186 + */ 187 + static int pci_vc_do_save_buffer(struct pci_dev *dev, int pos, 188 + struct pci_cap_saved_state *save_state, 189 + bool save) 190 + { 191 + u32 reg1; 192 + char evcc, lpevcc, parb_size; 193 + int i, len = 0; 194 + u8 *buf = save_state ? (u8 *)save_state->cap.data : NULL; 195 + 196 + /* Sanity check buffer size for save/restore */ 197 + if (buf && save_state->cap.size != 198 + pci_vc_do_save_buffer(dev, pos, NULL, save)) { 199 + dev_err(&dev->dev, 200 + "VC save buffer size does not match @0x%x\n", pos); 201 + return -ENOMEM; 202 + } 203 + 204 + pci_read_config_dword(dev, pos + PCI_VC_PORT_REG1, &reg1); 205 + /* Extended VC Count (not counting VC0) */ 206 + evcc = reg1 & PCI_VC_REG1_EVCC; 207 + /* Low Priority Extended VC Count (not counting VC0) */ 208 + lpevcc = (reg1 & PCI_VC_REG1_LPEVCC) >> 4; 209 + /* Port Arbitration Table Entry Size (bits) */ 210 + parb_size = 1 << ((reg1 & PCI_VC_REG1_ARB_SIZE) >> 10); 211 + 212 + /* 213 + * Port VC Control Register contains VC Arbitration Select, which 214 + * cannot be modified when more than one LPVC is in operation. We 215 + * therefore save/restore it first, as only VC0 should be enabled 216 + * after device reset. 217 + */ 218 + if (buf) { 219 + if (save) 220 + pci_read_config_word(dev, pos + PCI_VC_PORT_CTRL, 221 + (u16 *)buf); 222 + else 223 + pci_write_config_word(dev, pos + PCI_VC_PORT_CTRL, 224 + *(u16 *)buf); 225 + buf += 2; 226 + } 227 + len += 2; 228 + 229 + /* 230 + * If we have any Low Priority VCs and a VC Arbitration Table Offset 231 + * in Port VC Capability Register 2 then save/restore it next. 232 + */ 233 + if (lpevcc) { 234 + u32 reg2; 235 + int vcarb_offset; 236 + 237 + pci_read_config_dword(dev, pos + PCI_VC_PORT_REG2, &reg2); 238 + vcarb_offset = ((reg2 & PCI_VC_REG2_ARB_OFF) >> 24) * 16; 239 + 240 + if (vcarb_offset) { 241 + int size, vcarb_phases = 0; 242 + 243 + if (reg2 & PCI_VC_REG2_128_PHASE) 244 + vcarb_phases = 128; 245 + else if (reg2 & PCI_VC_REG2_64_PHASE) 246 + vcarb_phases = 64; 247 + else if (reg2 & PCI_VC_REG2_32_PHASE) 248 + vcarb_phases = 32; 249 + 250 + /* Fixed 4 bits per phase per lpevcc (plus VC0) */ 251 + size = ((lpevcc + 1) * vcarb_phases * 4) / 8; 252 + 253 + if (size && buf) { 254 + pci_vc_save_restore_dwords(dev, 255 + pos + vcarb_offset, 256 + (u32 *)buf, 257 + size / 4, save); 258 + /* 259 + * On restore, we need to signal hardware to 260 + * re-load the VC Arbitration Table. 261 + */ 262 + if (!save) 263 + pci_vc_load_arb_table(dev, pos); 264 + 265 + buf += size; 266 + } 267 + len += size; 268 + } 269 + } 270 + 271 + /* 272 + * In addition to each VC Resource Control Register, we may have a 273 + * Port Arbitration Table attached to each VC. The Port Arbitration 274 + * Table Offset in each VC Resource Capability Register tells us if 275 + * it exists. The entry size is global from the Port VC Capability 276 + * Register1 above. The number of phases is determined per VC. 277 + */ 278 + for (i = 0; i < evcc + 1; i++) { 279 + u32 cap; 280 + int parb_offset; 281 + 282 + pci_read_config_dword(dev, pos + PCI_VC_RES_CAP + 283 + (i * PCI_CAP_VC_PER_VC_SIZEOF), &cap); 284 + parb_offset = ((cap & PCI_VC_RES_CAP_ARB_OFF) >> 24) * 16; 285 + if (parb_offset) { 286 + int size, parb_phases = 0; 287 + 288 + if (cap & PCI_VC_RES_CAP_256_PHASE) 289 + parb_phases = 256; 290 + else if (cap & (PCI_VC_RES_CAP_128_PHASE | 291 + PCI_VC_RES_CAP_128_PHASE_TB)) 292 + parb_phases = 128; 293 + else if (cap & PCI_VC_RES_CAP_64_PHASE) 294 + parb_phases = 64; 295 + else if (cap & PCI_VC_RES_CAP_32_PHASE) 296 + parb_phases = 32; 297 + 298 + size = (parb_size * parb_phases) / 8; 299 + 300 + if (size && buf) { 301 + pci_vc_save_restore_dwords(dev, 302 + pos + parb_offset, 303 + (u32 *)buf, 304 + size / 4, save); 305 + buf += size; 306 + } 307 + len += size; 308 + } 309 + 310 + /* VC Resource Control Register */ 311 + if (buf) { 312 + int ctrl_pos = pos + PCI_VC_RES_CTRL + 313 + (i * PCI_CAP_VC_PER_VC_SIZEOF); 314 + if (save) 315 + pci_read_config_dword(dev, ctrl_pos, 316 + (u32 *)buf); 317 + else { 318 + u32 tmp, ctrl = *(u32 *)buf; 319 + /* 320 + * For an FLR case, the VC config may remain. 321 + * Preserve enable bit, restore the rest. 322 + */ 323 + pci_read_config_dword(dev, ctrl_pos, &tmp); 324 + tmp &= PCI_VC_RES_CTRL_ENABLE; 325 + tmp |= ctrl & ~PCI_VC_RES_CTRL_ENABLE; 326 + pci_write_config_dword(dev, ctrl_pos, tmp); 327 + /* Load port arbitration table if used */ 328 + if (ctrl & PCI_VC_RES_CTRL_ARB_SELECT) 329 + pci_vc_load_port_arb_table(dev, pos, i); 330 + /* Re-enable if needed */ 331 + if ((ctrl ^ tmp) & PCI_VC_RES_CTRL_ENABLE) 332 + pci_vc_enable(dev, pos, i); 333 + } 334 + buf += 4; 335 + } 336 + len += 4; 337 + } 338 + 339 + return buf ? 0 : len; 340 + } 341 + 342 + static struct { 343 + u16 id; 344 + const char *name; 345 + } vc_caps[] = { { PCI_EXT_CAP_ID_MFVC, "MFVC" }, 346 + { PCI_EXT_CAP_ID_VC, "VC" }, 347 + { PCI_EXT_CAP_ID_VC9, "VC9" } }; 348 + 349 + /** 350 + * pci_save_vc_state - Save VC state to pre-allocate save buffer 351 + * @dev: device 352 + * 353 + * For each type of VC capability, VC/VC9/MFVC, find the capability and 354 + * save it to the pre-allocated save buffer. 355 + */ 356 + int pci_save_vc_state(struct pci_dev *dev) 357 + { 358 + int i; 359 + 360 + for (i = 0; i < ARRAY_SIZE(vc_caps); i++) { 361 + int pos, ret; 362 + struct pci_cap_saved_state *save_state; 363 + 364 + pos = pci_find_ext_capability(dev, vc_caps[i].id); 365 + if (!pos) 366 + continue; 367 + 368 + save_state = pci_find_saved_ext_cap(dev, vc_caps[i].id); 369 + if (!save_state) { 370 + dev_err(&dev->dev, "%s buffer not found in %s\n", 371 + vc_caps[i].name, __func__); 372 + return -ENOMEM; 373 + } 374 + 375 + ret = pci_vc_do_save_buffer(dev, pos, save_state, true); 376 + if (ret) { 377 + dev_err(&dev->dev, "%s save unsuccessful %s\n", 378 + vc_caps[i].name, __func__); 379 + return ret; 380 + } 381 + } 382 + 383 + return 0; 384 + } 385 + 386 + /** 387 + * pci_restore_vc_state - Restore VC state from save buffer 388 + * @dev: device 389 + * 390 + * For each type of VC capability, VC/VC9/MFVC, find the capability and 391 + * restore it from the previously saved buffer. 392 + */ 393 + void pci_restore_vc_state(struct pci_dev *dev) 394 + { 395 + int i; 396 + 397 + for (i = 0; i < ARRAY_SIZE(vc_caps); i++) { 398 + int pos; 399 + struct pci_cap_saved_state *save_state; 400 + 401 + pos = pci_find_ext_capability(dev, vc_caps[i].id); 402 + save_state = pci_find_saved_ext_cap(dev, vc_caps[i].id); 403 + if (!save_state || !pos) 404 + continue; 405 + 406 + pci_vc_do_save_buffer(dev, pos, save_state, false); 407 + } 408 + } 409 + 410 + /** 411 + * pci_allocate_vc_save_buffers - Allocate save buffers for VC caps 412 + * @dev: device 413 + * 414 + * For each type of VC capability, VC/VC9/MFVC, find the capability, size 415 + * it, and allocate a buffer for save/restore. 416 + */ 417 + 418 + void pci_allocate_vc_save_buffers(struct pci_dev *dev) 419 + { 420 + int i; 421 + 422 + for (i = 0; i < ARRAY_SIZE(vc_caps); i++) { 423 + int len, pos = pci_find_ext_capability(dev, vc_caps[i].id); 424 + 425 + if (!pos) 426 + continue; 427 + 428 + len = pci_vc_do_save_buffer(dev, pos, NULL, false); 429 + if (pci_add_ext_cap_save_buffer(dev, vc_caps[i].id, len)) 430 + dev_err(&dev->dev, 431 + "unable to preallocate %s save buffer\n", 432 + vc_caps[i].name); 433 + } 434 + }
+5
include/linux/pci.h
··· 1005 1005 return __pci_enable_wake(dev, state, false, enable); 1006 1006 } 1007 1007 1008 + /* PCI Virtual Channel */ 1009 + int pci_save_vc_state(struct pci_dev *dev); 1010 + void pci_restore_vc_state(struct pci_dev *dev); 1011 + void pci_allocate_vc_save_buffers(struct pci_dev *dev); 1012 + 1008 1013 #define PCI_EXP_IDO_REQUEST (1<<0) 1009 1014 #define PCI_EXP_IDO_COMPLETION (1<<1) 1010 1015 void pci_enable_ido(struct pci_dev *dev, unsigned long type);
+21 -4
include/uapi/linux/pci_regs.h
··· 678 678 679 679 /* Virtual Channel */ 680 680 #define PCI_VC_PORT_REG1 4 681 - #define PCI_VC_REG1_EVCC 0x7 /* extended VC count */ 681 + #define PCI_VC_REG1_EVCC 0x00000007 /* extended VC count */ 682 + #define PCI_VC_REG1_LPEVCC 0x00000070 /* low prio extended VC count */ 683 + #define PCI_VC_REG1_ARB_SIZE 0x00000c00 682 684 #define PCI_VC_PORT_REG2 8 683 - #define PCI_VC_REG2_32_PHASE 0x2 684 - #define PCI_VC_REG2_64_PHASE 0x4 685 - #define PCI_VC_REG2_128_PHASE 0x8 685 + #define PCI_VC_REG2_32_PHASE 0x00000002 686 + #define PCI_VC_REG2_64_PHASE 0x00000004 687 + #define PCI_VC_REG2_128_PHASE 0x00000008 688 + #define PCI_VC_REG2_ARB_OFF 0xff000000 686 689 #define PCI_VC_PORT_CTRL 12 690 + #define PCI_VC_PORT_CTRL_LOAD_TABLE 0x00000001 687 691 #define PCI_VC_PORT_STATUS 14 692 + #define PCI_VC_PORT_STATUS_TABLE 0x00000001 688 693 #define PCI_VC_RES_CAP 16 694 + #define PCI_VC_RES_CAP_32_PHASE 0x00000002 695 + #define PCI_VC_RES_CAP_64_PHASE 0x00000004 696 + #define PCI_VC_RES_CAP_128_PHASE 0x00000008 697 + #define PCI_VC_RES_CAP_128_PHASE_TB 0x00000010 698 + #define PCI_VC_RES_CAP_256_PHASE 0x00000020 699 + #define PCI_VC_RES_CAP_ARB_OFF 0xff000000 689 700 #define PCI_VC_RES_CTRL 20 701 + #define PCI_VC_RES_CTRL_LOAD_TABLE 0x00010000 702 + #define PCI_VC_RES_CTRL_ARB_SELECT 0x000e0000 703 + #define PCI_VC_RES_CTRL_ID 0x07000000 704 + #define PCI_VC_RES_CTRL_ENABLE 0x80000000 690 705 #define PCI_VC_RES_STATUS 26 706 + #define PCI_VC_RES_STATUS_TABLE 0x00000001 707 + #define PCI_VC_RES_STATUS_NEGO 0x00000002 691 708 #define PCI_CAP_VC_BASE_SIZEOF 0x10 692 709 #define PCI_CAP_VC_PER_VC_SIZEOF 0x0C 693 710