Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

powerpc/pseries: Add driver for PAPR SCM regions

Adds a driver that implements support for enabling and accessing PAPR
SCM regions. Unfortunately due to how the PAPR interface works we can't
use the existing of_pmem driver (yet) because:

a) The guest is required to use the H_SCM_BIND_MEM h-call to add
add the SCM region to it's physical address space, and
b) There is currently no mechanism for relating a bare of_pmem region
to the backing DIMM (or not-a-DIMM for our case).

Both of these are easily handled by rolling the functionality into a
seperate driver so here we are...

Acked-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>

authored by

Oliver O'Halloran and committed by
Michael Ellerman
b5beae5e 4c5d87db

+353
+7
arch/powerpc/platforms/pseries/Kconfig
··· 138 138 bool "Support for GX bus based adapters" 139 139 help 140 140 Bus device driver for GX bus based adapters. 141 + 142 + config PAPR_SCM 143 + depends on PPC_PSERIES && MEMORY_HOTPLUG 144 + select LIBNVDIMM 145 + tristate "Support for the PAPR Storage Class Memory interface" 146 + help 147 + Enable access to hypervisor provided storage class memory.
+1
arch/powerpc/platforms/pseries/Makefile
··· 24 24 obj-$(CONFIG_LPARCFG) += lparcfg.o 25 25 obj-$(CONFIG_IBMVIO) += vio.o 26 26 obj-$(CONFIG_IBMEBUS) += ibmebus.o 27 + obj-$(CONFIG_PAPR_SCM) += papr_scm.o 27 28 28 29 ifdef CONFIG_PPC_PSERIES 29 30 obj-$(CONFIG_SUSPEND) += suspend.o
+345
arch/powerpc/platforms/pseries/papr_scm.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + #define pr_fmt(fmt) "papr-scm: " fmt 4 + 5 + #include <linux/of.h> 6 + #include <linux/kernel.h> 7 + #include <linux/module.h> 8 + #include <linux/ioport.h> 9 + #include <linux/slab.h> 10 + #include <linux/ndctl.h> 11 + #include <linux/sched.h> 12 + #include <linux/libnvdimm.h> 13 + #include <linux/platform_device.h> 14 + 15 + #include <asm/plpar_wrappers.h> 16 + 17 + #define BIND_ANY_ADDR (~0ul) 18 + 19 + #define PAPR_SCM_DIMM_CMD_MASK \ 20 + ((1ul << ND_CMD_GET_CONFIG_SIZE) | \ 21 + (1ul << ND_CMD_GET_CONFIG_DATA) | \ 22 + (1ul << ND_CMD_SET_CONFIG_DATA)) 23 + 24 + struct papr_scm_priv { 25 + struct platform_device *pdev; 26 + struct device_node *dn; 27 + uint32_t drc_index; 28 + uint64_t blocks; 29 + uint64_t block_size; 30 + int metadata_size; 31 + 32 + uint64_t bound_addr; 33 + 34 + struct nvdimm_bus_descriptor bus_desc; 35 + struct nvdimm_bus *bus; 36 + struct nvdimm *nvdimm; 37 + struct resource res; 38 + struct nd_region *region; 39 + struct nd_interleave_set nd_set; 40 + }; 41 + 42 + static int drc_pmem_bind(struct papr_scm_priv *p) 43 + { 44 + unsigned long ret[PLPAR_HCALL_BUFSIZE]; 45 + uint64_t rc, token; 46 + 47 + /* 48 + * When the hypervisor cannot map all the requested memory in a single 49 + * hcall it returns H_BUSY and we call again with the token until 50 + * we get H_SUCCESS. Aborting the retry loop before getting H_SUCCESS 51 + * leave the system in an undefined state, so we wait. 52 + */ 53 + token = 0; 54 + 55 + do { 56 + rc = plpar_hcall(H_SCM_BIND_MEM, ret, p->drc_index, 0, 57 + p->blocks, BIND_ANY_ADDR, token); 58 + token = be64_to_cpu(ret[0]); 59 + cond_resched(); 60 + } while (rc == H_BUSY); 61 + 62 + if (rc) { 63 + dev_err(&p->pdev->dev, "bind err: %lld\n", rc); 64 + return -ENXIO; 65 + } 66 + 67 + p->bound_addr = be64_to_cpu(ret[1]); 68 + 69 + dev_dbg(&p->pdev->dev, "bound drc %x to %pR\n", p->drc_index, &p->res); 70 + 71 + return 0; 72 + } 73 + 74 + static int drc_pmem_unbind(struct papr_scm_priv *p) 75 + { 76 + unsigned long ret[PLPAR_HCALL_BUFSIZE]; 77 + uint64_t rc, token; 78 + 79 + token = 0; 80 + 81 + /* NB: unbind has the same retry requirements mentioned above */ 82 + do { 83 + rc = plpar_hcall(H_SCM_UNBIND_MEM, ret, p->drc_index, 84 + p->bound_addr, p->blocks, token); 85 + token = be64_to_cpu(ret); 86 + cond_resched(); 87 + } while (rc == H_BUSY); 88 + 89 + if (rc) 90 + dev_err(&p->pdev->dev, "unbind error: %lld\n", rc); 91 + 92 + return !!rc; 93 + } 94 + 95 + static int papr_scm_meta_get(struct papr_scm_priv *p, 96 + struct nd_cmd_get_config_data_hdr *hdr) 97 + { 98 + unsigned long data[PLPAR_HCALL_BUFSIZE]; 99 + int64_t ret; 100 + 101 + if (hdr->in_offset >= p->metadata_size || hdr->in_length != 1) 102 + return -EINVAL; 103 + 104 + ret = plpar_hcall(H_SCM_READ_METADATA, data, p->drc_index, 105 + hdr->in_offset, 1); 106 + 107 + if (ret == H_PARAMETER) /* bad DRC index */ 108 + return -ENODEV; 109 + if (ret) 110 + return -EINVAL; /* other invalid parameter */ 111 + 112 + hdr->out_buf[0] = data[0] & 0xff; 113 + 114 + return 0; 115 + } 116 + 117 + static int papr_scm_meta_set(struct papr_scm_priv *p, 118 + struct nd_cmd_set_config_hdr *hdr) 119 + { 120 + int64_t ret; 121 + 122 + if (hdr->in_offset >= p->metadata_size || hdr->in_length != 1) 123 + return -EINVAL; 124 + 125 + ret = plpar_hcall_norets(H_SCM_WRITE_METADATA, 126 + p->drc_index, hdr->in_offset, hdr->in_buf[0], 1); 127 + 128 + if (ret == H_PARAMETER) /* bad DRC index */ 129 + return -ENODEV; 130 + if (ret) 131 + return -EINVAL; /* other invalid parameter */ 132 + 133 + return 0; 134 + } 135 + 136 + int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, 137 + unsigned int cmd, void *buf, unsigned int buf_len, int *cmd_rc) 138 + { 139 + struct nd_cmd_get_config_size *get_size_hdr; 140 + struct papr_scm_priv *p; 141 + 142 + /* Only dimm-specific calls are supported atm */ 143 + if (!nvdimm) 144 + return -EINVAL; 145 + 146 + p = nvdimm_provider_data(nvdimm); 147 + 148 + switch (cmd) { 149 + case ND_CMD_GET_CONFIG_SIZE: 150 + get_size_hdr = buf; 151 + 152 + get_size_hdr->status = 0; 153 + get_size_hdr->max_xfer = 1; 154 + get_size_hdr->config_size = p->metadata_size; 155 + *cmd_rc = 0; 156 + break; 157 + 158 + case ND_CMD_GET_CONFIG_DATA: 159 + *cmd_rc = papr_scm_meta_get(p, buf); 160 + break; 161 + 162 + case ND_CMD_SET_CONFIG_DATA: 163 + *cmd_rc = papr_scm_meta_set(p, buf); 164 + break; 165 + 166 + default: 167 + return -EINVAL; 168 + } 169 + 170 + dev_dbg(&p->pdev->dev, "returned with cmd_rc = %d\n", *cmd_rc); 171 + 172 + return 0; 173 + } 174 + 175 + static const struct attribute_group *region_attr_groups[] = { 176 + &nd_region_attribute_group, 177 + &nd_device_attribute_group, 178 + &nd_mapping_attribute_group, 179 + &nd_numa_attribute_group, 180 + NULL, 181 + }; 182 + 183 + static const struct attribute_group *bus_attr_groups[] = { 184 + &nvdimm_bus_attribute_group, 185 + NULL, 186 + }; 187 + 188 + static const struct attribute_group *papr_scm_dimm_groups[] = { 189 + &nvdimm_attribute_group, 190 + &nd_device_attribute_group, 191 + NULL, 192 + }; 193 + 194 + static int papr_scm_nvdimm_init(struct papr_scm_priv *p) 195 + { 196 + struct device *dev = &p->pdev->dev; 197 + struct nd_mapping_desc mapping; 198 + struct nd_region_desc ndr_desc; 199 + unsigned long dimm_flags; 200 + 201 + p->bus_desc.ndctl = papr_scm_ndctl; 202 + p->bus_desc.module = THIS_MODULE; 203 + p->bus_desc.of_node = p->pdev->dev.of_node; 204 + p->bus_desc.attr_groups = bus_attr_groups; 205 + p->bus_desc.provider_name = kstrdup(p->pdev->name, GFP_KERNEL); 206 + 207 + if (!p->bus_desc.provider_name) 208 + return -ENOMEM; 209 + 210 + p->bus = nvdimm_bus_register(NULL, &p->bus_desc); 211 + if (!p->bus) { 212 + dev_err(dev, "Error creating nvdimm bus %pOF\n", p->dn); 213 + return -ENXIO; 214 + } 215 + 216 + dimm_flags = 0; 217 + set_bit(NDD_ALIASING, &dimm_flags); 218 + 219 + p->nvdimm = nvdimm_create(p->bus, p, papr_scm_dimm_groups, 220 + dimm_flags, PAPR_SCM_DIMM_CMD_MASK, 0, NULL); 221 + if (!p->nvdimm) { 222 + dev_err(dev, "Error creating DIMM object for %pOF\n", p->dn); 223 + goto err; 224 + } 225 + 226 + /* now add the region */ 227 + 228 + memset(&mapping, 0, sizeof(mapping)); 229 + mapping.nvdimm = p->nvdimm; 230 + mapping.start = 0; 231 + mapping.size = p->blocks * p->block_size; // XXX: potential overflow? 232 + 233 + memset(&ndr_desc, 0, sizeof(ndr_desc)); 234 + ndr_desc.attr_groups = region_attr_groups; 235 + ndr_desc.numa_node = dev_to_node(&p->pdev->dev); 236 + ndr_desc.res = &p->res; 237 + ndr_desc.of_node = p->dn; 238 + ndr_desc.provider_data = p; 239 + ndr_desc.mapping = &mapping; 240 + ndr_desc.num_mappings = 1; 241 + ndr_desc.nd_set = &p->nd_set; 242 + set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags); 243 + 244 + p->region = nvdimm_pmem_region_create(p->bus, &ndr_desc); 245 + if (!p->region) { 246 + dev_err(dev, "Error registering region %pR from %pOF\n", 247 + ndr_desc.res, p->dn); 248 + goto err; 249 + } 250 + 251 + return 0; 252 + 253 + err: nvdimm_bus_unregister(p->bus); 254 + kfree(p->bus_desc.provider_name); 255 + return -ENXIO; 256 + } 257 + 258 + static int papr_scm_probe(struct platform_device *pdev) 259 + { 260 + uint32_t drc_index, metadata_size, unit_cap[2]; 261 + struct device_node *dn = pdev->dev.of_node; 262 + struct papr_scm_priv *p; 263 + int rc; 264 + 265 + /* check we have all the required DT properties */ 266 + if (of_property_read_u32(dn, "ibm,my-drc-index", &drc_index)) { 267 + dev_err(&pdev->dev, "%pOF: missing drc-index!\n", dn); 268 + return -ENODEV; 269 + } 270 + 271 + if (of_property_read_u32_array(dn, "ibm,unit-capacity", unit_cap, 2)) { 272 + dev_err(&pdev->dev, "%pOF: missing unit-capacity!\n", dn); 273 + return -ENODEV; 274 + } 275 + 276 + p = kzalloc(sizeof(*p), GFP_KERNEL); 277 + if (!p) 278 + return -ENOMEM; 279 + 280 + /* optional DT properties */ 281 + of_property_read_u32(dn, "ibm,metadata-size", &metadata_size); 282 + 283 + p->dn = dn; 284 + p->drc_index = drc_index; 285 + p->block_size = unit_cap[0]; 286 + p->blocks = unit_cap[1]; 287 + 288 + /* might be zero */ 289 + p->metadata_size = metadata_size; 290 + p->pdev = pdev; 291 + 292 + /* request the hypervisor to bind this region to somewhere in memory */ 293 + rc = drc_pmem_bind(p); 294 + if (rc) 295 + goto err; 296 + 297 + /* setup the resource for the newly bound range */ 298 + p->res.start = p->bound_addr; 299 + p->res.end = p->bound_addr + p->blocks * p->block_size; 300 + p->res.name = pdev->name; 301 + p->res.flags = IORESOURCE_MEM; 302 + 303 + rc = papr_scm_nvdimm_init(p); 304 + if (rc) 305 + goto err2; 306 + 307 + platform_set_drvdata(pdev, p); 308 + 309 + return 0; 310 + 311 + err2: drc_pmem_unbind(p); 312 + err: kfree(p); 313 + return rc; 314 + } 315 + 316 + static int papr_scm_remove(struct platform_device *pdev) 317 + { 318 + struct papr_scm_priv *p = platform_get_drvdata(pdev); 319 + 320 + nvdimm_bus_unregister(p->bus); 321 + drc_pmem_unbind(p); 322 + kfree(p); 323 + 324 + return 0; 325 + } 326 + 327 + static const struct of_device_id papr_scm_match[] = { 328 + { .compatible = "ibm,pmemory" }, 329 + { }, 330 + }; 331 + 332 + static struct platform_driver papr_scm_driver = { 333 + .probe = papr_scm_probe, 334 + .remove = papr_scm_remove, 335 + .driver = { 336 + .name = "papr_scm", 337 + .owner = THIS_MODULE, 338 + .of_match_table = papr_scm_match, 339 + }, 340 + }; 341 + 342 + module_platform_driver(papr_scm_driver); 343 + MODULE_DEVICE_TABLE(of, papr_scm_match); 344 + MODULE_LICENSE("GPL"); 345 + MODULE_AUTHOR("IBM Corporation");