Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0-only
2/* Copyright(c) 2020 Intel Corporation. */
3
4#include <linux/device.h>
5#include <linux/slab.h>
6#include <linux/idr.h>
7#include <linux/pci.h>
8#include <cxlmem.h>
9#include "trace.h"
10#include "core.h"
11
12static DECLARE_RWSEM(cxl_memdev_rwsem);
13
14/*
15 * An entire PCI topology full of devices should be enough for any
16 * config
17 */
18#define CXL_MEM_MAX_DEVS 65536
19
20static int cxl_mem_major;
21static DEFINE_IDA(cxl_memdev_ida);
22
23static void cxl_memdev_release(struct device *dev)
24{
25 struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
26
27 ida_free(&cxl_memdev_ida, cxlmd->id);
28 kfree(cxlmd);
29}
30
31static char *cxl_memdev_devnode(const struct device *dev, umode_t *mode, kuid_t *uid,
32 kgid_t *gid)
33{
34 return kasprintf(GFP_KERNEL, "cxl/%s", dev_name(dev));
35}
36
37static ssize_t firmware_version_show(struct device *dev,
38 struct device_attribute *attr, char *buf)
39{
40 struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
41 struct cxl_dev_state *cxlds = cxlmd->cxlds;
42
43 return sysfs_emit(buf, "%.16s\n", cxlds->firmware_version);
44}
45static DEVICE_ATTR_RO(firmware_version);
46
47static ssize_t payload_max_show(struct device *dev,
48 struct device_attribute *attr, char *buf)
49{
50 struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
51 struct cxl_dev_state *cxlds = cxlmd->cxlds;
52
53 return sysfs_emit(buf, "%zu\n", cxlds->payload_size);
54}
55static DEVICE_ATTR_RO(payload_max);
56
57static ssize_t label_storage_size_show(struct device *dev,
58 struct device_attribute *attr, char *buf)
59{
60 struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
61 struct cxl_dev_state *cxlds = cxlmd->cxlds;
62
63 return sysfs_emit(buf, "%zu\n", cxlds->lsa_size);
64}
65static DEVICE_ATTR_RO(label_storage_size);
66
67static ssize_t ram_size_show(struct device *dev, struct device_attribute *attr,
68 char *buf)
69{
70 struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
71 struct cxl_dev_state *cxlds = cxlmd->cxlds;
72 unsigned long long len = resource_size(&cxlds->ram_res);
73
74 return sysfs_emit(buf, "%#llx\n", len);
75}
76
77static struct device_attribute dev_attr_ram_size =
78 __ATTR(size, 0444, ram_size_show, NULL);
79
80static ssize_t pmem_size_show(struct device *dev, struct device_attribute *attr,
81 char *buf)
82{
83 struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
84 struct cxl_dev_state *cxlds = cxlmd->cxlds;
85 unsigned long long len = resource_size(&cxlds->pmem_res);
86
87 return sysfs_emit(buf, "%#llx\n", len);
88}
89
90static struct device_attribute dev_attr_pmem_size =
91 __ATTR(size, 0444, pmem_size_show, NULL);
92
93static ssize_t serial_show(struct device *dev, struct device_attribute *attr,
94 char *buf)
95{
96 struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
97 struct cxl_dev_state *cxlds = cxlmd->cxlds;
98
99 return sysfs_emit(buf, "%#llx\n", cxlds->serial);
100}
101static DEVICE_ATTR_RO(serial);
102
103static ssize_t numa_node_show(struct device *dev, struct device_attribute *attr,
104 char *buf)
105{
106 return sprintf(buf, "%d\n", dev_to_node(dev));
107}
108static DEVICE_ATTR_RO(numa_node);
109
110static int cxl_get_poison_by_memdev(struct cxl_memdev *cxlmd)
111{
112 struct cxl_dev_state *cxlds = cxlmd->cxlds;
113 u64 offset, length;
114 int rc = 0;
115
116 /* CXL 3.0 Spec 8.2.9.8.4.1 Separate pmem and ram poison requests */
117 if (resource_size(&cxlds->pmem_res)) {
118 offset = cxlds->pmem_res.start;
119 length = resource_size(&cxlds->pmem_res);
120 rc = cxl_mem_get_poison(cxlmd, offset, length, NULL);
121 if (rc)
122 return rc;
123 }
124 if (resource_size(&cxlds->ram_res)) {
125 offset = cxlds->ram_res.start;
126 length = resource_size(&cxlds->ram_res);
127 rc = cxl_mem_get_poison(cxlmd, offset, length, NULL);
128 /*
129 * Invalid Physical Address is not an error for
130 * volatile addresses. Device support is optional.
131 */
132 if (rc == -EFAULT)
133 rc = 0;
134 }
135 return rc;
136}
137
138int cxl_trigger_poison_list(struct cxl_memdev *cxlmd)
139{
140 struct cxl_port *port;
141 int rc;
142
143 port = dev_get_drvdata(&cxlmd->dev);
144 if (!port || !is_cxl_endpoint(port))
145 return -EINVAL;
146
147 rc = down_read_interruptible(&cxl_dpa_rwsem);
148 if (rc)
149 return rc;
150
151 if (port->commit_end == -1) {
152 /* No regions mapped to this memdev */
153 rc = cxl_get_poison_by_memdev(cxlmd);
154 } else {
155 /* Regions mapped, collect poison by endpoint */
156 rc = cxl_get_poison_by_endpoint(port);
157 }
158 up_read(&cxl_dpa_rwsem);
159
160 return rc;
161}
162EXPORT_SYMBOL_NS_GPL(cxl_trigger_poison_list, CXL);
163
164struct cxl_dpa_to_region_context {
165 struct cxl_region *cxlr;
166 u64 dpa;
167};
168
169static int __cxl_dpa_to_region(struct device *dev, void *arg)
170{
171 struct cxl_dpa_to_region_context *ctx = arg;
172 struct cxl_endpoint_decoder *cxled;
173 u64 dpa = ctx->dpa;
174
175 if (!is_endpoint_decoder(dev))
176 return 0;
177
178 cxled = to_cxl_endpoint_decoder(dev);
179 if (!cxled->dpa_res || !resource_size(cxled->dpa_res))
180 return 0;
181
182 if (dpa > cxled->dpa_res->end || dpa < cxled->dpa_res->start)
183 return 0;
184
185 dev_dbg(dev, "dpa:0x%llx mapped in region:%s\n", dpa,
186 dev_name(&cxled->cxld.region->dev));
187
188 ctx->cxlr = cxled->cxld.region;
189
190 return 1;
191}
192
193static struct cxl_region *cxl_dpa_to_region(struct cxl_memdev *cxlmd, u64 dpa)
194{
195 struct cxl_dpa_to_region_context ctx;
196 struct cxl_port *port;
197
198 ctx = (struct cxl_dpa_to_region_context) {
199 .dpa = dpa,
200 };
201 port = dev_get_drvdata(&cxlmd->dev);
202 if (port && is_cxl_endpoint(port) && port->commit_end != -1)
203 device_for_each_child(&port->dev, &ctx, __cxl_dpa_to_region);
204
205 return ctx.cxlr;
206}
207
208static int cxl_validate_poison_dpa(struct cxl_memdev *cxlmd, u64 dpa)
209{
210 struct cxl_dev_state *cxlds = cxlmd->cxlds;
211
212 if (!IS_ENABLED(CONFIG_DEBUG_FS))
213 return 0;
214
215 if (!resource_size(&cxlds->dpa_res)) {
216 dev_dbg(cxlds->dev, "device has no dpa resource\n");
217 return -EINVAL;
218 }
219 if (dpa < cxlds->dpa_res.start || dpa > cxlds->dpa_res.end) {
220 dev_dbg(cxlds->dev, "dpa:0x%llx not in resource:%pR\n",
221 dpa, &cxlds->dpa_res);
222 return -EINVAL;
223 }
224 if (!IS_ALIGNED(dpa, 64)) {
225 dev_dbg(cxlds->dev, "dpa:0x%llx is not 64-byte aligned\n", dpa);
226 return -EINVAL;
227 }
228
229 return 0;
230}
231
232int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa)
233{
234 struct cxl_dev_state *cxlds = cxlmd->cxlds;
235 struct cxl_mbox_inject_poison inject;
236 struct cxl_poison_record record;
237 struct cxl_mbox_cmd mbox_cmd;
238 struct cxl_region *cxlr;
239 int rc;
240
241 if (!IS_ENABLED(CONFIG_DEBUG_FS))
242 return 0;
243
244 rc = down_read_interruptible(&cxl_dpa_rwsem);
245 if (rc)
246 return rc;
247
248 rc = cxl_validate_poison_dpa(cxlmd, dpa);
249 if (rc)
250 goto out;
251
252 inject.address = cpu_to_le64(dpa);
253 mbox_cmd = (struct cxl_mbox_cmd) {
254 .opcode = CXL_MBOX_OP_INJECT_POISON,
255 .size_in = sizeof(inject),
256 .payload_in = &inject,
257 };
258 rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
259 if (rc)
260 goto out;
261
262 cxlr = cxl_dpa_to_region(cxlmd, dpa);
263 if (cxlr)
264 dev_warn_once(cxlds->dev,
265 "poison inject dpa:%#llx region: %s\n", dpa,
266 dev_name(&cxlr->dev));
267
268 record = (struct cxl_poison_record) {
269 .address = cpu_to_le64(dpa),
270 .length = cpu_to_le32(1),
271 };
272 trace_cxl_poison(cxlmd, cxlr, &record, 0, 0, CXL_POISON_TRACE_INJECT);
273out:
274 up_read(&cxl_dpa_rwsem);
275
276 return rc;
277}
278EXPORT_SYMBOL_NS_GPL(cxl_inject_poison, CXL);
279
280int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa)
281{
282 struct cxl_dev_state *cxlds = cxlmd->cxlds;
283 struct cxl_mbox_clear_poison clear;
284 struct cxl_poison_record record;
285 struct cxl_mbox_cmd mbox_cmd;
286 struct cxl_region *cxlr;
287 int rc;
288
289 if (!IS_ENABLED(CONFIG_DEBUG_FS))
290 return 0;
291
292 rc = down_read_interruptible(&cxl_dpa_rwsem);
293 if (rc)
294 return rc;
295
296 rc = cxl_validate_poison_dpa(cxlmd, dpa);
297 if (rc)
298 goto out;
299
300 /*
301 * In CXL 3.0 Spec 8.2.9.8.4.3, the Clear Poison mailbox command
302 * is defined to accept 64 bytes of write-data, along with the
303 * address to clear. This driver uses zeroes as write-data.
304 */
305 clear = (struct cxl_mbox_clear_poison) {
306 .address = cpu_to_le64(dpa)
307 };
308
309 mbox_cmd = (struct cxl_mbox_cmd) {
310 .opcode = CXL_MBOX_OP_CLEAR_POISON,
311 .size_in = sizeof(clear),
312 .payload_in = &clear,
313 };
314
315 rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
316 if (rc)
317 goto out;
318
319 cxlr = cxl_dpa_to_region(cxlmd, dpa);
320 if (cxlr)
321 dev_warn_once(cxlds->dev, "poison clear dpa:%#llx region: %s\n",
322 dpa, dev_name(&cxlr->dev));
323
324 record = (struct cxl_poison_record) {
325 .address = cpu_to_le64(dpa),
326 .length = cpu_to_le32(1),
327 };
328 trace_cxl_poison(cxlmd, cxlr, &record, 0, 0, CXL_POISON_TRACE_CLEAR);
329out:
330 up_read(&cxl_dpa_rwsem);
331
332 return rc;
333}
334EXPORT_SYMBOL_NS_GPL(cxl_clear_poison, CXL);
335
336static struct attribute *cxl_memdev_attributes[] = {
337 &dev_attr_serial.attr,
338 &dev_attr_firmware_version.attr,
339 &dev_attr_payload_max.attr,
340 &dev_attr_label_storage_size.attr,
341 &dev_attr_numa_node.attr,
342 NULL,
343};
344
345static struct attribute *cxl_memdev_pmem_attributes[] = {
346 &dev_attr_pmem_size.attr,
347 NULL,
348};
349
350static struct attribute *cxl_memdev_ram_attributes[] = {
351 &dev_attr_ram_size.attr,
352 NULL,
353};
354
355static umode_t cxl_memdev_visible(struct kobject *kobj, struct attribute *a,
356 int n)
357{
358 if (!IS_ENABLED(CONFIG_NUMA) && a == &dev_attr_numa_node.attr)
359 return 0;
360 return a->mode;
361}
362
363static struct attribute_group cxl_memdev_attribute_group = {
364 .attrs = cxl_memdev_attributes,
365 .is_visible = cxl_memdev_visible,
366};
367
368static struct attribute_group cxl_memdev_ram_attribute_group = {
369 .name = "ram",
370 .attrs = cxl_memdev_ram_attributes,
371};
372
373static struct attribute_group cxl_memdev_pmem_attribute_group = {
374 .name = "pmem",
375 .attrs = cxl_memdev_pmem_attributes,
376};
377
378static const struct attribute_group *cxl_memdev_attribute_groups[] = {
379 &cxl_memdev_attribute_group,
380 &cxl_memdev_ram_attribute_group,
381 &cxl_memdev_pmem_attribute_group,
382 NULL,
383};
384
385static const struct device_type cxl_memdev_type = {
386 .name = "cxl_memdev",
387 .release = cxl_memdev_release,
388 .devnode = cxl_memdev_devnode,
389 .groups = cxl_memdev_attribute_groups,
390};
391
392bool is_cxl_memdev(const struct device *dev)
393{
394 return dev->type == &cxl_memdev_type;
395}
396EXPORT_SYMBOL_NS_GPL(is_cxl_memdev, CXL);
397
398/**
399 * set_exclusive_cxl_commands() - atomically disable user cxl commands
400 * @cxlds: The device state to operate on
401 * @cmds: bitmap of commands to mark exclusive
402 *
403 * Grab the cxl_memdev_rwsem in write mode to flush in-flight
404 * invocations of the ioctl path and then disable future execution of
405 * commands with the command ids set in @cmds.
406 */
407void set_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds)
408{
409 down_write(&cxl_memdev_rwsem);
410 bitmap_or(cxlds->exclusive_cmds, cxlds->exclusive_cmds, cmds,
411 CXL_MEM_COMMAND_ID_MAX);
412 up_write(&cxl_memdev_rwsem);
413}
414EXPORT_SYMBOL_NS_GPL(set_exclusive_cxl_commands, CXL);
415
416/**
417 * clear_exclusive_cxl_commands() - atomically enable user cxl commands
418 * @cxlds: The device state to modify
419 * @cmds: bitmap of commands to mark available for userspace
420 */
421void clear_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds)
422{
423 down_write(&cxl_memdev_rwsem);
424 bitmap_andnot(cxlds->exclusive_cmds, cxlds->exclusive_cmds, cmds,
425 CXL_MEM_COMMAND_ID_MAX);
426 up_write(&cxl_memdev_rwsem);
427}
428EXPORT_SYMBOL_NS_GPL(clear_exclusive_cxl_commands, CXL);
429
430static void cxl_memdev_shutdown(struct device *dev)
431{
432 struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
433
434 down_write(&cxl_memdev_rwsem);
435 cxlmd->cxlds = NULL;
436 up_write(&cxl_memdev_rwsem);
437}
438
439static void cxl_memdev_unregister(void *_cxlmd)
440{
441 struct cxl_memdev *cxlmd = _cxlmd;
442 struct device *dev = &cxlmd->dev;
443
444 cxl_memdev_shutdown(dev);
445 cdev_device_del(&cxlmd->cdev, dev);
446 put_device(dev);
447}
448
449static void detach_memdev(struct work_struct *work)
450{
451 struct cxl_memdev *cxlmd;
452
453 cxlmd = container_of(work, typeof(*cxlmd), detach_work);
454 device_release_driver(&cxlmd->dev);
455 put_device(&cxlmd->dev);
456}
457
458static struct lock_class_key cxl_memdev_key;
459
460static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds,
461 const struct file_operations *fops)
462{
463 struct cxl_memdev *cxlmd;
464 struct device *dev;
465 struct cdev *cdev;
466 int rc;
467
468 cxlmd = kzalloc(sizeof(*cxlmd), GFP_KERNEL);
469 if (!cxlmd)
470 return ERR_PTR(-ENOMEM);
471
472 rc = ida_alloc_max(&cxl_memdev_ida, CXL_MEM_MAX_DEVS - 1, GFP_KERNEL);
473 if (rc < 0)
474 goto err;
475 cxlmd->id = rc;
476 cxlmd->depth = -1;
477
478 dev = &cxlmd->dev;
479 device_initialize(dev);
480 lockdep_set_class(&dev->mutex, &cxl_memdev_key);
481 dev->parent = cxlds->dev;
482 dev->bus = &cxl_bus_type;
483 dev->devt = MKDEV(cxl_mem_major, cxlmd->id);
484 dev->type = &cxl_memdev_type;
485 device_set_pm_not_required(dev);
486 INIT_WORK(&cxlmd->detach_work, detach_memdev);
487
488 cdev = &cxlmd->cdev;
489 cdev_init(cdev, fops);
490 return cxlmd;
491
492err:
493 kfree(cxlmd);
494 return ERR_PTR(rc);
495}
496
497static long __cxl_memdev_ioctl(struct cxl_memdev *cxlmd, unsigned int cmd,
498 unsigned long arg)
499{
500 switch (cmd) {
501 case CXL_MEM_QUERY_COMMANDS:
502 return cxl_query_cmd(cxlmd, (void __user *)arg);
503 case CXL_MEM_SEND_COMMAND:
504 return cxl_send_cmd(cxlmd, (void __user *)arg);
505 default:
506 return -ENOTTY;
507 }
508}
509
510static long cxl_memdev_ioctl(struct file *file, unsigned int cmd,
511 unsigned long arg)
512{
513 struct cxl_memdev *cxlmd = file->private_data;
514 int rc = -ENXIO;
515
516 down_read(&cxl_memdev_rwsem);
517 if (cxlmd->cxlds)
518 rc = __cxl_memdev_ioctl(cxlmd, cmd, arg);
519 up_read(&cxl_memdev_rwsem);
520
521 return rc;
522}
523
524static int cxl_memdev_open(struct inode *inode, struct file *file)
525{
526 struct cxl_memdev *cxlmd =
527 container_of(inode->i_cdev, typeof(*cxlmd), cdev);
528
529 get_device(&cxlmd->dev);
530 file->private_data = cxlmd;
531
532 return 0;
533}
534
535static int cxl_memdev_release_file(struct inode *inode, struct file *file)
536{
537 struct cxl_memdev *cxlmd =
538 container_of(inode->i_cdev, typeof(*cxlmd), cdev);
539
540 put_device(&cxlmd->dev);
541
542 return 0;
543}
544
545static const struct file_operations cxl_memdev_fops = {
546 .owner = THIS_MODULE,
547 .unlocked_ioctl = cxl_memdev_ioctl,
548 .open = cxl_memdev_open,
549 .release = cxl_memdev_release_file,
550 .compat_ioctl = compat_ptr_ioctl,
551 .llseek = noop_llseek,
552};
553
554struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds)
555{
556 struct cxl_memdev *cxlmd;
557 struct device *dev;
558 struct cdev *cdev;
559 int rc;
560
561 cxlmd = cxl_memdev_alloc(cxlds, &cxl_memdev_fops);
562 if (IS_ERR(cxlmd))
563 return cxlmd;
564
565 dev = &cxlmd->dev;
566 rc = dev_set_name(dev, "mem%d", cxlmd->id);
567 if (rc)
568 goto err;
569
570 /*
571 * Activate ioctl operations, no cxl_memdev_rwsem manipulation
572 * needed as this is ordered with cdev_add() publishing the device.
573 */
574 cxlmd->cxlds = cxlds;
575 cxlds->cxlmd = cxlmd;
576
577 cdev = &cxlmd->cdev;
578 rc = cdev_device_add(cdev, dev);
579 if (rc)
580 goto err;
581
582 rc = devm_add_action_or_reset(cxlds->dev, cxl_memdev_unregister, cxlmd);
583 if (rc)
584 return ERR_PTR(rc);
585 return cxlmd;
586
587err:
588 /*
589 * The cdev was briefly live, shutdown any ioctl operations that
590 * saw that state.
591 */
592 cxl_memdev_shutdown(dev);
593 put_device(dev);
594 return ERR_PTR(rc);
595}
596EXPORT_SYMBOL_NS_GPL(devm_cxl_add_memdev, CXL);
597
598__init int cxl_memdev_init(void)
599{
600 dev_t devt;
601 int rc;
602
603 rc = alloc_chrdev_region(&devt, 0, CXL_MEM_MAX_DEVS, "cxl");
604 if (rc)
605 return rc;
606
607 cxl_mem_major = MAJOR(devt);
608
609 return 0;
610}
611
612void cxl_memdev_exit(void)
613{
614 unregister_chrdev_region(MKDEV(cxl_mem_major, 0), CXL_MEM_MAX_DEVS);
615}