Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1/*
2 * Copyright 2014 IBM Corp.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9
10#include <linux/pci.h>
11#include <linux/slab.h>
12#include <linux/file.h>
13#include <misc/cxl.h>
14#include <linux/msi.h>
15#include <linux/module.h>
16#include <linux/mount.h>
17
18#include "cxl.h"
19
20/*
21 * Since we want to track memory mappings to be able to force-unmap
22 * when the AFU is no longer reachable, we need an inode. For devices
23 * opened through the cxl user API, this is not a problem, but a
24 * userland process can also get a cxl fd through the cxl_get_fd()
25 * API, which is used by the cxlflash driver.
26 *
27 * Therefore we implement our own simple pseudo-filesystem and inode
28 * allocator. We don't use the anonymous inode, as we need the
29 * meta-data associated with it (address_space) and it is shared by
30 * other drivers/processes, so it could lead to cxl unmapping VMAs
31 * from random processes.
32 */
33
34#define CXL_PSEUDO_FS_MAGIC 0x1697697f
35
36static int cxl_fs_cnt;
37static struct vfsmount *cxl_vfs_mount;
38
39static const struct dentry_operations cxl_fs_dops = {
40 .d_dname = simple_dname,
41};
42
43static struct dentry *cxl_fs_mount(struct file_system_type *fs_type, int flags,
44 const char *dev_name, void *data)
45{
46 return mount_pseudo(fs_type, "cxl:", NULL, &cxl_fs_dops,
47 CXL_PSEUDO_FS_MAGIC);
48}
49
50static struct file_system_type cxl_fs_type = {
51 .name = "cxl",
52 .owner = THIS_MODULE,
53 .mount = cxl_fs_mount,
54 .kill_sb = kill_anon_super,
55};
56
57
58void cxl_release_mapping(struct cxl_context *ctx)
59{
60 if (ctx->kernelapi && ctx->mapping)
61 simple_release_fs(&cxl_vfs_mount, &cxl_fs_cnt);
62}
63
64static struct file *cxl_getfile(const char *name,
65 const struct file_operations *fops,
66 void *priv, int flags)
67{
68 struct qstr this;
69 struct path path;
70 struct file *file;
71 struct inode *inode = NULL;
72 int rc;
73
74 /* strongly inspired by anon_inode_getfile() */
75
76 if (fops->owner && !try_module_get(fops->owner))
77 return ERR_PTR(-ENOENT);
78
79 rc = simple_pin_fs(&cxl_fs_type, &cxl_vfs_mount, &cxl_fs_cnt);
80 if (rc < 0) {
81 pr_err("Cannot mount cxl pseudo filesystem: %d\n", rc);
82 file = ERR_PTR(rc);
83 goto err_module;
84 }
85
86 inode = alloc_anon_inode(cxl_vfs_mount->mnt_sb);
87 if (IS_ERR(inode)) {
88 file = ERR_CAST(inode);
89 goto err_fs;
90 }
91
92 file = ERR_PTR(-ENOMEM);
93 this.name = name;
94 this.len = strlen(name);
95 this.hash = 0;
96 path.dentry = d_alloc_pseudo(cxl_vfs_mount->mnt_sb, &this);
97 if (!path.dentry)
98 goto err_inode;
99
100 path.mnt = mntget(cxl_vfs_mount);
101 d_instantiate(path.dentry, inode);
102
103 file = alloc_file(&path, OPEN_FMODE(flags), fops);
104 if (IS_ERR(file))
105 goto err_dput;
106 file->f_flags = flags & (O_ACCMODE | O_NONBLOCK);
107 file->private_data = priv;
108
109 return file;
110
111err_dput:
112 path_put(&path);
113err_inode:
114 iput(inode);
115err_fs:
116 simple_release_fs(&cxl_vfs_mount, &cxl_fs_cnt);
117err_module:
118 module_put(fops->owner);
119 return file;
120}
121
122struct cxl_context *cxl_dev_context_init(struct pci_dev *dev)
123{
124 struct cxl_afu *afu;
125 struct cxl_context *ctx;
126 int rc;
127
128 afu = cxl_pci_to_afu(dev);
129 if (IS_ERR(afu))
130 return ERR_CAST(afu);
131
132 ctx = cxl_context_alloc();
133 if (!ctx)
134 return ERR_PTR(-ENOMEM);
135
136 ctx->kernelapi = true;
137
138 /* Make it a slave context. We can promote it later? */
139 rc = cxl_context_init(ctx, afu, false);
140 if (rc)
141 goto err_ctx;
142
143 return ctx;
144
145err_ctx:
146 kfree(ctx);
147 return ERR_PTR(rc);
148}
149EXPORT_SYMBOL_GPL(cxl_dev_context_init);
150
151struct cxl_context *cxl_get_context(struct pci_dev *dev)
152{
153 return dev->dev.archdata.cxl_ctx;
154}
155EXPORT_SYMBOL_GPL(cxl_get_context);
156
157int cxl_release_context(struct cxl_context *ctx)
158{
159 if (ctx->status >= STARTED)
160 return -EBUSY;
161
162 cxl_context_free(ctx);
163
164 return 0;
165}
166EXPORT_SYMBOL_GPL(cxl_release_context);
167
168static irq_hw_number_t cxl_find_afu_irq(struct cxl_context *ctx, int num)
169{
170 __u16 range;
171 int r;
172
173 for (r = 0; r < CXL_IRQ_RANGES; r++) {
174 range = ctx->irqs.range[r];
175 if (num < range) {
176 return ctx->irqs.offset[r] + num;
177 }
178 num -= range;
179 }
180 return 0;
181}
182
183int _cxl_next_msi_hwirq(struct pci_dev *pdev, struct cxl_context **ctx, int *afu_irq)
184{
185 if (*ctx == NULL || *afu_irq == 0) {
186 *afu_irq = 1;
187 *ctx = cxl_get_context(pdev);
188 } else {
189 (*afu_irq)++;
190 if (*afu_irq > cxl_get_max_irqs_per_process(pdev)) {
191 *ctx = list_next_entry(*ctx, extra_irq_contexts);
192 *afu_irq = 1;
193 }
194 }
195 return cxl_find_afu_irq(*ctx, *afu_irq);
196}
197/* Exported via cxl_base */
198
199int cxl_set_priv(struct cxl_context *ctx, void *priv)
200{
201 if (!ctx)
202 return -EINVAL;
203
204 ctx->priv = priv;
205
206 return 0;
207}
208EXPORT_SYMBOL_GPL(cxl_set_priv);
209
210void *cxl_get_priv(struct cxl_context *ctx)
211{
212 if (!ctx)
213 return ERR_PTR(-EINVAL);
214
215 return ctx->priv;
216}
217EXPORT_SYMBOL_GPL(cxl_get_priv);
218
219int cxl_allocate_afu_irqs(struct cxl_context *ctx, int num)
220{
221 int res;
222 irq_hw_number_t hwirq;
223
224 if (num == 0)
225 num = ctx->afu->pp_irqs;
226 res = afu_allocate_irqs(ctx, num);
227 if (res)
228 return res;
229
230 if (!cpu_has_feature(CPU_FTR_HVMODE)) {
231 /* In a guest, the PSL interrupt is not multiplexed. It was
232 * allocated above, and we need to set its handler
233 */
234 hwirq = cxl_find_afu_irq(ctx, 0);
235 if (hwirq)
236 cxl_map_irq(ctx->afu->adapter, hwirq, cxl_ops->psl_interrupt, ctx, "psl");
237 }
238
239 if (ctx->status == STARTED) {
240 if (cxl_ops->update_ivtes)
241 cxl_ops->update_ivtes(ctx);
242 else WARN(1, "BUG: cxl_allocate_afu_irqs must be called prior to starting the context on this platform\n");
243 }
244
245 return res;
246}
247EXPORT_SYMBOL_GPL(cxl_allocate_afu_irqs);
248
249void cxl_free_afu_irqs(struct cxl_context *ctx)
250{
251 irq_hw_number_t hwirq;
252 unsigned int virq;
253
254 if (!cpu_has_feature(CPU_FTR_HVMODE)) {
255 hwirq = cxl_find_afu_irq(ctx, 0);
256 if (hwirq) {
257 virq = irq_find_mapping(NULL, hwirq);
258 if (virq)
259 cxl_unmap_irq(virq, ctx);
260 }
261 }
262 afu_irq_name_free(ctx);
263 cxl_ops->release_irq_ranges(&ctx->irqs, ctx->afu->adapter);
264}
265EXPORT_SYMBOL_GPL(cxl_free_afu_irqs);
266
267int cxl_map_afu_irq(struct cxl_context *ctx, int num,
268 irq_handler_t handler, void *cookie, char *name)
269{
270 irq_hw_number_t hwirq;
271
272 /*
273 * Find interrupt we are to register.
274 */
275 hwirq = cxl_find_afu_irq(ctx, num);
276 if (!hwirq)
277 return -ENOENT;
278
279 return cxl_map_irq(ctx->afu->adapter, hwirq, handler, cookie, name);
280}
281EXPORT_SYMBOL_GPL(cxl_map_afu_irq);
282
283void cxl_unmap_afu_irq(struct cxl_context *ctx, int num, void *cookie)
284{
285 irq_hw_number_t hwirq;
286 unsigned int virq;
287
288 hwirq = cxl_find_afu_irq(ctx, num);
289 if (!hwirq)
290 return;
291
292 virq = irq_find_mapping(NULL, hwirq);
293 if (virq)
294 cxl_unmap_irq(virq, cookie);
295}
296EXPORT_SYMBOL_GPL(cxl_unmap_afu_irq);
297
298/*
299 * Start a context
300 * Code here similar to afu_ioctl_start_work().
301 */
302int cxl_start_context(struct cxl_context *ctx, u64 wed,
303 struct task_struct *task)
304{
305 int rc = 0;
306 bool kernel = true;
307
308 pr_devel("%s: pe: %i\n", __func__, ctx->pe);
309
310 mutex_lock(&ctx->status_mutex);
311 if (ctx->status == STARTED)
312 goto out; /* already started */
313
314 /*
315 * Increment the mapped context count for adapter. This also checks
316 * if adapter_context_lock is taken.
317 */
318 rc = cxl_adapter_context_get(ctx->afu->adapter);
319 if (rc)
320 goto out;
321
322 if (task) {
323 ctx->pid = get_task_pid(task, PIDTYPE_PID);
324 ctx->glpid = get_task_pid(task->group_leader, PIDTYPE_PID);
325 kernel = false;
326 ctx->real_mode = false;
327 }
328
329 cxl_ctx_get();
330
331 if ((rc = cxl_ops->attach_process(ctx, kernel, wed, 0))) {
332 put_pid(ctx->glpid);
333 put_pid(ctx->pid);
334 ctx->glpid = ctx->pid = NULL;
335 cxl_adapter_context_put(ctx->afu->adapter);
336 cxl_ctx_put();
337 goto out;
338 }
339
340 ctx->status = STARTED;
341out:
342 mutex_unlock(&ctx->status_mutex);
343 return rc;
344}
345EXPORT_SYMBOL_GPL(cxl_start_context);
346
347int cxl_process_element(struct cxl_context *ctx)
348{
349 return ctx->external_pe;
350}
351EXPORT_SYMBOL_GPL(cxl_process_element);
352
353/* Stop a context. Returns 0 on success, otherwise -Errno */
354int cxl_stop_context(struct cxl_context *ctx)
355{
356 return __detach_context(ctx);
357}
358EXPORT_SYMBOL_GPL(cxl_stop_context);
359
360void cxl_set_master(struct cxl_context *ctx)
361{
362 ctx->master = true;
363}
364EXPORT_SYMBOL_GPL(cxl_set_master);
365
366int cxl_set_translation_mode(struct cxl_context *ctx, bool real_mode)
367{
368 if (ctx->status == STARTED) {
369 /*
370 * We could potentially update the PE and issue an update LLCMD
371 * to support this, but it doesn't seem to have a good use case
372 * since it's trivial to just create a second kernel context
373 * with different translation modes, so until someone convinces
374 * me otherwise:
375 */
376 return -EBUSY;
377 }
378
379 ctx->real_mode = real_mode;
380 return 0;
381}
382EXPORT_SYMBOL_GPL(cxl_set_translation_mode);
383
384/* wrappers around afu_* file ops which are EXPORTED */
385int cxl_fd_open(struct inode *inode, struct file *file)
386{
387 return afu_open(inode, file);
388}
389EXPORT_SYMBOL_GPL(cxl_fd_open);
390int cxl_fd_release(struct inode *inode, struct file *file)
391{
392 return afu_release(inode, file);
393}
394EXPORT_SYMBOL_GPL(cxl_fd_release);
395long cxl_fd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
396{
397 return afu_ioctl(file, cmd, arg);
398}
399EXPORT_SYMBOL_GPL(cxl_fd_ioctl);
400int cxl_fd_mmap(struct file *file, struct vm_area_struct *vm)
401{
402 return afu_mmap(file, vm);
403}
404EXPORT_SYMBOL_GPL(cxl_fd_mmap);
405unsigned int cxl_fd_poll(struct file *file, struct poll_table_struct *poll)
406{
407 return afu_poll(file, poll);
408}
409EXPORT_SYMBOL_GPL(cxl_fd_poll);
410ssize_t cxl_fd_read(struct file *file, char __user *buf, size_t count,
411 loff_t *off)
412{
413 return afu_read(file, buf, count, off);
414}
415EXPORT_SYMBOL_GPL(cxl_fd_read);
416
417#define PATCH_FOPS(NAME) if (!fops->NAME) fops->NAME = afu_fops.NAME
418
419/* Get a struct file and fd for a context and attach the ops */
420struct file *cxl_get_fd(struct cxl_context *ctx, struct file_operations *fops,
421 int *fd)
422{
423 struct file *file;
424 int rc, flags, fdtmp;
425 char *name = NULL;
426
427 /* only allow one per context */
428 if (ctx->mapping)
429 return ERR_PTR(-EEXIST);
430
431 flags = O_RDWR | O_CLOEXEC;
432
433 /* This code is similar to anon_inode_getfd() */
434 rc = get_unused_fd_flags(flags);
435 if (rc < 0)
436 return ERR_PTR(rc);
437 fdtmp = rc;
438
439 /*
440 * Patch the file ops. Needs to be careful that this is rentrant safe.
441 */
442 if (fops) {
443 PATCH_FOPS(open);
444 PATCH_FOPS(poll);
445 PATCH_FOPS(read);
446 PATCH_FOPS(release);
447 PATCH_FOPS(unlocked_ioctl);
448 PATCH_FOPS(compat_ioctl);
449 PATCH_FOPS(mmap);
450 } else /* use default ops */
451 fops = (struct file_operations *)&afu_fops;
452
453 name = kasprintf(GFP_KERNEL, "cxl:%d", ctx->pe);
454 file = cxl_getfile(name, fops, ctx, flags);
455 kfree(name);
456 if (IS_ERR(file))
457 goto err_fd;
458
459 cxl_context_set_mapping(ctx, file->f_mapping);
460 *fd = fdtmp;
461 return file;
462
463err_fd:
464 put_unused_fd(fdtmp);
465 return NULL;
466}
467EXPORT_SYMBOL_GPL(cxl_get_fd);
468
469struct cxl_context *cxl_fops_get_context(struct file *file)
470{
471 return file->private_data;
472}
473EXPORT_SYMBOL_GPL(cxl_fops_get_context);
474
475void cxl_set_driver_ops(struct cxl_context *ctx,
476 struct cxl_afu_driver_ops *ops)
477{
478 WARN_ON(!ops->fetch_event || !ops->event_delivered);
479 atomic_set(&ctx->afu_driver_events, 0);
480 ctx->afu_driver_ops = ops;
481}
482EXPORT_SYMBOL_GPL(cxl_set_driver_ops);
483
484void cxl_context_events_pending(struct cxl_context *ctx,
485 unsigned int new_events)
486{
487 atomic_add(new_events, &ctx->afu_driver_events);
488 wake_up_all(&ctx->wq);
489}
490EXPORT_SYMBOL_GPL(cxl_context_events_pending);
491
492int cxl_start_work(struct cxl_context *ctx,
493 struct cxl_ioctl_start_work *work)
494{
495 int rc;
496
497 /* code taken from afu_ioctl_start_work */
498 if (!(work->flags & CXL_START_WORK_NUM_IRQS))
499 work->num_interrupts = ctx->afu->pp_irqs;
500 else if ((work->num_interrupts < ctx->afu->pp_irqs) ||
501 (work->num_interrupts > ctx->afu->irqs_max)) {
502 return -EINVAL;
503 }
504
505 rc = afu_register_irqs(ctx, work->num_interrupts);
506 if (rc)
507 return rc;
508
509 rc = cxl_start_context(ctx, work->work_element_descriptor, current);
510 if (rc < 0) {
511 afu_release_irqs(ctx, ctx);
512 return rc;
513 }
514
515 return 0;
516}
517EXPORT_SYMBOL_GPL(cxl_start_work);
518
519void __iomem *cxl_psa_map(struct cxl_context *ctx)
520{
521 if (ctx->status != STARTED)
522 return NULL;
523
524 pr_devel("%s: psn_phys%llx size:%llx\n",
525 __func__, ctx->psn_phys, ctx->psn_size);
526 return ioremap(ctx->psn_phys, ctx->psn_size);
527}
528EXPORT_SYMBOL_GPL(cxl_psa_map);
529
530void cxl_psa_unmap(void __iomem *addr)
531{
532 iounmap(addr);
533}
534EXPORT_SYMBOL_GPL(cxl_psa_unmap);
535
536int cxl_afu_reset(struct cxl_context *ctx)
537{
538 struct cxl_afu *afu = ctx->afu;
539 int rc;
540
541 rc = cxl_ops->afu_reset(afu);
542 if (rc)
543 return rc;
544
545 return cxl_ops->afu_check_and_enable(afu);
546}
547EXPORT_SYMBOL_GPL(cxl_afu_reset);
548
549void cxl_perst_reloads_same_image(struct cxl_afu *afu,
550 bool perst_reloads_same_image)
551{
552 afu->adapter->perst_same_image = perst_reloads_same_image;
553}
554EXPORT_SYMBOL_GPL(cxl_perst_reloads_same_image);
555
556ssize_t cxl_read_adapter_vpd(struct pci_dev *dev, void *buf, size_t count)
557{
558 struct cxl_afu *afu = cxl_pci_to_afu(dev);
559 if (IS_ERR(afu))
560 return -ENODEV;
561
562 return cxl_ops->read_adapter_vpd(afu->adapter, buf, count);
563}
564EXPORT_SYMBOL_GPL(cxl_read_adapter_vpd);
565
566int cxl_set_max_irqs_per_process(struct pci_dev *dev, int irqs)
567{
568 struct cxl_afu *afu = cxl_pci_to_afu(dev);
569 if (IS_ERR(afu))
570 return -ENODEV;
571
572 if (irqs > afu->adapter->user_irqs)
573 return -EINVAL;
574
575 /* Limit user_irqs to prevent the user increasing this via sysfs */
576 afu->adapter->user_irqs = irqs;
577 afu->irqs_max = irqs;
578
579 return 0;
580}
581EXPORT_SYMBOL_GPL(cxl_set_max_irqs_per_process);
582
583int cxl_get_max_irqs_per_process(struct pci_dev *dev)
584{
585 struct cxl_afu *afu = cxl_pci_to_afu(dev);
586 if (IS_ERR(afu))
587 return -ENODEV;
588
589 return afu->irqs_max;
590}
591EXPORT_SYMBOL_GPL(cxl_get_max_irqs_per_process);
592
593/*
594 * This is a special interrupt allocation routine called from the PHB's MSI
595 * setup function. When capi interrupts are allocated in this manner they must
596 * still be associated with a running context, but since the MSI APIs have no
597 * way to specify this we use the default context associated with the device.
598 *
599 * The Mellanox CX4 has a hardware limitation that restricts the maximum AFU
600 * interrupt number, so in order to overcome this their driver informs us of
601 * the restriction by setting the maximum interrupts per context, and we
602 * allocate additional contexts as necessary so that we can keep the AFU
603 * interrupt number within the supported range.
604 */
605int _cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
606{
607 struct cxl_context *ctx, *new_ctx, *default_ctx;
608 int remaining;
609 int rc;
610
611 ctx = default_ctx = cxl_get_context(pdev);
612 if (WARN_ON(!default_ctx))
613 return -ENODEV;
614
615 remaining = nvec;
616 while (remaining > 0) {
617 rc = cxl_allocate_afu_irqs(ctx, min(remaining, ctx->afu->irqs_max));
618 if (rc) {
619 pr_warn("%s: Failed to find enough free MSIs\n", pci_name(pdev));
620 return rc;
621 }
622 remaining -= ctx->afu->irqs_max;
623
624 if (ctx != default_ctx && default_ctx->status == STARTED) {
625 WARN_ON(cxl_start_context(ctx,
626 be64_to_cpu(default_ctx->elem->common.wed),
627 NULL));
628 }
629
630 if (remaining > 0) {
631 new_ctx = cxl_dev_context_init(pdev);
632 if (IS_ERR(new_ctx)) {
633 pr_warn("%s: Failed to allocate enough contexts for MSIs\n", pci_name(pdev));
634 return -ENOSPC;
635 }
636 list_add(&new_ctx->extra_irq_contexts, &ctx->extra_irq_contexts);
637 ctx = new_ctx;
638 }
639 }
640
641 return 0;
642}
643/* Exported via cxl_base */
644
645void _cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev)
646{
647 struct cxl_context *ctx, *pos, *tmp;
648
649 ctx = cxl_get_context(pdev);
650 if (WARN_ON(!ctx))
651 return;
652
653 cxl_free_afu_irqs(ctx);
654 list_for_each_entry_safe(pos, tmp, &ctx->extra_irq_contexts, extra_irq_contexts) {
655 cxl_stop_context(pos);
656 cxl_free_afu_irqs(pos);
657 list_del(&pos->extra_irq_contexts);
658 cxl_release_context(pos);
659 }
660}
661/* Exported via cxl_base */