drivers/misc/cxl/api.c at v4.11

tjh.dev / kernel
fork atom
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
fork atom
kernel / drivers / misc / cxl / api.c
at v4.11 661 lines 16 kB view raw
wrap content
  1/*
  2 * Copyright 2014 IBM Corp.
  3 *
  4 * This program is free software; you can redistribute it and/or
  5 * modify it under the terms of the GNU General Public License
  6 * as published by the Free Software Foundation; either version
  7 * 2 of the License, or (at your option) any later version.
  8 */
  9
 10#include <linux/pci.h>
 11#include <linux/slab.h>
 12#include <linux/file.h>
 13#include <misc/cxl.h>
 14#include <linux/msi.h>
 15#include <linux/module.h>
 16#include <linux/mount.h>
 17
 18#include "cxl.h"
 19
 20/*
 21 * Since we want to track memory mappings to be able to force-unmap
 22 * when the AFU is no longer reachable, we need an inode. For devices
 23 * opened through the cxl user API, this is not a problem, but a
 24 * userland process can also get a cxl fd through the cxl_get_fd()
 25 * API, which is used by the cxlflash driver.
 26 *
 27 * Therefore we implement our own simple pseudo-filesystem and inode
 28 * allocator. We don't use the anonymous inode, as we need the
 29 * meta-data associated with it (address_space) and it is shared by
 30 * other drivers/processes, so it could lead to cxl unmapping VMAs
 31 * from random processes.
 32 */
 33
 34#define CXL_PSEUDO_FS_MAGIC	0x1697697f
 35
 36static int cxl_fs_cnt;
 37static struct vfsmount *cxl_vfs_mount;
 38
 39static const struct dentry_operations cxl_fs_dops = {
 40	.d_dname	= simple_dname,
 41};
 42
 43static struct dentry *cxl_fs_mount(struct file_system_type *fs_type, int flags,
 44				const char *dev_name, void *data)
 45{
 46	return mount_pseudo(fs_type, "cxl:", NULL, &cxl_fs_dops,
 47			CXL_PSEUDO_FS_MAGIC);
 48}
 49
 50static struct file_system_type cxl_fs_type = {
 51	.name		= "cxl",
 52	.owner		= THIS_MODULE,
 53	.mount		= cxl_fs_mount,
 54	.kill_sb	= kill_anon_super,
 55};
 56
 57
 58void cxl_release_mapping(struct cxl_context *ctx)
 59{
 60	if (ctx->kernelapi && ctx->mapping)
 61		simple_release_fs(&cxl_vfs_mount, &cxl_fs_cnt);
 62}
 63
 64static struct file *cxl_getfile(const char *name,
 65				const struct file_operations *fops,
 66				void *priv, int flags)
 67{
 68	struct qstr this;
 69	struct path path;
 70	struct file *file;
 71	struct inode *inode = NULL;
 72	int rc;
 73
 74	/* strongly inspired by anon_inode_getfile() */
 75
 76	if (fops->owner && !try_module_get(fops->owner))
 77		return ERR_PTR(-ENOENT);
 78
 79	rc = simple_pin_fs(&cxl_fs_type, &cxl_vfs_mount, &cxl_fs_cnt);
 80	if (rc < 0) {
 81		pr_err("Cannot mount cxl pseudo filesystem: %d\n", rc);
 82		file = ERR_PTR(rc);
 83		goto err_module;
 84	}
 85
 86	inode = alloc_anon_inode(cxl_vfs_mount->mnt_sb);
 87	if (IS_ERR(inode)) {
 88		file = ERR_CAST(inode);
 89		goto err_fs;
 90	}
 91
 92	file = ERR_PTR(-ENOMEM);
 93	this.name = name;
 94	this.len = strlen(name);
 95	this.hash = 0;
 96	path.dentry = d_alloc_pseudo(cxl_vfs_mount->mnt_sb, &this);
 97	if (!path.dentry)
 98		goto err_inode;
 99
100	path.mnt = mntget(cxl_vfs_mount);
101	d_instantiate(path.dentry, inode);
102
103	file = alloc_file(&path, OPEN_FMODE(flags), fops);
104	if (IS_ERR(file))
105		goto err_dput;
106	file->f_flags = flags & (O_ACCMODE | O_NONBLOCK);
107	file->private_data = priv;
108
109	return file;
110
111err_dput:
112	path_put(&path);
113err_inode:
114	iput(inode);
115err_fs:
116	simple_release_fs(&cxl_vfs_mount, &cxl_fs_cnt);
117err_module:
118	module_put(fops->owner);
119	return file;
120}
121
122struct cxl_context *cxl_dev_context_init(struct pci_dev *dev)
123{
124	struct cxl_afu *afu;
125	struct cxl_context  *ctx;
126	int rc;
127
128	afu = cxl_pci_to_afu(dev);
129	if (IS_ERR(afu))
130		return ERR_CAST(afu);
131
132	ctx = cxl_context_alloc();
133	if (!ctx)
134		return ERR_PTR(-ENOMEM);
135
136	ctx->kernelapi = true;
137
138	/* Make it a slave context.  We can promote it later? */
139	rc = cxl_context_init(ctx, afu, false);
140	if (rc)
141		goto err_ctx;
142
143	return ctx;
144
145err_ctx:
146	kfree(ctx);
147	return ERR_PTR(rc);
148}
149EXPORT_SYMBOL_GPL(cxl_dev_context_init);
150
151struct cxl_context *cxl_get_context(struct pci_dev *dev)
152{
153	return dev->dev.archdata.cxl_ctx;
154}
155EXPORT_SYMBOL_GPL(cxl_get_context);
156
157int cxl_release_context(struct cxl_context *ctx)
158{
159	if (ctx->status >= STARTED)
160		return -EBUSY;
161
162	cxl_context_free(ctx);
163
164	return 0;
165}
166EXPORT_SYMBOL_GPL(cxl_release_context);
167
168static irq_hw_number_t cxl_find_afu_irq(struct cxl_context *ctx, int num)
169{
170	__u16 range;
171	int r;
172
173	for (r = 0; r < CXL_IRQ_RANGES; r++) {
174		range = ctx->irqs.range[r];
175		if (num < range) {
176			return ctx->irqs.offset[r] + num;
177		}
178		num -= range;
179	}
180	return 0;
181}
182
183int _cxl_next_msi_hwirq(struct pci_dev *pdev, struct cxl_context **ctx, int *afu_irq)
184{
185	if (*ctx == NULL || *afu_irq == 0) {
186		*afu_irq = 1;
187		*ctx = cxl_get_context(pdev);
188	} else {
189		(*afu_irq)++;
190		if (*afu_irq > cxl_get_max_irqs_per_process(pdev)) {
191			*ctx = list_next_entry(*ctx, extra_irq_contexts);
192			*afu_irq = 1;
193		}
194	}
195	return cxl_find_afu_irq(*ctx, *afu_irq);
196}
197/* Exported via cxl_base */
198
199int cxl_set_priv(struct cxl_context *ctx, void *priv)
200{
201	if (!ctx)
202		return -EINVAL;
203
204	ctx->priv = priv;
205
206	return 0;
207}
208EXPORT_SYMBOL_GPL(cxl_set_priv);
209
210void *cxl_get_priv(struct cxl_context *ctx)
211{
212	if (!ctx)
213		return ERR_PTR(-EINVAL);
214
215	return ctx->priv;
216}
217EXPORT_SYMBOL_GPL(cxl_get_priv);
218
219int cxl_allocate_afu_irqs(struct cxl_context *ctx, int num)
220{
221	int res;
222	irq_hw_number_t hwirq;
223
224	if (num == 0)
225		num = ctx->afu->pp_irqs;
226	res = afu_allocate_irqs(ctx, num);
227	if (res)
228		return res;
229
230	if (!cpu_has_feature(CPU_FTR_HVMODE)) {
231		/* In a guest, the PSL interrupt is not multiplexed. It was
232		 * allocated above, and we need to set its handler
233		 */
234		hwirq = cxl_find_afu_irq(ctx, 0);
235		if (hwirq)
236			cxl_map_irq(ctx->afu->adapter, hwirq, cxl_ops->psl_interrupt, ctx, "psl");
237	}
238
239	if (ctx->status == STARTED) {
240		if (cxl_ops->update_ivtes)
241			cxl_ops->update_ivtes(ctx);
242		else WARN(1, "BUG: cxl_allocate_afu_irqs must be called prior to starting the context on this platform\n");
243	}
244
245	return res;
246}
247EXPORT_SYMBOL_GPL(cxl_allocate_afu_irqs);
248
249void cxl_free_afu_irqs(struct cxl_context *ctx)
250{
251	irq_hw_number_t hwirq;
252	unsigned int virq;
253
254	if (!cpu_has_feature(CPU_FTR_HVMODE)) {
255		hwirq = cxl_find_afu_irq(ctx, 0);
256		if (hwirq) {
257			virq = irq_find_mapping(NULL, hwirq);
258			if (virq)
259				cxl_unmap_irq(virq, ctx);
260		}
261	}
262	afu_irq_name_free(ctx);
263	cxl_ops->release_irq_ranges(&ctx->irqs, ctx->afu->adapter);
264}
265EXPORT_SYMBOL_GPL(cxl_free_afu_irqs);
266
267int cxl_map_afu_irq(struct cxl_context *ctx, int num,
268		    irq_handler_t handler, void *cookie, char *name)
269{
270	irq_hw_number_t hwirq;
271
272	/*
273	 * Find interrupt we are to register.
274	 */
275	hwirq = cxl_find_afu_irq(ctx, num);
276	if (!hwirq)
277		return -ENOENT;
278
279	return cxl_map_irq(ctx->afu->adapter, hwirq, handler, cookie, name);
280}
281EXPORT_SYMBOL_GPL(cxl_map_afu_irq);
282
283void cxl_unmap_afu_irq(struct cxl_context *ctx, int num, void *cookie)
284{
285	irq_hw_number_t hwirq;
286	unsigned int virq;
287
288	hwirq = cxl_find_afu_irq(ctx, num);
289	if (!hwirq)
290		return;
291
292	virq = irq_find_mapping(NULL, hwirq);
293	if (virq)
294		cxl_unmap_irq(virq, cookie);
295}
296EXPORT_SYMBOL_GPL(cxl_unmap_afu_irq);
297
298/*
299 * Start a context
300 * Code here similar to afu_ioctl_start_work().
301 */
302int cxl_start_context(struct cxl_context *ctx, u64 wed,
303		      struct task_struct *task)
304{
305	int rc = 0;
306	bool kernel = true;
307
308	pr_devel("%s: pe: %i\n", __func__, ctx->pe);
309
310	mutex_lock(&ctx->status_mutex);
311	if (ctx->status == STARTED)
312		goto out; /* already started */
313
314	/*
315	 * Increment the mapped context count for adapter. This also checks
316	 * if adapter_context_lock is taken.
317	 */
318	rc = cxl_adapter_context_get(ctx->afu->adapter);
319	if (rc)
320		goto out;
321
322	if (task) {
323		ctx->pid = get_task_pid(task, PIDTYPE_PID);
324		ctx->glpid = get_task_pid(task->group_leader, PIDTYPE_PID);
325		kernel = false;
326		ctx->real_mode = false;
327	}
328
329	cxl_ctx_get();
330
331	if ((rc = cxl_ops->attach_process(ctx, kernel, wed, 0))) {
332		put_pid(ctx->glpid);
333		put_pid(ctx->pid);
334		ctx->glpid = ctx->pid = NULL;
335		cxl_adapter_context_put(ctx->afu->adapter);
336		cxl_ctx_put();
337		goto out;
338	}
339
340	ctx->status = STARTED;
341out:
342	mutex_unlock(&ctx->status_mutex);
343	return rc;
344}
345EXPORT_SYMBOL_GPL(cxl_start_context);
346
347int cxl_process_element(struct cxl_context *ctx)
348{
349	return ctx->external_pe;
350}
351EXPORT_SYMBOL_GPL(cxl_process_element);
352
353/* Stop a context.  Returns 0 on success, otherwise -Errno */
354int cxl_stop_context(struct cxl_context *ctx)
355{
356	return __detach_context(ctx);
357}
358EXPORT_SYMBOL_GPL(cxl_stop_context);
359
360void cxl_set_master(struct cxl_context *ctx)
361{
362	ctx->master = true;
363}
364EXPORT_SYMBOL_GPL(cxl_set_master);
365
366int cxl_set_translation_mode(struct cxl_context *ctx, bool real_mode)
367{
368	if (ctx->status == STARTED) {
369		/*
370		 * We could potentially update the PE and issue an update LLCMD
371		 * to support this, but it doesn't seem to have a good use case
372		 * since it's trivial to just create a second kernel context
373		 * with different translation modes, so until someone convinces
374		 * me otherwise:
375		 */
376		return -EBUSY;
377	}
378
379	ctx->real_mode = real_mode;
380	return 0;
381}
382EXPORT_SYMBOL_GPL(cxl_set_translation_mode);
383
384/* wrappers around afu_* file ops which are EXPORTED */
385int cxl_fd_open(struct inode *inode, struct file *file)
386{
387	return afu_open(inode, file);
388}
389EXPORT_SYMBOL_GPL(cxl_fd_open);
390int cxl_fd_release(struct inode *inode, struct file *file)
391{
392	return afu_release(inode, file);
393}
394EXPORT_SYMBOL_GPL(cxl_fd_release);
395long cxl_fd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
396{
397	return afu_ioctl(file, cmd, arg);
398}
399EXPORT_SYMBOL_GPL(cxl_fd_ioctl);
400int cxl_fd_mmap(struct file *file, struct vm_area_struct *vm)
401{
402	return afu_mmap(file, vm);
403}
404EXPORT_SYMBOL_GPL(cxl_fd_mmap);
405unsigned int cxl_fd_poll(struct file *file, struct poll_table_struct *poll)
406{
407	return afu_poll(file, poll);
408}
409EXPORT_SYMBOL_GPL(cxl_fd_poll);
410ssize_t cxl_fd_read(struct file *file, char __user *buf, size_t count,
411			loff_t *off)
412{
413	return afu_read(file, buf, count, off);
414}
415EXPORT_SYMBOL_GPL(cxl_fd_read);
416
417#define PATCH_FOPS(NAME) if (!fops->NAME) fops->NAME = afu_fops.NAME
418
419/* Get a struct file and fd for a context and attach the ops */
420struct file *cxl_get_fd(struct cxl_context *ctx, struct file_operations *fops,
421			int *fd)
422{
423	struct file *file;
424	int rc, flags, fdtmp;
425	char *name = NULL;
426
427	/* only allow one per context */
428	if (ctx->mapping)
429		return ERR_PTR(-EEXIST);
430
431	flags = O_RDWR | O_CLOEXEC;
432
433	/* This code is similar to anon_inode_getfd() */
434	rc = get_unused_fd_flags(flags);
435	if (rc < 0)
436		return ERR_PTR(rc);
437	fdtmp = rc;
438
439	/*
440	 * Patch the file ops.  Needs to be careful that this is rentrant safe.
441	 */
442	if (fops) {
443		PATCH_FOPS(open);
444		PATCH_FOPS(poll);
445		PATCH_FOPS(read);
446		PATCH_FOPS(release);
447		PATCH_FOPS(unlocked_ioctl);
448		PATCH_FOPS(compat_ioctl);
449		PATCH_FOPS(mmap);
450	} else /* use default ops */
451		fops = (struct file_operations *)&afu_fops;
452
453	name = kasprintf(GFP_KERNEL, "cxl:%d", ctx->pe);
454	file = cxl_getfile(name, fops, ctx, flags);
455	kfree(name);
456	if (IS_ERR(file))
457		goto err_fd;
458
459	cxl_context_set_mapping(ctx, file->f_mapping);
460	*fd = fdtmp;
461	return file;
462
463err_fd:
464	put_unused_fd(fdtmp);
465	return NULL;
466}
467EXPORT_SYMBOL_GPL(cxl_get_fd);
468
469struct cxl_context *cxl_fops_get_context(struct file *file)
470{
471	return file->private_data;
472}
473EXPORT_SYMBOL_GPL(cxl_fops_get_context);
474
475void cxl_set_driver_ops(struct cxl_context *ctx,
476			struct cxl_afu_driver_ops *ops)
477{
478	WARN_ON(!ops->fetch_event || !ops->event_delivered);
479	atomic_set(&ctx->afu_driver_events, 0);
480	ctx->afu_driver_ops = ops;
481}
482EXPORT_SYMBOL_GPL(cxl_set_driver_ops);
483
484void cxl_context_events_pending(struct cxl_context *ctx,
485				unsigned int new_events)
486{
487	atomic_add(new_events, &ctx->afu_driver_events);
488	wake_up_all(&ctx->wq);
489}
490EXPORT_SYMBOL_GPL(cxl_context_events_pending);
491
492int cxl_start_work(struct cxl_context *ctx,
493		   struct cxl_ioctl_start_work *work)
494{
495	int rc;
496
497	/* code taken from afu_ioctl_start_work */
498	if (!(work->flags & CXL_START_WORK_NUM_IRQS))
499		work->num_interrupts = ctx->afu->pp_irqs;
500	else if ((work->num_interrupts < ctx->afu->pp_irqs) ||
501		 (work->num_interrupts > ctx->afu->irqs_max)) {
502		return -EINVAL;
503	}
504
505	rc = afu_register_irqs(ctx, work->num_interrupts);
506	if (rc)
507		return rc;
508
509	rc = cxl_start_context(ctx, work->work_element_descriptor, current);
510	if (rc < 0) {
511		afu_release_irqs(ctx, ctx);
512		return rc;
513	}
514
515	return 0;
516}
517EXPORT_SYMBOL_GPL(cxl_start_work);
518
519void __iomem *cxl_psa_map(struct cxl_context *ctx)
520{
521	if (ctx->status != STARTED)
522		return NULL;
523
524	pr_devel("%s: psn_phys%llx size:%llx\n",
525		__func__, ctx->psn_phys, ctx->psn_size);
526	return ioremap(ctx->psn_phys, ctx->psn_size);
527}
528EXPORT_SYMBOL_GPL(cxl_psa_map);
529
530void cxl_psa_unmap(void __iomem *addr)
531{
532	iounmap(addr);
533}
534EXPORT_SYMBOL_GPL(cxl_psa_unmap);
535
536int cxl_afu_reset(struct cxl_context *ctx)
537{
538	struct cxl_afu *afu = ctx->afu;
539	int rc;
540
541	rc = cxl_ops->afu_reset(afu);
542	if (rc)
543		return rc;
544
545	return cxl_ops->afu_check_and_enable(afu);
546}
547EXPORT_SYMBOL_GPL(cxl_afu_reset);
548
549void cxl_perst_reloads_same_image(struct cxl_afu *afu,
550				  bool perst_reloads_same_image)
551{
552	afu->adapter->perst_same_image = perst_reloads_same_image;
553}
554EXPORT_SYMBOL_GPL(cxl_perst_reloads_same_image);
555
556ssize_t cxl_read_adapter_vpd(struct pci_dev *dev, void *buf, size_t count)
557{
558	struct cxl_afu *afu = cxl_pci_to_afu(dev);
559	if (IS_ERR(afu))
560		return -ENODEV;
561
562	return cxl_ops->read_adapter_vpd(afu->adapter, buf, count);
563}
564EXPORT_SYMBOL_GPL(cxl_read_adapter_vpd);
565
566int cxl_set_max_irqs_per_process(struct pci_dev *dev, int irqs)
567{
568	struct cxl_afu *afu = cxl_pci_to_afu(dev);
569	if (IS_ERR(afu))
570		return -ENODEV;
571
572	if (irqs > afu->adapter->user_irqs)
573		return -EINVAL;
574
575	/* Limit user_irqs to prevent the user increasing this via sysfs */
576	afu->adapter->user_irqs = irqs;
577	afu->irqs_max = irqs;
578
579	return 0;
580}
581EXPORT_SYMBOL_GPL(cxl_set_max_irqs_per_process);
582
583int cxl_get_max_irqs_per_process(struct pci_dev *dev)
584{
585	struct cxl_afu *afu = cxl_pci_to_afu(dev);
586	if (IS_ERR(afu))
587		return -ENODEV;
588
589	return afu->irqs_max;
590}
591EXPORT_SYMBOL_GPL(cxl_get_max_irqs_per_process);
592
593/*
594 * This is a special interrupt allocation routine called from the PHB's MSI
595 * setup function. When capi interrupts are allocated in this manner they must
596 * still be associated with a running context, but since the MSI APIs have no
597 * way to specify this we use the default context associated with the device.
598 *
599 * The Mellanox CX4 has a hardware limitation that restricts the maximum AFU
600 * interrupt number, so in order to overcome this their driver informs us of
601 * the restriction by setting the maximum interrupts per context, and we
602 * allocate additional contexts as necessary so that we can keep the AFU
603 * interrupt number within the supported range.
604 */
605int _cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
606{
607	struct cxl_context *ctx, *new_ctx, *default_ctx;
608	int remaining;
609	int rc;
610
611	ctx = default_ctx = cxl_get_context(pdev);
612	if (WARN_ON(!default_ctx))
613		return -ENODEV;
614
615	remaining = nvec;
616	while (remaining > 0) {
617		rc = cxl_allocate_afu_irqs(ctx, min(remaining, ctx->afu->irqs_max));
618		if (rc) {
619			pr_warn("%s: Failed to find enough free MSIs\n", pci_name(pdev));
620			return rc;
621		}
622		remaining -= ctx->afu->irqs_max;
623
624		if (ctx != default_ctx && default_ctx->status == STARTED) {
625			WARN_ON(cxl_start_context(ctx,
626				be64_to_cpu(default_ctx->elem->common.wed),
627				NULL));
628		}
629
630		if (remaining > 0) {
631			new_ctx = cxl_dev_context_init(pdev);
632			if (IS_ERR(new_ctx)) {
633				pr_warn("%s: Failed to allocate enough contexts for MSIs\n", pci_name(pdev));
634				return -ENOSPC;
635			}
636			list_add(&new_ctx->extra_irq_contexts, &ctx->extra_irq_contexts);
637			ctx = new_ctx;
638		}
639	}
640
641	return 0;
642}
643/* Exported via cxl_base */
644
645void _cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev)
646{
647	struct cxl_context *ctx, *pos, *tmp;
648
649	ctx = cxl_get_context(pdev);
650	if (WARN_ON(!ctx))
651		return;
652
653	cxl_free_afu_irqs(ctx);
654	list_for_each_entry_safe(pos, tmp, &ctx->extra_irq_contexts, extra_irq_contexts) {
655		cxl_stop_context(pos);
656		cxl_free_afu_irqs(pos);
657		list_del(&pos->extra_irq_contexts);
658		cxl_release_context(pos);
659	}
660}
661/* Exported via cxl_base */