Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

nvme-multipath: Add visibility for numa io-policy

This patch helps add nvme native multipath visibility for numa io-policy.
It adds a new attribute file named "numa_nodes" under namespace gendisk
device path node which prints the list of numa nodes preferred by the
given namespace path. The numa nodes value is comma delimited list of
nodes or A-B range of nodes.

For instance, if we have a shared namespace accessible from two different
controllers/paths then accessing head node of the shared namespace would
show the following output:

$ ls -l /sys/block/nvme1n1/multipath/
nvme1c1n1 -> ../../../../../pci052e:78/052e:78:00.0/nvme/nvme1/nvme1c1n1
nvme1c3n1 -> ../../../../../pci058e:78/058e:78:00.0/nvme/nvme3/nvme1c3n1

In the above example, nvme1n1 is head gendisk node created for a shared
namespace and this namespace is accessible from nvme1c1n1 and nvme1c3n1
paths. For numa io-policy we can then refer the "numa_nodes" attribute
file created under each namespace path:

$ cat /sys/block/nvme1n1/multipath/nvme1c1n1/numa_nodes
0-1

$ cat /sys/block/nvme1n1/multipath/nvme1c3n1/numa_nodes
2-3

>From the above output, we infer that I/O workload targeted at nvme1n1
and running on numa nodes 0 and 1 would prefer using path nvme1c1n1.
Similarly, I/O workload running on numa nodes 2 and 3 would prefer
using path nvme1c3n1. Reading "numa_nodes" file when configured
io-policy is anything but numa would show no output.

Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Nilay Shroff <nilay@linux.ibm.com>
Signed-off-by: Keith Busch <kbusch@kernel.org>

authored by

Nilay Shroff and committed by
Keith Busch
6546cc4a 4dbd2b2e

+33
+27
drivers/nvme/host/multipath.c
··· 976 976 } 977 977 DEVICE_ATTR_RO(ana_state); 978 978 979 + static ssize_t numa_nodes_show(struct device *dev, struct device_attribute *attr, 980 + char *buf) 981 + { 982 + int node, srcu_idx; 983 + nodemask_t numa_nodes; 984 + struct nvme_ns *current_ns; 985 + struct nvme_ns *ns = nvme_get_ns_from_dev(dev); 986 + struct nvme_ns_head *head = ns->head; 987 + 988 + if (head->subsys->iopolicy != NVME_IOPOLICY_NUMA) 989 + return 0; 990 + 991 + nodes_clear(numa_nodes); 992 + 993 + srcu_idx = srcu_read_lock(&head->srcu); 994 + for_each_node(node) { 995 + current_ns = srcu_dereference(head->current_path[node], 996 + &head->srcu); 997 + if (ns == current_ns) 998 + node_set(node, numa_nodes); 999 + } 1000 + srcu_read_unlock(&head->srcu, srcu_idx); 1001 + 1002 + return sysfs_emit(buf, "%*pbl\n", nodemask_pr_args(&numa_nodes)); 1003 + } 1004 + DEVICE_ATTR_RO(numa_nodes); 1005 + 979 1006 static int nvme_lookup_ana_group_desc(struct nvme_ctrl *ctrl, 980 1007 struct nvme_ana_group_desc *desc, void *data) 981 1008 {
+1
drivers/nvme/host/nvme.h
··· 984 984 extern bool multipath; 985 985 extern struct device_attribute dev_attr_ana_grpid; 986 986 extern struct device_attribute dev_attr_ana_state; 987 + extern struct device_attribute dev_attr_numa_nodes; 987 988 extern struct device_attribute subsys_attr_iopolicy; 988 989 989 990 static inline bool nvme_disk_is_ns_head(struct gendisk *disk)
+5
drivers/nvme/host/sysfs.c
··· 258 258 #ifdef CONFIG_NVME_MULTIPATH 259 259 &dev_attr_ana_grpid.attr, 260 260 &dev_attr_ana_state.attr, 261 + &dev_attr_numa_nodes.attr, 261 262 #endif 262 263 &dev_attr_io_passthru_err_log_enabled.attr, 263 264 NULL, ··· 289 288 if (nvme_disk_is_ns_head(dev_to_disk(dev))) 290 289 return 0; 291 290 if (!nvme_ctrl_use_ana(nvme_get_ns_from_dev(dev)->ctrl)) 291 + return 0; 292 + } 293 + if (a == &dev_attr_numa_nodes.attr) { 294 + if (nvme_disk_is_ns_head(dev_to_disk(dev))) 292 295 return 0; 293 296 } 294 297 #endif