Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Shared Memory Communications over RDMA (SMC-R) and RoCE
4 *
5 * smc_sysctl.c: sysctl interface to SMC subsystem.
6 *
7 * Copyright (c) 2022, Alibaba Inc.
8 *
9 * Author: Tony Lu <tonylu@linux.alibaba.com>
10 *
11 */
12
13#include <linux/init.h>
14#include <linux/sysctl.h>
15#include <linux/bpf.h>
16#include <net/net_namespace.h>
17
18#include "smc.h"
19#include "smc_core.h"
20#include "smc_llc.h"
21#include "smc_sysctl.h"
22#include "smc_hs_bpf.h"
23
24static int min_sndbuf = SMC_BUF_MIN_SIZE;
25static int min_rcvbuf = SMC_BUF_MIN_SIZE;
26static int max_sndbuf = INT_MAX / 2;
27static int max_rcvbuf = INT_MAX / 2;
28static const int net_smc_wmem_init = (64 * 1024);
29static const int net_smc_rmem_init = (64 * 1024);
30static int links_per_lgr_min = SMC_LINKS_ADD_LNK_MIN;
31static int links_per_lgr_max = SMC_LINKS_ADD_LNK_MAX;
32static int conns_per_lgr_min = SMC_CONN_PER_LGR_MIN;
33static int conns_per_lgr_max = SMC_CONN_PER_LGR_MAX;
34static unsigned int smcr_max_wr_min = 2;
35static unsigned int smcr_max_wr_max = 2048;
36
37#if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF)
38static int smc_net_replace_smc_hs_ctrl(struct net *net, const char *name)
39{
40 struct smc_hs_ctrl *ctrl = NULL;
41
42 rcu_read_lock();
43 /* null or empty name ask to clear current ctrl */
44 if (name && name[0]) {
45 ctrl = smc_hs_ctrl_find_by_name(name);
46 if (!ctrl) {
47 rcu_read_unlock();
48 return -EINVAL;
49 }
50 /* no change, just return */
51 if (ctrl == rcu_dereference(net->smc.hs_ctrl)) {
52 rcu_read_unlock();
53 return 0;
54 }
55 if (!bpf_try_module_get(ctrl, ctrl->owner)) {
56 rcu_read_unlock();
57 return -EBUSY;
58 }
59 }
60 /* xhcg old ctrl with the new one atomically */
61 ctrl = unrcu_pointer(xchg(&net->smc.hs_ctrl, RCU_INITIALIZER(ctrl)));
62 /* release old ctrl */
63 if (ctrl)
64 bpf_module_put(ctrl, ctrl->owner);
65
66 rcu_read_unlock();
67 return 0;
68}
69
70static int proc_smc_hs_ctrl(const struct ctl_table *ctl, int write,
71 void *buffer, size_t *lenp, loff_t *ppos)
72{
73 struct net *net = container_of(ctl->data, struct net, smc.hs_ctrl);
74 char val[SMC_HS_CTRL_NAME_MAX];
75 const struct ctl_table tbl = {
76 .data = val,
77 .maxlen = SMC_HS_CTRL_NAME_MAX,
78 };
79 struct smc_hs_ctrl *ctrl;
80 int ret;
81
82 rcu_read_lock();
83 ctrl = rcu_dereference(net->smc.hs_ctrl);
84 if (ctrl)
85 memcpy(val, ctrl->name, sizeof(ctrl->name));
86 else
87 val[0] = '\0';
88 rcu_read_unlock();
89
90 ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
91 if (ret)
92 return ret;
93
94 if (write)
95 ret = smc_net_replace_smc_hs_ctrl(net, val);
96 return ret;
97}
98#endif /* CONFIG_SMC_HS_CTRL_BPF */
99
100static struct ctl_table smc_table[] = {
101 {
102 .procname = "autocorking_size",
103 .data = &init_net.smc.sysctl_autocorking_size,
104 .maxlen = sizeof(unsigned int),
105 .mode = 0644,
106 .proc_handler = proc_douintvec,
107 },
108 {
109 .procname = "smcr_buf_type",
110 .data = &init_net.smc.sysctl_smcr_buf_type,
111 .maxlen = sizeof(unsigned int),
112 .mode = 0644,
113 .proc_handler = proc_douintvec_minmax,
114 .extra1 = SYSCTL_ZERO,
115 .extra2 = SYSCTL_TWO,
116 },
117 {
118 .procname = "smcr_testlink_time",
119 .data = &init_net.smc.sysctl_smcr_testlink_time,
120 .maxlen = sizeof(int),
121 .mode = 0644,
122 .proc_handler = proc_dointvec_jiffies,
123 },
124 {
125 .procname = "wmem",
126 .data = &init_net.smc.sysctl_wmem,
127 .maxlen = sizeof(int),
128 .mode = 0644,
129 .proc_handler = proc_dointvec_minmax,
130 .extra1 = &min_sndbuf,
131 .extra2 = &max_sndbuf,
132 },
133 {
134 .procname = "rmem",
135 .data = &init_net.smc.sysctl_rmem,
136 .maxlen = sizeof(int),
137 .mode = 0644,
138 .proc_handler = proc_dointvec_minmax,
139 .extra1 = &min_rcvbuf,
140 .extra2 = &max_rcvbuf,
141 },
142 {
143 .procname = "smcr_max_links_per_lgr",
144 .data = &init_net.smc.sysctl_max_links_per_lgr,
145 .maxlen = sizeof(int),
146 .mode = 0644,
147 .proc_handler = proc_dointvec_minmax,
148 .extra1 = &links_per_lgr_min,
149 .extra2 = &links_per_lgr_max,
150 },
151 {
152 .procname = "smcr_max_conns_per_lgr",
153 .data = &init_net.smc.sysctl_max_conns_per_lgr,
154 .maxlen = sizeof(int),
155 .mode = 0644,
156 .proc_handler = proc_dointvec_minmax,
157 .extra1 = &conns_per_lgr_min,
158 .extra2 = &conns_per_lgr_max,
159 },
160 {
161 .procname = "limit_smc_hs",
162 .data = &init_net.smc.limit_smc_hs,
163 .maxlen = sizeof(int),
164 .mode = 0644,
165 .proc_handler = proc_dointvec_minmax,
166 .extra1 = SYSCTL_ZERO,
167 .extra2 = SYSCTL_ONE,
168 },
169 {
170 .procname = "smcr_max_send_wr",
171 .data = &init_net.smc.sysctl_smcr_max_send_wr,
172 .maxlen = sizeof(int),
173 .mode = 0644,
174 .proc_handler = proc_dointvec_minmax,
175 .extra1 = &smcr_max_wr_min,
176 .extra2 = &smcr_max_wr_max,
177 },
178 {
179 .procname = "smcr_max_recv_wr",
180 .data = &init_net.smc.sysctl_smcr_max_recv_wr,
181 .maxlen = sizeof(int),
182 .mode = 0644,
183 .proc_handler = proc_dointvec_minmax,
184 .extra1 = &smcr_max_wr_min,
185 .extra2 = &smcr_max_wr_max,
186 },
187#if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF)
188 {
189 .procname = "hs_ctrl",
190 .data = &init_net.smc.hs_ctrl,
191 .mode = 0644,
192 .maxlen = SMC_HS_CTRL_NAME_MAX,
193 .proc_handler = proc_smc_hs_ctrl,
194 },
195#endif /* CONFIG_SMC_HS_CTRL_BPF */
196};
197
198int __net_init smc_sysctl_net_init(struct net *net)
199{
200 size_t table_size = ARRAY_SIZE(smc_table);
201 struct ctl_table *table;
202
203 table = smc_table;
204 if (!net_eq(net, &init_net)) {
205 int i;
206#if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF)
207 struct smc_hs_ctrl *ctrl;
208
209 rcu_read_lock();
210 ctrl = rcu_dereference(init_net.smc.hs_ctrl);
211 if (ctrl && ctrl->flags & SMC_HS_CTRL_FLAG_INHERITABLE &&
212 bpf_try_module_get(ctrl, ctrl->owner))
213 rcu_assign_pointer(net->smc.hs_ctrl, ctrl);
214 rcu_read_unlock();
215#endif /* CONFIG_SMC_HS_CTRL_BPF */
216
217 table = kmemdup(table, sizeof(smc_table), GFP_KERNEL);
218 if (!table)
219 goto err_alloc;
220
221 for (i = 0; i < table_size; i++)
222 table[i].data += (void *)net - (void *)&init_net;
223 }
224
225 net->smc.smc_hdr = register_net_sysctl_sz(net, "net/smc", table,
226 table_size);
227 if (!net->smc.smc_hdr)
228 goto err_reg;
229
230 net->smc.sysctl_autocorking_size = SMC_AUTOCORKING_DEFAULT_SIZE;
231 net->smc.sysctl_smcr_buf_type = SMCR_PHYS_CONT_BUFS;
232 net->smc.sysctl_smcr_testlink_time = SMC_LLC_TESTLINK_DEFAULT_TIME;
233 WRITE_ONCE(net->smc.sysctl_wmem, net_smc_wmem_init);
234 WRITE_ONCE(net->smc.sysctl_rmem, net_smc_rmem_init);
235 net->smc.sysctl_max_links_per_lgr = SMC_LINKS_PER_LGR_MAX_PREFER;
236 net->smc.sysctl_max_conns_per_lgr = SMC_CONN_PER_LGR_PREFER;
237 net->smc.sysctl_smcr_max_send_wr = SMCR_MAX_SEND_WR_DEF;
238 net->smc.sysctl_smcr_max_recv_wr = SMCR_MAX_RECV_WR_DEF;
239 /* disable handshake limitation by default */
240 net->smc.limit_smc_hs = 0;
241
242 return 0;
243
244err_reg:
245 if (!net_eq(net, &init_net))
246 kfree(table);
247err_alloc:
248#if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF)
249 smc_net_replace_smc_hs_ctrl(net, NULL);
250#endif /* CONFIG_SMC_HS_CTRL_BPF */
251 return -ENOMEM;
252}
253
254void __net_exit smc_sysctl_net_exit(struct net *net)
255{
256 const struct ctl_table *table;
257
258 table = net->smc.smc_hdr->ctl_table_arg;
259 unregister_net_sysctl_table(net->smc.smc_hdr);
260#if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF)
261 smc_net_replace_smc_hs_ctrl(net, NULL);
262#endif /* CONFIG_SMC_HS_CTRL_BPF */
263
264 if (!net_eq(net, &init_net))
265 kfree(table);
266}