Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Shared Memory Communications over RDMA (SMC-R) and RoCE
4 *
5 * Basic Transport Functions exploiting Infiniband API
6 *
7 * Copyright IBM Corp. 2016
8 *
9 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
10 */
11
12#include <linux/socket.h>
13#include <linux/if_vlan.h>
14#include <linux/random.h>
15#include <linux/workqueue.h>
16#include <net/tcp.h>
17#include <net/sock.h>
18#include <rdma/ib_verbs.h>
19#include <rdma/ib_cache.h>
20
21#include "smc.h"
22#include "smc_clc.h"
23#include "smc_core.h"
24#include "smc_ib.h"
25#include "smc_wr.h"
26#include "smc_llc.h"
27#include "smc_cdc.h"
28#include "smc_close.h"
29#include "smc_ism.h"
30
31#define SMC_LGR_NUM_INCR 256
32#define SMC_LGR_FREE_DELAY_SERV (600 * HZ)
33#define SMC_LGR_FREE_DELAY_CLNT (SMC_LGR_FREE_DELAY_SERV + 10 * HZ)
34#define SMC_LGR_FREE_DELAY_FAST (8 * HZ)
35
36static struct smc_lgr_list smc_lgr_list = { /* established link groups */
37 .lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock),
38 .list = LIST_HEAD_INIT(smc_lgr_list.list),
39 .num = 0,
40};
41
42static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
43 struct smc_buf_desc *buf_desc);
44
45static void smc_lgr_schedule_free_work(struct smc_link_group *lgr)
46{
47 /* client link group creation always follows the server link group
48 * creation. For client use a somewhat higher removal delay time,
49 * otherwise there is a risk of out-of-sync link groups.
50 */
51 mod_delayed_work(system_wq, &lgr->free_work,
52 (!lgr->is_smcd && lgr->role == SMC_CLNT) ?
53 SMC_LGR_FREE_DELAY_CLNT : SMC_LGR_FREE_DELAY_SERV);
54}
55
56void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr)
57{
58 mod_delayed_work(system_wq, &lgr->free_work, SMC_LGR_FREE_DELAY_FAST);
59}
60
61/* Register connection's alert token in our lookup structure.
62 * To use rbtrees we have to implement our own insert core.
63 * Requires @conns_lock
64 * @smc connection to register
65 * Returns 0 on success, != otherwise.
66 */
67static void smc_lgr_add_alert_token(struct smc_connection *conn)
68{
69 struct rb_node **link, *parent = NULL;
70 u32 token = conn->alert_token_local;
71
72 link = &conn->lgr->conns_all.rb_node;
73 while (*link) {
74 struct smc_connection *cur = rb_entry(*link,
75 struct smc_connection, alert_node);
76
77 parent = *link;
78 if (cur->alert_token_local > token)
79 link = &parent->rb_left;
80 else
81 link = &parent->rb_right;
82 }
83 /* Put the new node there */
84 rb_link_node(&conn->alert_node, parent, link);
85 rb_insert_color(&conn->alert_node, &conn->lgr->conns_all);
86}
87
88/* Register connection in link group by assigning an alert token
89 * registered in a search tree.
90 * Requires @conns_lock
91 * Note that '0' is a reserved value and not assigned.
92 */
93static void smc_lgr_register_conn(struct smc_connection *conn)
94{
95 struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
96 static atomic_t nexttoken = ATOMIC_INIT(0);
97
98 /* find a new alert_token_local value not yet used by some connection
99 * in this link group
100 */
101 sock_hold(&smc->sk); /* sock_put in smc_lgr_unregister_conn() */
102 while (!conn->alert_token_local) {
103 conn->alert_token_local = atomic_inc_return(&nexttoken);
104 if (smc_lgr_find_conn(conn->alert_token_local, conn->lgr))
105 conn->alert_token_local = 0;
106 }
107 smc_lgr_add_alert_token(conn);
108 conn->lgr->conns_num++;
109}
110
111/* Unregister connection and reset the alert token of the given connection<
112 */
113static void __smc_lgr_unregister_conn(struct smc_connection *conn)
114{
115 struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
116 struct smc_link_group *lgr = conn->lgr;
117
118 rb_erase(&conn->alert_node, &lgr->conns_all);
119 lgr->conns_num--;
120 conn->alert_token_local = 0;
121 sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */
122}
123
124/* Unregister connection from lgr
125 */
126static void smc_lgr_unregister_conn(struct smc_connection *conn)
127{
128 struct smc_link_group *lgr = conn->lgr;
129
130 if (!lgr)
131 return;
132 write_lock_bh(&lgr->conns_lock);
133 if (conn->alert_token_local) {
134 __smc_lgr_unregister_conn(conn);
135 }
136 write_unlock_bh(&lgr->conns_lock);
137}
138
139/* Send delete link, either as client to request the initiation
140 * of the DELETE LINK sequence from server; or as server to
141 * initiate the delete processing. See smc_llc_rx_delete_link().
142 */
143static int smc_link_send_delete(struct smc_link *lnk)
144{
145 if (lnk->state == SMC_LNK_ACTIVE &&
146 !smc_llc_send_delete_link(lnk, SMC_LLC_REQ, true)) {
147 smc_llc_link_deleting(lnk);
148 return 0;
149 }
150 return -ENOTCONN;
151}
152
153static void smc_lgr_free(struct smc_link_group *lgr);
154
155static void smc_lgr_free_work(struct work_struct *work)
156{
157 struct smc_link_group *lgr = container_of(to_delayed_work(work),
158 struct smc_link_group,
159 free_work);
160 bool conns;
161
162 spin_lock_bh(&smc_lgr_list.lock);
163 read_lock_bh(&lgr->conns_lock);
164 conns = RB_EMPTY_ROOT(&lgr->conns_all);
165 read_unlock_bh(&lgr->conns_lock);
166 if (!conns) { /* number of lgr connections is no longer zero */
167 spin_unlock_bh(&smc_lgr_list.lock);
168 return;
169 }
170 if (!list_empty(&lgr->list))
171 list_del_init(&lgr->list); /* remove from smc_lgr_list */
172 spin_unlock_bh(&smc_lgr_list.lock);
173
174 if (!lgr->is_smcd && !lgr->terminating) {
175 struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
176
177 /* try to send del link msg, on error free lgr immediately */
178 if (lnk->state == SMC_LNK_ACTIVE &&
179 !smc_link_send_delete(lnk)) {
180 /* reschedule in case we never receive a response */
181 smc_lgr_schedule_free_work(lgr);
182 return;
183 }
184 }
185
186 if (!delayed_work_pending(&lgr->free_work)) {
187 struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
188
189 if (!lgr->is_smcd && lnk->state != SMC_LNK_INACTIVE)
190 smc_llc_link_inactive(lnk);
191 if (lgr->is_smcd)
192 smc_ism_signal_shutdown(lgr);
193 smc_lgr_free(lgr);
194 }
195}
196
197/* create a new SMC link group */
198static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
199{
200 struct smc_link_group *lgr;
201 struct smc_link *lnk;
202 u8 rndvec[3];
203 int rc = 0;
204 int i;
205
206 if (ini->is_smcd && ini->vlan_id) {
207 if (smc_ism_get_vlan(ini->ism_dev, ini->vlan_id)) {
208 rc = SMC_CLC_DECL_ISMVLANERR;
209 goto out;
210 }
211 }
212
213 lgr = kzalloc(sizeof(*lgr), GFP_KERNEL);
214 if (!lgr) {
215 rc = SMC_CLC_DECL_MEM;
216 goto out;
217 }
218 lgr->is_smcd = ini->is_smcd;
219 lgr->sync_err = 0;
220 lgr->vlan_id = ini->vlan_id;
221 rwlock_init(&lgr->sndbufs_lock);
222 rwlock_init(&lgr->rmbs_lock);
223 rwlock_init(&lgr->conns_lock);
224 for (i = 0; i < SMC_RMBE_SIZES; i++) {
225 INIT_LIST_HEAD(&lgr->sndbufs[i]);
226 INIT_LIST_HEAD(&lgr->rmbs[i]);
227 }
228 smc_lgr_list.num += SMC_LGR_NUM_INCR;
229 memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE);
230 INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work);
231 lgr->conns_all = RB_ROOT;
232 if (ini->is_smcd) {
233 /* SMC-D specific settings */
234 lgr->peer_gid = ini->ism_gid;
235 lgr->smcd = ini->ism_dev;
236 } else {
237 /* SMC-R specific settings */
238 lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
239 memcpy(lgr->peer_systemid, ini->ib_lcl->id_for_peer,
240 SMC_SYSTEMID_LEN);
241
242 lnk = &lgr->lnk[SMC_SINGLE_LINK];
243 /* initialize link */
244 lnk->state = SMC_LNK_ACTIVATING;
245 lnk->link_id = SMC_SINGLE_LINK;
246 lnk->smcibdev = ini->ib_dev;
247 lnk->ibport = ini->ib_port;
248 lnk->path_mtu =
249 ini->ib_dev->pattr[ini->ib_port - 1].active_mtu;
250 if (!ini->ib_dev->initialized)
251 smc_ib_setup_per_ibdev(ini->ib_dev);
252 get_random_bytes(rndvec, sizeof(rndvec));
253 lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) +
254 (rndvec[2] << 16);
255 rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport,
256 ini->vlan_id, lnk->gid,
257 &lnk->sgid_index);
258 if (rc)
259 goto free_lgr;
260 rc = smc_llc_link_init(lnk);
261 if (rc)
262 goto free_lgr;
263 rc = smc_wr_alloc_link_mem(lnk);
264 if (rc)
265 goto clear_llc_lnk;
266 rc = smc_ib_create_protection_domain(lnk);
267 if (rc)
268 goto free_link_mem;
269 rc = smc_ib_create_queue_pair(lnk);
270 if (rc)
271 goto dealloc_pd;
272 rc = smc_wr_create_link(lnk);
273 if (rc)
274 goto destroy_qp;
275 }
276 smc->conn.lgr = lgr;
277 spin_lock_bh(&smc_lgr_list.lock);
278 list_add(&lgr->list, &smc_lgr_list.list);
279 spin_unlock_bh(&smc_lgr_list.lock);
280 return 0;
281
282destroy_qp:
283 smc_ib_destroy_queue_pair(lnk);
284dealloc_pd:
285 smc_ib_dealloc_protection_domain(lnk);
286free_link_mem:
287 smc_wr_free_link_mem(lnk);
288clear_llc_lnk:
289 smc_llc_link_clear(lnk);
290free_lgr:
291 kfree(lgr);
292out:
293 if (rc < 0) {
294 if (rc == -ENOMEM)
295 rc = SMC_CLC_DECL_MEM;
296 else
297 rc = SMC_CLC_DECL_INTERR;
298 }
299 return rc;
300}
301
302static void smc_buf_unuse(struct smc_connection *conn,
303 struct smc_link_group *lgr)
304{
305 if (conn->sndbuf_desc)
306 conn->sndbuf_desc->used = 0;
307 if (conn->rmb_desc) {
308 if (!conn->rmb_desc->regerr) {
309 if (!lgr->is_smcd) {
310 /* unregister rmb with peer */
311 smc_llc_do_delete_rkey(
312 &lgr->lnk[SMC_SINGLE_LINK],
313 conn->rmb_desc);
314 }
315 conn->rmb_desc->used = 0;
316 } else {
317 /* buf registration failed, reuse not possible */
318 write_lock_bh(&lgr->rmbs_lock);
319 list_del(&conn->rmb_desc->list);
320 write_unlock_bh(&lgr->rmbs_lock);
321
322 smc_buf_free(lgr, true, conn->rmb_desc);
323 }
324 }
325}
326
327/* remove a finished connection from its link group */
328void smc_conn_free(struct smc_connection *conn)
329{
330 struct smc_link_group *lgr = conn->lgr;
331
332 if (!lgr)
333 return;
334 if (lgr->is_smcd) {
335 smc_ism_unset_conn(conn);
336 tasklet_kill(&conn->rx_tsklet);
337 } else {
338 smc_cdc_tx_dismiss_slots(conn);
339 }
340 smc_lgr_unregister_conn(conn);
341 smc_buf_unuse(conn, lgr); /* allow buffer reuse */
342 conn->lgr = NULL;
343
344 if (!lgr->conns_num)
345 smc_lgr_schedule_free_work(lgr);
346}
347
348static void smc_link_clear(struct smc_link *lnk)
349{
350 lnk->peer_qpn = 0;
351 smc_llc_link_clear(lnk);
352 smc_ib_modify_qp_reset(lnk);
353 smc_wr_free_link(lnk);
354 smc_ib_destroy_queue_pair(lnk);
355 smc_ib_dealloc_protection_domain(lnk);
356 smc_wr_free_link_mem(lnk);
357}
358
359static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb,
360 struct smc_buf_desc *buf_desc)
361{
362 struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
363
364 if (is_rmb) {
365 if (buf_desc->mr_rx[SMC_SINGLE_LINK])
366 smc_ib_put_memory_region(
367 buf_desc->mr_rx[SMC_SINGLE_LINK]);
368 smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc,
369 DMA_FROM_DEVICE);
370 } else {
371 smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc,
372 DMA_TO_DEVICE);
373 }
374 sg_free_table(&buf_desc->sgt[SMC_SINGLE_LINK]);
375 if (buf_desc->pages)
376 __free_pages(buf_desc->pages, buf_desc->order);
377 kfree(buf_desc);
378}
379
380static void smcd_buf_free(struct smc_link_group *lgr, bool is_dmb,
381 struct smc_buf_desc *buf_desc)
382{
383 if (is_dmb) {
384 /* restore original buf len */
385 buf_desc->len += sizeof(struct smcd_cdc_msg);
386 smc_ism_unregister_dmb(lgr->smcd, buf_desc);
387 } else {
388 kfree(buf_desc->cpu_addr);
389 }
390 kfree(buf_desc);
391}
392
393static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
394 struct smc_buf_desc *buf_desc)
395{
396 if (lgr->is_smcd)
397 smcd_buf_free(lgr, is_rmb, buf_desc);
398 else
399 smcr_buf_free(lgr, is_rmb, buf_desc);
400}
401
402static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb)
403{
404 struct smc_buf_desc *buf_desc, *bf_desc;
405 struct list_head *buf_list;
406 int i;
407
408 for (i = 0; i < SMC_RMBE_SIZES; i++) {
409 if (is_rmb)
410 buf_list = &lgr->rmbs[i];
411 else
412 buf_list = &lgr->sndbufs[i];
413 list_for_each_entry_safe(buf_desc, bf_desc, buf_list,
414 list) {
415 list_del(&buf_desc->list);
416 smc_buf_free(lgr, is_rmb, buf_desc);
417 }
418 }
419}
420
421static void smc_lgr_free_bufs(struct smc_link_group *lgr)
422{
423 /* free send buffers */
424 __smc_lgr_free_bufs(lgr, false);
425 /* free rmbs */
426 __smc_lgr_free_bufs(lgr, true);
427}
428
429/* remove a link group */
430static void smc_lgr_free(struct smc_link_group *lgr)
431{
432 smc_lgr_free_bufs(lgr);
433 if (lgr->is_smcd)
434 smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
435 else
436 smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]);
437 kfree(lgr);
438}
439
440void smc_lgr_forget(struct smc_link_group *lgr)
441{
442 spin_lock_bh(&smc_lgr_list.lock);
443 /* do not use this link group for new connections */
444 if (!list_empty(&lgr->list))
445 list_del_init(&lgr->list);
446 spin_unlock_bh(&smc_lgr_list.lock);
447}
448
449/* terminate linkgroup abnormally */
450static void __smc_lgr_terminate(struct smc_link_group *lgr)
451{
452 struct smc_connection *conn;
453 struct smc_sock *smc;
454 struct rb_node *node;
455
456 if (lgr->terminating)
457 return; /* lgr already terminating */
458 lgr->terminating = 1;
459 if (!list_empty(&lgr->list)) /* forget lgr */
460 list_del_init(&lgr->list);
461 if (!lgr->is_smcd)
462 smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]);
463
464 write_lock_bh(&lgr->conns_lock);
465 node = rb_first(&lgr->conns_all);
466 while (node) {
467 conn = rb_entry(node, struct smc_connection, alert_node);
468 smc = container_of(conn, struct smc_sock, conn);
469 sock_hold(&smc->sk); /* sock_put in close work */
470 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
471 __smc_lgr_unregister_conn(conn);
472 conn->lgr = NULL;
473 write_unlock_bh(&lgr->conns_lock);
474 if (!schedule_work(&conn->close_work))
475 sock_put(&smc->sk);
476 write_lock_bh(&lgr->conns_lock);
477 node = rb_first(&lgr->conns_all);
478 }
479 write_unlock_bh(&lgr->conns_lock);
480 if (!lgr->is_smcd)
481 wake_up(&lgr->lnk[SMC_SINGLE_LINK].wr_reg_wait);
482 smc_lgr_schedule_free_work(lgr);
483}
484
485void smc_lgr_terminate(struct smc_link_group *lgr)
486{
487 spin_lock_bh(&smc_lgr_list.lock);
488 __smc_lgr_terminate(lgr);
489 spin_unlock_bh(&smc_lgr_list.lock);
490}
491
492/* Called when IB port is terminated */
493void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport)
494{
495 struct smc_link_group *lgr, *l;
496
497 spin_lock_bh(&smc_lgr_list.lock);
498 list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) {
499 if (!lgr->is_smcd &&
500 lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev &&
501 lgr->lnk[SMC_SINGLE_LINK].ibport == ibport)
502 __smc_lgr_terminate(lgr);
503 }
504 spin_unlock_bh(&smc_lgr_list.lock);
505}
506
507/* Called when SMC-D device is terminated or peer is lost */
508void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan)
509{
510 struct smc_link_group *lgr, *l;
511 LIST_HEAD(lgr_free_list);
512
513 /* run common cleanup function and build free list */
514 spin_lock_bh(&smc_lgr_list.lock);
515 list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) {
516 if (lgr->is_smcd && lgr->smcd == dev &&
517 (!peer_gid || lgr->peer_gid == peer_gid) &&
518 (vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) {
519 __smc_lgr_terminate(lgr);
520 list_move(&lgr->list, &lgr_free_list);
521 }
522 }
523 spin_unlock_bh(&smc_lgr_list.lock);
524
525 /* cancel the regular free workers and actually free lgrs */
526 list_for_each_entry_safe(lgr, l, &lgr_free_list, list) {
527 list_del_init(&lgr->list);
528 cancel_delayed_work_sync(&lgr->free_work);
529 if (!peer_gid && vlan == VLAN_VID_MASK) /* dev terminated? */
530 smc_ism_signal_shutdown(lgr);
531 smc_lgr_free(lgr);
532 }
533}
534
535/* Determine vlan of internal TCP socket.
536 * @vlan_id: address to store the determined vlan id into
537 */
538int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini)
539{
540 struct dst_entry *dst = sk_dst_get(clcsock->sk);
541 struct net_device *ndev;
542 int i, nest_lvl, rc = 0;
543
544 ini->vlan_id = 0;
545 if (!dst) {
546 rc = -ENOTCONN;
547 goto out;
548 }
549 if (!dst->dev) {
550 rc = -ENODEV;
551 goto out_rel;
552 }
553
554 ndev = dst->dev;
555 if (is_vlan_dev(ndev)) {
556 ini->vlan_id = vlan_dev_vlan_id(ndev);
557 goto out_rel;
558 }
559
560 rtnl_lock();
561 nest_lvl = dev_get_nest_level(ndev);
562 for (i = 0; i < nest_lvl; i++) {
563 struct list_head *lower = &ndev->adj_list.lower;
564
565 if (list_empty(lower))
566 break;
567 lower = lower->next;
568 ndev = (struct net_device *)netdev_lower_get_next(ndev, &lower);
569 if (is_vlan_dev(ndev)) {
570 ini->vlan_id = vlan_dev_vlan_id(ndev);
571 break;
572 }
573 }
574 rtnl_unlock();
575
576out_rel:
577 dst_release(dst);
578out:
579 return rc;
580}
581
582static bool smcr_lgr_match(struct smc_link_group *lgr,
583 struct smc_clc_msg_local *lcl,
584 enum smc_lgr_role role, u32 clcqpn)
585{
586 return !memcmp(lgr->peer_systemid, lcl->id_for_peer,
587 SMC_SYSTEMID_LEN) &&
588 !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_gid, &lcl->gid,
589 SMC_GID_SIZE) &&
590 !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_mac, lcl->mac,
591 sizeof(lcl->mac)) &&
592 lgr->role == role &&
593 (lgr->role == SMC_SERV ||
594 lgr->lnk[SMC_SINGLE_LINK].peer_qpn == clcqpn);
595}
596
597static bool smcd_lgr_match(struct smc_link_group *lgr,
598 struct smcd_dev *smcismdev, u64 peer_gid)
599{
600 return lgr->peer_gid == peer_gid && lgr->smcd == smcismdev;
601}
602
603/* create a new SMC connection (and a new link group if necessary) */
604int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
605{
606 struct smc_connection *conn = &smc->conn;
607 struct smc_link_group *lgr;
608 enum smc_lgr_role role;
609 int rc = 0;
610
611 ini->cln_first_contact = SMC_FIRST_CONTACT;
612 role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
613 if (role == SMC_CLNT && ini->srv_first_contact)
614 /* create new link group as well */
615 goto create;
616
617 /* determine if an existing link group can be reused */
618 spin_lock_bh(&smc_lgr_list.lock);
619 list_for_each_entry(lgr, &smc_lgr_list.list, list) {
620 write_lock_bh(&lgr->conns_lock);
621 if ((ini->is_smcd ?
622 smcd_lgr_match(lgr, ini->ism_dev, ini->ism_gid) :
623 smcr_lgr_match(lgr, ini->ib_lcl, role, ini->ib_clcqpn)) &&
624 !lgr->sync_err &&
625 lgr->vlan_id == ini->vlan_id &&
626 (role == SMC_CLNT ||
627 lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) {
628 /* link group found */
629 ini->cln_first_contact = SMC_REUSE_CONTACT;
630 conn->lgr = lgr;
631 smc_lgr_register_conn(conn); /* add smc conn to lgr */
632 if (delayed_work_pending(&lgr->free_work))
633 cancel_delayed_work(&lgr->free_work);
634 write_unlock_bh(&lgr->conns_lock);
635 break;
636 }
637 write_unlock_bh(&lgr->conns_lock);
638 }
639 spin_unlock_bh(&smc_lgr_list.lock);
640
641 if (role == SMC_CLNT && !ini->srv_first_contact &&
642 ini->cln_first_contact == SMC_FIRST_CONTACT) {
643 /* Server reuses a link group, but Client wants to start
644 * a new one
645 * send out_of_sync decline, reason synchr. error
646 */
647 return SMC_CLC_DECL_SYNCERR;
648 }
649
650create:
651 if (ini->cln_first_contact == SMC_FIRST_CONTACT) {
652 rc = smc_lgr_create(smc, ini);
653 if (rc)
654 goto out;
655 smc_lgr_register_conn(conn); /* add smc conn to lgr */
656 }
657 conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
658 conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
659 conn->urg_state = SMC_URG_READ;
660 if (ini->is_smcd) {
661 conn->rx_off = sizeof(struct smcd_cdc_msg);
662 smcd_cdc_rx_init(conn); /* init tasklet for this conn */
663 }
664#ifndef KERNEL_HAS_ATOMIC64
665 spin_lock_init(&conn->acurs_lock);
666#endif
667
668out:
669 return rc;
670}
671
672/* convert the RMB size into the compressed notation - minimum 16K.
673 * In contrast to plain ilog2, this rounds towards the next power of 2,
674 * so the socket application gets at least its desired sndbuf / rcvbuf size.
675 */
676static u8 smc_compress_bufsize(int size)
677{
678 u8 compressed;
679
680 if (size <= SMC_BUF_MIN_SIZE)
681 return 0;
682
683 size = (size - 1) >> 14;
684 compressed = ilog2(size) + 1;
685 if (compressed >= SMC_RMBE_SIZES)
686 compressed = SMC_RMBE_SIZES - 1;
687 return compressed;
688}
689
690/* convert the RMB size from compressed notation into integer */
691int smc_uncompress_bufsize(u8 compressed)
692{
693 u32 size;
694
695 size = 0x00000001 << (((int)compressed) + 14);
696 return (int)size;
697}
698
699/* try to reuse a sndbuf or rmb description slot for a certain
700 * buffer size; if not available, return NULL
701 */
702static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize,
703 rwlock_t *lock,
704 struct list_head *buf_list)
705{
706 struct smc_buf_desc *buf_slot;
707
708 read_lock_bh(lock);
709 list_for_each_entry(buf_slot, buf_list, list) {
710 if (cmpxchg(&buf_slot->used, 0, 1) == 0) {
711 read_unlock_bh(lock);
712 return buf_slot;
713 }
714 }
715 read_unlock_bh(lock);
716 return NULL;
717}
718
719/* one of the conditions for announcing a receiver's current window size is
720 * that it "results in a minimum increase in the window size of 10% of the
721 * receive buffer space" [RFC7609]
722 */
723static inline int smc_rmb_wnd_update_limit(int rmbe_size)
724{
725 return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2);
726}
727
728static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr,
729 bool is_rmb, int bufsize)
730{
731 struct smc_buf_desc *buf_desc;
732 struct smc_link *lnk;
733 int rc;
734
735 /* try to alloc a new buffer */
736 buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
737 if (!buf_desc)
738 return ERR_PTR(-ENOMEM);
739
740 buf_desc->order = get_order(bufsize);
741 buf_desc->pages = alloc_pages(GFP_KERNEL | __GFP_NOWARN |
742 __GFP_NOMEMALLOC | __GFP_COMP |
743 __GFP_NORETRY | __GFP_ZERO,
744 buf_desc->order);
745 if (!buf_desc->pages) {
746 kfree(buf_desc);
747 return ERR_PTR(-EAGAIN);
748 }
749 buf_desc->cpu_addr = (void *)page_address(buf_desc->pages);
750
751 /* build the sg table from the pages */
752 lnk = &lgr->lnk[SMC_SINGLE_LINK];
753 rc = sg_alloc_table(&buf_desc->sgt[SMC_SINGLE_LINK], 1,
754 GFP_KERNEL);
755 if (rc) {
756 smc_buf_free(lgr, is_rmb, buf_desc);
757 return ERR_PTR(rc);
758 }
759 sg_set_buf(buf_desc->sgt[SMC_SINGLE_LINK].sgl,
760 buf_desc->cpu_addr, bufsize);
761
762 /* map sg table to DMA address */
763 rc = smc_ib_buf_map_sg(lnk->smcibdev, buf_desc,
764 is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
765 /* SMC protocol depends on mapping to one DMA address only */
766 if (rc != 1) {
767 smc_buf_free(lgr, is_rmb, buf_desc);
768 return ERR_PTR(-EAGAIN);
769 }
770
771 /* create a new memory region for the RMB */
772 if (is_rmb) {
773 rc = smc_ib_get_memory_region(lnk->roce_pd,
774 IB_ACCESS_REMOTE_WRITE |
775 IB_ACCESS_LOCAL_WRITE,
776 buf_desc);
777 if (rc) {
778 smc_buf_free(lgr, is_rmb, buf_desc);
779 return ERR_PTR(rc);
780 }
781 }
782
783 buf_desc->len = bufsize;
784 return buf_desc;
785}
786
787#define SMCD_DMBE_SIZES 7 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */
788
789static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr,
790 bool is_dmb, int bufsize)
791{
792 struct smc_buf_desc *buf_desc;
793 int rc;
794
795 if (smc_compress_bufsize(bufsize) > SMCD_DMBE_SIZES)
796 return ERR_PTR(-EAGAIN);
797
798 /* try to alloc a new DMB */
799 buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
800 if (!buf_desc)
801 return ERR_PTR(-ENOMEM);
802 if (is_dmb) {
803 rc = smc_ism_register_dmb(lgr, bufsize, buf_desc);
804 if (rc) {
805 kfree(buf_desc);
806 return ERR_PTR(-EAGAIN);
807 }
808 buf_desc->pages = virt_to_page(buf_desc->cpu_addr);
809 /* CDC header stored in buf. So, pretend it was smaller */
810 buf_desc->len = bufsize - sizeof(struct smcd_cdc_msg);
811 } else {
812 buf_desc->cpu_addr = kzalloc(bufsize, GFP_KERNEL |
813 __GFP_NOWARN | __GFP_NORETRY |
814 __GFP_NOMEMALLOC);
815 if (!buf_desc->cpu_addr) {
816 kfree(buf_desc);
817 return ERR_PTR(-EAGAIN);
818 }
819 buf_desc->len = bufsize;
820 }
821 return buf_desc;
822}
823
824static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
825{
826 struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM);
827 struct smc_connection *conn = &smc->conn;
828 struct smc_link_group *lgr = conn->lgr;
829 struct list_head *buf_list;
830 int bufsize, bufsize_short;
831 int sk_buf_size;
832 rwlock_t *lock;
833
834 if (is_rmb)
835 /* use socket recv buffer size (w/o overhead) as start value */
836 sk_buf_size = smc->sk.sk_rcvbuf / 2;
837 else
838 /* use socket send buffer size (w/o overhead) as start value */
839 sk_buf_size = smc->sk.sk_sndbuf / 2;
840
841 for (bufsize_short = smc_compress_bufsize(sk_buf_size);
842 bufsize_short >= 0; bufsize_short--) {
843
844 if (is_rmb) {
845 lock = &lgr->rmbs_lock;
846 buf_list = &lgr->rmbs[bufsize_short];
847 } else {
848 lock = &lgr->sndbufs_lock;
849 buf_list = &lgr->sndbufs[bufsize_short];
850 }
851 bufsize = smc_uncompress_bufsize(bufsize_short);
852 if ((1 << get_order(bufsize)) > SG_MAX_SINGLE_ALLOC)
853 continue;
854
855 /* check for reusable slot in the link group */
856 buf_desc = smc_buf_get_slot(bufsize_short, lock, buf_list);
857 if (buf_desc) {
858 memset(buf_desc->cpu_addr, 0, bufsize);
859 break; /* found reusable slot */
860 }
861
862 if (is_smcd)
863 buf_desc = smcd_new_buf_create(lgr, is_rmb, bufsize);
864 else
865 buf_desc = smcr_new_buf_create(lgr, is_rmb, bufsize);
866
867 if (PTR_ERR(buf_desc) == -ENOMEM)
868 break;
869 if (IS_ERR(buf_desc))
870 continue;
871
872 buf_desc->used = 1;
873 write_lock_bh(lock);
874 list_add(&buf_desc->list, buf_list);
875 write_unlock_bh(lock);
876 break; /* found */
877 }
878
879 if (IS_ERR(buf_desc))
880 return -ENOMEM;
881
882 if (is_rmb) {
883 conn->rmb_desc = buf_desc;
884 conn->rmbe_size_short = bufsize_short;
885 smc->sk.sk_rcvbuf = bufsize * 2;
886 atomic_set(&conn->bytes_to_rcv, 0);
887 conn->rmbe_update_limit =
888 smc_rmb_wnd_update_limit(buf_desc->len);
889 if (is_smcd)
890 smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */
891 } else {
892 conn->sndbuf_desc = buf_desc;
893 smc->sk.sk_sndbuf = bufsize * 2;
894 atomic_set(&conn->sndbuf_space, bufsize);
895 }
896 return 0;
897}
898
899void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn)
900{
901 struct smc_link_group *lgr = conn->lgr;
902
903 if (!conn->lgr || conn->lgr->is_smcd)
904 return;
905 smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
906 conn->sndbuf_desc, DMA_TO_DEVICE);
907}
908
909void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn)
910{
911 struct smc_link_group *lgr = conn->lgr;
912
913 if (!conn->lgr || conn->lgr->is_smcd)
914 return;
915 smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
916 conn->sndbuf_desc, DMA_TO_DEVICE);
917}
918
919void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn)
920{
921 struct smc_link_group *lgr = conn->lgr;
922
923 if (!conn->lgr || conn->lgr->is_smcd)
924 return;
925 smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
926 conn->rmb_desc, DMA_FROM_DEVICE);
927}
928
929void smc_rmb_sync_sg_for_device(struct smc_connection *conn)
930{
931 struct smc_link_group *lgr = conn->lgr;
932
933 if (!conn->lgr || conn->lgr->is_smcd)
934 return;
935 smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
936 conn->rmb_desc, DMA_FROM_DEVICE);
937}
938
939/* create the send and receive buffer for an SMC socket;
940 * receive buffers are called RMBs;
941 * (even though the SMC protocol allows more than one RMB-element per RMB,
942 * the Linux implementation uses just one RMB-element per RMB, i.e. uses an
943 * extra RMB for every connection in a link group
944 */
945int smc_buf_create(struct smc_sock *smc, bool is_smcd)
946{
947 int rc;
948
949 /* create send buffer */
950 rc = __smc_buf_create(smc, is_smcd, false);
951 if (rc)
952 return rc;
953 /* create rmb */
954 rc = __smc_buf_create(smc, is_smcd, true);
955 if (rc)
956 smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc);
957 return rc;
958}
959
960static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr)
961{
962 int i;
963
964 for_each_clear_bit(i, lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX) {
965 if (!test_and_set_bit(i, lgr->rtokens_used_mask))
966 return i;
967 }
968 return -ENOSPC;
969}
970
971/* add a new rtoken from peer */
972int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey)
973{
974 u64 dma_addr = be64_to_cpu(nw_vaddr);
975 u32 rkey = ntohl(nw_rkey);
976 int i;
977
978 for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
979 if ((lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey) &&
980 (lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr == dma_addr) &&
981 test_bit(i, lgr->rtokens_used_mask)) {
982 /* already in list */
983 return i;
984 }
985 }
986 i = smc_rmb_reserve_rtoken_idx(lgr);
987 if (i < 0)
988 return i;
989 lgr->rtokens[i][SMC_SINGLE_LINK].rkey = rkey;
990 lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = dma_addr;
991 return i;
992}
993
994/* delete an rtoken */
995int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey)
996{
997 u32 rkey = ntohl(nw_rkey);
998 int i;
999
1000 for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
1001 if (lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey &&
1002 test_bit(i, lgr->rtokens_used_mask)) {
1003 lgr->rtokens[i][SMC_SINGLE_LINK].rkey = 0;
1004 lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = 0;
1005
1006 clear_bit(i, lgr->rtokens_used_mask);
1007 return 0;
1008 }
1009 }
1010 return -ENOENT;
1011}
1012
1013/* save rkey and dma_addr received from peer during clc handshake */
1014int smc_rmb_rtoken_handling(struct smc_connection *conn,
1015 struct smc_clc_msg_accept_confirm *clc)
1016{
1017 conn->rtoken_idx = smc_rtoken_add(conn->lgr, clc->rmb_dma_addr,
1018 clc->rmb_rkey);
1019 if (conn->rtoken_idx < 0)
1020 return conn->rtoken_idx;
1021 return 0;
1022}
1023
1024/* Called (from smc_exit) when module is removed */
1025void smc_core_exit(void)
1026{
1027 struct smc_link_group *lgr, *lg;
1028 LIST_HEAD(lgr_freeing_list);
1029
1030 spin_lock_bh(&smc_lgr_list.lock);
1031 if (!list_empty(&smc_lgr_list.list))
1032 list_splice_init(&smc_lgr_list.list, &lgr_freeing_list);
1033 spin_unlock_bh(&smc_lgr_list.lock);
1034 list_for_each_entry_safe(lgr, lg, &lgr_freeing_list, list) {
1035 list_del_init(&lgr->list);
1036 if (!lgr->is_smcd) {
1037 struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
1038
1039 if (lnk->state == SMC_LNK_ACTIVE)
1040 smc_llc_send_delete_link(lnk, SMC_LLC_REQ,
1041 false);
1042 smc_llc_link_inactive(lnk);
1043 }
1044 cancel_delayed_work_sync(&lgr->free_work);
1045 if (lgr->is_smcd)
1046 smc_ism_signal_shutdown(lgr);
1047 smc_lgr_free(lgr); /* free link group */
1048 }
1049}