Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1/*
2 * Copyright (c) 2004 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
4 * Copyright (c) 2006 Intel Corporation. All rights reserved.
5 *
6 * This software is available to you under a choice of one of two
7 * licenses. You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * OpenIB.org BSD license below:
11 *
12 * Redistribution and use in source and binary forms, with or
13 * without modification, are permitted provided that the following
14 * conditions are met:
15 *
16 * - Redistributions of source code must retain the above
17 * copyright notice, this list of conditions and the following
18 * disclaimer.
19 *
20 * - Redistributions in binary form must reproduce the above
21 * copyright notice, this list of conditions and the following
22 * disclaimer in the documentation and/or other materials
23 * provided with the distribution.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE.
33 */
34
35#include <linux/init.h>
36#include <linux/err.h>
37#include <linux/random.h>
38#include <linux/spinlock.h>
39#include <linux/slab.h>
40#include <linux/dma-mapping.h>
41#include <linux/kref.h>
42#include <linux/xarray.h>
43#include <linux/workqueue.h>
44#include <uapi/linux/if_ether.h>
45#include <rdma/ib_pack.h>
46#include <rdma/ib_cache.h>
47#include <rdma/rdma_netlink.h>
48#include <net/netlink.h>
49#include <uapi/rdma/ib_user_sa.h>
50#include <rdma/ib_marshall.h>
51#include <rdma/ib_addr.h>
52#include <rdma/opa_addr.h>
53#include "sa.h"
54#include "core_priv.h"
55
56#define IB_SA_LOCAL_SVC_TIMEOUT_MIN 100
57#define IB_SA_LOCAL_SVC_TIMEOUT_DEFAULT 2000
58#define IB_SA_LOCAL_SVC_TIMEOUT_MAX 200000
59#define IB_SA_CPI_MAX_RETRY_CNT 3
60#define IB_SA_CPI_RETRY_WAIT 1000 /*msecs */
61static int sa_local_svc_timeout_ms = IB_SA_LOCAL_SVC_TIMEOUT_DEFAULT;
62
63struct ib_sa_sm_ah {
64 struct ib_ah *ah;
65 struct kref ref;
66 u16 pkey_index;
67 u8 src_path_mask;
68};
69
70enum rdma_class_port_info_type {
71 RDMA_CLASS_PORT_INFO_IB,
72 RDMA_CLASS_PORT_INFO_OPA
73};
74
75struct rdma_class_port_info {
76 enum rdma_class_port_info_type type;
77 union {
78 struct ib_class_port_info ib;
79 struct opa_class_port_info opa;
80 };
81};
82
83struct ib_sa_classport_cache {
84 bool valid;
85 int retry_cnt;
86 struct rdma_class_port_info data;
87};
88
89struct ib_sa_port {
90 struct ib_mad_agent *agent;
91 struct ib_sa_sm_ah *sm_ah;
92 struct work_struct update_task;
93 struct ib_sa_classport_cache classport_info;
94 struct delayed_work ib_cpi_work;
95 spinlock_t classport_lock; /* protects class port info set */
96 spinlock_t ah_lock;
97 u32 port_num;
98};
99
100struct ib_sa_device {
101 int start_port, end_port;
102 struct ib_event_handler event_handler;
103 struct ib_sa_port port[];
104};
105
106struct ib_sa_query {
107 void (*callback)(struct ib_sa_query *, int, struct ib_sa_mad *);
108 void (*release)(struct ib_sa_query *);
109 struct ib_sa_client *client;
110 struct ib_sa_port *port;
111 struct ib_mad_send_buf *mad_buf;
112 struct ib_sa_sm_ah *sm_ah;
113 int id;
114 u32 flags;
115 struct list_head list; /* Local svc request list */
116 u32 seq; /* Local svc request sequence number */
117 unsigned long timeout; /* Local svc timeout */
118 u8 path_use; /* How will the pathrecord be used */
119};
120
121#define IB_SA_ENABLE_LOCAL_SERVICE 0x00000001
122#define IB_SA_CANCEL 0x00000002
123#define IB_SA_QUERY_OPA 0x00000004
124
125struct ib_sa_path_query {
126 void (*callback)(int, struct sa_path_rec *, void *);
127 void *context;
128 struct ib_sa_query sa_query;
129 struct sa_path_rec *conv_pr;
130};
131
132struct ib_sa_guidinfo_query {
133 void (*callback)(int, struct ib_sa_guidinfo_rec *, void *);
134 void *context;
135 struct ib_sa_query sa_query;
136};
137
138struct ib_sa_classport_info_query {
139 void (*callback)(void *);
140 void *context;
141 struct ib_sa_query sa_query;
142};
143
144struct ib_sa_mcmember_query {
145 void (*callback)(int, struct ib_sa_mcmember_rec *, void *);
146 void *context;
147 struct ib_sa_query sa_query;
148};
149
150static LIST_HEAD(ib_nl_request_list);
151static DEFINE_SPINLOCK(ib_nl_request_lock);
152static atomic_t ib_nl_sa_request_seq;
153static struct workqueue_struct *ib_nl_wq;
154static struct delayed_work ib_nl_timed_work;
155static const struct nla_policy ib_nl_policy[LS_NLA_TYPE_MAX] = {
156 [LS_NLA_TYPE_PATH_RECORD] = {.type = NLA_BINARY,
157 .len = sizeof(struct ib_path_rec_data)},
158 [LS_NLA_TYPE_TIMEOUT] = {.type = NLA_U32},
159 [LS_NLA_TYPE_SERVICE_ID] = {.type = NLA_U64},
160 [LS_NLA_TYPE_DGID] = {.type = NLA_BINARY,
161 .len = sizeof(struct rdma_nla_ls_gid)},
162 [LS_NLA_TYPE_SGID] = {.type = NLA_BINARY,
163 .len = sizeof(struct rdma_nla_ls_gid)},
164 [LS_NLA_TYPE_TCLASS] = {.type = NLA_U8},
165 [LS_NLA_TYPE_PKEY] = {.type = NLA_U16},
166 [LS_NLA_TYPE_QOS_CLASS] = {.type = NLA_U16},
167};
168
169
170static int ib_sa_add_one(struct ib_device *device);
171static void ib_sa_remove_one(struct ib_device *device, void *client_data);
172
173static struct ib_client sa_client = {
174 .name = "sa",
175 .add = ib_sa_add_one,
176 .remove = ib_sa_remove_one
177};
178
179static DEFINE_XARRAY_FLAGS(queries, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
180
181static DEFINE_SPINLOCK(tid_lock);
182static u32 tid;
183
184#define PATH_REC_FIELD(field) \
185 .struct_offset_bytes = offsetof(struct sa_path_rec, field), \
186 .struct_size_bytes = sizeof_field(struct sa_path_rec, field), \
187 .field_name = "sa_path_rec:" #field
188
189static const struct ib_field path_rec_table[] = {
190 { PATH_REC_FIELD(service_id),
191 .offset_words = 0,
192 .offset_bits = 0,
193 .size_bits = 64 },
194 { PATH_REC_FIELD(dgid),
195 .offset_words = 2,
196 .offset_bits = 0,
197 .size_bits = 128 },
198 { PATH_REC_FIELD(sgid),
199 .offset_words = 6,
200 .offset_bits = 0,
201 .size_bits = 128 },
202 { PATH_REC_FIELD(ib.dlid),
203 .offset_words = 10,
204 .offset_bits = 0,
205 .size_bits = 16 },
206 { PATH_REC_FIELD(ib.slid),
207 .offset_words = 10,
208 .offset_bits = 16,
209 .size_bits = 16 },
210 { PATH_REC_FIELD(ib.raw_traffic),
211 .offset_words = 11,
212 .offset_bits = 0,
213 .size_bits = 1 },
214 { RESERVED,
215 .offset_words = 11,
216 .offset_bits = 1,
217 .size_bits = 3 },
218 { PATH_REC_FIELD(flow_label),
219 .offset_words = 11,
220 .offset_bits = 4,
221 .size_bits = 20 },
222 { PATH_REC_FIELD(hop_limit),
223 .offset_words = 11,
224 .offset_bits = 24,
225 .size_bits = 8 },
226 { PATH_REC_FIELD(traffic_class),
227 .offset_words = 12,
228 .offset_bits = 0,
229 .size_bits = 8 },
230 { PATH_REC_FIELD(reversible),
231 .offset_words = 12,
232 .offset_bits = 8,
233 .size_bits = 1 },
234 { PATH_REC_FIELD(numb_path),
235 .offset_words = 12,
236 .offset_bits = 9,
237 .size_bits = 7 },
238 { PATH_REC_FIELD(pkey),
239 .offset_words = 12,
240 .offset_bits = 16,
241 .size_bits = 16 },
242 { PATH_REC_FIELD(qos_class),
243 .offset_words = 13,
244 .offset_bits = 0,
245 .size_bits = 12 },
246 { PATH_REC_FIELD(sl),
247 .offset_words = 13,
248 .offset_bits = 12,
249 .size_bits = 4 },
250 { PATH_REC_FIELD(mtu_selector),
251 .offset_words = 13,
252 .offset_bits = 16,
253 .size_bits = 2 },
254 { PATH_REC_FIELD(mtu),
255 .offset_words = 13,
256 .offset_bits = 18,
257 .size_bits = 6 },
258 { PATH_REC_FIELD(rate_selector),
259 .offset_words = 13,
260 .offset_bits = 24,
261 .size_bits = 2 },
262 { PATH_REC_FIELD(rate),
263 .offset_words = 13,
264 .offset_bits = 26,
265 .size_bits = 6 },
266 { PATH_REC_FIELD(packet_life_time_selector),
267 .offset_words = 14,
268 .offset_bits = 0,
269 .size_bits = 2 },
270 { PATH_REC_FIELD(packet_life_time),
271 .offset_words = 14,
272 .offset_bits = 2,
273 .size_bits = 6 },
274 { PATH_REC_FIELD(preference),
275 .offset_words = 14,
276 .offset_bits = 8,
277 .size_bits = 8 },
278 { RESERVED,
279 .offset_words = 14,
280 .offset_bits = 16,
281 .size_bits = 48 },
282};
283
284#define OPA_PATH_REC_FIELD(field) \
285 .struct_offset_bytes = \
286 offsetof(struct sa_path_rec, field), \
287 .struct_size_bytes = \
288 sizeof_field(struct sa_path_rec, field), \
289 .field_name = "sa_path_rec:" #field
290
291static const struct ib_field opa_path_rec_table[] = {
292 { OPA_PATH_REC_FIELD(service_id),
293 .offset_words = 0,
294 .offset_bits = 0,
295 .size_bits = 64 },
296 { OPA_PATH_REC_FIELD(dgid),
297 .offset_words = 2,
298 .offset_bits = 0,
299 .size_bits = 128 },
300 { OPA_PATH_REC_FIELD(sgid),
301 .offset_words = 6,
302 .offset_bits = 0,
303 .size_bits = 128 },
304 { OPA_PATH_REC_FIELD(opa.dlid),
305 .offset_words = 10,
306 .offset_bits = 0,
307 .size_bits = 32 },
308 { OPA_PATH_REC_FIELD(opa.slid),
309 .offset_words = 11,
310 .offset_bits = 0,
311 .size_bits = 32 },
312 { OPA_PATH_REC_FIELD(opa.raw_traffic),
313 .offset_words = 12,
314 .offset_bits = 0,
315 .size_bits = 1 },
316 { RESERVED,
317 .offset_words = 12,
318 .offset_bits = 1,
319 .size_bits = 3 },
320 { OPA_PATH_REC_FIELD(flow_label),
321 .offset_words = 12,
322 .offset_bits = 4,
323 .size_bits = 20 },
324 { OPA_PATH_REC_FIELD(hop_limit),
325 .offset_words = 12,
326 .offset_bits = 24,
327 .size_bits = 8 },
328 { OPA_PATH_REC_FIELD(traffic_class),
329 .offset_words = 13,
330 .offset_bits = 0,
331 .size_bits = 8 },
332 { OPA_PATH_REC_FIELD(reversible),
333 .offset_words = 13,
334 .offset_bits = 8,
335 .size_bits = 1 },
336 { OPA_PATH_REC_FIELD(numb_path),
337 .offset_words = 13,
338 .offset_bits = 9,
339 .size_bits = 7 },
340 { OPA_PATH_REC_FIELD(pkey),
341 .offset_words = 13,
342 .offset_bits = 16,
343 .size_bits = 16 },
344 { OPA_PATH_REC_FIELD(opa.l2_8B),
345 .offset_words = 14,
346 .offset_bits = 0,
347 .size_bits = 1 },
348 { OPA_PATH_REC_FIELD(opa.l2_10B),
349 .offset_words = 14,
350 .offset_bits = 1,
351 .size_bits = 1 },
352 { OPA_PATH_REC_FIELD(opa.l2_9B),
353 .offset_words = 14,
354 .offset_bits = 2,
355 .size_bits = 1 },
356 { OPA_PATH_REC_FIELD(opa.l2_16B),
357 .offset_words = 14,
358 .offset_bits = 3,
359 .size_bits = 1 },
360 { RESERVED,
361 .offset_words = 14,
362 .offset_bits = 4,
363 .size_bits = 2 },
364 { OPA_PATH_REC_FIELD(opa.qos_type),
365 .offset_words = 14,
366 .offset_bits = 6,
367 .size_bits = 2 },
368 { OPA_PATH_REC_FIELD(opa.qos_priority),
369 .offset_words = 14,
370 .offset_bits = 8,
371 .size_bits = 8 },
372 { RESERVED,
373 .offset_words = 14,
374 .offset_bits = 16,
375 .size_bits = 3 },
376 { OPA_PATH_REC_FIELD(sl),
377 .offset_words = 14,
378 .offset_bits = 19,
379 .size_bits = 5 },
380 { RESERVED,
381 .offset_words = 14,
382 .offset_bits = 24,
383 .size_bits = 8 },
384 { OPA_PATH_REC_FIELD(mtu_selector),
385 .offset_words = 15,
386 .offset_bits = 0,
387 .size_bits = 2 },
388 { OPA_PATH_REC_FIELD(mtu),
389 .offset_words = 15,
390 .offset_bits = 2,
391 .size_bits = 6 },
392 { OPA_PATH_REC_FIELD(rate_selector),
393 .offset_words = 15,
394 .offset_bits = 8,
395 .size_bits = 2 },
396 { OPA_PATH_REC_FIELD(rate),
397 .offset_words = 15,
398 .offset_bits = 10,
399 .size_bits = 6 },
400 { OPA_PATH_REC_FIELD(packet_life_time_selector),
401 .offset_words = 15,
402 .offset_bits = 16,
403 .size_bits = 2 },
404 { OPA_PATH_REC_FIELD(packet_life_time),
405 .offset_words = 15,
406 .offset_bits = 18,
407 .size_bits = 6 },
408 { OPA_PATH_REC_FIELD(preference),
409 .offset_words = 15,
410 .offset_bits = 24,
411 .size_bits = 8 },
412};
413
414#define MCMEMBER_REC_FIELD(field) \
415 .struct_offset_bytes = offsetof(struct ib_sa_mcmember_rec, field), \
416 .struct_size_bytes = sizeof_field(struct ib_sa_mcmember_rec, field), \
417 .field_name = "sa_mcmember_rec:" #field
418
419static const struct ib_field mcmember_rec_table[] = {
420 { MCMEMBER_REC_FIELD(mgid),
421 .offset_words = 0,
422 .offset_bits = 0,
423 .size_bits = 128 },
424 { MCMEMBER_REC_FIELD(port_gid),
425 .offset_words = 4,
426 .offset_bits = 0,
427 .size_bits = 128 },
428 { MCMEMBER_REC_FIELD(qkey),
429 .offset_words = 8,
430 .offset_bits = 0,
431 .size_bits = 32 },
432 { MCMEMBER_REC_FIELD(mlid),
433 .offset_words = 9,
434 .offset_bits = 0,
435 .size_bits = 16 },
436 { MCMEMBER_REC_FIELD(mtu_selector),
437 .offset_words = 9,
438 .offset_bits = 16,
439 .size_bits = 2 },
440 { MCMEMBER_REC_FIELD(mtu),
441 .offset_words = 9,
442 .offset_bits = 18,
443 .size_bits = 6 },
444 { MCMEMBER_REC_FIELD(traffic_class),
445 .offset_words = 9,
446 .offset_bits = 24,
447 .size_bits = 8 },
448 { MCMEMBER_REC_FIELD(pkey),
449 .offset_words = 10,
450 .offset_bits = 0,
451 .size_bits = 16 },
452 { MCMEMBER_REC_FIELD(rate_selector),
453 .offset_words = 10,
454 .offset_bits = 16,
455 .size_bits = 2 },
456 { MCMEMBER_REC_FIELD(rate),
457 .offset_words = 10,
458 .offset_bits = 18,
459 .size_bits = 6 },
460 { MCMEMBER_REC_FIELD(packet_life_time_selector),
461 .offset_words = 10,
462 .offset_bits = 24,
463 .size_bits = 2 },
464 { MCMEMBER_REC_FIELD(packet_life_time),
465 .offset_words = 10,
466 .offset_bits = 26,
467 .size_bits = 6 },
468 { MCMEMBER_REC_FIELD(sl),
469 .offset_words = 11,
470 .offset_bits = 0,
471 .size_bits = 4 },
472 { MCMEMBER_REC_FIELD(flow_label),
473 .offset_words = 11,
474 .offset_bits = 4,
475 .size_bits = 20 },
476 { MCMEMBER_REC_FIELD(hop_limit),
477 .offset_words = 11,
478 .offset_bits = 24,
479 .size_bits = 8 },
480 { MCMEMBER_REC_FIELD(scope),
481 .offset_words = 12,
482 .offset_bits = 0,
483 .size_bits = 4 },
484 { MCMEMBER_REC_FIELD(join_state),
485 .offset_words = 12,
486 .offset_bits = 4,
487 .size_bits = 4 },
488 { MCMEMBER_REC_FIELD(proxy_join),
489 .offset_words = 12,
490 .offset_bits = 8,
491 .size_bits = 1 },
492 { RESERVED,
493 .offset_words = 12,
494 .offset_bits = 9,
495 .size_bits = 23 },
496};
497
498#define CLASSPORTINFO_REC_FIELD(field) \
499 .struct_offset_bytes = offsetof(struct ib_class_port_info, field), \
500 .struct_size_bytes = sizeof_field(struct ib_class_port_info, field), \
501 .field_name = "ib_class_port_info:" #field
502
503static const struct ib_field ib_classport_info_rec_table[] = {
504 { CLASSPORTINFO_REC_FIELD(base_version),
505 .offset_words = 0,
506 .offset_bits = 0,
507 .size_bits = 8 },
508 { CLASSPORTINFO_REC_FIELD(class_version),
509 .offset_words = 0,
510 .offset_bits = 8,
511 .size_bits = 8 },
512 { CLASSPORTINFO_REC_FIELD(capability_mask),
513 .offset_words = 0,
514 .offset_bits = 16,
515 .size_bits = 16 },
516 { CLASSPORTINFO_REC_FIELD(cap_mask2_resp_time),
517 .offset_words = 1,
518 .offset_bits = 0,
519 .size_bits = 32 },
520 { CLASSPORTINFO_REC_FIELD(redirect_gid),
521 .offset_words = 2,
522 .offset_bits = 0,
523 .size_bits = 128 },
524 { CLASSPORTINFO_REC_FIELD(redirect_tcslfl),
525 .offset_words = 6,
526 .offset_bits = 0,
527 .size_bits = 32 },
528 { CLASSPORTINFO_REC_FIELD(redirect_lid),
529 .offset_words = 7,
530 .offset_bits = 0,
531 .size_bits = 16 },
532 { CLASSPORTINFO_REC_FIELD(redirect_pkey),
533 .offset_words = 7,
534 .offset_bits = 16,
535 .size_bits = 16 },
536
537 { CLASSPORTINFO_REC_FIELD(redirect_qp),
538 .offset_words = 8,
539 .offset_bits = 0,
540 .size_bits = 32 },
541 { CLASSPORTINFO_REC_FIELD(redirect_qkey),
542 .offset_words = 9,
543 .offset_bits = 0,
544 .size_bits = 32 },
545
546 { CLASSPORTINFO_REC_FIELD(trap_gid),
547 .offset_words = 10,
548 .offset_bits = 0,
549 .size_bits = 128 },
550 { CLASSPORTINFO_REC_FIELD(trap_tcslfl),
551 .offset_words = 14,
552 .offset_bits = 0,
553 .size_bits = 32 },
554
555 { CLASSPORTINFO_REC_FIELD(trap_lid),
556 .offset_words = 15,
557 .offset_bits = 0,
558 .size_bits = 16 },
559 { CLASSPORTINFO_REC_FIELD(trap_pkey),
560 .offset_words = 15,
561 .offset_bits = 16,
562 .size_bits = 16 },
563
564 { CLASSPORTINFO_REC_FIELD(trap_hlqp),
565 .offset_words = 16,
566 .offset_bits = 0,
567 .size_bits = 32 },
568 { CLASSPORTINFO_REC_FIELD(trap_qkey),
569 .offset_words = 17,
570 .offset_bits = 0,
571 .size_bits = 32 },
572};
573
574#define OPA_CLASSPORTINFO_REC_FIELD(field) \
575 .struct_offset_bytes =\
576 offsetof(struct opa_class_port_info, field), \
577 .struct_size_bytes = \
578 sizeof_field(struct opa_class_port_info, field), \
579 .field_name = "opa_class_port_info:" #field
580
581static const struct ib_field opa_classport_info_rec_table[] = {
582 { OPA_CLASSPORTINFO_REC_FIELD(base_version),
583 .offset_words = 0,
584 .offset_bits = 0,
585 .size_bits = 8 },
586 { OPA_CLASSPORTINFO_REC_FIELD(class_version),
587 .offset_words = 0,
588 .offset_bits = 8,
589 .size_bits = 8 },
590 { OPA_CLASSPORTINFO_REC_FIELD(cap_mask),
591 .offset_words = 0,
592 .offset_bits = 16,
593 .size_bits = 16 },
594 { OPA_CLASSPORTINFO_REC_FIELD(cap_mask2_resp_time),
595 .offset_words = 1,
596 .offset_bits = 0,
597 .size_bits = 32 },
598 { OPA_CLASSPORTINFO_REC_FIELD(redirect_gid),
599 .offset_words = 2,
600 .offset_bits = 0,
601 .size_bits = 128 },
602 { OPA_CLASSPORTINFO_REC_FIELD(redirect_tc_fl),
603 .offset_words = 6,
604 .offset_bits = 0,
605 .size_bits = 32 },
606 { OPA_CLASSPORTINFO_REC_FIELD(redirect_lid),
607 .offset_words = 7,
608 .offset_bits = 0,
609 .size_bits = 32 },
610 { OPA_CLASSPORTINFO_REC_FIELD(redirect_sl_qp),
611 .offset_words = 8,
612 .offset_bits = 0,
613 .size_bits = 32 },
614 { OPA_CLASSPORTINFO_REC_FIELD(redirect_qkey),
615 .offset_words = 9,
616 .offset_bits = 0,
617 .size_bits = 32 },
618 { OPA_CLASSPORTINFO_REC_FIELD(trap_gid),
619 .offset_words = 10,
620 .offset_bits = 0,
621 .size_bits = 128 },
622 { OPA_CLASSPORTINFO_REC_FIELD(trap_tc_fl),
623 .offset_words = 14,
624 .offset_bits = 0,
625 .size_bits = 32 },
626 { OPA_CLASSPORTINFO_REC_FIELD(trap_lid),
627 .offset_words = 15,
628 .offset_bits = 0,
629 .size_bits = 32 },
630 { OPA_CLASSPORTINFO_REC_FIELD(trap_hl_qp),
631 .offset_words = 16,
632 .offset_bits = 0,
633 .size_bits = 32 },
634 { OPA_CLASSPORTINFO_REC_FIELD(trap_qkey),
635 .offset_words = 17,
636 .offset_bits = 0,
637 .size_bits = 32 },
638 { OPA_CLASSPORTINFO_REC_FIELD(trap_pkey),
639 .offset_words = 18,
640 .offset_bits = 0,
641 .size_bits = 16 },
642 { OPA_CLASSPORTINFO_REC_FIELD(redirect_pkey),
643 .offset_words = 18,
644 .offset_bits = 16,
645 .size_bits = 16 },
646 { OPA_CLASSPORTINFO_REC_FIELD(trap_sl_rsvd),
647 .offset_words = 19,
648 .offset_bits = 0,
649 .size_bits = 8 },
650 { RESERVED,
651 .offset_words = 19,
652 .offset_bits = 8,
653 .size_bits = 24 },
654};
655
656#define GUIDINFO_REC_FIELD(field) \
657 .struct_offset_bytes = offsetof(struct ib_sa_guidinfo_rec, field), \
658 .struct_size_bytes = sizeof_field(struct ib_sa_guidinfo_rec, field), \
659 .field_name = "sa_guidinfo_rec:" #field
660
661static const struct ib_field guidinfo_rec_table[] = {
662 { GUIDINFO_REC_FIELD(lid),
663 .offset_words = 0,
664 .offset_bits = 0,
665 .size_bits = 16 },
666 { GUIDINFO_REC_FIELD(block_num),
667 .offset_words = 0,
668 .offset_bits = 16,
669 .size_bits = 8 },
670 { GUIDINFO_REC_FIELD(res1),
671 .offset_words = 0,
672 .offset_bits = 24,
673 .size_bits = 8 },
674 { GUIDINFO_REC_FIELD(res2),
675 .offset_words = 1,
676 .offset_bits = 0,
677 .size_bits = 32 },
678 { GUIDINFO_REC_FIELD(guid_info_list),
679 .offset_words = 2,
680 .offset_bits = 0,
681 .size_bits = 512 },
682};
683
684static inline void ib_sa_disable_local_svc(struct ib_sa_query *query)
685{
686 query->flags &= ~IB_SA_ENABLE_LOCAL_SERVICE;
687}
688
689static inline int ib_sa_query_cancelled(struct ib_sa_query *query)
690{
691 return (query->flags & IB_SA_CANCEL);
692}
693
694static void ib_nl_set_path_rec_attrs(struct sk_buff *skb,
695 struct ib_sa_query *query)
696{
697 struct sa_path_rec *sa_rec = query->mad_buf->context[1];
698 struct ib_sa_mad *mad = query->mad_buf->mad;
699 ib_sa_comp_mask comp_mask = mad->sa_hdr.comp_mask;
700 u16 val16;
701 u64 val64;
702 struct rdma_ls_resolve_header *header;
703
704 query->mad_buf->context[1] = NULL;
705
706 /* Construct the family header first */
707 header = skb_put(skb, NLMSG_ALIGN(sizeof(*header)));
708 strscpy_pad(header->device_name,
709 dev_name(&query->port->agent->device->dev),
710 LS_DEVICE_NAME_MAX);
711 header->port_num = query->port->port_num;
712
713 if ((comp_mask & IB_SA_PATH_REC_REVERSIBLE) &&
714 sa_rec->reversible != 0)
715 query->path_use = LS_RESOLVE_PATH_USE_GMP;
716 else
717 query->path_use = LS_RESOLVE_PATH_USE_UNIDIRECTIONAL;
718 header->path_use = query->path_use;
719
720 /* Now build the attributes */
721 if (comp_mask & IB_SA_PATH_REC_SERVICE_ID) {
722 val64 = be64_to_cpu(sa_rec->service_id);
723 nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_SERVICE_ID,
724 sizeof(val64), &val64);
725 }
726 if (comp_mask & IB_SA_PATH_REC_DGID)
727 nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_DGID,
728 sizeof(sa_rec->dgid), &sa_rec->dgid);
729 if (comp_mask & IB_SA_PATH_REC_SGID)
730 nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_SGID,
731 sizeof(sa_rec->sgid), &sa_rec->sgid);
732 if (comp_mask & IB_SA_PATH_REC_TRAFFIC_CLASS)
733 nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_TCLASS,
734 sizeof(sa_rec->traffic_class), &sa_rec->traffic_class);
735
736 if (comp_mask & IB_SA_PATH_REC_PKEY) {
737 val16 = be16_to_cpu(sa_rec->pkey);
738 nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_PKEY,
739 sizeof(val16), &val16);
740 }
741 if (comp_mask & IB_SA_PATH_REC_QOS_CLASS) {
742 val16 = be16_to_cpu(sa_rec->qos_class);
743 nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_QOS_CLASS,
744 sizeof(val16), &val16);
745 }
746}
747
748static int ib_nl_get_path_rec_attrs_len(ib_sa_comp_mask comp_mask)
749{
750 int len = 0;
751
752 if (comp_mask & IB_SA_PATH_REC_SERVICE_ID)
753 len += nla_total_size(sizeof(u64));
754 if (comp_mask & IB_SA_PATH_REC_DGID)
755 len += nla_total_size(sizeof(struct rdma_nla_ls_gid));
756 if (comp_mask & IB_SA_PATH_REC_SGID)
757 len += nla_total_size(sizeof(struct rdma_nla_ls_gid));
758 if (comp_mask & IB_SA_PATH_REC_TRAFFIC_CLASS)
759 len += nla_total_size(sizeof(u8));
760 if (comp_mask & IB_SA_PATH_REC_PKEY)
761 len += nla_total_size(sizeof(u16));
762 if (comp_mask & IB_SA_PATH_REC_QOS_CLASS)
763 len += nla_total_size(sizeof(u16));
764
765 /*
766 * Make sure that at least some of the required comp_mask bits are
767 * set.
768 */
769 if (WARN_ON(len == 0))
770 return len;
771
772 /* Add the family header */
773 len += NLMSG_ALIGN(sizeof(struct rdma_ls_resolve_header));
774
775 return len;
776}
777
778static int ib_nl_make_request(struct ib_sa_query *query, gfp_t gfp_mask)
779{
780 struct sk_buff *skb = NULL;
781 struct nlmsghdr *nlh;
782 void *data;
783 struct ib_sa_mad *mad;
784 int len;
785 unsigned long flags;
786 unsigned long delay;
787 gfp_t gfp_flag;
788 int ret;
789
790 INIT_LIST_HEAD(&query->list);
791 query->seq = (u32)atomic_inc_return(&ib_nl_sa_request_seq);
792
793 mad = query->mad_buf->mad;
794 len = ib_nl_get_path_rec_attrs_len(mad->sa_hdr.comp_mask);
795 if (len <= 0)
796 return -EMSGSIZE;
797
798 skb = nlmsg_new(len, gfp_mask);
799 if (!skb)
800 return -ENOMEM;
801
802 /* Put nlmsg header only for now */
803 data = ibnl_put_msg(skb, &nlh, query->seq, 0, RDMA_NL_LS,
804 RDMA_NL_LS_OP_RESOLVE, NLM_F_REQUEST);
805 if (!data) {
806 nlmsg_free(skb);
807 return -EMSGSIZE;
808 }
809
810 /* Add attributes */
811 ib_nl_set_path_rec_attrs(skb, query);
812
813 /* Repair the nlmsg header length */
814 nlmsg_end(skb, nlh);
815
816 gfp_flag = ((gfp_mask & GFP_ATOMIC) == GFP_ATOMIC) ? GFP_ATOMIC :
817 GFP_NOWAIT;
818
819 spin_lock_irqsave(&ib_nl_request_lock, flags);
820 ret = rdma_nl_multicast(&init_net, skb, RDMA_NL_GROUP_LS, gfp_flag);
821
822 if (ret)
823 goto out;
824
825 /* Put the request on the list.*/
826 delay = msecs_to_jiffies(sa_local_svc_timeout_ms);
827 query->timeout = delay + jiffies;
828 list_add_tail(&query->list, &ib_nl_request_list);
829 /* Start the timeout if this is the only request */
830 if (ib_nl_request_list.next == &query->list)
831 queue_delayed_work(ib_nl_wq, &ib_nl_timed_work, delay);
832
833out:
834 spin_unlock_irqrestore(&ib_nl_request_lock, flags);
835
836 return ret;
837}
838
839static int ib_nl_cancel_request(struct ib_sa_query *query)
840{
841 unsigned long flags;
842 struct ib_sa_query *wait_query;
843 int found = 0;
844
845 spin_lock_irqsave(&ib_nl_request_lock, flags);
846 list_for_each_entry(wait_query, &ib_nl_request_list, list) {
847 /* Let the timeout to take care of the callback */
848 if (query == wait_query) {
849 query->flags |= IB_SA_CANCEL;
850 query->timeout = jiffies;
851 list_move(&query->list, &ib_nl_request_list);
852 found = 1;
853 mod_delayed_work(ib_nl_wq, &ib_nl_timed_work, 1);
854 break;
855 }
856 }
857 spin_unlock_irqrestore(&ib_nl_request_lock, flags);
858
859 return found;
860}
861
862static void send_handler(struct ib_mad_agent *agent,
863 struct ib_mad_send_wc *mad_send_wc);
864
865static void ib_nl_process_good_resolve_rsp(struct ib_sa_query *query,
866 const struct nlmsghdr *nlh)
867{
868 struct ib_mad_send_wc mad_send_wc;
869 struct ib_sa_mad *mad = NULL;
870 const struct nlattr *head, *curr;
871 struct ib_path_rec_data *rec;
872 int len, rem;
873 u32 mask = 0;
874 int status = -EIO;
875
876 if (query->callback) {
877 head = (const struct nlattr *) nlmsg_data(nlh);
878 len = nlmsg_len(nlh);
879 switch (query->path_use) {
880 case LS_RESOLVE_PATH_USE_UNIDIRECTIONAL:
881 mask = IB_PATH_PRIMARY | IB_PATH_OUTBOUND;
882 break;
883
884 case LS_RESOLVE_PATH_USE_ALL:
885 case LS_RESOLVE_PATH_USE_GMP:
886 default:
887 mask = IB_PATH_PRIMARY | IB_PATH_GMP |
888 IB_PATH_BIDIRECTIONAL;
889 break;
890 }
891 nla_for_each_attr(curr, head, len, rem) {
892 if (curr->nla_type == LS_NLA_TYPE_PATH_RECORD) {
893 rec = nla_data(curr);
894 /*
895 * Get the first one. In the future, we may
896 * need to get up to 6 pathrecords.
897 */
898 if ((rec->flags & mask) == mask) {
899 mad = query->mad_buf->mad;
900 mad->mad_hdr.method |=
901 IB_MGMT_METHOD_RESP;
902 memcpy(mad->data, rec->path_rec,
903 sizeof(rec->path_rec));
904 status = 0;
905 break;
906 }
907 }
908 }
909 query->callback(query, status, mad);
910 }
911
912 mad_send_wc.send_buf = query->mad_buf;
913 mad_send_wc.status = IB_WC_SUCCESS;
914 send_handler(query->mad_buf->mad_agent, &mad_send_wc);
915}
916
917static void ib_nl_request_timeout(struct work_struct *work)
918{
919 unsigned long flags;
920 struct ib_sa_query *query;
921 unsigned long delay;
922 struct ib_mad_send_wc mad_send_wc;
923 int ret;
924
925 spin_lock_irqsave(&ib_nl_request_lock, flags);
926 while (!list_empty(&ib_nl_request_list)) {
927 query = list_entry(ib_nl_request_list.next,
928 struct ib_sa_query, list);
929
930 if (time_after(query->timeout, jiffies)) {
931 delay = query->timeout - jiffies;
932 if ((long)delay <= 0)
933 delay = 1;
934 queue_delayed_work(ib_nl_wq, &ib_nl_timed_work, delay);
935 break;
936 }
937
938 list_del(&query->list);
939 ib_sa_disable_local_svc(query);
940 /* Hold the lock to protect against query cancellation */
941 if (ib_sa_query_cancelled(query))
942 ret = -1;
943 else
944 ret = ib_post_send_mad(query->mad_buf, NULL);
945 if (ret) {
946 mad_send_wc.send_buf = query->mad_buf;
947 mad_send_wc.status = IB_WC_WR_FLUSH_ERR;
948 spin_unlock_irqrestore(&ib_nl_request_lock, flags);
949 send_handler(query->port->agent, &mad_send_wc);
950 spin_lock_irqsave(&ib_nl_request_lock, flags);
951 }
952 }
953 spin_unlock_irqrestore(&ib_nl_request_lock, flags);
954}
955
956int ib_nl_handle_set_timeout(struct sk_buff *skb,
957 struct nlmsghdr *nlh,
958 struct netlink_ext_ack *extack)
959{
960 int timeout, delta, abs_delta;
961 const struct nlattr *attr;
962 unsigned long flags;
963 struct ib_sa_query *query;
964 long delay = 0;
965 struct nlattr *tb[LS_NLA_TYPE_MAX];
966 int ret;
967
968 if (!(nlh->nlmsg_flags & NLM_F_REQUEST) ||
969 !(NETLINK_CB(skb).sk))
970 return -EPERM;
971
972 ret = nla_parse_deprecated(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh),
973 nlmsg_len(nlh), ib_nl_policy, NULL);
974 attr = (const struct nlattr *)tb[LS_NLA_TYPE_TIMEOUT];
975 if (ret || !attr)
976 goto settimeout_out;
977
978 timeout = *(int *) nla_data(attr);
979 if (timeout < IB_SA_LOCAL_SVC_TIMEOUT_MIN)
980 timeout = IB_SA_LOCAL_SVC_TIMEOUT_MIN;
981 if (timeout > IB_SA_LOCAL_SVC_TIMEOUT_MAX)
982 timeout = IB_SA_LOCAL_SVC_TIMEOUT_MAX;
983
984 delta = timeout - sa_local_svc_timeout_ms;
985 if (delta < 0)
986 abs_delta = -delta;
987 else
988 abs_delta = delta;
989
990 if (delta != 0) {
991 spin_lock_irqsave(&ib_nl_request_lock, flags);
992 sa_local_svc_timeout_ms = timeout;
993 list_for_each_entry(query, &ib_nl_request_list, list) {
994 if (delta < 0 && abs_delta > query->timeout)
995 query->timeout = 0;
996 else
997 query->timeout += delta;
998
999 /* Get the new delay from the first entry */
1000 if (!delay) {
1001 delay = query->timeout - jiffies;
1002 if (delay <= 0)
1003 delay = 1;
1004 }
1005 }
1006 if (delay)
1007 mod_delayed_work(ib_nl_wq, &ib_nl_timed_work,
1008 (unsigned long)delay);
1009 spin_unlock_irqrestore(&ib_nl_request_lock, flags);
1010 }
1011
1012settimeout_out:
1013 return 0;
1014}
1015
1016static inline int ib_nl_is_good_resolve_resp(const struct nlmsghdr *nlh)
1017{
1018 struct nlattr *tb[LS_NLA_TYPE_MAX];
1019 int ret;
1020
1021 if (nlh->nlmsg_flags & RDMA_NL_LS_F_ERR)
1022 return 0;
1023
1024 ret = nla_parse_deprecated(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh),
1025 nlmsg_len(nlh), ib_nl_policy, NULL);
1026 if (ret)
1027 return 0;
1028
1029 return 1;
1030}
1031
1032int ib_nl_handle_resolve_resp(struct sk_buff *skb,
1033 struct nlmsghdr *nlh,
1034 struct netlink_ext_ack *extack)
1035{
1036 unsigned long flags;
1037 struct ib_sa_query *query = NULL, *iter;
1038 struct ib_mad_send_buf *send_buf;
1039 struct ib_mad_send_wc mad_send_wc;
1040 int ret;
1041
1042 if ((nlh->nlmsg_flags & NLM_F_REQUEST) ||
1043 !(NETLINK_CB(skb).sk))
1044 return -EPERM;
1045
1046 spin_lock_irqsave(&ib_nl_request_lock, flags);
1047 list_for_each_entry(iter, &ib_nl_request_list, list) {
1048 /*
1049 * If the query is cancelled, let the timeout routine
1050 * take care of it.
1051 */
1052 if (nlh->nlmsg_seq == iter->seq) {
1053 if (!ib_sa_query_cancelled(iter)) {
1054 list_del(&iter->list);
1055 query = iter;
1056 }
1057 break;
1058 }
1059 }
1060
1061 if (!query) {
1062 spin_unlock_irqrestore(&ib_nl_request_lock, flags);
1063 goto resp_out;
1064 }
1065
1066 send_buf = query->mad_buf;
1067
1068 if (!ib_nl_is_good_resolve_resp(nlh)) {
1069 /* if the result is a failure, send out the packet via IB */
1070 ib_sa_disable_local_svc(query);
1071 ret = ib_post_send_mad(query->mad_buf, NULL);
1072 spin_unlock_irqrestore(&ib_nl_request_lock, flags);
1073 if (ret) {
1074 mad_send_wc.send_buf = send_buf;
1075 mad_send_wc.status = IB_WC_GENERAL_ERR;
1076 send_handler(query->port->agent, &mad_send_wc);
1077 }
1078 } else {
1079 spin_unlock_irqrestore(&ib_nl_request_lock, flags);
1080 ib_nl_process_good_resolve_rsp(query, nlh);
1081 }
1082
1083resp_out:
1084 return 0;
1085}
1086
1087static void free_sm_ah(struct kref *kref)
1088{
1089 struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref);
1090
1091 rdma_destroy_ah(sm_ah->ah, 0);
1092 kfree(sm_ah);
1093}
1094
1095void ib_sa_register_client(struct ib_sa_client *client)
1096{
1097 atomic_set(&client->users, 1);
1098 init_completion(&client->comp);
1099}
1100EXPORT_SYMBOL(ib_sa_register_client);
1101
1102void ib_sa_unregister_client(struct ib_sa_client *client)
1103{
1104 ib_sa_client_put(client);
1105 wait_for_completion(&client->comp);
1106}
1107EXPORT_SYMBOL(ib_sa_unregister_client);
1108
1109/**
1110 * ib_sa_cancel_query - try to cancel an SA query
1111 * @id:ID of query to cancel
1112 * @query:query pointer to cancel
1113 *
1114 * Try to cancel an SA query. If the id and query don't match up or
1115 * the query has already completed, nothing is done. Otherwise the
1116 * query is canceled and will complete with a status of -EINTR.
1117 */
1118void ib_sa_cancel_query(int id, struct ib_sa_query *query)
1119{
1120 unsigned long flags;
1121 struct ib_mad_send_buf *mad_buf;
1122
1123 xa_lock_irqsave(&queries, flags);
1124 if (xa_load(&queries, id) != query) {
1125 xa_unlock_irqrestore(&queries, flags);
1126 return;
1127 }
1128 mad_buf = query->mad_buf;
1129 xa_unlock_irqrestore(&queries, flags);
1130
1131 /*
1132 * If the query is still on the netlink request list, schedule
1133 * it to be cancelled by the timeout routine. Otherwise, it has been
1134 * sent to the MAD layer and has to be cancelled from there.
1135 */
1136 if (!ib_nl_cancel_request(query))
1137 ib_cancel_mad(mad_buf);
1138}
1139EXPORT_SYMBOL(ib_sa_cancel_query);
1140
1141static u8 get_src_path_mask(struct ib_device *device, u32 port_num)
1142{
1143 struct ib_sa_device *sa_dev;
1144 struct ib_sa_port *port;
1145 unsigned long flags;
1146 u8 src_path_mask;
1147
1148 sa_dev = ib_get_client_data(device, &sa_client);
1149 if (!sa_dev)
1150 return 0x7f;
1151
1152 port = &sa_dev->port[port_num - sa_dev->start_port];
1153 spin_lock_irqsave(&port->ah_lock, flags);
1154 src_path_mask = port->sm_ah ? port->sm_ah->src_path_mask : 0x7f;
1155 spin_unlock_irqrestore(&port->ah_lock, flags);
1156
1157 return src_path_mask;
1158}
1159
1160static int init_ah_attr_grh_fields(struct ib_device *device, u32 port_num,
1161 struct sa_path_rec *rec,
1162 struct rdma_ah_attr *ah_attr,
1163 const struct ib_gid_attr *gid_attr)
1164{
1165 enum ib_gid_type type = sa_conv_pathrec_to_gid_type(rec);
1166
1167 if (!gid_attr) {
1168 gid_attr = rdma_find_gid_by_port(device, &rec->sgid, type,
1169 port_num, NULL);
1170 if (IS_ERR(gid_attr))
1171 return PTR_ERR(gid_attr);
1172 } else
1173 rdma_hold_gid_attr(gid_attr);
1174
1175 rdma_move_grh_sgid_attr(ah_attr, &rec->dgid,
1176 be32_to_cpu(rec->flow_label),
1177 rec->hop_limit, rec->traffic_class,
1178 gid_attr);
1179 return 0;
1180}
1181
1182/**
1183 * ib_init_ah_attr_from_path - Initialize address handle attributes based on
1184 * an SA path record.
1185 * @device: Device associated ah attributes initialization.
1186 * @port_num: Port on the specified device.
1187 * @rec: path record entry to use for ah attributes initialization.
1188 * @ah_attr: address handle attributes to initialization from path record.
1189 * @gid_attr: SGID attribute to consider during initialization.
1190 *
1191 * When ib_init_ah_attr_from_path() returns success,
1192 * (a) for IB link layer it optionally contains a reference to SGID attribute
1193 * when GRH is present for IB link layer.
1194 * (b) for RoCE link layer it contains a reference to SGID attribute.
1195 * User must invoke rdma_destroy_ah_attr() to release reference to SGID
1196 * attributes which are initialized using ib_init_ah_attr_from_path().
1197 */
1198int ib_init_ah_attr_from_path(struct ib_device *device, u32 port_num,
1199 struct sa_path_rec *rec,
1200 struct rdma_ah_attr *ah_attr,
1201 const struct ib_gid_attr *gid_attr)
1202{
1203 int ret = 0;
1204
1205 memset(ah_attr, 0, sizeof(*ah_attr));
1206 ah_attr->type = rdma_ah_find_type(device, port_num);
1207 rdma_ah_set_sl(ah_attr, rec->sl);
1208 rdma_ah_set_port_num(ah_attr, port_num);
1209 rdma_ah_set_static_rate(ah_attr, rec->rate);
1210
1211 if (sa_path_is_roce(rec)) {
1212 ret = roce_resolve_route_from_path(rec, gid_attr);
1213 if (ret)
1214 return ret;
1215
1216 memcpy(ah_attr->roce.dmac, sa_path_get_dmac(rec), ETH_ALEN);
1217 } else {
1218 rdma_ah_set_dlid(ah_attr, be32_to_cpu(sa_path_get_dlid(rec)));
1219 if (sa_path_is_opa(rec) &&
1220 rdma_ah_get_dlid(ah_attr) == be16_to_cpu(IB_LID_PERMISSIVE))
1221 rdma_ah_set_make_grd(ah_attr, true);
1222
1223 rdma_ah_set_path_bits(ah_attr,
1224 be32_to_cpu(sa_path_get_slid(rec)) &
1225 get_src_path_mask(device, port_num));
1226 }
1227
1228 if (rec->hop_limit > 0 || sa_path_is_roce(rec))
1229 ret = init_ah_attr_grh_fields(device, port_num,
1230 rec, ah_attr, gid_attr);
1231 return ret;
1232}
1233EXPORT_SYMBOL(ib_init_ah_attr_from_path);
1234
1235static int alloc_mad(struct ib_sa_query *query, gfp_t gfp_mask)
1236{
1237 struct rdma_ah_attr ah_attr;
1238 unsigned long flags;
1239
1240 spin_lock_irqsave(&query->port->ah_lock, flags);
1241 if (!query->port->sm_ah) {
1242 spin_unlock_irqrestore(&query->port->ah_lock, flags);
1243 return -EAGAIN;
1244 }
1245 kref_get(&query->port->sm_ah->ref);
1246 query->sm_ah = query->port->sm_ah;
1247 spin_unlock_irqrestore(&query->port->ah_lock, flags);
1248
1249 /*
1250 * Always check if sm_ah has valid dlid assigned,
1251 * before querying for class port info
1252 */
1253 if ((rdma_query_ah(query->sm_ah->ah, &ah_attr) < 0) ||
1254 !rdma_is_valid_unicast_lid(&ah_attr)) {
1255 kref_put(&query->sm_ah->ref, free_sm_ah);
1256 return -EAGAIN;
1257 }
1258 query->mad_buf = ib_create_send_mad(query->port->agent, 1,
1259 query->sm_ah->pkey_index,
1260 0, IB_MGMT_SA_HDR, IB_MGMT_SA_DATA,
1261 gfp_mask,
1262 ((query->flags & IB_SA_QUERY_OPA) ?
1263 OPA_MGMT_BASE_VERSION :
1264 IB_MGMT_BASE_VERSION));
1265 if (IS_ERR(query->mad_buf)) {
1266 kref_put(&query->sm_ah->ref, free_sm_ah);
1267 return -ENOMEM;
1268 }
1269
1270 query->mad_buf->ah = query->sm_ah->ah;
1271
1272 return 0;
1273}
1274
1275static void free_mad(struct ib_sa_query *query)
1276{
1277 ib_free_send_mad(query->mad_buf);
1278 kref_put(&query->sm_ah->ref, free_sm_ah);
1279}
1280
1281static void init_mad(struct ib_sa_query *query, struct ib_mad_agent *agent)
1282{
1283 struct ib_sa_mad *mad = query->mad_buf->mad;
1284 unsigned long flags;
1285
1286 memset(mad, 0, sizeof *mad);
1287
1288 if (query->flags & IB_SA_QUERY_OPA) {
1289 mad->mad_hdr.base_version = OPA_MGMT_BASE_VERSION;
1290 mad->mad_hdr.class_version = OPA_SA_CLASS_VERSION;
1291 } else {
1292 mad->mad_hdr.base_version = IB_MGMT_BASE_VERSION;
1293 mad->mad_hdr.class_version = IB_SA_CLASS_VERSION;
1294 }
1295 mad->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM;
1296 spin_lock_irqsave(&tid_lock, flags);
1297 mad->mad_hdr.tid =
1298 cpu_to_be64(((u64) agent->hi_tid) << 32 | tid++);
1299 spin_unlock_irqrestore(&tid_lock, flags);
1300}
1301
1302static int send_mad(struct ib_sa_query *query, unsigned long timeout_ms,
1303 gfp_t gfp_mask)
1304{
1305 unsigned long flags;
1306 int ret, id;
1307 const int nmbr_sa_query_retries = 10;
1308
1309 xa_lock_irqsave(&queries, flags);
1310 ret = __xa_alloc(&queries, &id, query, xa_limit_32b, gfp_mask);
1311 xa_unlock_irqrestore(&queries, flags);
1312 if (ret < 0)
1313 return ret;
1314
1315 query->mad_buf->timeout_ms = timeout_ms / nmbr_sa_query_retries;
1316 query->mad_buf->retries = nmbr_sa_query_retries;
1317 if (!query->mad_buf->timeout_ms) {
1318 /* Special case, very small timeout_ms */
1319 query->mad_buf->timeout_ms = 1;
1320 query->mad_buf->retries = timeout_ms;
1321 }
1322 query->mad_buf->context[0] = query;
1323 query->id = id;
1324
1325 if ((query->flags & IB_SA_ENABLE_LOCAL_SERVICE) &&
1326 (!(query->flags & IB_SA_QUERY_OPA))) {
1327 if (rdma_nl_chk_listeners(RDMA_NL_GROUP_LS)) {
1328 if (!ib_nl_make_request(query, gfp_mask))
1329 return id;
1330 }
1331 ib_sa_disable_local_svc(query);
1332 }
1333
1334 ret = ib_post_send_mad(query->mad_buf, NULL);
1335 if (ret) {
1336 xa_lock_irqsave(&queries, flags);
1337 __xa_erase(&queries, id);
1338 xa_unlock_irqrestore(&queries, flags);
1339 }
1340
1341 /*
1342 * It's not safe to dereference query any more, because the
1343 * send may already have completed and freed the query in
1344 * another context.
1345 */
1346 return ret ? ret : id;
1347}
1348
1349void ib_sa_unpack_path(void *attribute, struct sa_path_rec *rec)
1350{
1351 ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), attribute, rec);
1352}
1353EXPORT_SYMBOL(ib_sa_unpack_path);
1354
1355void ib_sa_pack_path(struct sa_path_rec *rec, void *attribute)
1356{
1357 ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), rec, attribute);
1358}
1359EXPORT_SYMBOL(ib_sa_pack_path);
1360
1361static bool ib_sa_opa_pathrecord_support(struct ib_sa_client *client,
1362 struct ib_sa_device *sa_dev,
1363 u32 port_num)
1364{
1365 struct ib_sa_port *port;
1366 unsigned long flags;
1367 bool ret = false;
1368
1369 port = &sa_dev->port[port_num - sa_dev->start_port];
1370 spin_lock_irqsave(&port->classport_lock, flags);
1371 if (!port->classport_info.valid)
1372 goto ret;
1373
1374 if (port->classport_info.data.type == RDMA_CLASS_PORT_INFO_OPA)
1375 ret = opa_get_cpi_capmask2(&port->classport_info.data.opa) &
1376 OPA_CLASS_PORT_INFO_PR_SUPPORT;
1377ret:
1378 spin_unlock_irqrestore(&port->classport_lock, flags);
1379 return ret;
1380}
1381
1382enum opa_pr_supported {
1383 PR_NOT_SUPPORTED,
1384 PR_OPA_SUPPORTED,
1385 PR_IB_SUPPORTED
1386};
1387
1388/*
1389 * opa_pr_query_possible - Check if current PR query can be an OPA query.
1390 *
1391 * Retuns PR_NOT_SUPPORTED if a path record query is not
1392 * possible, PR_OPA_SUPPORTED if an OPA path record query
1393 * is possible and PR_IB_SUPPORTED if an IB path record
1394 * query is possible.
1395 */
1396static int opa_pr_query_possible(struct ib_sa_client *client,
1397 struct ib_sa_device *sa_dev,
1398 struct ib_device *device, u32 port_num)
1399{
1400 struct ib_port_attr port_attr;
1401
1402 if (ib_query_port(device, port_num, &port_attr))
1403 return PR_NOT_SUPPORTED;
1404
1405 if (ib_sa_opa_pathrecord_support(client, sa_dev, port_num))
1406 return PR_OPA_SUPPORTED;
1407
1408 if (port_attr.lid >= be16_to_cpu(IB_MULTICAST_LID_BASE))
1409 return PR_NOT_SUPPORTED;
1410 else
1411 return PR_IB_SUPPORTED;
1412}
1413
1414static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
1415 int status,
1416 struct ib_sa_mad *mad)
1417{
1418 struct ib_sa_path_query *query =
1419 container_of(sa_query, struct ib_sa_path_query, sa_query);
1420
1421 if (mad) {
1422 struct sa_path_rec rec;
1423
1424 if (sa_query->flags & IB_SA_QUERY_OPA) {
1425 ib_unpack(opa_path_rec_table,
1426 ARRAY_SIZE(opa_path_rec_table),
1427 mad->data, &rec);
1428 rec.rec_type = SA_PATH_REC_TYPE_OPA;
1429 query->callback(status, &rec, query->context);
1430 } else {
1431 ib_unpack(path_rec_table,
1432 ARRAY_SIZE(path_rec_table),
1433 mad->data, &rec);
1434 rec.rec_type = SA_PATH_REC_TYPE_IB;
1435 sa_path_set_dmac_zero(&rec);
1436
1437 if (query->conv_pr) {
1438 struct sa_path_rec opa;
1439
1440 memset(&opa, 0, sizeof(struct sa_path_rec));
1441 sa_convert_path_ib_to_opa(&opa, &rec);
1442 query->callback(status, &opa, query->context);
1443 } else {
1444 query->callback(status, &rec, query->context);
1445 }
1446 }
1447 } else
1448 query->callback(status, NULL, query->context);
1449}
1450
1451static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
1452{
1453 struct ib_sa_path_query *query =
1454 container_of(sa_query, struct ib_sa_path_query, sa_query);
1455
1456 kfree(query->conv_pr);
1457 kfree(query);
1458}
1459
1460/**
1461 * ib_sa_path_rec_get - Start a Path get query
1462 * @client:SA client
1463 * @device:device to send query on
1464 * @port_num: port number to send query on
1465 * @rec:Path Record to send in query
1466 * @comp_mask:component mask to send in query
1467 * @timeout_ms:time to wait for response
1468 * @gfp_mask:GFP mask to use for internal allocations
1469 * @callback:function called when query completes, times out or is
1470 * canceled
1471 * @context:opaque user context passed to callback
1472 * @sa_query:query context, used to cancel query
1473 *
1474 * Send a Path Record Get query to the SA to look up a path. The
1475 * callback function will be called when the query completes (or
1476 * fails); status is 0 for a successful response, -EINTR if the query
1477 * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error
1478 * occurred sending the query. The resp parameter of the callback is
1479 * only valid if status is 0.
1480 *
1481 * If the return value of ib_sa_path_rec_get() is negative, it is an
1482 * error code. Otherwise it is a query ID that can be used to cancel
1483 * the query.
1484 */
1485int ib_sa_path_rec_get(struct ib_sa_client *client,
1486 struct ib_device *device, u32 port_num,
1487 struct sa_path_rec *rec,
1488 ib_sa_comp_mask comp_mask,
1489 unsigned long timeout_ms, gfp_t gfp_mask,
1490 void (*callback)(int status,
1491 struct sa_path_rec *resp,
1492 void *context),
1493 void *context,
1494 struct ib_sa_query **sa_query)
1495{
1496 struct ib_sa_path_query *query;
1497 struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
1498 struct ib_sa_port *port;
1499 struct ib_mad_agent *agent;
1500 struct ib_sa_mad *mad;
1501 enum opa_pr_supported status;
1502 int ret;
1503
1504 if (!sa_dev)
1505 return -ENODEV;
1506
1507 if ((rec->rec_type != SA_PATH_REC_TYPE_IB) &&
1508 (rec->rec_type != SA_PATH_REC_TYPE_OPA))
1509 return -EINVAL;
1510
1511 port = &sa_dev->port[port_num - sa_dev->start_port];
1512 agent = port->agent;
1513
1514 query = kzalloc(sizeof(*query), gfp_mask);
1515 if (!query)
1516 return -ENOMEM;
1517
1518 query->sa_query.port = port;
1519 if (rec->rec_type == SA_PATH_REC_TYPE_OPA) {
1520 status = opa_pr_query_possible(client, sa_dev, device, port_num);
1521 if (status == PR_NOT_SUPPORTED) {
1522 ret = -EINVAL;
1523 goto err1;
1524 } else if (status == PR_OPA_SUPPORTED) {
1525 query->sa_query.flags |= IB_SA_QUERY_OPA;
1526 } else {
1527 query->conv_pr =
1528 kmalloc(sizeof(*query->conv_pr), gfp_mask);
1529 if (!query->conv_pr) {
1530 ret = -ENOMEM;
1531 goto err1;
1532 }
1533 }
1534 }
1535
1536 ret = alloc_mad(&query->sa_query, gfp_mask);
1537 if (ret)
1538 goto err2;
1539
1540 ib_sa_client_get(client);
1541 query->sa_query.client = client;
1542 query->callback = callback;
1543 query->context = context;
1544
1545 mad = query->sa_query.mad_buf->mad;
1546 init_mad(&query->sa_query, agent);
1547
1548 query->sa_query.callback = callback ? ib_sa_path_rec_callback : NULL;
1549 query->sa_query.release = ib_sa_path_rec_release;
1550 mad->mad_hdr.method = IB_MGMT_METHOD_GET;
1551 mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_PATH_REC);
1552 mad->sa_hdr.comp_mask = comp_mask;
1553
1554 if (query->sa_query.flags & IB_SA_QUERY_OPA) {
1555 ib_pack(opa_path_rec_table, ARRAY_SIZE(opa_path_rec_table),
1556 rec, mad->data);
1557 } else if (query->conv_pr) {
1558 sa_convert_path_opa_to_ib(query->conv_pr, rec);
1559 ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table),
1560 query->conv_pr, mad->data);
1561 } else {
1562 ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table),
1563 rec, mad->data);
1564 }
1565
1566 *sa_query = &query->sa_query;
1567
1568 query->sa_query.flags |= IB_SA_ENABLE_LOCAL_SERVICE;
1569 query->sa_query.mad_buf->context[1] = (query->conv_pr) ?
1570 query->conv_pr : rec;
1571
1572 ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
1573 if (ret < 0)
1574 goto err3;
1575
1576 return ret;
1577
1578err3:
1579 *sa_query = NULL;
1580 ib_sa_client_put(query->sa_query.client);
1581 free_mad(&query->sa_query);
1582err2:
1583 kfree(query->conv_pr);
1584err1:
1585 kfree(query);
1586 return ret;
1587}
1588EXPORT_SYMBOL(ib_sa_path_rec_get);
1589
1590static void ib_sa_mcmember_rec_callback(struct ib_sa_query *sa_query,
1591 int status,
1592 struct ib_sa_mad *mad)
1593{
1594 struct ib_sa_mcmember_query *query =
1595 container_of(sa_query, struct ib_sa_mcmember_query, sa_query);
1596
1597 if (mad) {
1598 struct ib_sa_mcmember_rec rec;
1599
1600 ib_unpack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table),
1601 mad->data, &rec);
1602 query->callback(status, &rec, query->context);
1603 } else
1604 query->callback(status, NULL, query->context);
1605}
1606
1607static void ib_sa_mcmember_rec_release(struct ib_sa_query *sa_query)
1608{
1609 kfree(container_of(sa_query, struct ib_sa_mcmember_query, sa_query));
1610}
1611
1612int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
1613 struct ib_device *device, u32 port_num,
1614 u8 method,
1615 struct ib_sa_mcmember_rec *rec,
1616 ib_sa_comp_mask comp_mask,
1617 unsigned long timeout_ms, gfp_t gfp_mask,
1618 void (*callback)(int status,
1619 struct ib_sa_mcmember_rec *resp,
1620 void *context),
1621 void *context,
1622 struct ib_sa_query **sa_query)
1623{
1624 struct ib_sa_mcmember_query *query;
1625 struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
1626 struct ib_sa_port *port;
1627 struct ib_mad_agent *agent;
1628 struct ib_sa_mad *mad;
1629 int ret;
1630
1631 if (!sa_dev)
1632 return -ENODEV;
1633
1634 port = &sa_dev->port[port_num - sa_dev->start_port];
1635 agent = port->agent;
1636
1637 query = kzalloc(sizeof(*query), gfp_mask);
1638 if (!query)
1639 return -ENOMEM;
1640
1641 query->sa_query.port = port;
1642 ret = alloc_mad(&query->sa_query, gfp_mask);
1643 if (ret)
1644 goto err1;
1645
1646 ib_sa_client_get(client);
1647 query->sa_query.client = client;
1648 query->callback = callback;
1649 query->context = context;
1650
1651 mad = query->sa_query.mad_buf->mad;
1652 init_mad(&query->sa_query, agent);
1653
1654 query->sa_query.callback = callback ? ib_sa_mcmember_rec_callback : NULL;
1655 query->sa_query.release = ib_sa_mcmember_rec_release;
1656 mad->mad_hdr.method = method;
1657 mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC);
1658 mad->sa_hdr.comp_mask = comp_mask;
1659
1660 ib_pack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table),
1661 rec, mad->data);
1662
1663 *sa_query = &query->sa_query;
1664
1665 ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
1666 if (ret < 0)
1667 goto err2;
1668
1669 return ret;
1670
1671err2:
1672 *sa_query = NULL;
1673 ib_sa_client_put(query->sa_query.client);
1674 free_mad(&query->sa_query);
1675
1676err1:
1677 kfree(query);
1678 return ret;
1679}
1680
1681/* Support GuidInfoRecord */
1682static void ib_sa_guidinfo_rec_callback(struct ib_sa_query *sa_query,
1683 int status,
1684 struct ib_sa_mad *mad)
1685{
1686 struct ib_sa_guidinfo_query *query =
1687 container_of(sa_query, struct ib_sa_guidinfo_query, sa_query);
1688
1689 if (mad) {
1690 struct ib_sa_guidinfo_rec rec;
1691
1692 ib_unpack(guidinfo_rec_table, ARRAY_SIZE(guidinfo_rec_table),
1693 mad->data, &rec);
1694 query->callback(status, &rec, query->context);
1695 } else
1696 query->callback(status, NULL, query->context);
1697}
1698
1699static void ib_sa_guidinfo_rec_release(struct ib_sa_query *sa_query)
1700{
1701 kfree(container_of(sa_query, struct ib_sa_guidinfo_query, sa_query));
1702}
1703
1704int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
1705 struct ib_device *device, u32 port_num,
1706 struct ib_sa_guidinfo_rec *rec,
1707 ib_sa_comp_mask comp_mask, u8 method,
1708 unsigned long timeout_ms, gfp_t gfp_mask,
1709 void (*callback)(int status,
1710 struct ib_sa_guidinfo_rec *resp,
1711 void *context),
1712 void *context,
1713 struct ib_sa_query **sa_query)
1714{
1715 struct ib_sa_guidinfo_query *query;
1716 struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
1717 struct ib_sa_port *port;
1718 struct ib_mad_agent *agent;
1719 struct ib_sa_mad *mad;
1720 int ret;
1721
1722 if (!sa_dev)
1723 return -ENODEV;
1724
1725 if (method != IB_MGMT_METHOD_GET &&
1726 method != IB_MGMT_METHOD_SET &&
1727 method != IB_SA_METHOD_DELETE) {
1728 return -EINVAL;
1729 }
1730
1731 port = &sa_dev->port[port_num - sa_dev->start_port];
1732 agent = port->agent;
1733
1734 query = kzalloc(sizeof(*query), gfp_mask);
1735 if (!query)
1736 return -ENOMEM;
1737
1738 query->sa_query.port = port;
1739 ret = alloc_mad(&query->sa_query, gfp_mask);
1740 if (ret)
1741 goto err1;
1742
1743 ib_sa_client_get(client);
1744 query->sa_query.client = client;
1745 query->callback = callback;
1746 query->context = context;
1747
1748 mad = query->sa_query.mad_buf->mad;
1749 init_mad(&query->sa_query, agent);
1750
1751 query->sa_query.callback = callback ? ib_sa_guidinfo_rec_callback : NULL;
1752 query->sa_query.release = ib_sa_guidinfo_rec_release;
1753
1754 mad->mad_hdr.method = method;
1755 mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_GUID_INFO_REC);
1756 mad->sa_hdr.comp_mask = comp_mask;
1757
1758 ib_pack(guidinfo_rec_table, ARRAY_SIZE(guidinfo_rec_table), rec,
1759 mad->data);
1760
1761 *sa_query = &query->sa_query;
1762
1763 ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
1764 if (ret < 0)
1765 goto err2;
1766
1767 return ret;
1768
1769err2:
1770 *sa_query = NULL;
1771 ib_sa_client_put(query->sa_query.client);
1772 free_mad(&query->sa_query);
1773
1774err1:
1775 kfree(query);
1776 return ret;
1777}
1778EXPORT_SYMBOL(ib_sa_guid_info_rec_query);
1779
1780struct ib_classport_info_context {
1781 struct completion done;
1782 struct ib_sa_query *sa_query;
1783};
1784
1785static void ib_classportinfo_cb(void *context)
1786{
1787 struct ib_classport_info_context *cb_ctx = context;
1788
1789 complete(&cb_ctx->done);
1790}
1791
1792static void ib_sa_classport_info_rec_callback(struct ib_sa_query *sa_query,
1793 int status,
1794 struct ib_sa_mad *mad)
1795{
1796 unsigned long flags;
1797 struct ib_sa_classport_info_query *query =
1798 container_of(sa_query, struct ib_sa_classport_info_query, sa_query);
1799 struct ib_sa_classport_cache *info = &sa_query->port->classport_info;
1800
1801 if (mad) {
1802 if (sa_query->flags & IB_SA_QUERY_OPA) {
1803 struct opa_class_port_info rec;
1804
1805 ib_unpack(opa_classport_info_rec_table,
1806 ARRAY_SIZE(opa_classport_info_rec_table),
1807 mad->data, &rec);
1808
1809 spin_lock_irqsave(&sa_query->port->classport_lock,
1810 flags);
1811 if (!status && !info->valid) {
1812 memcpy(&info->data.opa, &rec,
1813 sizeof(info->data.opa));
1814
1815 info->valid = true;
1816 info->data.type = RDMA_CLASS_PORT_INFO_OPA;
1817 }
1818 spin_unlock_irqrestore(&sa_query->port->classport_lock,
1819 flags);
1820
1821 } else {
1822 struct ib_class_port_info rec;
1823
1824 ib_unpack(ib_classport_info_rec_table,
1825 ARRAY_SIZE(ib_classport_info_rec_table),
1826 mad->data, &rec);
1827
1828 spin_lock_irqsave(&sa_query->port->classport_lock,
1829 flags);
1830 if (!status && !info->valid) {
1831 memcpy(&info->data.ib, &rec,
1832 sizeof(info->data.ib));
1833
1834 info->valid = true;
1835 info->data.type = RDMA_CLASS_PORT_INFO_IB;
1836 }
1837 spin_unlock_irqrestore(&sa_query->port->classport_lock,
1838 flags);
1839 }
1840 }
1841 query->callback(query->context);
1842}
1843
1844static void ib_sa_classport_info_rec_release(struct ib_sa_query *sa_query)
1845{
1846 kfree(container_of(sa_query, struct ib_sa_classport_info_query,
1847 sa_query));
1848}
1849
1850static int ib_sa_classport_info_rec_query(struct ib_sa_port *port,
1851 unsigned long timeout_ms,
1852 void (*callback)(void *context),
1853 void *context,
1854 struct ib_sa_query **sa_query)
1855{
1856 struct ib_mad_agent *agent;
1857 struct ib_sa_classport_info_query *query;
1858 struct ib_sa_mad *mad;
1859 gfp_t gfp_mask = GFP_KERNEL;
1860 int ret;
1861
1862 agent = port->agent;
1863
1864 query = kzalloc(sizeof(*query), gfp_mask);
1865 if (!query)
1866 return -ENOMEM;
1867
1868 query->sa_query.port = port;
1869 query->sa_query.flags |= rdma_cap_opa_ah(port->agent->device,
1870 port->port_num) ?
1871 IB_SA_QUERY_OPA : 0;
1872 ret = alloc_mad(&query->sa_query, gfp_mask);
1873 if (ret)
1874 goto err_free;
1875
1876 query->callback = callback;
1877 query->context = context;
1878
1879 mad = query->sa_query.mad_buf->mad;
1880 init_mad(&query->sa_query, agent);
1881
1882 query->sa_query.callback = ib_sa_classport_info_rec_callback;
1883 query->sa_query.release = ib_sa_classport_info_rec_release;
1884 mad->mad_hdr.method = IB_MGMT_METHOD_GET;
1885 mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_CLASS_PORTINFO);
1886 mad->sa_hdr.comp_mask = 0;
1887 *sa_query = &query->sa_query;
1888
1889 ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
1890 if (ret < 0)
1891 goto err_free_mad;
1892
1893 return ret;
1894
1895err_free_mad:
1896 *sa_query = NULL;
1897 free_mad(&query->sa_query);
1898
1899err_free:
1900 kfree(query);
1901 return ret;
1902}
1903
1904static void update_ib_cpi(struct work_struct *work)
1905{
1906 struct ib_sa_port *port =
1907 container_of(work, struct ib_sa_port, ib_cpi_work.work);
1908 struct ib_classport_info_context *cb_context;
1909 unsigned long flags;
1910 int ret;
1911
1912 /* If the classport info is valid, nothing
1913 * to do here.
1914 */
1915 spin_lock_irqsave(&port->classport_lock, flags);
1916 if (port->classport_info.valid) {
1917 spin_unlock_irqrestore(&port->classport_lock, flags);
1918 return;
1919 }
1920 spin_unlock_irqrestore(&port->classport_lock, flags);
1921
1922 cb_context = kmalloc(sizeof(*cb_context), GFP_KERNEL);
1923 if (!cb_context)
1924 goto err_nomem;
1925
1926 init_completion(&cb_context->done);
1927
1928 ret = ib_sa_classport_info_rec_query(port, 3000,
1929 ib_classportinfo_cb, cb_context,
1930 &cb_context->sa_query);
1931 if (ret < 0)
1932 goto free_cb_err;
1933 wait_for_completion(&cb_context->done);
1934free_cb_err:
1935 kfree(cb_context);
1936 spin_lock_irqsave(&port->classport_lock, flags);
1937
1938 /* If the classport info is still not valid, the query should have
1939 * failed for some reason. Retry issuing the query
1940 */
1941 if (!port->classport_info.valid) {
1942 port->classport_info.retry_cnt++;
1943 if (port->classport_info.retry_cnt <=
1944 IB_SA_CPI_MAX_RETRY_CNT) {
1945 unsigned long delay =
1946 msecs_to_jiffies(IB_SA_CPI_RETRY_WAIT);
1947
1948 queue_delayed_work(ib_wq, &port->ib_cpi_work, delay);
1949 }
1950 }
1951 spin_unlock_irqrestore(&port->classport_lock, flags);
1952
1953err_nomem:
1954 return;
1955}
1956
1957static void send_handler(struct ib_mad_agent *agent,
1958 struct ib_mad_send_wc *mad_send_wc)
1959{
1960 struct ib_sa_query *query = mad_send_wc->send_buf->context[0];
1961 unsigned long flags;
1962
1963 if (query->callback)
1964 switch (mad_send_wc->status) {
1965 case IB_WC_SUCCESS:
1966 /* No callback -- already got recv */
1967 break;
1968 case IB_WC_RESP_TIMEOUT_ERR:
1969 query->callback(query, -ETIMEDOUT, NULL);
1970 break;
1971 case IB_WC_WR_FLUSH_ERR:
1972 query->callback(query, -EINTR, NULL);
1973 break;
1974 default:
1975 query->callback(query, -EIO, NULL);
1976 break;
1977 }
1978
1979 xa_lock_irqsave(&queries, flags);
1980 __xa_erase(&queries, query->id);
1981 xa_unlock_irqrestore(&queries, flags);
1982
1983 free_mad(query);
1984 if (query->client)
1985 ib_sa_client_put(query->client);
1986 query->release(query);
1987}
1988
1989static void recv_handler(struct ib_mad_agent *mad_agent,
1990 struct ib_mad_send_buf *send_buf,
1991 struct ib_mad_recv_wc *mad_recv_wc)
1992{
1993 struct ib_sa_query *query;
1994
1995 if (!send_buf)
1996 return;
1997
1998 query = send_buf->context[0];
1999 if (query->callback) {
2000 if (mad_recv_wc->wc->status == IB_WC_SUCCESS)
2001 query->callback(query,
2002 mad_recv_wc->recv_buf.mad->mad_hdr.status ?
2003 -EINVAL : 0,
2004 (struct ib_sa_mad *) mad_recv_wc->recv_buf.mad);
2005 else
2006 query->callback(query, -EIO, NULL);
2007 }
2008
2009 ib_free_recv_mad(mad_recv_wc);
2010}
2011
2012static void update_sm_ah(struct work_struct *work)
2013{
2014 struct ib_sa_port *port =
2015 container_of(work, struct ib_sa_port, update_task);
2016 struct ib_sa_sm_ah *new_ah;
2017 struct ib_port_attr port_attr;
2018 struct rdma_ah_attr ah_attr;
2019 bool grh_required;
2020
2021 if (ib_query_port(port->agent->device, port->port_num, &port_attr)) {
2022 pr_warn("Couldn't query port\n");
2023 return;
2024 }
2025
2026 new_ah = kmalloc(sizeof(*new_ah), GFP_KERNEL);
2027 if (!new_ah)
2028 return;
2029
2030 kref_init(&new_ah->ref);
2031 new_ah->src_path_mask = (1 << port_attr.lmc) - 1;
2032
2033 new_ah->pkey_index = 0;
2034 if (ib_find_pkey(port->agent->device, port->port_num,
2035 IB_DEFAULT_PKEY_FULL, &new_ah->pkey_index))
2036 pr_err("Couldn't find index for default PKey\n");
2037
2038 memset(&ah_attr, 0, sizeof(ah_attr));
2039 ah_attr.type = rdma_ah_find_type(port->agent->device,
2040 port->port_num);
2041 rdma_ah_set_dlid(&ah_attr, port_attr.sm_lid);
2042 rdma_ah_set_sl(&ah_attr, port_attr.sm_sl);
2043 rdma_ah_set_port_num(&ah_attr, port->port_num);
2044
2045 grh_required = rdma_is_grh_required(port->agent->device,
2046 port->port_num);
2047
2048 /*
2049 * The OPA sm_lid of 0xFFFF needs special handling so that it can be
2050 * differentiated from a permissive LID of 0xFFFF. We set the
2051 * grh_required flag here so the SA can program the DGID in the
2052 * address handle appropriately
2053 */
2054 if (ah_attr.type == RDMA_AH_ATTR_TYPE_OPA &&
2055 (grh_required ||
2056 port_attr.sm_lid == be16_to_cpu(IB_LID_PERMISSIVE)))
2057 rdma_ah_set_make_grd(&ah_attr, true);
2058
2059 if (ah_attr.type == RDMA_AH_ATTR_TYPE_IB && grh_required) {
2060 rdma_ah_set_ah_flags(&ah_attr, IB_AH_GRH);
2061 rdma_ah_set_subnet_prefix(&ah_attr,
2062 cpu_to_be64(port_attr.subnet_prefix));
2063 rdma_ah_set_interface_id(&ah_attr,
2064 cpu_to_be64(IB_SA_WELL_KNOWN_GUID));
2065 }
2066
2067 new_ah->ah = rdma_create_ah(port->agent->qp->pd, &ah_attr,
2068 RDMA_CREATE_AH_SLEEPABLE);
2069 if (IS_ERR(new_ah->ah)) {
2070 pr_warn("Couldn't create new SM AH\n");
2071 kfree(new_ah);
2072 return;
2073 }
2074
2075 spin_lock_irq(&port->ah_lock);
2076 if (port->sm_ah)
2077 kref_put(&port->sm_ah->ref, free_sm_ah);
2078 port->sm_ah = new_ah;
2079 spin_unlock_irq(&port->ah_lock);
2080}
2081
2082static void ib_sa_event(struct ib_event_handler *handler,
2083 struct ib_event *event)
2084{
2085 if (event->event == IB_EVENT_PORT_ERR ||
2086 event->event == IB_EVENT_PORT_ACTIVE ||
2087 event->event == IB_EVENT_LID_CHANGE ||
2088 event->event == IB_EVENT_PKEY_CHANGE ||
2089 event->event == IB_EVENT_SM_CHANGE ||
2090 event->event == IB_EVENT_CLIENT_REREGISTER) {
2091 unsigned long flags;
2092 struct ib_sa_device *sa_dev =
2093 container_of(handler, typeof(*sa_dev), event_handler);
2094 u32 port_num = event->element.port_num - sa_dev->start_port;
2095 struct ib_sa_port *port = &sa_dev->port[port_num];
2096
2097 if (!rdma_cap_ib_sa(handler->device, port->port_num))
2098 return;
2099
2100 spin_lock_irqsave(&port->ah_lock, flags);
2101 if (port->sm_ah)
2102 kref_put(&port->sm_ah->ref, free_sm_ah);
2103 port->sm_ah = NULL;
2104 spin_unlock_irqrestore(&port->ah_lock, flags);
2105
2106 if (event->event == IB_EVENT_SM_CHANGE ||
2107 event->event == IB_EVENT_CLIENT_REREGISTER ||
2108 event->event == IB_EVENT_LID_CHANGE ||
2109 event->event == IB_EVENT_PORT_ACTIVE) {
2110 unsigned long delay =
2111 msecs_to_jiffies(IB_SA_CPI_RETRY_WAIT);
2112
2113 spin_lock_irqsave(&port->classport_lock, flags);
2114 port->classport_info.valid = false;
2115 port->classport_info.retry_cnt = 0;
2116 spin_unlock_irqrestore(&port->classport_lock, flags);
2117 queue_delayed_work(ib_wq,
2118 &port->ib_cpi_work, delay);
2119 }
2120 queue_work(ib_wq, &sa_dev->port[port_num].update_task);
2121 }
2122}
2123
2124static int ib_sa_add_one(struct ib_device *device)
2125{
2126 struct ib_sa_device *sa_dev;
2127 int s, e, i;
2128 int count = 0;
2129 int ret;
2130
2131 s = rdma_start_port(device);
2132 e = rdma_end_port(device);
2133
2134 sa_dev = kzalloc(struct_size(sa_dev, port, e - s + 1), GFP_KERNEL);
2135 if (!sa_dev)
2136 return -ENOMEM;
2137
2138 sa_dev->start_port = s;
2139 sa_dev->end_port = e;
2140
2141 for (i = 0; i <= e - s; ++i) {
2142 spin_lock_init(&sa_dev->port[i].ah_lock);
2143 if (!rdma_cap_ib_sa(device, i + 1))
2144 continue;
2145
2146 sa_dev->port[i].sm_ah = NULL;
2147 sa_dev->port[i].port_num = i + s;
2148
2149 spin_lock_init(&sa_dev->port[i].classport_lock);
2150 sa_dev->port[i].classport_info.valid = false;
2151
2152 sa_dev->port[i].agent =
2153 ib_register_mad_agent(device, i + s, IB_QPT_GSI,
2154 NULL, 0, send_handler,
2155 recv_handler, sa_dev, 0);
2156 if (IS_ERR(sa_dev->port[i].agent)) {
2157 ret = PTR_ERR(sa_dev->port[i].agent);
2158 goto err;
2159 }
2160
2161 INIT_WORK(&sa_dev->port[i].update_task, update_sm_ah);
2162 INIT_DELAYED_WORK(&sa_dev->port[i].ib_cpi_work,
2163 update_ib_cpi);
2164
2165 count++;
2166 }
2167
2168 if (!count) {
2169 ret = -EOPNOTSUPP;
2170 goto free;
2171 }
2172
2173 ib_set_client_data(device, &sa_client, sa_dev);
2174
2175 /*
2176 * We register our event handler after everything is set up,
2177 * and then update our cached info after the event handler is
2178 * registered to avoid any problems if a port changes state
2179 * during our initialization.
2180 */
2181
2182 INIT_IB_EVENT_HANDLER(&sa_dev->event_handler, device, ib_sa_event);
2183 ib_register_event_handler(&sa_dev->event_handler);
2184
2185 for (i = 0; i <= e - s; ++i) {
2186 if (rdma_cap_ib_sa(device, i + 1))
2187 update_sm_ah(&sa_dev->port[i].update_task);
2188 }
2189
2190 return 0;
2191
2192err:
2193 while (--i >= 0) {
2194 if (rdma_cap_ib_sa(device, i + 1))
2195 ib_unregister_mad_agent(sa_dev->port[i].agent);
2196 }
2197free:
2198 kfree(sa_dev);
2199 return ret;
2200}
2201
2202static void ib_sa_remove_one(struct ib_device *device, void *client_data)
2203{
2204 struct ib_sa_device *sa_dev = client_data;
2205 int i;
2206
2207 ib_unregister_event_handler(&sa_dev->event_handler);
2208 flush_workqueue(ib_wq);
2209
2210 for (i = 0; i <= sa_dev->end_port - sa_dev->start_port; ++i) {
2211 if (rdma_cap_ib_sa(device, i + 1)) {
2212 cancel_delayed_work_sync(&sa_dev->port[i].ib_cpi_work);
2213 ib_unregister_mad_agent(sa_dev->port[i].agent);
2214 if (sa_dev->port[i].sm_ah)
2215 kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah);
2216 }
2217
2218 }
2219
2220 kfree(sa_dev);
2221}
2222
2223int ib_sa_init(void)
2224{
2225 int ret;
2226
2227 get_random_bytes(&tid, sizeof tid);
2228
2229 atomic_set(&ib_nl_sa_request_seq, 0);
2230
2231 ret = ib_register_client(&sa_client);
2232 if (ret) {
2233 pr_err("Couldn't register ib_sa client\n");
2234 goto err1;
2235 }
2236
2237 ret = mcast_init();
2238 if (ret) {
2239 pr_err("Couldn't initialize multicast handling\n");
2240 goto err2;
2241 }
2242
2243 ib_nl_wq = alloc_ordered_workqueue("ib_nl_sa_wq", WQ_MEM_RECLAIM);
2244 if (!ib_nl_wq) {
2245 ret = -ENOMEM;
2246 goto err3;
2247 }
2248
2249 INIT_DELAYED_WORK(&ib_nl_timed_work, ib_nl_request_timeout);
2250
2251 return 0;
2252
2253err3:
2254 mcast_cleanup();
2255err2:
2256 ib_unregister_client(&sa_client);
2257err1:
2258 return ret;
2259}
2260
2261void ib_sa_cleanup(void)
2262{
2263 cancel_delayed_work(&ib_nl_timed_work);
2264 destroy_workqueue(ib_nl_wq);
2265 mcast_cleanup();
2266 ib_unregister_client(&sa_client);
2267 WARN_ON(!xa_empty(&queries));
2268}