Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

RDMA/rxe: Let destroy qp succeed with stuck packet

In some situations a sent packet may get queued in the NIC longer than
than timeout of a ULP. Currently if this happens the ULP may try to reset
the link by destroying the qp and setting up an alternate connection but
will fail because the rxe driver is waiting for the packet to finish
getting sent and be returned to the skb destructor function where the qp
reference holding things up will be dropped. This patch modifies the way
that the qp is passed to the destructor to pass the qp index and not a qp
pointer. Then the destructor will attempt to lookup the qp from its index
and if it fails exit early. This requires taking a reference on the struct
sock rather than the qp allowing the qp to be destroyed while the sk is
still around waiting for the packet to finish.

Link: https://lore.kernel.org/r/20240329145513.35381-15-rpearsonhpe@gmail.com
Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>

authored by

Bob Pearson and committed by
Jason Gunthorpe
1a633bdc 9cc62909

+32 -12
+31 -11
drivers/infiniband/sw/rxe/rxe_net.c
··· 345 345 346 346 static void rxe_skb_tx_dtor(struct sk_buff *skb) 347 347 { 348 - struct sock *sk = skb->sk; 349 - struct rxe_qp *qp = sk->sk_user_data; 350 - int skb_out = atomic_dec_return(&qp->skb_out); 348 + struct net_device *ndev = skb->dev; 349 + struct rxe_dev *rxe; 350 + unsigned int qp_index; 351 + struct rxe_qp *qp; 352 + int skb_out; 351 353 352 - if (unlikely(qp->need_req_skb && 353 - skb_out < RXE_INFLIGHT_SKBS_PER_QP_LOW)) 354 + rxe = rxe_get_dev_from_net(ndev); 355 + if (!rxe && is_vlan_dev(ndev)) 356 + rxe = rxe_get_dev_from_net(vlan_dev_real_dev(ndev)); 357 + if (WARN_ON(!rxe)) 358 + return; 359 + 360 + qp_index = (int)(uintptr_t)skb->sk->sk_user_data; 361 + if (!qp_index) 362 + return; 363 + 364 + qp = rxe_pool_get_index(&rxe->qp_pool, qp_index); 365 + if (!qp) 366 + goto put_dev; 367 + 368 + skb_out = atomic_dec_return(&qp->skb_out); 369 + if (qp->need_req_skb && skb_out < RXE_INFLIGHT_SKBS_PER_QP_LOW) 354 370 rxe_sched_task(&qp->send_task); 355 371 356 372 rxe_put(qp); 373 + put_dev: 374 + ib_device_put(&rxe->ib_dev); 375 + sock_put(skb->sk); 357 376 } 358 377 359 378 static int rxe_send(struct sk_buff *skb, struct rxe_pkt_info *pkt) 360 379 { 361 380 int err; 381 + struct sock *sk = pkt->qp->sk->sk; 362 382 383 + sock_hold(sk); 384 + skb->sk = sk; 363 385 skb->destructor = rxe_skb_tx_dtor; 364 - skb->sk = pkt->qp->sk->sk; 365 - 366 - rxe_get(pkt->qp); 367 386 atomic_inc(&pkt->qp->skb_out); 368 387 369 388 if (skb->protocol == htons(ETH_P_IP)) ··· 398 379 */ 399 380 static int rxe_loopback(struct sk_buff *skb, struct rxe_pkt_info *pkt) 400 381 { 382 + struct sock *sk = pkt->qp->sk->sk; 383 + 401 384 memcpy(SKB_TO_PKT(skb), pkt, sizeof(*pkt)); 402 385 386 + sock_hold(sk); 387 + skb->sk = sk; 403 388 skb->destructor = rxe_skb_tx_dtor; 404 - skb->sk = pkt->qp->sk->sk; 405 - 406 - rxe_get(pkt->qp); 407 389 atomic_inc(&pkt->qp->skb_out); 408 390 409 391 if (skb->protocol == htons(ETH_P_IP))
+1 -1
drivers/infiniband/sw/rxe/rxe_qp.c
··· 244 244 err = sock_create_kern(&init_net, AF_INET, SOCK_DGRAM, 0, &qp->sk); 245 245 if (err < 0) 246 246 return err; 247 - qp->sk->sk->sk_user_data = qp; 247 + qp->sk->sk->sk_user_data = (void *)(uintptr_t)qp->elem.index; 248 248 249 249 /* pick a source UDP port number for this QP based on 250 250 * the source QPN. this spreads traffic for different QPs