Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net/smc: handle device, port, and QP error events

RoCE device changes cause an IB event, processed in the global event
handler for the ROCE device. Problems for a certain Queue Pair cause a QP
event, processed in the QP event handler for this QP.
Among those events are port errors and other fatal device errors. All
link groups using such a port or device must be terminated in those cases.

Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Ursula Braun and committed by
David S. Miller
da05bf29 a81e4aff

+25 -13
+25 -13
net/smc/smc_ib.c
··· 141 141 return rc; 142 142 } 143 143 144 + static void smc_ib_port_terminate(struct smc_ib_device *smcibdev, u8 ibport) 145 + { 146 + struct smc_link_group *lgr, *l; 147 + 148 + list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) { 149 + if (lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev && 150 + lgr->lnk[SMC_SINGLE_LINK].ibport == ibport) 151 + smc_lgr_terminate(lgr); 152 + } 153 + } 154 + 144 155 /* process context wrapper for might_sleep smc_ib_remember_port_attr */ 145 156 static void smc_ib_port_event_work(struct work_struct *work) 146 157 { ··· 162 151 for_each_set_bit(port_idx, &smcibdev->port_event_mask, SMC_MAX_PORTS) { 163 152 smc_ib_remember_port_attr(smcibdev, port_idx + 1); 164 153 clear_bit(port_idx, &smcibdev->port_event_mask); 154 + if (!smc_ib_port_active(smcibdev, port_idx + 1)) 155 + smc_ib_port_terminate(smcibdev, port_idx + 1); 165 156 } 166 157 } 167 158 ··· 178 165 179 166 switch (ibevent->event) { 180 167 case IB_EVENT_PORT_ERR: 181 - port_idx = ibevent->element.port_num - 1; 182 - set_bit(port_idx, &smcibdev->port_event_mask); 183 - schedule_work(&smcibdev->port_event_work); 184 - /* fall through */ 185 168 case IB_EVENT_DEVICE_FATAL: 186 - /* tbd in follow-on patch: 187 - * abnormal close of corresponding connections 188 - */ 189 - break; 190 169 case IB_EVENT_PORT_ACTIVE: 191 170 port_idx = ibevent->element.port_num - 1; 192 171 set_bit(port_idx, &smcibdev->port_event_mask); ··· 191 186 192 187 void smc_ib_dealloc_protection_domain(struct smc_link *lnk) 193 188 { 194 - ib_dealloc_pd(lnk->roce_pd); 189 + if (lnk->roce_pd) 190 + ib_dealloc_pd(lnk->roce_pd); 195 191 lnk->roce_pd = NULL; 196 192 } 197 193 ··· 209 203 210 204 static void smc_ib_qp_event_handler(struct ib_event *ibevent, void *priv) 211 205 { 206 + struct smc_ib_device *smcibdev = 207 + (struct smc_ib_device *)ibevent->device; 208 + u8 port_idx; 209 + 212 210 switch (ibevent->event) { 213 211 case IB_EVENT_DEVICE_FATAL: 214 212 case IB_EVENT_GID_CHANGE: 215 213 case IB_EVENT_PORT_ERR: 216 214 case IB_EVENT_QP_ACCESS_ERR: 217 - /* tbd in follow-on patch: 218 - * abnormal close of corresponding connections 219 - */ 215 + port_idx = ibevent->element.port_num - 1; 216 + set_bit(port_idx, &smcibdev->port_event_mask); 217 + schedule_work(&smcibdev->port_event_work); 220 218 break; 221 219 default: 222 220 break; ··· 229 219 230 220 void smc_ib_destroy_queue_pair(struct smc_link *lnk) 231 221 { 232 - ib_destroy_qp(lnk->roce_qp); 222 + if (lnk->roce_qp) 223 + ib_destroy_qp(lnk->roce_qp); 233 224 lnk->roce_qp = NULL; 234 225 } 235 226 ··· 473 462 { 474 463 if (!smcibdev->initialized) 475 464 return; 465 + smcibdev->initialized = 0; 476 466 smc_wr_remove_dev(smcibdev); 477 467 ib_unregister_event_handler(&smcibdev->event_handler); 478 468 ib_destroy_cq(smcibdev->roce_cq_recv);