IB/mthca: Fix race in reference counting

Fix races in in destroying various objects. If a destroy routine
waits for an object to become free by doing

wait_event(&obj->wait, !atomic_read(&obj->refcount));
/* now clean up and destroy the object */

and another place drops a reference to the object by doing

if (atomic_dec_and_test(&obj->refcount))
wake_up(&obj->wait);

then this is susceptible to a race where the wait_event() and final
freeing of the object occur between the atomic_dec_and_test() and the
wake_up(). And this is a use-after-free, since wake_up() will be
called on part of the already-freed object.

Fix this in mthca by replacing the atomic_t refcounts with plain old
integers protected by a spinlock. This makes it possible to do the
decrement of the reference count and the wake_up() so that it appears
as a single atomic operation to the code waiting on the wait queue.

While touching this code, also simplify mthca_cq_clean(): the CQ being
cleaned cannot go away, because it still has a QP attached to it. So
there's no reason to be paranoid and look up the CQ by number; it's
perfectly safe to use the pointer that the callers already have.

Signed-off-by: Roland Dreier <rolandd@cisco.com>

+74 -45
+21 -20
drivers/infiniband/hw/mthca/mthca_cq.c
··· 238 spin_lock(&dev->cq_table.lock); 239 240 cq = mthca_array_get(&dev->cq_table.cq, cqn & (dev->limits.num_cqs - 1)); 241 - 242 if (cq) 243 - atomic_inc(&cq->refcount); 244 spin_unlock(&dev->cq_table.lock); 245 246 if (!cq) { ··· 254 if (cq->ibcq.event_handler) 255 cq->ibcq.event_handler(&event, cq->ibcq.cq_context); 256 257 - if (atomic_dec_and_test(&cq->refcount)) 258 wake_up(&cq->wait); 259 } 260 261 static inline int is_recv_cqe(struct mthca_cqe *cqe) ··· 269 return !(cqe->is_send & 0x80); 270 } 271 272 - void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn, 273 struct mthca_srq *srq) 274 { 275 - struct mthca_cq *cq; 276 struct mthca_cqe *cqe; 277 u32 prod_index; 278 int nfreed = 0; 279 - 280 - spin_lock_irq(&dev->cq_table.lock); 281 - cq = mthca_array_get(&dev->cq_table.cq, cqn & (dev->limits.num_cqs - 1)); 282 - if (cq) 283 - atomic_inc(&cq->refcount); 284 - spin_unlock_irq(&dev->cq_table.lock); 285 - 286 - if (!cq) 287 - return; 288 289 spin_lock_irq(&cq->lock); 290 ··· 293 294 if (0) 295 mthca_dbg(dev, "Cleaning QPN %06x from CQN %06x; ci %d, pi %d\n", 296 - qpn, cqn, cq->cons_index, prod_index); 297 298 /* 299 * Now sweep backwards through the CQ, removing CQ entries ··· 317 } 318 319 spin_unlock_irq(&cq->lock); 320 - if (atomic_dec_and_test(&cq->refcount)) 321 - wake_up(&cq->wait); 322 } 323 324 void mthca_cq_resize_copy_cqes(struct mthca_cq *cq) ··· 811 } 812 813 spin_lock_init(&cq->lock); 814 - atomic_set(&cq->refcount, 1); 815 init_waitqueue_head(&cq->wait); 816 817 memset(cq_context, 0, sizeof *cq_context); ··· 886 return err; 887 } 888 889 void mthca_free_cq(struct mthca_dev *dev, 890 struct mthca_cq *cq) 891 { ··· 930 spin_lock_irq(&dev->cq_table.lock); 931 mthca_array_clear(&dev->cq_table.cq, 932 cq->cqn & (dev->limits.num_cqs - 1)); 933 spin_unlock_irq(&dev->cq_table.lock); 934 935 if (dev->mthca_flags & MTHCA_FLAG_MSI_X) ··· 938 else 939 synchronize_irq(dev->pdev->irq); 940 941 - atomic_dec(&cq->refcount); 942 - wait_event(cq->wait, !atomic_read(&cq->refcount)); 943 944 if (cq->is_kernel) { 945 mthca_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe);
··· 238 spin_lock(&dev->cq_table.lock); 239 240 cq = mthca_array_get(&dev->cq_table.cq, cqn & (dev->limits.num_cqs - 1)); 241 if (cq) 242 + ++cq->refcount; 243 + 244 spin_unlock(&dev->cq_table.lock); 245 246 if (!cq) { ··· 254 if (cq->ibcq.event_handler) 255 cq->ibcq.event_handler(&event, cq->ibcq.cq_context); 256 257 + spin_lock(&dev->cq_table.lock); 258 + if (!--cq->refcount) 259 wake_up(&cq->wait); 260 + spin_unlock(&dev->cq_table.lock); 261 } 262 263 static inline int is_recv_cqe(struct mthca_cqe *cqe) ··· 267 return !(cqe->is_send & 0x80); 268 } 269 270 + void mthca_cq_clean(struct mthca_dev *dev, struct mthca_cq *cq, u32 qpn, 271 struct mthca_srq *srq) 272 { 273 struct mthca_cqe *cqe; 274 u32 prod_index; 275 int nfreed = 0; 276 277 spin_lock_irq(&cq->lock); 278 ··· 301 302 if (0) 303 mthca_dbg(dev, "Cleaning QPN %06x from CQN %06x; ci %d, pi %d\n", 304 + qpn, cq->cqn, cq->cons_index, prod_index); 305 306 /* 307 * Now sweep backwards through the CQ, removing CQ entries ··· 325 } 326 327 spin_unlock_irq(&cq->lock); 328 } 329 330 void mthca_cq_resize_copy_cqes(struct mthca_cq *cq) ··· 821 } 822 823 spin_lock_init(&cq->lock); 824 + cq->refcount = 1; 825 init_waitqueue_head(&cq->wait); 826 827 memset(cq_context, 0, sizeof *cq_context); ··· 896 return err; 897 } 898 899 + static inline int get_cq_refcount(struct mthca_dev *dev, struct mthca_cq *cq) 900 + { 901 + int c; 902 + 903 + spin_lock_irq(&dev->cq_table.lock); 904 + c = cq->refcount; 905 + spin_unlock_irq(&dev->cq_table.lock); 906 + 907 + return c; 908 + } 909 + 910 void mthca_free_cq(struct mthca_dev *dev, 911 struct mthca_cq *cq) 912 { ··· 929 spin_lock_irq(&dev->cq_table.lock); 930 mthca_array_clear(&dev->cq_table.cq, 931 cq->cqn & (dev->limits.num_cqs - 1)); 932 + --cq->refcount; 933 spin_unlock_irq(&dev->cq_table.lock); 934 935 if (dev->mthca_flags & MTHCA_FLAG_MSI_X) ··· 936 else 937 synchronize_irq(dev->pdev->irq); 938 939 + wait_event(cq->wait, !get_cq_refcount(dev, cq)); 940 941 if (cq->is_kernel) { 942 mthca_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe);
+1 -1
drivers/infiniband/hw/mthca/mthca_dev.h
··· 496 void mthca_cq_completion(struct mthca_dev *dev, u32 cqn); 497 void mthca_cq_event(struct mthca_dev *dev, u32 cqn, 498 enum ib_event_type event_type); 499 - void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn, 500 struct mthca_srq *srq); 501 void mthca_cq_resize_copy_cqes(struct mthca_cq *cq); 502 int mthca_alloc_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int nent);
··· 496 void mthca_cq_completion(struct mthca_dev *dev, u32 cqn); 497 void mthca_cq_event(struct mthca_dev *dev, u32 cqn, 498 enum ib_event_type event_type); 499 + void mthca_cq_clean(struct mthca_dev *dev, struct mthca_cq *cq, u32 qpn, 500 struct mthca_srq *srq); 501 void mthca_cq_resize_copy_cqes(struct mthca_cq *cq); 502 int mthca_alloc_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int nent);
+12 -10
drivers/infiniband/hw/mthca/mthca_provider.h
··· 139 * a qp may be locked, with the send cq locked first. No other 140 * nesting should be done. 141 * 142 - * Each struct mthca_cq/qp also has an atomic_t ref count. The 143 - * pointer from the cq/qp_table to the struct counts as one reference. 144 - * This reference also is good for access through the consumer API, so 145 - * modifying the CQ/QP etc doesn't need to take another reference. 146 - * Access because of a completion being polled does need a reference. 147 * 148 * Finally, each struct mthca_cq/qp has a wait_queue_head_t for the 149 * destroy function to sleep on. ··· 160 * - decrement ref count; if zero, wake up waiters 161 * 162 * To destroy a CQ/QP, we can do the following: 163 - * - lock cq/qp_table, remove pointer, unlock cq/qp_table lock 164 - * - decrement ref count 165 * - wait_event until ref count is zero 166 * 167 * It is the consumer's responsibilty to make sure that no QP ··· 199 struct mthca_cq { 200 struct ib_cq ibcq; 201 spinlock_t lock; 202 - atomic_t refcount; 203 int cqn; 204 u32 cons_index; 205 struct mthca_cq_buf buf; ··· 219 struct mthca_srq { 220 struct ib_srq ibsrq; 221 spinlock_t lock; 222 - atomic_t refcount; 223 int srqn; 224 int max; 225 int max_gs; ··· 256 257 struct mthca_qp { 258 struct ib_qp ibqp; 259 - atomic_t refcount; 260 u32 qpn; 261 int is_direct; 262 u8 port; /* for SQP and memfree use only */
··· 139 * a qp may be locked, with the send cq locked first. No other 140 * nesting should be done. 141 * 142 + * Each struct mthca_cq/qp also has an ref count, protected by the 143 + * corresponding table lock. The pointer from the cq/qp_table to the 144 + * struct counts as one reference. This reference also is good for 145 + * access through the consumer API, so modifying the CQ/QP etc doesn't 146 + * need to take another reference. Access to a QP because of a 147 + * completion being polled does not need a reference either. 148 * 149 * Finally, each struct mthca_cq/qp has a wait_queue_head_t for the 150 * destroy function to sleep on. ··· 159 * - decrement ref count; if zero, wake up waiters 160 * 161 * To destroy a CQ/QP, we can do the following: 162 + * - lock cq/qp_table 163 + * - remove pointer and decrement ref count 164 + * - unlock cq/qp_table lock 165 * - wait_event until ref count is zero 166 * 167 * It is the consumer's responsibilty to make sure that no QP ··· 197 struct mthca_cq { 198 struct ib_cq ibcq; 199 spinlock_t lock; 200 + int refcount; 201 int cqn; 202 u32 cons_index; 203 struct mthca_cq_buf buf; ··· 217 struct mthca_srq { 218 struct ib_srq ibsrq; 219 spinlock_t lock; 220 + int refcount; 221 int srqn; 222 int max; 223 int max_gs; ··· 254 255 struct mthca_qp { 256 struct ib_qp ibqp; 257 + int refcount; 258 u32 qpn; 259 int is_direct; 260 u8 port; /* for SQP and memfree use only */
+22 -9
drivers/infiniband/hw/mthca/mthca_qp.c
··· 240 spin_lock(&dev->qp_table.lock); 241 qp = mthca_array_get(&dev->qp_table.qp, qpn & (dev->limits.num_qps - 1)); 242 if (qp) 243 - atomic_inc(&qp->refcount); 244 spin_unlock(&dev->qp_table.lock); 245 246 if (!qp) { ··· 257 if (qp->ibqp.event_handler) 258 qp->ibqp.event_handler(&event, qp->ibqp.qp_context); 259 260 - if (atomic_dec_and_test(&qp->refcount)) 261 wake_up(&qp->wait); 262 } 263 264 static int to_mthca_state(enum ib_qp_state ib_state) ··· 835 * entries and reinitialize the QP. 836 */ 837 if (new_state == IB_QPS_RESET && !qp->ibqp.uobject) { 838 - mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq)->cqn, qp->qpn, 839 qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL); 840 if (qp->ibqp.send_cq != qp->ibqp.recv_cq) 841 - mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq)->cqn, qp->qpn, 842 qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL); 843 844 mthca_wq_init(&qp->sq); ··· 1098 int ret; 1099 int i; 1100 1101 - atomic_set(&qp->refcount, 1); 1102 init_waitqueue_head(&qp->wait); 1103 qp->state = IB_QPS_RESET; 1104 qp->atomic_rd_en = 0; ··· 1320 return err; 1321 } 1322 1323 void mthca_free_qp(struct mthca_dev *dev, 1324 struct mthca_qp *qp) 1325 { ··· 1352 spin_lock(&dev->qp_table.lock); 1353 mthca_array_clear(&dev->qp_table.qp, 1354 qp->qpn & (dev->limits.num_qps - 1)); 1355 spin_unlock(&dev->qp_table.lock); 1356 1357 if (send_cq != recv_cq) 1358 spin_unlock(&recv_cq->lock); 1359 spin_unlock_irq(&send_cq->lock); 1360 1361 - atomic_dec(&qp->refcount); 1362 - wait_event(qp->wait, !atomic_read(&qp->refcount)); 1363 1364 if (qp->state != IB_QPS_RESET) 1365 mthca_MODIFY_QP(dev, qp->state, IB_QPS_RESET, qp->qpn, 0, ··· 1371 * unref the mem-free tables and free the QPN in our table. 1372 */ 1373 if (!qp->ibqp.uobject) { 1374 - mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq)->cqn, qp->qpn, 1375 qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL); 1376 if (qp->ibqp.send_cq != qp->ibqp.recv_cq) 1377 - mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq)->cqn, qp->qpn, 1378 qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL); 1379 1380 mthca_free_memfree(dev, qp);
··· 240 spin_lock(&dev->qp_table.lock); 241 qp = mthca_array_get(&dev->qp_table.qp, qpn & (dev->limits.num_qps - 1)); 242 if (qp) 243 + ++qp->refcount; 244 spin_unlock(&dev->qp_table.lock); 245 246 if (!qp) { ··· 257 if (qp->ibqp.event_handler) 258 qp->ibqp.event_handler(&event, qp->ibqp.qp_context); 259 260 + spin_lock(&dev->qp_table.lock); 261 + if (!--qp->refcount) 262 wake_up(&qp->wait); 263 + spin_unlock(&dev->qp_table.lock); 264 } 265 266 static int to_mthca_state(enum ib_qp_state ib_state) ··· 833 * entries and reinitialize the QP. 834 */ 835 if (new_state == IB_QPS_RESET && !qp->ibqp.uobject) { 836 + mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq), qp->qpn, 837 qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL); 838 if (qp->ibqp.send_cq != qp->ibqp.recv_cq) 839 + mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq), qp->qpn, 840 qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL); 841 842 mthca_wq_init(&qp->sq); ··· 1096 int ret; 1097 int i; 1098 1099 + qp->refcount = 1; 1100 init_waitqueue_head(&qp->wait); 1101 qp->state = IB_QPS_RESET; 1102 qp->atomic_rd_en = 0; ··· 1318 return err; 1319 } 1320 1321 + static inline int get_qp_refcount(struct mthca_dev *dev, struct mthca_qp *qp) 1322 + { 1323 + int c; 1324 + 1325 + spin_lock_irq(&dev->qp_table.lock); 1326 + c = qp->refcount; 1327 + spin_unlock_irq(&dev->qp_table.lock); 1328 + 1329 + return c; 1330 + } 1331 + 1332 void mthca_free_qp(struct mthca_dev *dev, 1333 struct mthca_qp *qp) 1334 { ··· 1339 spin_lock(&dev->qp_table.lock); 1340 mthca_array_clear(&dev->qp_table.qp, 1341 qp->qpn & (dev->limits.num_qps - 1)); 1342 + --qp->refcount; 1343 spin_unlock(&dev->qp_table.lock); 1344 1345 if (send_cq != recv_cq) 1346 spin_unlock(&recv_cq->lock); 1347 spin_unlock_irq(&send_cq->lock); 1348 1349 + wait_event(qp->wait, !get_qp_refcount(dev, qp)); 1350 1351 if (qp->state != IB_QPS_RESET) 1352 mthca_MODIFY_QP(dev, qp->state, IB_QPS_RESET, qp->qpn, 0, ··· 1358 * unref the mem-free tables and free the QPN in our table. 1359 */ 1360 if (!qp->ibqp.uobject) { 1361 + mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq), qp->qpn, 1362 qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL); 1363 if (qp->ibqp.send_cq != qp->ibqp.recv_cq) 1364 + mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq), qp->qpn, 1365 qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL); 1366 1367 mthca_free_memfree(dev, qp);
+18 -5
drivers/infiniband/hw/mthca/mthca_srq.c
··· 241 goto err_out_mailbox; 242 243 spin_lock_init(&srq->lock); 244 - atomic_set(&srq->refcount, 1); 245 init_waitqueue_head(&srq->wait); 246 247 if (mthca_is_memfree(dev)) ··· 308 return err; 309 } 310 311 void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq) 312 { 313 struct mthca_mailbox *mailbox; ··· 340 spin_lock_irq(&dev->srq_table.lock); 341 mthca_array_clear(&dev->srq_table.srq, 342 srq->srqn & (dev->limits.num_srqs - 1)); 343 spin_unlock_irq(&dev->srq_table.lock); 344 345 - atomic_dec(&srq->refcount); 346 - wait_event(srq->wait, !atomic_read(&srq->refcount)); 347 348 if (!srq->ibsrq.uobject) { 349 mthca_free_srq_buf(dev, srq); ··· 425 spin_lock(&dev->srq_table.lock); 426 srq = mthca_array_get(&dev->srq_table.srq, srqn & (dev->limits.num_srqs - 1)); 427 if (srq) 428 - atomic_inc(&srq->refcount); 429 spin_unlock(&dev->srq_table.lock); 430 431 if (!srq) { ··· 442 srq->ibsrq.event_handler(&event, srq->ibsrq.srq_context); 443 444 out: 445 - if (atomic_dec_and_test(&srq->refcount)) 446 wake_up(&srq->wait); 447 } 448 449 /*
··· 241 goto err_out_mailbox; 242 243 spin_lock_init(&srq->lock); 244 + srq->refcount = 1; 245 init_waitqueue_head(&srq->wait); 246 247 if (mthca_is_memfree(dev)) ··· 308 return err; 309 } 310 311 + static inline int get_srq_refcount(struct mthca_dev *dev, struct mthca_srq *srq) 312 + { 313 + int c; 314 + 315 + spin_lock_irq(&dev->srq_table.lock); 316 + c = srq->refcount; 317 + spin_unlock_irq(&dev->srq_table.lock); 318 + 319 + return c; 320 + } 321 + 322 void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq) 323 { 324 struct mthca_mailbox *mailbox; ··· 329 spin_lock_irq(&dev->srq_table.lock); 330 mthca_array_clear(&dev->srq_table.srq, 331 srq->srqn & (dev->limits.num_srqs - 1)); 332 + --srq->refcount; 333 spin_unlock_irq(&dev->srq_table.lock); 334 335 + wait_event(srq->wait, !get_srq_refcount(dev, srq)); 336 337 if (!srq->ibsrq.uobject) { 338 mthca_free_srq_buf(dev, srq); ··· 414 spin_lock(&dev->srq_table.lock); 415 srq = mthca_array_get(&dev->srq_table.srq, srqn & (dev->limits.num_srqs - 1)); 416 if (srq) 417 + ++srq->refcount; 418 spin_unlock(&dev->srq_table.lock); 419 420 if (!srq) { ··· 431 srq->ibsrq.event_handler(&event, srq->ibsrq.srq_context); 432 433 out: 434 + spin_lock(&dev->srq_table.lock); 435 + if (!--srq->refcount) 436 wake_up(&srq->wait); 437 + spin_unlock(&dev->srq_table.lock); 438 } 439 440 /*