Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

IB/ehca: Fix sync between completion handler and destroy cq

This patch fixes two issues reported by Roland Dreier and Christoph Hellwig:

- Mismatched sync/locking between completion handler and destroy cq We
introduced a counter nr_events per cq to track number of irq events
seen. This counter is incremented when an event queue entry is seen
and decremented after completion handler has been called regardless
if scaling code is active or not. Note that nr_callbacks tracks
number of events assigned to a cpu and both counters can potentially
diverge.

The sync between running completion handler and destroy cq is done
by using the global spin lock ehca_cq_idr_lock.

- Replace yield by wait_event on the counter above to become zero.

Signed-off-by: Hoang-Nam Nguyen <hnguyen@de.ibm.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>

authored by

Hoang-Nam Nguyen and committed by
Roland Dreier
31726798 a27cbe87

+60 -25
+5 -1
drivers/infiniband/hw/ehca/ehca_classes.h
··· 52 52 struct ehca_pd; 53 53 struct ehca_av; 54 54 55 + #include <linux/wait.h> 56 + 55 57 #include <rdma/ib_verbs.h> 56 58 #include <rdma/ib_user_verbs.h> 57 59 ··· 155 153 spinlock_t cb_lock; 156 154 struct hlist_head qp_hashtab[QP_HASHTAB_LEN]; 157 155 struct list_head entry; 158 - u32 nr_callbacks; 156 + u32 nr_callbacks; /* #events assigned to cpu by scaling code */ 157 + u32 nr_events; /* #events seen */ 158 + wait_queue_head_t wait_completion; 159 159 spinlock_t task_lock; 160 160 u32 ownpid; 161 161 /* mmap counter for resources mapped into user space */
+14 -2
drivers/infiniband/hw/ehca/ehca_cq.c
··· 146 146 spin_lock_init(&my_cq->spinlock); 147 147 spin_lock_init(&my_cq->cb_lock); 148 148 spin_lock_init(&my_cq->task_lock); 149 + init_waitqueue_head(&my_cq->wait_completion); 149 150 my_cq->ownpid = current->tgid; 150 151 151 152 cq = &my_cq->ib_cq; ··· 303 302 return cq; 304 303 } 305 304 305 + static int get_cq_nr_events(struct ehca_cq *my_cq) 306 + { 307 + int ret; 308 + unsigned long flags; 309 + spin_lock_irqsave(&ehca_cq_idr_lock, flags); 310 + ret = my_cq->nr_events; 311 + spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); 312 + return ret; 313 + } 314 + 306 315 int ehca_destroy_cq(struct ib_cq *cq) 307 316 { 308 317 u64 h_ret; ··· 340 329 } 341 330 342 331 spin_lock_irqsave(&ehca_cq_idr_lock, flags); 343 - while (my_cq->nr_callbacks) { 332 + while (my_cq->nr_events) { 344 333 spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); 345 - yield(); 334 + wait_event(my_cq->wait_completion, !get_cq_nr_events(my_cq)); 346 335 spin_lock_irqsave(&ehca_cq_idr_lock, flags); 336 + /* recheck nr_events to assure no cqe has just arrived */ 347 337 } 348 338 349 339 idr_remove(&ehca_cq_idr, my_cq->token);
+39 -20
drivers/infiniband/hw/ehca/ehca_irq.c
··· 404 404 u32 token; 405 405 unsigned long flags; 406 406 struct ehca_cq *cq; 407 + 407 408 eqe_value = eqe->entry; 408 409 ehca_dbg(&shca->ib_device, "eqe_value=%lx", eqe_value); 409 410 if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) { 410 - ehca_dbg(&shca->ib_device, "... completion event"); 411 + ehca_dbg(&shca->ib_device, "Got completion event"); 411 412 token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value); 412 413 spin_lock_irqsave(&ehca_cq_idr_lock, flags); 413 414 cq = idr_find(&ehca_cq_idr, token); ··· 420 419 return; 421 420 } 422 421 reset_eq_pending(cq); 423 - if (ehca_scaling_code) { 422 + cq->nr_events++; 423 + spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); 424 + if (ehca_scaling_code) 424 425 queue_comp_task(cq); 425 - spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); 426 - } else { 427 - spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); 426 + else { 428 427 comp_event_callback(cq); 428 + spin_lock_irqsave(&ehca_cq_idr_lock, flags); 429 + cq->nr_events--; 430 + if (!cq->nr_events) 431 + wake_up(&cq->wait_completion); 432 + spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); 429 433 } 430 434 } else { 431 - ehca_dbg(&shca->ib_device, 432 - "Got non completion event"); 435 + ehca_dbg(&shca->ib_device, "Got non completion event"); 433 436 parse_identifier(shca, eqe_value); 434 437 } 435 438 } ··· 483 478 "token=%x", token); 484 479 continue; 485 480 } 481 + eqe_cache[eqe_cnt].cq->nr_events++; 486 482 spin_unlock(&ehca_cq_idr_lock); 487 483 } else 488 484 eqe_cache[eqe_cnt].cq = NULL; ··· 510 504 /* call completion handler for cached eqes */ 511 505 for (i = 0; i < eqe_cnt; i++) 512 506 if (eq->eqe_cache[i].cq) { 513 - if (ehca_scaling_code) { 514 - spin_lock(&ehca_cq_idr_lock); 507 + if (ehca_scaling_code) 515 508 queue_comp_task(eq->eqe_cache[i].cq); 516 - spin_unlock(&ehca_cq_idr_lock); 517 - } else 518 - comp_event_callback(eq->eqe_cache[i].cq); 509 + else { 510 + struct ehca_cq *cq = eq->eqe_cache[i].cq; 511 + comp_event_callback(cq); 512 + spin_lock_irqsave(&ehca_cq_idr_lock, flags); 513 + cq->nr_events--; 514 + if (!cq->nr_events) 515 + wake_up(&cq->wait_completion); 516 + spin_unlock_irqrestore(&ehca_cq_idr_lock, 517 + flags); 518 + } 519 519 } else { 520 520 ehca_dbg(&shca->ib_device, "Got non completion event"); 521 521 parse_identifier(shca, eq->eqe_cache[i].eqe->entry); ··· 535 523 if (!eqe) 536 524 break; 537 525 process_eqe(shca, eqe); 538 - eqe_cnt++; 539 526 } while (1); 540 527 541 528 unlock_irq_spinlock: ··· 578 567 list_add_tail(&__cq->entry, &cct->cq_list); 579 568 cct->cq_jobs++; 580 569 wake_up(&cct->wait_queue); 581 - } 582 - else 570 + } else 583 571 __cq->nr_callbacks++; 584 572 585 573 spin_unlock(&__cq->task_lock); ··· 587 577 588 578 static void queue_comp_task(struct ehca_cq *__cq) 589 579 { 590 - int cpu; 591 580 int cpu_id; 592 581 struct ehca_cpu_comp_task *cct; 582 + int cq_jobs; 583 + unsigned long flags; 593 584 594 - cpu = get_cpu(); 595 585 cpu_id = find_next_online_cpu(pool); 596 586 BUG_ON(!cpu_online(cpu_id)); 597 587 598 588 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id); 599 589 BUG_ON(!cct); 600 590 601 - if (cct->cq_jobs > 0) { 591 + spin_lock_irqsave(&cct->task_lock, flags); 592 + cq_jobs = cct->cq_jobs; 593 + spin_unlock_irqrestore(&cct->task_lock, flags); 594 + if (cq_jobs > 0) { 602 595 cpu_id = find_next_online_cpu(pool); 603 596 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id); 604 597 BUG_ON(!cct); ··· 621 608 cq = list_entry(cct->cq_list.next, struct ehca_cq, entry); 622 609 spin_unlock_irqrestore(&cct->task_lock, flags); 623 610 comp_event_callback(cq); 624 - spin_lock_irqsave(&cct->task_lock, flags); 625 611 612 + spin_lock_irqsave(&ehca_cq_idr_lock, flags); 613 + cq->nr_events--; 614 + if (!cq->nr_events) 615 + wake_up(&cq->wait_completion); 616 + spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); 617 + 618 + spin_lock_irqsave(&cct->task_lock, flags); 626 619 spin_lock(&cq->task_lock); 627 620 cq->nr_callbacks--; 628 - if (cq->nr_callbacks == 0) { 621 + if (!cq->nr_callbacks) { 629 622 list_del_init(cct->cq_list.next); 630 623 cct->cq_jobs--; 631 624 }
+2 -2
drivers/infiniband/hw/ehca/ehca_main.c
··· 52 52 MODULE_LICENSE("Dual BSD/GPL"); 53 53 MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>"); 54 54 MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver"); 55 - MODULE_VERSION("SVNEHCA_0021"); 55 + MODULE_VERSION("SVNEHCA_0022"); 56 56 57 57 int ehca_open_aqp1 = 0; 58 58 int ehca_debug_level = 0; ··· 810 810 int ret; 811 811 812 812 printk(KERN_INFO "eHCA Infiniband Device Driver " 813 - "(Rel.: SVNEHCA_0021)\n"); 813 + "(Rel.: SVNEHCA_0022)\n"); 814 814 idr_init(&ehca_qp_idr); 815 815 idr_init(&ehca_cq_idr); 816 816 spin_lock_init(&ehca_qp_idr_lock);