Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
fork

Configure Feed

Select the types of activity you want to include in your feed.

IB/ehca: Support small QP queues

eHCA2 supports QP queues that can be as small as 512 bytes. This
greatly reduces memory overhead for consumers that use lots of QPs
with small queues (e.g. RDMA-only QPs). Apart from dealing with
firmware, this code needs to manage bite-sized chunks of kernel pages,
making sure that no kernel page is shared between different protection
domains.

Signed-off-by: Hoang-Nam Nguyen <hnguyen@de.ibm.com>

authored by

Stefan Roscher and committed by
Roland Dreier
e2f81daf 0c10f7b7

+382 -163
+26 -15
drivers/infiniband/hw/ehca/ehca_classes.h
··· 43 43 #ifndef __EHCA_CLASSES_H__ 44 44 #define __EHCA_CLASSES_H__ 45 45 46 - 47 46 struct ehca_module; 48 47 struct ehca_qp; 49 48 struct ehca_cq; ··· 128 129 struct ib_pd ib_pd; 129 130 struct ipz_pd fw_pd; 130 131 u32 ownpid; 132 + /* small queue mgmt */ 133 + struct mutex lock; 134 + struct list_head free[2]; 135 + struct list_head full[2]; 131 136 }; 132 137 133 138 enum ehca_ext_qp_type { ··· 310 307 void ehca_cleanup_av_cache(void); 311 308 int ehca_init_mrmw_cache(void); 312 309 void ehca_cleanup_mrmw_cache(void); 310 + int ehca_init_small_qp_cache(void); 311 + void ehca_cleanup_small_qp_cache(void); 313 312 314 313 extern rwlock_t ehca_qp_idr_lock; 315 314 extern rwlock_t ehca_cq_idr_lock; ··· 329 324 u32 queue_length; /* queue length allocated in bytes */ 330 325 u32 pagesize; 331 326 u32 toggle_state; 332 - u32 dummy; /* padding for 8 byte alignment */ 327 + u32 offset; /* save offset within a page for small_qp */ 333 328 }; 334 329 335 330 struct ehca_create_cq_resp { ··· 371 366 LLQP_COMP_MASK = 0x60, 372 367 }; 373 368 369 + struct ehca_alloc_queue_parms { 370 + /* input parameters */ 371 + int max_wr; 372 + int max_sge; 373 + int page_size; 374 + int is_small; 375 + 376 + /* output parameters */ 377 + u16 act_nr_wqes; 378 + u8 act_nr_sges; 379 + u32 queue_size; /* bytes for small queues, pages otherwise */ 380 + }; 381 + 374 382 struct ehca_alloc_qp_parms { 375 - /* input parameters */ 383 + struct ehca_alloc_queue_parms squeue; 384 + struct ehca_alloc_queue_parms rqueue; 385 + 386 + /* input parameters */ 376 387 enum ehca_service_type servicetype; 388 + int qp_storage; 377 389 int sigtype; 378 390 enum ehca_ext_qp_type ext_type; 379 391 enum ehca_ll_comp_flags ll_comp_flags; 380 - 381 - int max_send_wr, max_recv_wr; 382 - int max_send_sge, max_recv_sge; 383 392 int ud_av_l_key_ctl; 384 393 385 394 u32 token; ··· 403 384 404 385 u32 srq_qpn, srq_token, srq_limit; 405 386 406 - /* output parameters */ 387 + /* output parameters */ 407 388 u32 real_qp_num; 408 389 struct ipz_qp_handle qp_handle; 409 390 struct h_galpas galpas; 410 - 411 - u16 act_nr_send_wqes; 412 - u16 act_nr_recv_wqes; 413 - u8 act_nr_recv_sges; 414 - u8 act_nr_send_sges; 415 - 416 - u32 nr_rq_pages; 417 - u32 nr_sq_pages; 418 391 }; 419 392 420 393 int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp);
+4 -4
drivers/infiniband/hw/ehca/ehca_cq.c
··· 190 190 goto create_cq_exit2; 191 191 } 192 192 193 - ipz_rc = ipz_queue_ctor(&my_cq->ipz_queue, param.act_pages, 194 - EHCA_PAGESIZE, sizeof(struct ehca_cqe), 0); 193 + ipz_rc = ipz_queue_ctor(NULL, &my_cq->ipz_queue, param.act_pages, 194 + EHCA_PAGESIZE, sizeof(struct ehca_cqe), 0, 0); 195 195 if (!ipz_rc) { 196 196 ehca_err(device, "ipz_queue_ctor() failed ipz_rc=%x device=%p", 197 197 ipz_rc, device); ··· 285 285 return cq; 286 286 287 287 create_cq_exit4: 288 - ipz_queue_dtor(&my_cq->ipz_queue); 288 + ipz_queue_dtor(NULL, &my_cq->ipz_queue); 289 289 290 290 create_cq_exit3: 291 291 h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 1); ··· 359 359 "ehca_cq=%p cq_num=%x", h_ret, my_cq, cq_num); 360 360 return ehca2ib_return_code(h_ret); 361 361 } 362 - ipz_queue_dtor(&my_cq->ipz_queue); 362 + ipz_queue_dtor(NULL, &my_cq->ipz_queue); 363 363 kmem_cache_free(cq_cache, my_cq); 364 364 365 365 return 0;
+4 -4
drivers/infiniband/hw/ehca/ehca_eq.c
··· 86 86 return -EINVAL; 87 87 } 88 88 89 - ret = ipz_queue_ctor(&eq->ipz_queue, nr_pages, 90 - EHCA_PAGESIZE, sizeof(struct ehca_eqe), 0); 89 + ret = ipz_queue_ctor(NULL, &eq->ipz_queue, nr_pages, 90 + EHCA_PAGESIZE, sizeof(struct ehca_eqe), 0, 0); 91 91 if (!ret) { 92 92 ehca_err(ib_dev, "Can't allocate EQ pages eq=%p", eq); 93 93 goto create_eq_exit1; ··· 145 145 return 0; 146 146 147 147 create_eq_exit2: 148 - ipz_queue_dtor(&eq->ipz_queue); 148 + ipz_queue_dtor(NULL, &eq->ipz_queue); 149 149 150 150 create_eq_exit1: 151 151 hipz_h_destroy_eq(shca->ipz_hca_handle, eq); ··· 181 181 ehca_err(&shca->ib_device, "Can't free EQ resources."); 182 182 return -EINVAL; 183 183 } 184 - ipz_queue_dtor(&eq->ipz_queue); 184 + ipz_queue_dtor(NULL, &eq->ipz_queue); 185 185 186 186 return 0; 187 187 }
+12 -2
drivers/infiniband/hw/ehca/ehca_main.c
··· 181 181 goto create_slab_caches5; 182 182 } 183 183 184 + ret = ehca_init_small_qp_cache(); 185 + if (ret) { 186 + ehca_gen_err("Cannot create small queue SLAB cache."); 187 + goto create_slab_caches6; 188 + } 189 + 184 190 #ifdef CONFIG_PPC_64K_PAGES 185 191 ctblk_cache = kmem_cache_create("ehca_cache_ctblk", 186 192 EHCA_PAGESIZE, H_CB_ALIGNMENT, ··· 194 188 NULL); 195 189 if (!ctblk_cache) { 196 190 ehca_gen_err("Cannot create ctblk SLAB cache."); 197 - ehca_cleanup_mrmw_cache(); 198 - goto create_slab_caches5; 191 + ehca_cleanup_small_qp_cache(); 192 + goto create_slab_caches6; 199 193 } 200 194 #endif 201 195 return 0; 196 + 197 + create_slab_caches6: 198 + ehca_cleanup_mrmw_cache(); 202 199 203 200 create_slab_caches5: 204 201 ehca_cleanup_av_cache(); ··· 220 211 221 212 static void ehca_destroy_slab_caches(void) 222 213 { 214 + ehca_cleanup_small_qp_cache(); 223 215 ehca_cleanup_mrmw_cache(); 224 216 ehca_cleanup_av_cache(); 225 217 ehca_cleanup_qp_cache();
+23 -2
drivers/infiniband/hw/ehca/ehca_pd.c
··· 49 49 struct ib_ucontext *context, struct ib_udata *udata) 50 50 { 51 51 struct ehca_pd *pd; 52 + int i; 52 53 53 54 pd = kmem_cache_zalloc(pd_cache, GFP_KERNEL); 54 55 if (!pd) { ··· 59 58 } 60 59 61 60 pd->ownpid = current->tgid; 61 + for (i = 0; i < 2; i++) { 62 + INIT_LIST_HEAD(&pd->free[i]); 63 + INIT_LIST_HEAD(&pd->full[i]); 64 + } 65 + mutex_init(&pd->lock); 62 66 63 67 /* 64 68 * Kernel PD: when device = -1, 0 ··· 87 81 { 88 82 u32 cur_pid = current->tgid; 89 83 struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd); 84 + int i, leftovers = 0; 85 + extern struct kmem_cache *small_qp_cache; 86 + struct ipz_small_queue_page *page, *tmp; 90 87 91 88 if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && 92 89 my_pd->ownpid != cur_pid) { ··· 98 89 return -EINVAL; 99 90 } 100 91 101 - kmem_cache_free(pd_cache, 102 - container_of(pd, struct ehca_pd, ib_pd)); 92 + for (i = 0; i < 2; i++) { 93 + list_splice(&my_pd->full[i], &my_pd->free[i]); 94 + list_for_each_entry_safe(page, tmp, &my_pd->free[i], list) { 95 + leftovers = 1; 96 + free_page(page->page); 97 + kmem_cache_free(small_qp_cache, page); 98 + } 99 + } 100 + 101 + if (leftovers) 102 + ehca_warn(pd->device, 103 + "Some small queue pages were not freed"); 104 + 105 + kmem_cache_free(pd_cache, my_pd); 103 106 104 107 return 0; 105 108 }
+97 -64
drivers/infiniband/hw/ehca/ehca_qp.c
··· 275 275 resp->toggle_state = queue->toggle_state; 276 276 } 277 277 278 - static inline int ll_qp_msg_size(int nr_sge) 279 - { 280 - return 128 << nr_sge; 281 - } 282 - 283 278 /* 284 279 * init_qp_queue initializes/constructs r/squeue and registers queue pages. 285 280 */ 286 281 static inline int init_qp_queue(struct ehca_shca *shca, 282 + struct ehca_pd *pd, 287 283 struct ehca_qp *my_qp, 288 284 struct ipz_queue *queue, 289 285 int q_type, 290 286 u64 expected_hret, 291 - int nr_q_pages, 292 - int wqe_size, 293 - int nr_sges) 287 + struct ehca_alloc_queue_parms *parms, 288 + int wqe_size) 294 289 { 295 - int ret, cnt, ipz_rc; 290 + int ret, cnt, ipz_rc, nr_q_pages; 296 291 void *vpage; 297 292 u64 rpage, h_ret; 298 293 struct ib_device *ib_dev = &shca->ib_device; 299 294 struct ipz_adapter_handle ipz_hca_handle = shca->ipz_hca_handle; 300 295 301 - if (!nr_q_pages) 296 + if (!parms->queue_size) 302 297 return 0; 303 298 304 - ipz_rc = ipz_queue_ctor(queue, nr_q_pages, EHCA_PAGESIZE, 305 - wqe_size, nr_sges); 299 + if (parms->is_small) { 300 + nr_q_pages = 1; 301 + ipz_rc = ipz_queue_ctor(pd, queue, nr_q_pages, 302 + 128 << parms->page_size, 303 + wqe_size, parms->act_nr_sges, 1); 304 + } else { 305 + nr_q_pages = parms->queue_size; 306 + ipz_rc = ipz_queue_ctor(pd, queue, nr_q_pages, 307 + EHCA_PAGESIZE, wqe_size, 308 + parms->act_nr_sges, 0); 309 + } 310 + 306 311 if (!ipz_rc) { 307 312 ehca_err(ib_dev, "Cannot allocate page for queue. ipz_rc=%x", 308 313 ipz_rc); ··· 328 323 h_ret = hipz_h_register_rpage_qp(ipz_hca_handle, 329 324 my_qp->ipz_qp_handle, 330 325 NULL, 0, q_type, 331 - rpage, 1, 326 + rpage, parms->is_small ? 0 : 1, 332 327 my_qp->galpas.kernel); 333 328 if (cnt == (nr_q_pages - 1)) { /* last page! */ 334 329 if (h_ret != expected_hret) { ··· 359 354 return 0; 360 355 361 356 init_qp_queue1: 362 - ipz_queue_dtor(queue); 357 + ipz_queue_dtor(pd, queue); 363 358 return ret; 359 + } 360 + 361 + static inline int ehca_calc_wqe_size(int act_nr_sge, int is_llqp) 362 + { 363 + if (is_llqp) 364 + return 128 << act_nr_sge; 365 + else 366 + return offsetof(struct ehca_wqe, 367 + u.nud.sg_list[act_nr_sge]); 368 + } 369 + 370 + static void ehca_determine_small_queue(struct ehca_alloc_queue_parms *queue, 371 + int req_nr_sge, int is_llqp) 372 + { 373 + u32 wqe_size, q_size; 374 + int act_nr_sge = req_nr_sge; 375 + 376 + if (!is_llqp) 377 + /* round up #SGEs so WQE size is a power of 2 */ 378 + for (act_nr_sge = 4; act_nr_sge <= 252; 379 + act_nr_sge = 4 + 2 * act_nr_sge) 380 + if (act_nr_sge >= req_nr_sge) 381 + break; 382 + 383 + wqe_size = ehca_calc_wqe_size(act_nr_sge, is_llqp); 384 + q_size = wqe_size * (queue->max_wr + 1); 385 + 386 + if (q_size <= 512) 387 + queue->page_size = 2; 388 + else if (q_size <= 1024) 389 + queue->page_size = 3; 390 + else 391 + queue->page_size = 0; 392 + 393 + queue->is_small = (queue->page_size != 0); 364 394 } 365 395 366 396 /* ··· 593 553 if (my_qp->recv_cq) 594 554 parms.recv_cq_handle = my_qp->recv_cq->ipz_cq_handle; 595 555 596 - parms.max_send_wr = init_attr->cap.max_send_wr; 597 - parms.max_recv_wr = init_attr->cap.max_recv_wr; 598 - parms.max_send_sge = max_send_sge; 599 - parms.max_recv_sge = max_recv_sge; 556 + parms.squeue.max_wr = init_attr->cap.max_send_wr; 557 + parms.rqueue.max_wr = init_attr->cap.max_recv_wr; 558 + parms.squeue.max_sge = max_send_sge; 559 + parms.rqueue.max_sge = max_recv_sge; 560 + 561 + if (EHCA_BMASK_GET(HCA_CAP_MINI_QP, shca->hca_cap) 562 + && !(context && udata)) { /* no small QP support in userspace ATM */ 563 + ehca_determine_small_queue( 564 + &parms.squeue, max_send_sge, is_llqp); 565 + ehca_determine_small_queue( 566 + &parms.rqueue, max_recv_sge, is_llqp); 567 + parms.qp_storage = 568 + (parms.squeue.is_small || parms.rqueue.is_small); 569 + } 600 570 601 571 h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, &parms); 602 572 if (h_ret != H_SUCCESS) { ··· 620 570 my_qp->ipz_qp_handle = parms.qp_handle; 621 571 my_qp->galpas = parms.galpas; 622 572 573 + swqe_size = ehca_calc_wqe_size(parms.squeue.act_nr_sges, is_llqp); 574 + rwqe_size = ehca_calc_wqe_size(parms.rqueue.act_nr_sges, is_llqp); 575 + 623 576 switch (qp_type) { 624 577 case IB_QPT_RC: 625 - if (!is_llqp) { 626 - swqe_size = offsetof(struct ehca_wqe, u.nud.sg_list[ 627 - (parms.act_nr_send_sges)]); 628 - rwqe_size = offsetof(struct ehca_wqe, u.nud.sg_list[ 629 - (parms.act_nr_recv_sges)]); 630 - } else { /* for LLQP we need to use msg size, not wqe size */ 631 - swqe_size = ll_qp_msg_size(max_send_sge); 632 - rwqe_size = ll_qp_msg_size(max_recv_sge); 633 - parms.act_nr_send_sges = 1; 634 - parms.act_nr_recv_sges = 1; 578 + if (is_llqp) { 579 + parms.squeue.act_nr_sges = 1; 580 + parms.rqueue.act_nr_sges = 1; 635 581 } 636 582 break; 637 - case IB_QPT_UC: 638 - swqe_size = offsetof(struct ehca_wqe, 639 - u.nud.sg_list[parms.act_nr_send_sges]); 640 - rwqe_size = offsetof(struct ehca_wqe, 641 - u.nud.sg_list[parms.act_nr_recv_sges]); 642 - break; 643 - 644 583 case IB_QPT_UD: 645 584 case IB_QPT_GSI: 646 585 case IB_QPT_SMI: 586 + /* UD circumvention */ 647 587 if (is_llqp) { 648 - swqe_size = ll_qp_msg_size(parms.act_nr_send_sges); 649 - rwqe_size = ll_qp_msg_size(parms.act_nr_recv_sges); 650 - parms.act_nr_send_sges = 1; 651 - parms.act_nr_recv_sges = 1; 588 + parms.squeue.act_nr_sges = 1; 589 + parms.rqueue.act_nr_sges = 1; 652 590 } else { 653 - /* UD circumvention */ 654 - parms.act_nr_send_sges -= 2; 655 - parms.act_nr_recv_sges -= 2; 656 - swqe_size = offsetof(struct ehca_wqe, u.ud_av.sg_list[ 657 - parms.act_nr_send_sges]); 658 - rwqe_size = offsetof(struct ehca_wqe, u.ud_av.sg_list[ 659 - parms.act_nr_recv_sges]); 591 + parms.squeue.act_nr_sges -= 2; 592 + parms.rqueue.act_nr_sges -= 2; 660 593 } 661 594 662 595 if (IB_QPT_GSI == qp_type || IB_QPT_SMI == qp_type) { 663 - parms.act_nr_send_wqes = init_attr->cap.max_send_wr; 664 - parms.act_nr_recv_wqes = init_attr->cap.max_recv_wr; 665 - parms.act_nr_send_sges = init_attr->cap.max_send_sge; 666 - parms.act_nr_recv_sges = init_attr->cap.max_recv_sge; 596 + parms.squeue.act_nr_wqes = init_attr->cap.max_send_wr; 597 + parms.rqueue.act_nr_wqes = init_attr->cap.max_recv_wr; 598 + parms.squeue.act_nr_sges = init_attr->cap.max_send_sge; 599 + parms.rqueue.act_nr_sges = init_attr->cap.max_recv_sge; 667 600 ib_qp_num = (qp_type == IB_QPT_SMI) ? 0 : 1; 668 601 } 669 602 ··· 659 626 /* initialize r/squeue and register queue pages */ 660 627 if (HAS_SQ(my_qp)) { 661 628 ret = init_qp_queue( 662 - shca, my_qp, &my_qp->ipz_squeue, 0, 629 + shca, my_pd, my_qp, &my_qp->ipz_squeue, 0, 663 630 HAS_RQ(my_qp) ? H_PAGE_REGISTERED : H_SUCCESS, 664 - parms.nr_sq_pages, swqe_size, 665 - parms.act_nr_send_sges); 631 + &parms.squeue, swqe_size); 666 632 if (ret) { 667 633 ehca_err(pd->device, "Couldn't initialize squeue " 668 634 "and pages ret=%x", ret); ··· 671 639 672 640 if (HAS_RQ(my_qp)) { 673 641 ret = init_qp_queue( 674 - shca, my_qp, &my_qp->ipz_rqueue, 1, 675 - H_SUCCESS, parms.nr_rq_pages, rwqe_size, 676 - parms.act_nr_recv_sges); 642 + shca, my_pd, my_qp, &my_qp->ipz_rqueue, 1, 643 + H_SUCCESS, &parms.rqueue, rwqe_size); 677 644 if (ret) { 678 645 ehca_err(pd->device, "Couldn't initialize rqueue " 679 646 "and pages ret=%x", ret); ··· 702 671 } 703 672 704 673 init_attr->cap.max_inline_data = 0; /* not supported yet */ 705 - init_attr->cap.max_recv_sge = parms.act_nr_recv_sges; 706 - init_attr->cap.max_recv_wr = parms.act_nr_recv_wqes; 707 - init_attr->cap.max_send_sge = parms.act_nr_send_sges; 708 - init_attr->cap.max_send_wr = parms.act_nr_send_wqes; 674 + init_attr->cap.max_recv_sge = parms.rqueue.act_nr_sges; 675 + init_attr->cap.max_recv_wr = parms.rqueue.act_nr_wqes; 676 + init_attr->cap.max_send_sge = parms.squeue.act_nr_sges; 677 + init_attr->cap.max_send_wr = parms.squeue.act_nr_wqes; 709 678 my_qp->init_attr = *init_attr; 710 679 711 680 /* NOTE: define_apq0() not supported yet */ ··· 739 708 resp.ext_type = my_qp->ext_type; 740 709 resp.qkey = my_qp->qkey; 741 710 resp.real_qp_num = my_qp->real_qp_num; 711 + resp.ipz_rqueue.offset = my_qp->ipz_rqueue.offset; 712 + resp.ipz_squeue.offset = my_qp->ipz_squeue.offset; 742 713 if (HAS_SQ(my_qp)) 743 714 queue2resp(&resp.ipz_squeue, &my_qp->ipz_squeue); 744 715 if (HAS_RQ(my_qp)) ··· 757 724 758 725 create_qp_exit4: 759 726 if (HAS_RQ(my_qp)) 760 - ipz_queue_dtor(&my_qp->ipz_rqueue); 727 + ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue); 761 728 762 729 create_qp_exit3: 763 730 if (HAS_SQ(my_qp)) 764 - ipz_queue_dtor(&my_qp->ipz_squeue); 731 + ipz_queue_dtor(my_pd, &my_qp->ipz_squeue); 765 732 766 733 create_qp_exit2: 767 734 hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp); ··· 1768 1735 } 1769 1736 1770 1737 if (HAS_RQ(my_qp)) 1771 - ipz_queue_dtor(&my_qp->ipz_rqueue); 1738 + ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue); 1772 1739 if (HAS_SQ(my_qp)) 1773 - ipz_queue_dtor(&my_qp->ipz_squeue); 1740 + ipz_queue_dtor(my_pd, &my_qp->ipz_squeue); 1774 1741 kmem_cache_free(qp_cache, my_qp); 1775 1742 return 0; 1776 1743 }
+1 -1
drivers/infiniband/hw/ehca/ehca_uverbs.c
··· 149 149 ehca_gen_err("vm_insert_page() failed rc=%x", ret); 150 150 return ret; 151 151 } 152 - start += PAGE_SIZE; 152 + start += PAGE_SIZE; 153 153 } 154 154 vma->vm_private_data = mm_count; 155 155 (*mm_count)++;
+19 -11
drivers/infiniband/hw/ehca/hcp_if.c
··· 52 52 #define H_ALL_RES_QP_ENHANCED_OPS EHCA_BMASK_IBM(9, 11) 53 53 #define H_ALL_RES_QP_PTE_PIN EHCA_BMASK_IBM(12, 12) 54 54 #define H_ALL_RES_QP_SERVICE_TYPE EHCA_BMASK_IBM(13, 15) 55 + #define H_ALL_RES_QP_STORAGE EHCA_BMASK_IBM(16, 17) 55 56 #define H_ALL_RES_QP_LL_RQ_CQE_POSTING EHCA_BMASK_IBM(18, 18) 56 57 #define H_ALL_RES_QP_LL_SQ_CQE_POSTING EHCA_BMASK_IBM(19, 21) 57 58 #define H_ALL_RES_QP_SIGNALING_TYPE EHCA_BMASK_IBM(22, 23) 58 59 #define H_ALL_RES_QP_UD_AV_LKEY_CTRL EHCA_BMASK_IBM(31, 31) 60 + #define H_ALL_RES_QP_SMALL_SQ_PAGE_SIZE EHCA_BMASK_IBM(32, 35) 61 + #define H_ALL_RES_QP_SMALL_RQ_PAGE_SIZE EHCA_BMASK_IBM(36, 39) 59 62 #define H_ALL_RES_QP_RESOURCE_TYPE EHCA_BMASK_IBM(56, 63) 60 63 61 64 #define H_ALL_RES_QP_MAX_OUTST_SEND_WR EHCA_BMASK_IBM(0, 15) ··· 302 299 | EHCA_BMASK_SET(H_ALL_RES_QP_PTE_PIN, 0) 303 300 | EHCA_BMASK_SET(H_ALL_RES_QP_SERVICE_TYPE, parms->servicetype) 304 301 | EHCA_BMASK_SET(H_ALL_RES_QP_SIGNALING_TYPE, parms->sigtype) 302 + | EHCA_BMASK_SET(H_ALL_RES_QP_STORAGE, parms->qp_storage) 303 + | EHCA_BMASK_SET(H_ALL_RES_QP_SMALL_SQ_PAGE_SIZE, 304 + parms->squeue.page_size) 305 + | EHCA_BMASK_SET(H_ALL_RES_QP_SMALL_RQ_PAGE_SIZE, 306 + parms->rqueue.page_size) 305 307 | EHCA_BMASK_SET(H_ALL_RES_QP_LL_RQ_CQE_POSTING, 306 308 !!(parms->ll_comp_flags & LLQP_RECV_COMP)) 307 309 | EHCA_BMASK_SET(H_ALL_RES_QP_LL_SQ_CQE_POSTING, ··· 317 309 318 310 max_r10_reg = 319 311 EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_SEND_WR, 320 - parms->max_send_wr + 1) 312 + parms->squeue.max_wr + 1) 321 313 | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_RECV_WR, 322 - parms->max_recv_wr + 1) 314 + parms->rqueue.max_wr + 1) 323 315 | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_SEND_SGE, 324 - parms->max_send_sge) 316 + parms->squeue.max_sge) 325 317 | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_RECV_SGE, 326 - parms->max_recv_sge); 318 + parms->rqueue.max_sge); 327 319 328 320 r11 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_QP_TOKEN, parms->srq_token); 329 321 ··· 343 335 344 336 parms->qp_handle.handle = outs[0]; 345 337 parms->real_qp_num = (u32)outs[1]; 346 - parms->act_nr_send_wqes = 338 + parms->squeue.act_nr_wqes = 347 339 (u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_SEND_WR, outs[2]); 348 - parms->act_nr_recv_wqes = 340 + parms->rqueue.act_nr_wqes = 349 341 (u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_RECV_WR, outs[2]); 350 - parms->act_nr_send_sges = 342 + parms->squeue.act_nr_sges = 351 343 (u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_SEND_SGE, outs[3]); 352 - parms->act_nr_recv_sges = 344 + parms->rqueue.act_nr_sges = 353 345 (u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_RECV_SGE, outs[3]); 354 - parms->nr_sq_pages = 346 + parms->squeue.queue_size = 355 347 (u32)EHCA_BMASK_GET(H_ALL_RES_QP_SQUEUE_SIZE_PAGES, outs[4]); 356 - parms->nr_rq_pages = 348 + parms->rqueue.queue_size = 357 349 (u32)EHCA_BMASK_GET(H_ALL_RES_QP_RQUEUE_SIZE_PAGES, outs[4]); 358 350 359 351 if (ret == H_SUCCESS) ··· 505 497 const u64 count, 506 498 const struct h_galpa galpa) 507 499 { 508 - if (count != 1) { 500 + if (count > 1) { 509 501 ehca_gen_err("Page counter=%lx", count); 510 502 return H_PARAMETER; 511 503 }
+175 -55
drivers/infiniband/hw/ehca/ipz_pt_fn.c
··· 40 40 41 41 #include "ehca_tools.h" 42 42 #include "ipz_pt_fn.h" 43 + #include "ehca_classes.h" 44 + 45 + #define PAGES_PER_KPAGE (PAGE_SIZE >> EHCA_PAGESHIFT) 46 + 47 + struct kmem_cache *small_qp_cache; 43 48 44 49 void *ipz_qpageit_get_inc(struct ipz_queue *queue) 45 50 { ··· 54 49 queue->current_q_offset -= queue->pagesize; 55 50 ret = NULL; 56 51 } 57 - if (((u64)ret) % EHCA_PAGESIZE) { 52 + if (((u64)ret) % queue->pagesize) { 58 53 ehca_gen_err("ERROR!! not at PAGE-Boundary"); 59 54 return NULL; 60 55 } ··· 88 83 return -EINVAL; 89 84 } 90 85 91 - int ipz_queue_ctor(struct ipz_queue *queue, 92 - const u32 nr_of_pages, 93 - const u32 pagesize, const u32 qe_size, const u32 nr_of_sg) 94 - { 95 - int pages_per_kpage = PAGE_SIZE >> EHCA_PAGESHIFT; 96 - int f; 86 + #if PAGE_SHIFT < EHCA_PAGESHIFT 87 + #error Kernel pages must be at least as large than eHCA pages (4K) ! 88 + #endif 97 89 98 - if (pagesize > PAGE_SIZE) { 99 - ehca_gen_err("FATAL ERROR: pagesize=%x is greater " 100 - "than kernel page size", pagesize); 101 - return 0; 102 - } 103 - if (!pages_per_kpage) { 104 - ehca_gen_err("FATAL ERROR: invalid kernel page size. " 105 - "pages_per_kpage=%x", pages_per_kpage); 106 - return 0; 107 - } 108 - queue->queue_length = nr_of_pages * pagesize; 109 - queue->queue_pages = vmalloc(nr_of_pages * sizeof(void *)); 110 - if (!queue->queue_pages) { 111 - ehca_gen_err("ERROR!! didn't get the memory"); 112 - return 0; 113 - } 114 - memset(queue->queue_pages, 0, nr_of_pages * sizeof(void *)); 115 - /* 116 - * allocate pages for queue: 117 - * outer loop allocates whole kernel pages (page aligned) and 118 - * inner loop divides a kernel page into smaller hca queue pages 119 - */ 120 - f = 0; 90 + /* 91 + * allocate pages for queue: 92 + * outer loop allocates whole kernel pages (page aligned) and 93 + * inner loop divides a kernel page into smaller hca queue pages 94 + */ 95 + static int alloc_queue_pages(struct ipz_queue *queue, const u32 nr_of_pages) 96 + { 97 + int k, f = 0; 98 + u8 *kpage; 99 + 121 100 while (f < nr_of_pages) { 122 - u8 *kpage = (u8 *)get_zeroed_page(GFP_KERNEL); 123 - int k; 101 + kpage = (u8 *)get_zeroed_page(GFP_KERNEL); 124 102 if (!kpage) 125 - goto ipz_queue_ctor_exit0; /*NOMEM*/ 126 - for (k = 0; k < pages_per_kpage && f < nr_of_pages; k++) { 127 - (queue->queue_pages)[f] = (struct ipz_page *)kpage; 103 + goto out; 104 + 105 + for (k = 0; k < PAGES_PER_KPAGE && f < nr_of_pages; k++) { 106 + queue->queue_pages[f] = (struct ipz_page *)kpage; 128 107 kpage += EHCA_PAGESIZE; 129 108 f++; 130 109 } 131 110 } 132 - 133 - queue->current_q_offset = 0; 134 - queue->qe_size = qe_size; 135 - queue->act_nr_of_sg = nr_of_sg; 136 - queue->pagesize = pagesize; 137 - queue->toggle_state = 1; 138 111 return 1; 139 112 140 - ipz_queue_ctor_exit0: 141 - ehca_gen_err("Couldn't get alloc pages queue=%p f=%x nr_of_pages=%x", 142 - queue, f, nr_of_pages); 143 - for (f = 0; f < nr_of_pages; f += pages_per_kpage) { 144 - if (!(queue->queue_pages)[f]) 145 - break; 113 + out: 114 + for (f = 0; f < nr_of_pages && queue->queue_pages[f]; 115 + f += PAGES_PER_KPAGE) 146 116 free_page((unsigned long)(queue->queue_pages)[f]); 147 - } 148 117 return 0; 149 118 } 150 119 151 - int ipz_queue_dtor(struct ipz_queue *queue) 120 + static int alloc_small_queue_page(struct ipz_queue *queue, struct ehca_pd *pd) 152 121 { 153 - int pages_per_kpage = PAGE_SIZE >> EHCA_PAGESHIFT; 154 - int g; 155 - int nr_pages; 122 + int order = ilog2(queue->pagesize) - 9; 123 + struct ipz_small_queue_page *page; 124 + unsigned long bit; 125 + 126 + mutex_lock(&pd->lock); 127 + 128 + if (!list_empty(&pd->free[order])) 129 + page = list_entry(pd->free[order].next, 130 + struct ipz_small_queue_page, list); 131 + else { 132 + page = kmem_cache_zalloc(small_qp_cache, GFP_KERNEL); 133 + if (!page) 134 + goto out; 135 + 136 + page->page = get_zeroed_page(GFP_KERNEL); 137 + if (!page->page) { 138 + kmem_cache_free(small_qp_cache, page); 139 + goto out; 140 + } 141 + 142 + list_add(&page->list, &pd->free[order]); 143 + } 144 + 145 + bit = find_first_zero_bit(page->bitmap, IPZ_SPAGE_PER_KPAGE >> order); 146 + __set_bit(bit, page->bitmap); 147 + page->fill++; 148 + 149 + if (page->fill == IPZ_SPAGE_PER_KPAGE >> order) 150 + list_move(&page->list, &pd->full[order]); 151 + 152 + mutex_unlock(&pd->lock); 153 + 154 + queue->queue_pages[0] = (void *)(page->page | (bit << (order + 9))); 155 + queue->small_page = page; 156 + return 1; 157 + 158 + out: 159 + ehca_err(pd->ib_pd.device, "failed to allocate small queue page"); 160 + return 0; 161 + } 162 + 163 + static void free_small_queue_page(struct ipz_queue *queue, struct ehca_pd *pd) 164 + { 165 + int order = ilog2(queue->pagesize) - 9; 166 + struct ipz_small_queue_page *page = queue->small_page; 167 + unsigned long bit; 168 + int free_page = 0; 169 + 170 + bit = ((unsigned long)queue->queue_pages[0] & PAGE_MASK) 171 + >> (order + 9); 172 + 173 + mutex_lock(&pd->lock); 174 + 175 + __clear_bit(bit, page->bitmap); 176 + page->fill--; 177 + 178 + if (page->fill == 0) { 179 + list_del(&page->list); 180 + free_page = 1; 181 + } 182 + 183 + if (page->fill == (IPZ_SPAGE_PER_KPAGE >> order) - 1) 184 + /* the page was full until we freed the chunk */ 185 + list_move_tail(&page->list, &pd->free[order]); 186 + 187 + mutex_unlock(&pd->lock); 188 + 189 + if (free_page) { 190 + free_page(page->page); 191 + kmem_cache_free(small_qp_cache, page); 192 + } 193 + } 194 + 195 + int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue, 196 + const u32 nr_of_pages, const u32 pagesize, 197 + const u32 qe_size, const u32 nr_of_sg, 198 + int is_small) 199 + { 200 + if (pagesize > PAGE_SIZE) { 201 + ehca_gen_err("FATAL ERROR: pagesize=%x " 202 + "is greater than kernel page size", pagesize); 203 + return 0; 204 + } 205 + 206 + /* init queue fields */ 207 + queue->queue_length = nr_of_pages * pagesize; 208 + queue->pagesize = pagesize; 209 + queue->qe_size = qe_size; 210 + queue->act_nr_of_sg = nr_of_sg; 211 + queue->current_q_offset = 0; 212 + queue->toggle_state = 1; 213 + queue->small_page = NULL; 214 + 215 + /* allocate queue page pointers */ 216 + queue->queue_pages = vmalloc(nr_of_pages * sizeof(void *)); 217 + if (!queue->queue_pages) { 218 + ehca_gen_err("Couldn't allocate queue page list"); 219 + return 0; 220 + } 221 + memset(queue->queue_pages, 0, nr_of_pages * sizeof(void *)); 222 + 223 + /* allocate actual queue pages */ 224 + if (is_small) { 225 + if (!alloc_small_queue_page(queue, pd)) 226 + goto ipz_queue_ctor_exit0; 227 + } else 228 + if (!alloc_queue_pages(queue, nr_of_pages)) 229 + goto ipz_queue_ctor_exit0; 230 + 231 + return 1; 232 + 233 + ipz_queue_ctor_exit0: 234 + ehca_gen_err("Couldn't alloc pages queue=%p " 235 + "nr_of_pages=%x", queue, nr_of_pages); 236 + vfree(queue->queue_pages); 237 + 238 + return 0; 239 + } 240 + 241 + int ipz_queue_dtor(struct ehca_pd *pd, struct ipz_queue *queue) 242 + { 243 + int i, nr_pages; 156 244 157 245 if (!queue || !queue->queue_pages) { 158 246 ehca_gen_dbg("queue or queue_pages is NULL"); 159 247 return 0; 160 248 } 161 - nr_pages = queue->queue_length / queue->pagesize; 162 - for (g = 0; g < nr_pages; g += pages_per_kpage) 163 - free_page((unsigned long)(queue->queue_pages)[g]); 249 + 250 + if (queue->small_page) 251 + free_small_queue_page(queue, pd); 252 + else { 253 + nr_pages = queue->queue_length / queue->pagesize; 254 + for (i = 0; i < nr_pages; i += PAGES_PER_KPAGE) 255 + free_page((unsigned long)queue->queue_pages[i]); 256 + } 257 + 164 258 vfree(queue->queue_pages); 165 259 166 260 return 1; 261 + } 262 + 263 + int ehca_init_small_qp_cache(void) 264 + { 265 + small_qp_cache = kmem_cache_create("ehca_cache_small_qp", 266 + sizeof(struct ipz_small_queue_page), 267 + 0, SLAB_HWCACHE_ALIGN, NULL); 268 + if (!small_qp_cache) 269 + return -ENOMEM; 270 + 271 + return 0; 272 + } 273 + 274 + void ehca_cleanup_small_qp_cache(void) 275 + { 276 + kmem_cache_destroy(small_qp_cache); 167 277 }
+21 -5
drivers/infiniband/hw/ehca/ipz_pt_fn.h
··· 51 51 #include "ehca_tools.h" 52 52 #include "ehca_qes.h" 53 53 54 + struct ehca_pd; 55 + struct ipz_small_queue_page; 56 + 54 57 /* struct generic ehca page */ 55 58 struct ipz_page { 56 59 u8 entries[EHCA_PAGESIZE]; 60 + }; 61 + 62 + #define IPZ_SPAGE_PER_KPAGE (PAGE_SIZE / 512) 63 + 64 + struct ipz_small_queue_page { 65 + unsigned long page; 66 + unsigned long bitmap[IPZ_SPAGE_PER_KPAGE / BITS_PER_LONG]; 67 + int fill; 68 + void *mapped_addr; 69 + u32 mmap_count; 70 + struct list_head list; 57 71 }; 58 72 59 73 /* struct generic queue in linux kernel virtual memory (kv) */ ··· 80 66 u32 queue_length; /* queue length allocated in bytes */ 81 67 u32 pagesize; 82 68 u32 toggle_state; /* toggle flag - per page */ 83 - u32 dummy3; /* 64 bit alignment */ 69 + u32 offset; /* save offset within page for small_qp */ 70 + struct ipz_small_queue_page *small_page; 84 71 }; 85 72 86 73 /* ··· 203 188 * see ipz_qpt_ctor() 204 189 * returns true if ok, false if out of memory 205 190 */ 206 - int ipz_queue_ctor(struct ipz_queue *queue, const u32 nr_of_pages, 207 - const u32 pagesize, const u32 qe_size, 208 - const u32 nr_of_sg); 191 + int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue, 192 + const u32 nr_of_pages, const u32 pagesize, 193 + const u32 qe_size, const u32 nr_of_sg, 194 + int is_small); 209 195 210 196 /* 211 197 * destructor for a ipz_queue_t ··· 214 198 * see ipz_queue_ctor() 215 199 * returns true if ok, false if queue was NULL-ptr of free failed 216 200 */ 217 - int ipz_queue_dtor(struct ipz_queue *queue); 201 + int ipz_queue_dtor(struct ehca_pd *pd, struct ipz_queue *queue); 218 202 219 203 /* 220 204 * constructor for a ipz_qpt_t,