Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

IB/mthca: Always fill MTTs from CPU

Speed up memory registration by filling in MTTs directly when the CPU
can write directly to the whole table (all mem-free cards, and to
Tavor mode on 64-bit systems with the patch I posted earlier). This
reduces the number of FW commands needed to register an MR by at least
a factor of 2 and speeds up memory registration significantly.

Signed-off-by: Michael S. Tsirkin <mst@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>

authored by

Michael S. Tsirkin and committed by
Roland Dreier
b2875d4c c20e20ab

+89 -9
+2
drivers/infiniband/hw/mthca/mthca_dev.h
··· 464 464 int mthca_pd_alloc(struct mthca_dev *dev, int privileged, struct mthca_pd *pd); 465 465 void mthca_pd_free(struct mthca_dev *dev, struct mthca_pd *pd); 466 466 467 + int mthca_write_mtt_size(struct mthca_dev *dev); 468 + 467 469 struct mthca_mtt *mthca_alloc_mtt(struct mthca_dev *dev, int size); 468 470 void mthca_free_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt); 469 471 int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
+80 -2
drivers/infiniband/hw/mthca/mthca_mr.c
··· 243 243 kfree(mtt); 244 244 } 245 245 246 - int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt, 247 - int start_index, u64 *buffer_list, int list_len) 246 + static int __mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt, 247 + int start_index, u64 *buffer_list, int list_len) 248 248 { 249 249 struct mthca_mailbox *mailbox; 250 250 __be64 *mtt_entry; ··· 293 293 out: 294 294 mthca_free_mailbox(dev, mailbox); 295 295 return err; 296 + } 297 + 298 + int mthca_write_mtt_size(struct mthca_dev *dev) 299 + { 300 + if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy) 301 + /* 302 + * Be friendly to WRITE_MTT command 303 + * and leave two empty slots for the 304 + * index and reserved fields of the 305 + * mailbox. 306 + */ 307 + return PAGE_SIZE / sizeof (u64) - 2; 308 + 309 + /* For Arbel, all MTTs must fit in the same page. */ 310 + return mthca_is_memfree(dev) ? (PAGE_SIZE / sizeof (u64)) : 0x7ffffff; 311 + } 312 + 313 + void mthca_tavor_write_mtt_seg(struct mthca_dev *dev, struct mthca_mtt *mtt, 314 + int start_index, u64 *buffer_list, int list_len) 315 + { 316 + u64 __iomem *mtts; 317 + int i; 318 + 319 + mtts = dev->mr_table.tavor_fmr.mtt_base + mtt->first_seg * MTHCA_MTT_SEG_SIZE + 320 + start_index * sizeof (u64); 321 + for (i = 0; i < list_len; ++i) 322 + mthca_write64_raw(cpu_to_be64(buffer_list[i] | MTHCA_MTT_FLAG_PRESENT), 323 + mtts + i); 324 + } 325 + 326 + void mthca_arbel_write_mtt_seg(struct mthca_dev *dev, struct mthca_mtt *mtt, 327 + int start_index, u64 *buffer_list, int list_len) 328 + { 329 + __be64 *mtts; 330 + dma_addr_t dma_handle; 331 + int i; 332 + int s = start_index * sizeof (u64); 333 + 334 + /* For Arbel, all MTTs must fit in the same page. */ 335 + BUG_ON(s / PAGE_SIZE != (s + list_len * sizeof(u64) - 1) / PAGE_SIZE); 336 + /* Require full segments */ 337 + BUG_ON(s % MTHCA_MTT_SEG_SIZE); 338 + 339 + mtts = mthca_table_find(dev->mr_table.mtt_table, mtt->first_seg + 340 + s / MTHCA_MTT_SEG_SIZE, &dma_handle); 341 + 342 + BUG_ON(!mtts); 343 + 344 + for (i = 0; i < list_len; ++i) 345 + mtts[i] = cpu_to_be64(buffer_list[i] | MTHCA_MTT_FLAG_PRESENT); 346 + 347 + dma_sync_single(&dev->pdev->dev, dma_handle, list_len * sizeof (u64), DMA_TO_DEVICE); 348 + } 349 + 350 + int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt, 351 + int start_index, u64 *buffer_list, int list_len) 352 + { 353 + int size = mthca_write_mtt_size(dev); 354 + int chunk; 355 + 356 + if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy) 357 + return __mthca_write_mtt(dev, mtt, start_index, buffer_list, list_len); 358 + 359 + while (list_len > 0) { 360 + chunk = min(size, list_len); 361 + if (mthca_is_memfree(dev)) 362 + mthca_arbel_write_mtt_seg(dev, mtt, start_index, 363 + buffer_list, chunk); 364 + else 365 + mthca_tavor_write_mtt_seg(dev, mtt, start_index, 366 + buffer_list, chunk); 367 + 368 + list_len -= chunk; 369 + start_index += chunk; 370 + buffer_list += chunk; 371 + } 372 + 373 + return 0; 296 374 } 297 375 298 376 static inline u32 tavor_hw_index_to_key(u32 ind)
+7 -7
drivers/infiniband/hw/mthca/mthca_provider.c
··· 1015 1015 int shift, n, len; 1016 1016 int i, j, k; 1017 1017 int err = 0; 1018 + int write_mtt_size; 1018 1019 1019 1020 shift = ffs(region->page_size) - 1; 1020 1021 ··· 1041 1040 1042 1041 i = n = 0; 1043 1042 1043 + write_mtt_size = min(mthca_write_mtt_size(dev), (int) (PAGE_SIZE / sizeof *pages)); 1044 + 1044 1045 list_for_each_entry(chunk, &region->chunk_list, list) 1045 1046 for (j = 0; j < chunk->nmap; ++j) { 1046 1047 len = sg_dma_len(&chunk->page_list[j]) >> shift; ··· 1050 1047 pages[i++] = sg_dma_address(&chunk->page_list[j]) + 1051 1048 region->page_size * k; 1052 1049 /* 1053 - * Be friendly to WRITE_MTT command 1054 - * and leave two empty slots for the 1055 - * index and reserved fields of the 1056 - * mailbox. 1050 + * Be friendly to write_mtt and pass it chunks 1051 + * of appropriate size. 1057 1052 */ 1058 - if (i == PAGE_SIZE / sizeof (u64) - 2) { 1059 - err = mthca_write_mtt(dev, mr->mtt, 1060 - n, pages, i); 1053 + if (i == write_mtt_size) { 1054 + err = mthca_write_mtt(dev, mr->mtt, n, pages, i); 1061 1055 if (err) 1062 1056 goto mtt_done; 1063 1057 n += i;