Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

powerpc/pseries/mm: call H_BLOCK_REMOVE

This hypervisor's call allows to remove up to 8 ptes with only call to
tlbie.

The virtual pages must be all within the same naturally aligned 8 pages
virtual address block and have the same page and segment size encodings.

Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Laurent Dufour <ldufour@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>

authored by

Laurent Dufour and committed by
Michael Ellerman
ba2dd8a2 0effa488

+207 -8
+1
arch/powerpc/include/asm/hvcall.h
··· 278 278 #define H_COP 0x304 279 279 #define H_GET_MPP_X 0x314 280 280 #define H_SET_MODE 0x31C 281 + #define H_BLOCK_REMOVE 0x328 281 282 #define H_CLEAR_HPT 0x358 282 283 #define H_REQUEST_VMC 0x360 283 284 #define H_RESIZE_HPT_PREPARE 0x36C
+206 -8
arch/powerpc/platforms/pseries/lpar.c
··· 417 417 BUG_ON(lpar_rc != H_SUCCESS); 418 418 } 419 419 420 + 421 + /* 422 + * As defined in the PAPR's section 14.5.4.1.8 423 + * The control mask doesn't include the returned reference and change bit from 424 + * the processed PTE. 425 + */ 426 + #define HBLKR_AVPN 0x0100000000000000UL 427 + #define HBLKR_CTRL_MASK 0xf800000000000000UL 428 + #define HBLKR_CTRL_SUCCESS 0x8000000000000000UL 429 + #define HBLKR_CTRL_ERRNOTFOUND 0x8800000000000000UL 430 + #define HBLKR_CTRL_ERRBUSY 0xa000000000000000UL 431 + 432 + /** 433 + * H_BLOCK_REMOVE caller. 434 + * @idx should point to the latest @param entry set with a PTEX. 435 + * If PTE cannot be processed because another CPUs has already locked that 436 + * group, those entries are put back in @param starting at index 1. 437 + * If entries has to be retried and @retry_busy is set to true, these entries 438 + * are retried until success. If @retry_busy is set to false, the returned 439 + * is the number of entries yet to process. 440 + */ 441 + static unsigned long call_block_remove(unsigned long idx, unsigned long *param, 442 + bool retry_busy) 443 + { 444 + unsigned long i, rc, new_idx; 445 + unsigned long retbuf[PLPAR_HCALL9_BUFSIZE]; 446 + 447 + if (idx < 2) { 448 + pr_warn("Unexpected empty call to H_BLOCK_REMOVE"); 449 + return 0; 450 + } 451 + again: 452 + new_idx = 0; 453 + if (idx > PLPAR_HCALL9_BUFSIZE) { 454 + pr_err("Too many PTEs (%lu) for H_BLOCK_REMOVE", idx); 455 + idx = PLPAR_HCALL9_BUFSIZE; 456 + } else if (idx < PLPAR_HCALL9_BUFSIZE) 457 + param[idx] = HBR_END; 458 + 459 + rc = plpar_hcall9(H_BLOCK_REMOVE, retbuf, 460 + param[0], /* AVA */ 461 + param[1], param[2], param[3], param[4], /* TS0-7 */ 462 + param[5], param[6], param[7], param[8]); 463 + if (rc == H_SUCCESS) 464 + return 0; 465 + 466 + BUG_ON(rc != H_PARTIAL); 467 + 468 + /* Check that the unprocessed entries were 'not found' or 'busy' */ 469 + for (i = 0; i < idx-1; i++) { 470 + unsigned long ctrl = retbuf[i] & HBLKR_CTRL_MASK; 471 + 472 + if (ctrl == HBLKR_CTRL_ERRBUSY) { 473 + param[++new_idx] = param[i+1]; 474 + continue; 475 + } 476 + 477 + BUG_ON(ctrl != HBLKR_CTRL_SUCCESS 478 + && ctrl != HBLKR_CTRL_ERRNOTFOUND); 479 + } 480 + 481 + /* 482 + * If there were entries found busy, retry these entries if requested, 483 + * of if all the entries have to be retried. 484 + */ 485 + if (new_idx && (retry_busy || new_idx == (PLPAR_HCALL9_BUFSIZE-1))) { 486 + idx = new_idx + 1; 487 + goto again; 488 + } 489 + 490 + return new_idx; 491 + } 492 + 420 493 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 421 494 /* 422 495 * Limit iterations holding pSeries_lpar_tlbie_lock to 3. We also need ··· 497 424 */ 498 425 #define PPC64_HUGE_HPTE_BATCH 12 499 426 500 - static void __pSeries_lpar_hugepage_invalidate(unsigned long *slot, 501 - unsigned long *vpn, int count, 502 - int psize, int ssize) 427 + static void hugepage_block_invalidate(unsigned long *slot, unsigned long *vpn, 428 + int count, int psize, int ssize) 429 + { 430 + unsigned long param[PLPAR_HCALL9_BUFSIZE]; 431 + unsigned long shift, current_vpgb, vpgb; 432 + int i, pix = 0; 433 + 434 + shift = mmu_psize_defs[psize].shift; 435 + 436 + for (i = 0; i < count; i++) { 437 + /* 438 + * Shifting 3 bits more on the right to get a 439 + * 8 pages aligned virtual addresse. 440 + */ 441 + vpgb = (vpn[i] >> (shift - VPN_SHIFT + 3)); 442 + if (!pix || vpgb != current_vpgb) { 443 + /* 444 + * Need to start a new 8 pages block, flush 445 + * the current one if needed. 446 + */ 447 + if (pix) 448 + (void)call_block_remove(pix, param, true); 449 + current_vpgb = vpgb; 450 + param[0] = hpte_encode_avpn(vpn[i], psize, ssize); 451 + pix = 1; 452 + } 453 + 454 + param[pix++] = HBR_REQUEST | HBLKR_AVPN | slot[i]; 455 + if (pix == PLPAR_HCALL9_BUFSIZE) { 456 + pix = call_block_remove(pix, param, false); 457 + /* 458 + * pix = 0 means that all the entries were 459 + * removed, we can start a new block. 460 + * Otherwise, this means that there are entries 461 + * to retry, and pix points to latest one, so 462 + * we should increment it and try to continue 463 + * the same block. 464 + */ 465 + if (pix) 466 + pix++; 467 + } 468 + } 469 + if (pix) 470 + (void)call_block_remove(pix, param, true); 471 + } 472 + 473 + static void hugepage_bulk_invalidate(unsigned long *slot, unsigned long *vpn, 474 + int count, int psize, int ssize) 503 475 { 504 476 unsigned long param[PLPAR_HCALL9_BUFSIZE]; 505 477 int i = 0, pix = 0, rc; 506 - unsigned long flags = 0; 507 - int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); 508 - 509 - if (lock_tlbie) 510 - spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags); 511 478 512 479 for (i = 0; i < count; i++) { 513 480 ··· 575 462 param[6], param[7]); 576 463 BUG_ON(rc != H_SUCCESS); 577 464 } 465 + } 466 + 467 + static inline void __pSeries_lpar_hugepage_invalidate(unsigned long *slot, 468 + unsigned long *vpn, 469 + int count, int psize, 470 + int ssize) 471 + { 472 + unsigned long flags = 0; 473 + int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); 474 + 475 + if (lock_tlbie) 476 + spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags); 477 + 478 + if (firmware_has_feature(FW_FEATURE_BLOCK_REMOVE)) 479 + hugepage_block_invalidate(slot, vpn, count, psize, ssize); 480 + else 481 + hugepage_bulk_invalidate(slot, vpn, count, psize, ssize); 578 482 579 483 if (lock_tlbie) 580 484 spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags); ··· 694 564 return slot; 695 565 } 696 566 567 + /** 568 + * The hcall H_BLOCK_REMOVE implies that the virtual pages to processed are 569 + * "all within the same naturally aligned 8 page virtual address block". 570 + */ 571 + static void do_block_remove(unsigned long number, struct ppc64_tlb_batch *batch, 572 + unsigned long *param) 573 + { 574 + unsigned long vpn; 575 + unsigned long i, pix = 0; 576 + unsigned long index, shift, slot, current_vpgb, vpgb; 577 + real_pte_t pte; 578 + int psize, ssize; 579 + 580 + psize = batch->psize; 581 + ssize = batch->ssize; 582 + 583 + for (i = 0; i < number; i++) { 584 + vpn = batch->vpn[i]; 585 + pte = batch->pte[i]; 586 + pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) { 587 + /* 588 + * Shifting 3 bits more on the right to get a 589 + * 8 pages aligned virtual addresse. 590 + */ 591 + vpgb = (vpn >> (shift - VPN_SHIFT + 3)); 592 + if (!pix || vpgb != current_vpgb) { 593 + /* 594 + * Need to start a new 8 pages block, flush 595 + * the current one if needed. 596 + */ 597 + if (pix) 598 + (void)call_block_remove(pix, param, 599 + true); 600 + current_vpgb = vpgb; 601 + param[0] = hpte_encode_avpn(vpn, psize, 602 + ssize); 603 + pix = 1; 604 + } 605 + 606 + slot = compute_slot(pte, vpn, index, shift, ssize); 607 + param[pix++] = HBR_REQUEST | HBLKR_AVPN | slot; 608 + 609 + if (pix == PLPAR_HCALL9_BUFSIZE) { 610 + pix = call_block_remove(pix, param, false); 611 + /* 612 + * pix = 0 means that all the entries were 613 + * removed, we can start a new block. 614 + * Otherwise, this means that there are entries 615 + * to retry, and pix points to latest one, so 616 + * we should increment it and try to continue 617 + * the same block. 618 + */ 619 + if (pix) 620 + pix++; 621 + } 622 + } pte_iterate_hashed_end(); 623 + } 624 + 625 + if (pix) 626 + (void)call_block_remove(pix, param, true); 627 + } 628 + 697 629 /* 698 630 * Take a spinlock around flushes to avoid bouncing the hypervisor tlbie 699 631 * lock. ··· 774 582 775 583 if (lock_tlbie) 776 584 spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags); 585 + 586 + if (firmware_has_feature(FW_FEATURE_BLOCK_REMOVE)) { 587 + do_block_remove(number, batch, param); 588 + goto out; 589 + } 777 590 778 591 psize = batch->psize; 779 592 ssize = batch->ssize; ··· 818 621 BUG_ON(rc != H_SUCCESS); 819 622 } 820 623 624 + out: 821 625 if (lock_tlbie) 822 626 spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags); 823 627 }