Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

RAS/AMD/ATL: Implement DF 4.5 NP2 denormalization

Unlike with previous Data Fabric versions, with Data Fabric 4.5
non-power-of-2 denormalization, there are bits of the system physical
address that can't be fully reconstructed from the normalized address.

To determine the proper combination of missing system physical address
bits, iterate through each possible combination of these bits, normalize
the resulting system physical address, and compare to the original
address that is being translated. If the addresses match, then the
correct permutation of bits has been found.

Signed-off-by: John Allen <john.allen@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Yazen Ghannam <yazen.ghannam@amd.com>
Link: https://lore.kernel.org/r/20240606203313.51197-6-john.allen@amd.com

authored by

John Allen and committed by
Borislav Petkov (AMD)
e0372d69 d5811a16

+621
+561
drivers/ras/amd/atl/denormalize.c
··· 448 448 return (phys_fabric_id & df_cfg.node_id_mask) | log_fabric_id; 449 449 } 450 450 451 + static u16 get_logical_coh_st_fabric_id_for_current_spa(struct addr_ctx *ctx, 452 + struct df4p5_denorm_ctx *denorm_ctx) 453 + { 454 + bool hash_ctl_64k, hash_ctl_2M, hash_ctl_1G, hash_ctl_1T; 455 + bool hash_pa8, hash_pa9, hash_pa12, hash_pa13; 456 + u64 cs_id = 0; 457 + 458 + hash_ctl_64k = FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl); 459 + hash_ctl_2M = FIELD_GET(DF4_HASH_CTL_2M, ctx->map.ctl); 460 + hash_ctl_1G = FIELD_GET(DF4_HASH_CTL_1G, ctx->map.ctl); 461 + hash_ctl_1T = FIELD_GET(DF4p5_HASH_CTL_1T, ctx->map.ctl); 462 + 463 + hash_pa8 = FIELD_GET(BIT_ULL(8), denorm_ctx->current_spa); 464 + hash_pa8 ^= FIELD_GET(BIT_ULL(14), denorm_ctx->current_spa); 465 + hash_pa8 ^= FIELD_GET(BIT_ULL(16), denorm_ctx->current_spa) & hash_ctl_64k; 466 + hash_pa8 ^= FIELD_GET(BIT_ULL(21), denorm_ctx->current_spa) & hash_ctl_2M; 467 + hash_pa8 ^= FIELD_GET(BIT_ULL(30), denorm_ctx->current_spa) & hash_ctl_1G; 468 + hash_pa8 ^= FIELD_GET(BIT_ULL(40), denorm_ctx->current_spa) & hash_ctl_1T; 469 + 470 + hash_pa9 = FIELD_GET(BIT_ULL(9), denorm_ctx->current_spa); 471 + hash_pa9 ^= FIELD_GET(BIT_ULL(17), denorm_ctx->current_spa) & hash_ctl_64k; 472 + hash_pa9 ^= FIELD_GET(BIT_ULL(22), denorm_ctx->current_spa) & hash_ctl_2M; 473 + hash_pa9 ^= FIELD_GET(BIT_ULL(31), denorm_ctx->current_spa) & hash_ctl_1G; 474 + hash_pa9 ^= FIELD_GET(BIT_ULL(41), denorm_ctx->current_spa) & hash_ctl_1T; 475 + 476 + hash_pa12 = FIELD_GET(BIT_ULL(12), denorm_ctx->current_spa); 477 + hash_pa12 ^= FIELD_GET(BIT_ULL(18), denorm_ctx->current_spa) & hash_ctl_64k; 478 + hash_pa12 ^= FIELD_GET(BIT_ULL(23), denorm_ctx->current_spa) & hash_ctl_2M; 479 + hash_pa12 ^= FIELD_GET(BIT_ULL(32), denorm_ctx->current_spa) & hash_ctl_1G; 480 + hash_pa12 ^= FIELD_GET(BIT_ULL(42), denorm_ctx->current_spa) & hash_ctl_1T; 481 + 482 + hash_pa13 = FIELD_GET(BIT_ULL(13), denorm_ctx->current_spa); 483 + hash_pa13 ^= FIELD_GET(BIT_ULL(19), denorm_ctx->current_spa) & hash_ctl_64k; 484 + hash_pa13 ^= FIELD_GET(BIT_ULL(24), denorm_ctx->current_spa) & hash_ctl_2M; 485 + hash_pa13 ^= FIELD_GET(BIT_ULL(33), denorm_ctx->current_spa) & hash_ctl_1G; 486 + hash_pa13 ^= FIELD_GET(BIT_ULL(43), denorm_ctx->current_spa) & hash_ctl_1T; 487 + 488 + switch (ctx->map.intlv_mode) { 489 + case DF4p5_NPS0_24CHAN_1K_HASH: 490 + cs_id = FIELD_GET(GENMASK_ULL(63, 13), denorm_ctx->current_spa) << 3; 491 + cs_id %= denorm_ctx->mod_value; 492 + cs_id <<= 2; 493 + cs_id |= (hash_pa9 | (hash_pa12 << 1)); 494 + cs_id |= hash_pa8 << df_cfg.socket_id_shift; 495 + break; 496 + 497 + case DF4p5_NPS0_24CHAN_2K_HASH: 498 + cs_id = FIELD_GET(GENMASK_ULL(63, 14), denorm_ctx->current_spa) << 4; 499 + cs_id %= denorm_ctx->mod_value; 500 + cs_id <<= 2; 501 + cs_id |= (hash_pa12 | (hash_pa13 << 1)); 502 + cs_id |= hash_pa8 << df_cfg.socket_id_shift; 503 + break; 504 + 505 + case DF4p5_NPS1_12CHAN_1K_HASH: 506 + cs_id = FIELD_GET(GENMASK_ULL(63, 12), denorm_ctx->current_spa) << 2; 507 + cs_id %= denorm_ctx->mod_value; 508 + cs_id <<= 2; 509 + cs_id |= (hash_pa8 | (hash_pa9 << 1)); 510 + break; 511 + 512 + case DF4p5_NPS1_12CHAN_2K_HASH: 513 + cs_id = FIELD_GET(GENMASK_ULL(63, 13), denorm_ctx->current_spa) << 3; 514 + cs_id %= denorm_ctx->mod_value; 515 + cs_id <<= 2; 516 + cs_id |= (hash_pa8 | (hash_pa12 << 1)); 517 + break; 518 + 519 + case DF4p5_NPS2_6CHAN_1K_HASH: 520 + case DF4p5_NPS1_10CHAN_1K_HASH: 521 + cs_id = FIELD_GET(GENMASK_ULL(63, 12), denorm_ctx->current_spa) << 2; 522 + cs_id |= (FIELD_GET(BIT_ULL(9), denorm_ctx->current_spa) << 1); 523 + cs_id %= denorm_ctx->mod_value; 524 + cs_id <<= 1; 525 + cs_id |= hash_pa8; 526 + break; 527 + 528 + case DF4p5_NPS2_6CHAN_2K_HASH: 529 + case DF4p5_NPS1_10CHAN_2K_HASH: 530 + cs_id = FIELD_GET(GENMASK_ULL(63, 12), denorm_ctx->current_spa) << 2; 531 + cs_id %= denorm_ctx->mod_value; 532 + cs_id <<= 1; 533 + cs_id |= hash_pa8; 534 + break; 535 + 536 + case DF4p5_NPS4_3CHAN_1K_HASH: 537 + case DF4p5_NPS2_5CHAN_1K_HASH: 538 + cs_id = FIELD_GET(GENMASK_ULL(63, 12), denorm_ctx->current_spa) << 2; 539 + cs_id |= FIELD_GET(GENMASK_ULL(9, 8), denorm_ctx->current_spa); 540 + cs_id %= denorm_ctx->mod_value; 541 + break; 542 + 543 + case DF4p5_NPS4_3CHAN_2K_HASH: 544 + case DF4p5_NPS2_5CHAN_2K_HASH: 545 + cs_id = FIELD_GET(GENMASK_ULL(63, 12), denorm_ctx->current_spa) << 2; 546 + cs_id |= FIELD_GET(BIT_ULL(8), denorm_ctx->current_spa) << 1; 547 + cs_id %= denorm_ctx->mod_value; 548 + break; 549 + 550 + default: 551 + atl_debug_on_bad_intlv_mode(ctx); 552 + return 0; 553 + } 554 + 555 + if (cs_id > 0xffff) { 556 + atl_debug(ctx, "Translation error: Resulting cs_id larger than u16\n"); 557 + return 0; 558 + } 559 + 560 + return cs_id; 561 + } 562 + 451 563 static int denorm_addr_common(struct addr_ctx *ctx) 452 564 { 453 565 u64 denorm_addr; ··· 811 699 return 0; 812 700 } 813 701 702 + static u64 normalize_addr_df4p5_np2(struct addr_ctx *ctx, struct df4p5_denorm_ctx *denorm_ctx, 703 + u64 addr) 704 + { 705 + u64 temp_addr_a = 0, temp_addr_b = 0; 706 + 707 + switch (ctx->map.intlv_mode) { 708 + case DF4p5_NPS0_24CHAN_1K_HASH: 709 + case DF4p5_NPS1_12CHAN_1K_HASH: 710 + case DF4p5_NPS2_6CHAN_1K_HASH: 711 + case DF4p5_NPS4_3CHAN_1K_HASH: 712 + case DF4p5_NPS1_10CHAN_1K_HASH: 713 + case DF4p5_NPS2_5CHAN_1K_HASH: 714 + temp_addr_a = FIELD_GET(GENMASK_ULL(11, 10), addr) << 8; 715 + break; 716 + 717 + case DF4p5_NPS0_24CHAN_2K_HASH: 718 + case DF4p5_NPS1_12CHAN_2K_HASH: 719 + case DF4p5_NPS2_6CHAN_2K_HASH: 720 + case DF4p5_NPS4_3CHAN_2K_HASH: 721 + case DF4p5_NPS1_10CHAN_2K_HASH: 722 + case DF4p5_NPS2_5CHAN_2K_HASH: 723 + temp_addr_a = FIELD_GET(GENMASK_ULL(11, 9), addr) << 8; 724 + break; 725 + 726 + default: 727 + atl_debug_on_bad_intlv_mode(ctx); 728 + return 0; 729 + } 730 + 731 + switch (ctx->map.intlv_mode) { 732 + case DF4p5_NPS0_24CHAN_1K_HASH: 733 + temp_addr_b = FIELD_GET(GENMASK_ULL(63, 13), addr) / denorm_ctx->mod_value; 734 + temp_addr_b <<= 10; 735 + break; 736 + 737 + case DF4p5_NPS0_24CHAN_2K_HASH: 738 + temp_addr_b = FIELD_GET(GENMASK_ULL(63, 14), addr) / denorm_ctx->mod_value; 739 + temp_addr_b <<= 11; 740 + break; 741 + 742 + case DF4p5_NPS1_12CHAN_1K_HASH: 743 + temp_addr_b = FIELD_GET(GENMASK_ULL(63, 12), addr) / denorm_ctx->mod_value; 744 + temp_addr_b <<= 10; 745 + break; 746 + 747 + case DF4p5_NPS1_12CHAN_2K_HASH: 748 + temp_addr_b = FIELD_GET(GENMASK_ULL(63, 13), addr) / denorm_ctx->mod_value; 749 + temp_addr_b <<= 11; 750 + break; 751 + 752 + case DF4p5_NPS2_6CHAN_1K_HASH: 753 + case DF4p5_NPS1_10CHAN_1K_HASH: 754 + temp_addr_b = FIELD_GET(GENMASK_ULL(63, 12), addr) << 1; 755 + temp_addr_b |= FIELD_GET(BIT_ULL(9), addr); 756 + temp_addr_b /= denorm_ctx->mod_value; 757 + temp_addr_b <<= 10; 758 + break; 759 + 760 + case DF4p5_NPS2_6CHAN_2K_HASH: 761 + case DF4p5_NPS1_10CHAN_2K_HASH: 762 + temp_addr_b = FIELD_GET(GENMASK_ULL(63, 12), addr) / denorm_ctx->mod_value; 763 + temp_addr_b <<= 11; 764 + break; 765 + 766 + case DF4p5_NPS4_3CHAN_1K_HASH: 767 + case DF4p5_NPS2_5CHAN_1K_HASH: 768 + temp_addr_b = FIELD_GET(GENMASK_ULL(63, 12), addr) << 2; 769 + temp_addr_b |= FIELD_GET(GENMASK_ULL(9, 8), addr); 770 + temp_addr_b /= denorm_ctx->mod_value; 771 + temp_addr_b <<= 10; 772 + break; 773 + 774 + case DF4p5_NPS4_3CHAN_2K_HASH: 775 + case DF4p5_NPS2_5CHAN_2K_HASH: 776 + temp_addr_b = FIELD_GET(GENMASK_ULL(63, 12), addr) << 1; 777 + temp_addr_b |= FIELD_GET(BIT_ULL(8), addr); 778 + temp_addr_b /= denorm_ctx->mod_value; 779 + temp_addr_b <<= 11; 780 + break; 781 + 782 + default: 783 + atl_debug_on_bad_intlv_mode(ctx); 784 + return 0; 785 + } 786 + 787 + return denorm_ctx->base_denorm_addr | temp_addr_a | temp_addr_b; 788 + } 789 + 790 + static void recalculate_hashed_bits_df4p5_np2(struct addr_ctx *ctx, 791 + struct df4p5_denorm_ctx *denorm_ctx) 792 + { 793 + bool hash_ctl_64k, hash_ctl_2M, hash_ctl_1G, hash_ctl_1T, hashed_bit; 794 + 795 + if (!denorm_ctx->rehash_vector) 796 + return; 797 + 798 + hash_ctl_64k = FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl); 799 + hash_ctl_2M = FIELD_GET(DF4_HASH_CTL_2M, ctx->map.ctl); 800 + hash_ctl_1G = FIELD_GET(DF4_HASH_CTL_1G, ctx->map.ctl); 801 + hash_ctl_1T = FIELD_GET(DF4p5_HASH_CTL_1T, ctx->map.ctl); 802 + 803 + if (denorm_ctx->rehash_vector & BIT_ULL(8)) { 804 + hashed_bit = FIELD_GET(BIT_ULL(8), denorm_ctx->current_spa); 805 + hashed_bit ^= FIELD_GET(BIT_ULL(14), denorm_ctx->current_spa); 806 + hashed_bit ^= FIELD_GET(BIT_ULL(16), denorm_ctx->current_spa) & hash_ctl_64k; 807 + hashed_bit ^= FIELD_GET(BIT_ULL(21), denorm_ctx->current_spa) & hash_ctl_2M; 808 + hashed_bit ^= FIELD_GET(BIT_ULL(30), denorm_ctx->current_spa) & hash_ctl_1G; 809 + hashed_bit ^= FIELD_GET(BIT_ULL(40), denorm_ctx->current_spa) & hash_ctl_1T; 810 + 811 + if (FIELD_GET(BIT_ULL(8), denorm_ctx->current_spa) != hashed_bit) 812 + denorm_ctx->current_spa ^= BIT_ULL(8); 813 + } 814 + 815 + if (denorm_ctx->rehash_vector & BIT_ULL(9)) { 816 + hashed_bit = FIELD_GET(BIT_ULL(9), denorm_ctx->current_spa); 817 + hashed_bit ^= FIELD_GET(BIT_ULL(17), denorm_ctx->current_spa) & hash_ctl_64k; 818 + hashed_bit ^= FIELD_GET(BIT_ULL(22), denorm_ctx->current_spa) & hash_ctl_2M; 819 + hashed_bit ^= FIELD_GET(BIT_ULL(31), denorm_ctx->current_spa) & hash_ctl_1G; 820 + hashed_bit ^= FIELD_GET(BIT_ULL(41), denorm_ctx->current_spa) & hash_ctl_1T; 821 + 822 + if (FIELD_GET(BIT_ULL(9), denorm_ctx->current_spa) != hashed_bit) 823 + denorm_ctx->current_spa ^= BIT_ULL(9); 824 + } 825 + 826 + if (denorm_ctx->rehash_vector & BIT_ULL(12)) { 827 + hashed_bit = FIELD_GET(BIT_ULL(12), denorm_ctx->current_spa); 828 + hashed_bit ^= FIELD_GET(BIT_ULL(18), denorm_ctx->current_spa) & hash_ctl_64k; 829 + hashed_bit ^= FIELD_GET(BIT_ULL(23), denorm_ctx->current_spa) & hash_ctl_2M; 830 + hashed_bit ^= FIELD_GET(BIT_ULL(32), denorm_ctx->current_spa) & hash_ctl_1G; 831 + hashed_bit ^= FIELD_GET(BIT_ULL(42), denorm_ctx->current_spa) & hash_ctl_1T; 832 + 833 + if (FIELD_GET(BIT_ULL(12), denorm_ctx->current_spa) != hashed_bit) 834 + denorm_ctx->current_spa ^= BIT_ULL(12); 835 + } 836 + 837 + if (denorm_ctx->rehash_vector & BIT_ULL(13)) { 838 + hashed_bit = FIELD_GET(BIT_ULL(13), denorm_ctx->current_spa); 839 + hashed_bit ^= FIELD_GET(BIT_ULL(19), denorm_ctx->current_spa) & hash_ctl_64k; 840 + hashed_bit ^= FIELD_GET(BIT_ULL(24), denorm_ctx->current_spa) & hash_ctl_2M; 841 + hashed_bit ^= FIELD_GET(BIT_ULL(33), denorm_ctx->current_spa) & hash_ctl_1G; 842 + hashed_bit ^= FIELD_GET(BIT_ULL(43), denorm_ctx->current_spa) & hash_ctl_1T; 843 + 844 + if (FIELD_GET(BIT_ULL(13), denorm_ctx->current_spa) != hashed_bit) 845 + denorm_ctx->current_spa ^= BIT_ULL(13); 846 + } 847 + } 848 + 849 + static bool match_logical_coh_st_fabric_id(struct addr_ctx *ctx, 850 + struct df4p5_denorm_ctx *denorm_ctx) 851 + { 852 + /* 853 + * The logical CS fabric ID of the permutation must be calculated from the 854 + * current SPA with the base and with the MMIO hole. 855 + */ 856 + u16 id = get_logical_coh_st_fabric_id_for_current_spa(ctx, denorm_ctx); 857 + 858 + atl_debug(ctx, "Checking calculated logical coherent station fabric id:\n"); 859 + atl_debug(ctx, " calculated fabric id = 0x%x\n", id); 860 + atl_debug(ctx, " expected fabric id = 0x%x\n", denorm_ctx->coh_st_fabric_id); 861 + 862 + return denorm_ctx->coh_st_fabric_id == id; 863 + } 864 + 865 + static bool match_norm_addr(struct addr_ctx *ctx, struct df4p5_denorm_ctx *denorm_ctx) 866 + { 867 + u64 addr = remove_base_and_hole(ctx, denorm_ctx->current_spa); 868 + 869 + /* 870 + * The normalized address must be calculated with the current SPA without 871 + * the base and without the MMIO hole. 872 + */ 873 + addr = normalize_addr_df4p5_np2(ctx, denorm_ctx, addr); 874 + 875 + atl_debug(ctx, "Checking calculated normalized address:\n"); 876 + atl_debug(ctx, " calculated normalized addr = 0x%016llx\n", addr); 877 + atl_debug(ctx, " expected normalized addr = 0x%016llx\n", ctx->ret_addr); 878 + 879 + return addr == ctx->ret_addr; 880 + } 881 + 882 + static int check_permutations(struct addr_ctx *ctx, struct df4p5_denorm_ctx *denorm_ctx) 883 + { 884 + u64 test_perm, temp_addr, denorm_addr, num_perms; 885 + unsigned int dropped_remainder; 886 + 887 + denorm_ctx->div_addr *= denorm_ctx->mod_value; 888 + 889 + /* 890 + * The high order bits of num_permutations represent the permutations 891 + * of the dropped remainder. This will be either 0-3 or 0-5 depending 892 + * on the interleave mode. The low order bits represent the 893 + * permutations of other "lost" bits which will be any combination of 894 + * 1, 2, or 3 bits depending on the interleave mode. 895 + */ 896 + num_perms = denorm_ctx->mod_value << denorm_ctx->perm_shift; 897 + 898 + for (test_perm = 0; test_perm < num_perms; test_perm++) { 899 + denorm_addr = denorm_ctx->base_denorm_addr; 900 + dropped_remainder = test_perm >> denorm_ctx->perm_shift; 901 + temp_addr = denorm_ctx->div_addr + dropped_remainder; 902 + 903 + switch (ctx->map.intlv_mode) { 904 + case DF4p5_NPS0_24CHAN_2K_HASH: 905 + denorm_addr |= temp_addr << 14; 906 + break; 907 + 908 + case DF4p5_NPS0_24CHAN_1K_HASH: 909 + case DF4p5_NPS1_12CHAN_2K_HASH: 910 + denorm_addr |= temp_addr << 13; 911 + break; 912 + 913 + case DF4p5_NPS1_12CHAN_1K_HASH: 914 + case DF4p5_NPS2_6CHAN_2K_HASH: 915 + case DF4p5_NPS1_10CHAN_2K_HASH: 916 + denorm_addr |= temp_addr << 12; 917 + break; 918 + 919 + case DF4p5_NPS2_6CHAN_1K_HASH: 920 + case DF4p5_NPS1_10CHAN_1K_HASH: 921 + denorm_addr |= FIELD_GET(BIT_ULL(0), temp_addr) << 9; 922 + denorm_addr |= FIELD_GET(GENMASK_ULL(63, 1), temp_addr) << 12; 923 + break; 924 + 925 + case DF4p5_NPS4_3CHAN_1K_HASH: 926 + case DF4p5_NPS2_5CHAN_1K_HASH: 927 + denorm_addr |= FIELD_GET(GENMASK_ULL(1, 0), temp_addr) << 8; 928 + denorm_addr |= FIELD_GET(GENMASK_ULL(63, 2), (temp_addr)) << 12; 929 + break; 930 + 931 + case DF4p5_NPS4_3CHAN_2K_HASH: 932 + case DF4p5_NPS2_5CHAN_2K_HASH: 933 + denorm_addr |= FIELD_GET(BIT_ULL(0), temp_addr) << 8; 934 + denorm_addr |= FIELD_GET(GENMASK_ULL(63, 1), temp_addr) << 12; 935 + break; 936 + 937 + default: 938 + atl_debug_on_bad_intlv_mode(ctx); 939 + return -EINVAL; 940 + } 941 + 942 + switch (ctx->map.intlv_mode) { 943 + case DF4p5_NPS0_24CHAN_1K_HASH: 944 + denorm_addr |= FIELD_GET(BIT_ULL(0), test_perm) << 8; 945 + denorm_addr |= FIELD_GET(BIT_ULL(1), test_perm) << 9; 946 + denorm_addr |= FIELD_GET(BIT_ULL(2), test_perm) << 12; 947 + break; 948 + 949 + case DF4p5_NPS0_24CHAN_2K_HASH: 950 + denorm_addr |= FIELD_GET(BIT_ULL(0), test_perm) << 8; 951 + denorm_addr |= FIELD_GET(BIT_ULL(1), test_perm) << 12; 952 + denorm_addr |= FIELD_GET(BIT_ULL(2), test_perm) << 13; 953 + break; 954 + 955 + case DF4p5_NPS1_12CHAN_2K_HASH: 956 + denorm_addr |= FIELD_GET(BIT_ULL(0), test_perm) << 8; 957 + denorm_addr |= FIELD_GET(BIT_ULL(1), test_perm) << 12; 958 + break; 959 + 960 + case DF4p5_NPS1_12CHAN_1K_HASH: 961 + case DF4p5_NPS4_3CHAN_1K_HASH: 962 + case DF4p5_NPS2_5CHAN_1K_HASH: 963 + denorm_addr |= FIELD_GET(BIT_ULL(0), test_perm) << 8; 964 + denorm_addr |= FIELD_GET(BIT_ULL(1), test_perm) << 9; 965 + break; 966 + 967 + case DF4p5_NPS2_6CHAN_1K_HASH: 968 + case DF4p5_NPS2_6CHAN_2K_HASH: 969 + case DF4p5_NPS4_3CHAN_2K_HASH: 970 + case DF4p5_NPS1_10CHAN_1K_HASH: 971 + case DF4p5_NPS1_10CHAN_2K_HASH: 972 + case DF4p5_NPS2_5CHAN_2K_HASH: 973 + denorm_addr |= FIELD_GET(BIT_ULL(0), test_perm) << 8; 974 + break; 975 + 976 + default: 977 + atl_debug_on_bad_intlv_mode(ctx); 978 + return -EINVAL; 979 + } 980 + 981 + denorm_ctx->current_spa = add_base_and_hole(ctx, denorm_addr); 982 + recalculate_hashed_bits_df4p5_np2(ctx, denorm_ctx); 983 + 984 + atl_debug(ctx, "Checking potential system physical address 0x%016llx\n", 985 + denorm_ctx->current_spa); 986 + 987 + if (!match_logical_coh_st_fabric_id(ctx, denorm_ctx)) 988 + continue; 989 + 990 + if (!match_norm_addr(ctx, denorm_ctx)) 991 + continue; 992 + 993 + if (denorm_ctx->resolved_spa == INVALID_SPA || 994 + denorm_ctx->current_spa > denorm_ctx->resolved_spa) 995 + denorm_ctx->resolved_spa = denorm_ctx->current_spa; 996 + } 997 + 998 + if (denorm_ctx->resolved_spa == INVALID_SPA) { 999 + atl_debug(ctx, "Failed to find valid SPA for normalized address 0x%016llx\n", 1000 + ctx->ret_addr); 1001 + return -EINVAL; 1002 + } 1003 + 1004 + /* Return the resolved SPA without the base, without the MMIO hole */ 1005 + ctx->ret_addr = remove_base_and_hole(ctx, denorm_ctx->resolved_spa); 1006 + 1007 + return 0; 1008 + } 1009 + 1010 + static int init_df4p5_denorm_ctx(struct addr_ctx *ctx, struct df4p5_denorm_ctx *denorm_ctx) 1011 + { 1012 + denorm_ctx->current_spa = INVALID_SPA; 1013 + denorm_ctx->resolved_spa = INVALID_SPA; 1014 + 1015 + switch (ctx->map.intlv_mode) { 1016 + case DF4p5_NPS0_24CHAN_1K_HASH: 1017 + denorm_ctx->perm_shift = 3; 1018 + denorm_ctx->rehash_vector = BIT(8) | BIT(9) | BIT(12); 1019 + break; 1020 + 1021 + case DF4p5_NPS0_24CHAN_2K_HASH: 1022 + denorm_ctx->perm_shift = 3; 1023 + denorm_ctx->rehash_vector = BIT(8) | BIT(12) | BIT(13); 1024 + break; 1025 + 1026 + case DF4p5_NPS1_12CHAN_1K_HASH: 1027 + denorm_ctx->perm_shift = 2; 1028 + denorm_ctx->rehash_vector = BIT(8); 1029 + break; 1030 + 1031 + case DF4p5_NPS1_12CHAN_2K_HASH: 1032 + denorm_ctx->perm_shift = 2; 1033 + denorm_ctx->rehash_vector = BIT(8) | BIT(12); 1034 + break; 1035 + 1036 + case DF4p5_NPS2_6CHAN_1K_HASH: 1037 + case DF4p5_NPS2_6CHAN_2K_HASH: 1038 + case DF4p5_NPS1_10CHAN_1K_HASH: 1039 + case DF4p5_NPS1_10CHAN_2K_HASH: 1040 + denorm_ctx->perm_shift = 1; 1041 + denorm_ctx->rehash_vector = BIT(8); 1042 + break; 1043 + 1044 + case DF4p5_NPS4_3CHAN_1K_HASH: 1045 + case DF4p5_NPS2_5CHAN_1K_HASH: 1046 + denorm_ctx->perm_shift = 2; 1047 + denorm_ctx->rehash_vector = 0; 1048 + break; 1049 + 1050 + case DF4p5_NPS4_3CHAN_2K_HASH: 1051 + case DF4p5_NPS2_5CHAN_2K_HASH: 1052 + denorm_ctx->perm_shift = 1; 1053 + denorm_ctx->rehash_vector = 0; 1054 + break; 1055 + 1056 + default: 1057 + atl_debug_on_bad_intlv_mode(ctx); 1058 + return -EINVAL; 1059 + } 1060 + 1061 + denorm_ctx->base_denorm_addr = FIELD_GET(GENMASK_ULL(7, 0), ctx->ret_addr); 1062 + 1063 + switch (ctx->map.intlv_mode) { 1064 + case DF4p5_NPS0_24CHAN_1K_HASH: 1065 + case DF4p5_NPS1_12CHAN_1K_HASH: 1066 + case DF4p5_NPS2_6CHAN_1K_HASH: 1067 + case DF4p5_NPS4_3CHAN_1K_HASH: 1068 + case DF4p5_NPS1_10CHAN_1K_HASH: 1069 + case DF4p5_NPS2_5CHAN_1K_HASH: 1070 + denorm_ctx->base_denorm_addr |= FIELD_GET(GENMASK_ULL(9, 8), ctx->ret_addr) << 10; 1071 + denorm_ctx->div_addr = FIELD_GET(GENMASK_ULL(63, 10), ctx->ret_addr); 1072 + break; 1073 + 1074 + case DF4p5_NPS0_24CHAN_2K_HASH: 1075 + case DF4p5_NPS1_12CHAN_2K_HASH: 1076 + case DF4p5_NPS2_6CHAN_2K_HASH: 1077 + case DF4p5_NPS4_3CHAN_2K_HASH: 1078 + case DF4p5_NPS1_10CHAN_2K_HASH: 1079 + case DF4p5_NPS2_5CHAN_2K_HASH: 1080 + denorm_ctx->base_denorm_addr |= FIELD_GET(GENMASK_ULL(10, 8), ctx->ret_addr) << 9; 1081 + denorm_ctx->div_addr = FIELD_GET(GENMASK_ULL(63, 11), ctx->ret_addr); 1082 + break; 1083 + 1084 + default: 1085 + atl_debug_on_bad_intlv_mode(ctx); 1086 + return -EINVAL; 1087 + } 1088 + 1089 + if (ctx->map.num_intlv_chan % 3 == 0) 1090 + denorm_ctx->mod_value = 3; 1091 + else 1092 + denorm_ctx->mod_value = 5; 1093 + 1094 + denorm_ctx->coh_st_fabric_id = get_logical_coh_st_fabric_id(ctx) - get_dst_fabric_id(ctx); 1095 + 1096 + atl_debug(ctx, "Initialized df4p5_denorm_ctx:"); 1097 + atl_debug(ctx, " mod_value = %d", denorm_ctx->mod_value); 1098 + atl_debug(ctx, " perm_shift = %d", denorm_ctx->perm_shift); 1099 + atl_debug(ctx, " rehash_vector = 0x%x", denorm_ctx->rehash_vector); 1100 + atl_debug(ctx, " base_denorm_addr = 0x%016llx", denorm_ctx->base_denorm_addr); 1101 + atl_debug(ctx, " div_addr = 0x%016llx", denorm_ctx->div_addr); 1102 + atl_debug(ctx, " coh_st_fabric_id = 0x%x", denorm_ctx->coh_st_fabric_id); 1103 + 1104 + return 0; 1105 + } 1106 + 1107 + /* 1108 + * For DF 4.5, parts of the physical address can be directly pulled from the 1109 + * normalized address. The exact bits will differ between interleave modes, but 1110 + * using NPS0_24CHAN_1K_HASH as an example, the normalized address consists of 1111 + * bits [63:13] (divided by 3), bits [11:10], and bits [7:0] of the system 1112 + * physical address. 1113 + * 1114 + * In this case, there is no way to reconstruct the missing bits (bits 8, 9, 1115 + * and 12) from the normalized address. Additionally, when bits [63:13] are 1116 + * divided by 3, the remainder is dropped. Determine the proper combination of 1117 + * "lost" bits and dropped remainder by iterating through each possible 1118 + * permutation of these bits and then normalizing the generated system physical 1119 + * addresses. If the normalized address matches the address we are trying to 1120 + * translate, then we have found the correct permutation of bits. 1121 + */ 1122 + static int denorm_addr_df4p5_np2(struct addr_ctx *ctx) 1123 + { 1124 + struct df4p5_denorm_ctx denorm_ctx; 1125 + int ret = 0; 1126 + 1127 + memset(&denorm_ctx, 0, sizeof(denorm_ctx)); 1128 + 1129 + atl_debug(ctx, "Denormalizing DF 4.5 normalized address 0x%016llx", ctx->ret_addr); 1130 + 1131 + ret = init_df4p5_denorm_ctx(ctx, &denorm_ctx); 1132 + if (ret) 1133 + return ret; 1134 + 1135 + return check_permutations(ctx, &denorm_ctx); 1136 + } 1137 + 814 1138 int denormalize_address(struct addr_ctx *ctx) 815 1139 { 816 1140 switch (ctx->map.intlv_mode) { ··· 1258 710 case DF4_NPS2_5CHAN_HASH: 1259 711 case DF4_NPS1_10CHAN_HASH: 1260 712 return denorm_addr_df4_np2(ctx); 713 + case DF4p5_NPS0_24CHAN_1K_HASH: 714 + case DF4p5_NPS4_3CHAN_1K_HASH: 715 + case DF4p5_NPS2_6CHAN_1K_HASH: 716 + case DF4p5_NPS1_12CHAN_1K_HASH: 717 + case DF4p5_NPS2_5CHAN_1K_HASH: 718 + case DF4p5_NPS1_10CHAN_1K_HASH: 719 + case DF4p5_NPS4_3CHAN_2K_HASH: 720 + case DF4p5_NPS2_6CHAN_2K_HASH: 721 + case DF4p5_NPS1_12CHAN_2K_HASH: 722 + case DF4p5_NPS0_24CHAN_2K_HASH: 723 + case DF4p5_NPS2_5CHAN_2K_HASH: 724 + case DF4p5_NPS1_10CHAN_2K_HASH: 725 + return denorm_addr_df4p5_np2(ctx); 1261 726 case DF3_6CHAN: 1262 727 return denorm_addr_df3_6chan(ctx); 1263 728 default:
+40
drivers/ras/amd/atl/internal.h
··· 37 37 #define DF_DRAM_BASE_LIMIT_LSB 28 38 38 #define MI300_DRAM_LIMIT_LSB 20 39 39 40 + #define INVALID_SPA ~0ULL 41 + 40 42 enum df_revisions { 41 43 UNKNOWN, 42 44 DF2, ··· 93 91 DF4p5_NPS0_24CHAN_2K_HASH = 0x47, 94 92 DF4p5_NPS2_5CHAN_2K_HASH = 0x48, 95 93 DF4p5_NPS1_10CHAN_2K_HASH = 0x49, 94 + }; 95 + 96 + struct df4p5_denorm_ctx { 97 + /* Indicates the number of "lost" bits. This will be 1, 2, or 3. */ 98 + u8 perm_shift; 99 + 100 + /* A mask indicating the bits that need to be rehashed. */ 101 + u16 rehash_vector; 102 + 103 + /* 104 + * Represents the value that the high bits of the normalized address 105 + * are divided by during normalization. This value will be 3 for 106 + * interleave modes with a number of channels divisible by 3 or the 107 + * value will be 5 for interleave modes with a number of channels 108 + * divisible by 5. Power-of-two interleave modes are handled 109 + * separately. 110 + */ 111 + u8 mod_value; 112 + 113 + /* 114 + * Represents the bits that can be directly pulled from the normalized 115 + * address. In each case, pass through bits [7:0] of the normalized 116 + * address. The other bits depend on the interleave bit position which 117 + * will be bit 10 for 1K interleave stripe cases and bit 11 for 2K 118 + * interleave stripe cases. 119 + */ 120 + u64 base_denorm_addr; 121 + 122 + /* 123 + * Represents the high bits of the physical address that have been 124 + * divided by the mod_value. 125 + */ 126 + u64 div_addr; 127 + 128 + u64 current_spa; 129 + u64 resolved_spa; 130 + 131 + u16 coh_st_fabric_id; 96 132 }; 97 133 98 134 struct df_flags {
+20
drivers/ras/amd/atl/map.c
··· 696 696 goto err; 697 697 break; 698 698 699 + case DF4p5_NPS4_3CHAN_1K_HASH: 700 + case DF4p5_NPS4_3CHAN_2K_HASH: 701 + case DF4p5_NPS2_5CHAN_1K_HASH: 702 + case DF4p5_NPS2_5CHAN_2K_HASH: 703 + case DF4p5_NPS2_6CHAN_1K_HASH: 704 + case DF4p5_NPS2_6CHAN_2K_HASH: 705 + case DF4p5_NPS1_10CHAN_1K_HASH: 706 + case DF4p5_NPS1_10CHAN_2K_HASH: 707 + case DF4p5_NPS1_12CHAN_1K_HASH: 708 + case DF4p5_NPS1_12CHAN_2K_HASH: 709 + if (ctx->map.num_intlv_sockets != 1 || !map_bits_valid(ctx, 8, 0, 1, 1)) 710 + goto err; 711 + break; 712 + 713 + case DF4p5_NPS0_24CHAN_1K_HASH: 714 + case DF4p5_NPS0_24CHAN_2K_HASH: 715 + if (ctx->map.num_intlv_sockets < 2 || !map_bits_valid(ctx, 8, 0, 1, 2)) 716 + goto err; 717 + break; 718 + 699 719 case MI3_HASH_8CHAN: 700 720 case MI3_HASH_16CHAN: 701 721 case MI3_HASH_32CHAN: