Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdkfd: Replace deprecated gfx12 trap handler instructions

Newer assemblers reject S_WAITCNT. All instances of S_WAITCNT can be
replaced by S_WAITCNT 0 (< gfx12) or S_WAIT_IDLE (>= gfx12) since
there is no concurrency of different memory instruction classes.

Signed-off-by: Jay Cornwall <jay.cornwall@amd.com>
Reviewed-by: Lancelot Six <lancelot.six@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Jay Cornwall and committed by
Alex Deucher
c5e35891 dec4f2d2

+97 -95
+70 -70
drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
··· 711 711 0xbf0d8f7b, 0xbf840002, 712 712 0x887bff7b, 0xffff0000, 713 713 0xf4011bbd, 0xfa000010, 714 - 0xbf8cc07f, 0x8f6e976e, 714 + 0xbf8c0000, 0x8f6e976e, 715 715 0x8a77ff77, 0x00800000, 716 716 0x88776e77, 0xf4051bbd, 717 - 0xfa000000, 0xbf8cc07f, 717 + 0xfa000000, 0xbf8c0000, 718 718 0xf4051ebd, 0xfa000008, 719 - 0xbf8cc07f, 0x87ee6e6e, 719 + 0xbf8c0000, 0x87ee6e6e, 720 720 0xbf840001, 0xbe80206e, 721 721 0x876eff6d, 0x00ff0000, 722 722 0xbf850008, 0x876eff6d, ··· 1185 1185 0x785d0000, 0xe0304080, 1186 1186 0x785d0100, 0xe0304100, 1187 1187 0x785d0200, 0xe0304180, 1188 - 0x785d0300, 0xbf8c3f70, 1188 + 0x785d0300, 0xbf8c0000, 1189 1189 0x7e008500, 0x7e028501, 1190 1190 0x7e048502, 0x7e068503, 1191 1191 0x807c847c, 0x8078ff78, ··· 1194 1194 0x6e5d0000, 0xe0304080, 1195 1195 0x6e5d0100, 0xe0304100, 1196 1196 0x6e5d0200, 0xe0304180, 1197 - 0x6e5d0300, 0xbf8c3f70, 1197 + 0x6e5d0300, 0xbf8c0000, 1198 1198 0xbf820034, 0xbef603ff, 1199 1199 0x01000000, 0xbeee0378, 1200 1200 0x8078ff78, 0x00000400, ··· 1203 1203 0x785d0000, 0xe0304100, 1204 1204 0x785d0100, 0xe0304200, 1205 1205 0x785d0200, 0xe0304300, 1206 - 0x785d0300, 0xbf8c3f70, 1206 + 0x785d0300, 0xbf8c0000, 1207 1207 0x7e008500, 0x7e028501, 1208 1208 0x7e048502, 0x7e068503, 1209 1209 0x807c847c, 0x8078ff78, ··· 1213 1213 0x8f6f836f, 0x806f7c6f, 1214 1214 0xbefe03c1, 0xbeff0380, 1215 1215 0xe0304000, 0x785d0000, 1216 - 0xbf8c3f70, 0x7e008500, 1216 + 0xbf8c0000, 0x7e008500, 1217 1217 0x807c817c, 0x8078ff78, 1218 1218 0x00000080, 0xbf0a6f7c, 1219 1219 0xbf85fff7, 0xbeff03c1, ··· 1221 1221 0xe0304100, 0x6e5d0100, 1222 1222 0xe0304200, 0x6e5d0200, 1223 1223 0xe0304300, 0x6e5d0300, 1224 - 0xbf8c3f70, 0xb9783a05, 1224 + 0xbf8c0000, 0xb9783a05, 1225 1225 0x80788178, 0xbf0d9972, 1226 1226 0xbf850002, 0x8f788978, 1227 1227 0xbf820001, 0x8f788a78, ··· 1232 1232 0x01000000, 0xbefc03ff, 1233 1233 0x0000006c, 0x80f89078, 1234 1234 0xf429003a, 0xf0000000, 1235 - 0xbf8cc07f, 0x80fc847c, 1235 + 0xbf8c0000, 0x80fc847c, 1236 1236 0xbf800000, 0xbe803100, 1237 1237 0xbe823102, 0x80f8a078, 1238 1238 0xf42d003a, 0xf0000000, 1239 - 0xbf8cc07f, 0x80fc887c, 1239 + 0xbf8c0000, 0x80fc887c, 1240 1240 0xbf800000, 0xbe803100, 1241 1241 0xbe823102, 0xbe843104, 1242 1242 0xbe863106, 0x80f8c078, 1243 1243 0xf431003a, 0xf0000000, 1244 - 0xbf8cc07f, 0x80fc907c, 1244 + 0xbf8c0000, 0x80fc907c, 1245 1245 0xbf800000, 0xbe803100, 1246 1246 0xbe823102, 0xbe843104, 1247 1247 0xbe863106, 0xbe883108, ··· 1271 1271 0xf4211cfa, 0xf0000000, 1272 1272 0x80788478, 0xf4211bba, 1273 1273 0xf0000000, 0x80788478, 1274 - 0xbf8cc07f, 0xb9eef814, 1274 + 0xbf8c0000, 0xb9eef814, 1275 1275 0xf4211bba, 0xf0000000, 1276 - 0x80788478, 0xbf8cc07f, 1276 + 0x80788478, 0xbf8c0000, 1277 1277 0xb9eef815, 0xbefc036f, 1278 1278 0xbefe0370, 0xbeff0371, 1279 1279 0xb9f9f816, 0xb9fbf803, ··· 1288 1288 0x0000ffff, 0xf4091c37, 1289 1289 0xfa000050, 0xf4091d37, 1290 1290 0xfa000060, 0xf4011e77, 1291 - 0xfa000074, 0xbf8cc07f, 1291 + 0xfa000074, 0xbf8c0000, 1292 1292 0x906e8977, 0x876fff6e, 1293 1293 0x003f8000, 0x906e8677, 1294 1294 0x876eff6e, 0x02000000, ··· 2299 2299 0xbf0d8f7b, 0xbf840002, 2300 2300 0x887bff7b, 0xffff0000, 2301 2301 0xf4011bbd, 0xfa000010, 2302 - 0xbf8cc07f, 0x8f6e976e, 2302 + 0xbf8c0000, 0x8f6e976e, 2303 2303 0x8a77ff77, 0x00800000, 2304 2304 0x88776e77, 0xf4051bbd, 2305 - 0xfa000000, 0xbf8cc07f, 2305 + 0xfa000000, 0xbf8c0000, 2306 2306 0xf4051ebd, 0xfa000008, 2307 - 0xbf8cc07f, 0x87ee6e6e, 2307 + 0xbf8c0000, 0x87ee6e6e, 2308 2308 0xbf840001, 0xbe80206e, 2309 2309 0x876eff6d, 0x00ff0000, 2310 2310 0xbf850008, 0x876eff6d, ··· 2319 2319 0x0000ffff, 0xbefa0380, 2320 2320 0xb9fa0283, 0xbeee037e, 2321 2321 0xbeef037f, 0xbefe0480, 2322 - 0xbf900004, 0xbf8cc07f, 2322 + 0xbf900004, 0xbf8c0000, 2323 2323 0x877aff7f, 0x04000000, 2324 2324 0x8f7a857a, 0x886d7a6d, 2325 2325 0x7e008200, 0xbefa037e, ··· 2595 2595 0xe0304080, 0x785d0100, 2596 2596 0xe0304100, 0x785d0200, 2597 2597 0xe0304180, 0x785d0300, 2598 - 0xbf8c3f70, 0x7e008500, 2598 + 0xbf8c0000, 0x7e008500, 2599 2599 0x7e028501, 0x7e048502, 2600 2600 0x7e068503, 0x807c847c, 2601 2601 0x8078ff78, 0x00000200, ··· 2604 2604 0xe0304080, 0x6e5d0100, 2605 2605 0xe0304100, 0x6e5d0200, 2606 2606 0xe0304180, 0x6e5d0300, 2607 - 0xbf8c3f70, 0xbf820034, 2607 + 0xbf8c0000, 0xbf820034, 2608 2608 0xbef603ff, 0x01000000, 2609 2609 0xbeee0378, 0x8078ff78, 2610 2610 0x00000400, 0xbefc0384, ··· 2613 2613 0xe0304100, 0x785d0100, 2614 2614 0xe0304200, 0x785d0200, 2615 2615 0xe0304300, 0x785d0300, 2616 - 0xbf8c3f70, 0x7e008500, 2616 + 0xbf8c0000, 0x7e008500, 2617 2617 0x7e028501, 0x7e048502, 2618 2618 0x7e068503, 0x807c847c, 2619 2619 0x8078ff78, 0x00000400, ··· 2622 2622 0xbf84000e, 0x8f6f836f, 2623 2623 0x806f7c6f, 0xbefe03c1, 2624 2624 0xbeff0380, 0xe0304000, 2625 - 0x785d0000, 0xbf8c3f70, 2625 + 0x785d0000, 0xbf8c0000, 2626 2626 0x7e008500, 0x807c817c, 2627 2627 0x8078ff78, 0x00000080, 2628 2628 0xbf0a6f7c, 0xbf85fff7, ··· 2630 2630 0x6e5d0000, 0xe0304100, 2631 2631 0x6e5d0100, 0xe0304200, 2632 2632 0x6e5d0200, 0xe0304300, 2633 - 0x6e5d0300, 0xbf8c3f70, 2633 + 0x6e5d0300, 0xbf8c0000, 2634 2634 0xb9783a05, 0x80788178, 2635 2635 0xbf0d9972, 0xbf850002, 2636 2636 0x8f788978, 0xbf820001, ··· 2641 2641 0xbef603ff, 0x01000000, 2642 2642 0xbefc03ff, 0x0000006c, 2643 2643 0x80f89078, 0xf429003a, 2644 - 0xf0000000, 0xbf8cc07f, 2644 + 0xf0000000, 0xbf8c0000, 2645 2645 0x80fc847c, 0xbf800000, 2646 2646 0xbe803100, 0xbe823102, 2647 2647 0x80f8a078, 0xf42d003a, 2648 - 0xf0000000, 0xbf8cc07f, 2648 + 0xf0000000, 0xbf8c0000, 2649 2649 0x80fc887c, 0xbf800000, 2650 2650 0xbe803100, 0xbe823102, 2651 2651 0xbe843104, 0xbe863106, 2652 2652 0x80f8c078, 0xf431003a, 2653 - 0xf0000000, 0xbf8cc07f, 2653 + 0xf0000000, 0xbf8c0000, 2654 2654 0x80fc907c, 0xbf800000, 2655 2655 0xbe803100, 0xbe823102, 2656 2656 0xbe843104, 0xbe863106, ··· 2680 2680 0x80788478, 0xf4211cfa, 2681 2681 0xf0000000, 0x80788478, 2682 2682 0xf4211bba, 0xf0000000, 2683 - 0x80788478, 0xbf8cc07f, 2683 + 0x80788478, 0xbf8c0000, 2684 2684 0xb9eef814, 0xf4211bba, 2685 2685 0xf0000000, 0x80788478, 2686 - 0xbf8cc07f, 0xb9eef815, 2686 + 0xbf8c0000, 0xb9eef815, 2687 2687 0xbefc036f, 0xbefe0370, 2688 2688 0xbeff0371, 0xb9fbf803, 2689 2689 0xb9f3f801, 0xb96e3a05, ··· 2697 2697 0x0000ffff, 0xf4091c37, 2698 2698 0xfa000050, 0xf4091d37, 2699 2699 0xfa000060, 0xf4011e77, 2700 - 0xfa000074, 0xbf8cc07f, 2700 + 0xfa000074, 0xbf8c0000, 2701 2701 0x876dff6d, 0x0000ffff, 2702 2702 0x87fe7e7e, 0x87ea6a6a, 2703 2703 0xb9faf802, 0xbe80226c, ··· 2731 2731 0x8b6eff6e, 0x00000800, 2732 2732 0xbfa20003, 0x8b6eff7b, 2733 2733 0x00000400, 0xbfa2002a, 2734 - 0xbefa4d82, 0xbf89fc07, 2734 + 0xbefa4d82, 0xbf890000, 2735 2735 0x84fa887a, 0xbf0d8f7b, 2736 2736 0xbfa10002, 0x8c7bff7b, 2737 2737 0xffff0000, 0xf4005bbd, 2738 - 0xf8000010, 0xbf89fc07, 2738 + 0xf8000010, 0xbf890000, 2739 2739 0x846e976e, 0x9177ff77, 2740 2740 0x00800000, 0x8c776e77, 2741 2741 0xf4045bbd, 0xf8000000, 2742 - 0xbf89fc07, 0xf4045ebd, 2743 - 0xf8000008, 0xbf89fc07, 2742 + 0xbf890000, 0xf4045ebd, 2743 + 0xf8000008, 0xbf890000, 2744 2744 0x8bee6e6e, 0xbfa10001, 2745 2745 0xbe80486e, 0x8b6eff6d, 2746 2746 0x00ff0000, 0xbfa20008, ··· 2756 2756 0xbefa0080, 0xb97a0283, 2757 2757 0xbeee007e, 0xbeef007f, 2758 2758 0xbefe0180, 0xbefe4d84, 2759 - 0xbf89fc07, 0x8b7aff7f, 2759 + 0xbf890000, 0x8b7aff7f, 2760 2760 0x04000000, 0x847a857a, 2761 2761 0x8c6d7a6d, 0xbefa007e, 2762 2762 0x8b7bff7f, 0x0000ffff, ··· 3007 3007 0x857d9972, 0x8b7d817d, 3008 3008 0xbf06817d, 0xbefd0080, 3009 3009 0xbfa2000c, 0xe0500000, 3010 - 0x781d0000, 0xbf8903f7, 3010 + 0x781d0000, 0xbf890000, 3011 3011 0xdac00000, 0x00000000, 3012 3012 0x807dff7d, 0x00000080, 3013 3013 0x8078ff78, 0x00000080, 3014 3014 0xbf0a6f7d, 0xbfa2fff5, 3015 3015 0xbfa0000b, 0xe0500000, 3016 - 0x781d0000, 0xbf8903f7, 3016 + 0x781d0000, 0xbf890000, 3017 3017 0xdac00000, 0x00000000, 3018 3018 0x807dff7d, 0x00000100, 3019 3019 0x8078ff78, 0x00000100, ··· 3034 3034 0xe0505080, 0x781d0100, 3035 3035 0xe0505100, 0x781d0200, 3036 3036 0xe0505180, 0x781d0300, 3037 - 0xbf8903f7, 0x7e008500, 3037 + 0xbf890000, 0x7e008500, 3038 3038 0x7e028501, 0x7e048502, 3039 3039 0x7e068503, 0x807d847d, 3040 3040 0x8078ff78, 0x00000200, ··· 3043 3043 0xe0505080, 0x6e1d0100, 3044 3044 0xe0505100, 0x6e1d0200, 3045 3045 0xe0505180, 0x6e1d0300, 3046 - 0xbf8903f7, 0xbfa00034, 3046 + 0xbf890000, 0xbfa00034, 3047 3047 0xbef600ff, 0x01000000, 3048 3048 0xbeee0078, 0x8078ff78, 3049 3049 0x00000400, 0xbefd0084, ··· 3052 3052 0xe0505100, 0x781d0100, 3053 3053 0xe0505200, 0x781d0200, 3054 3054 0xe0505300, 0x781d0300, 3055 - 0xbf8903f7, 0x7e008500, 3055 + 0xbf890000, 0x7e008500, 3056 3056 0x7e028501, 0x7e048502, 3057 3057 0x7e068503, 0x807d847d, 3058 3058 0x8078ff78, 0x00000400, ··· 3061 3061 0xbfa1000e, 0x846f836f, 3062 3062 0x806f7d6f, 0xbefe00c1, 3063 3063 0xbeff0080, 0xe0505000, 3064 - 0x781d0000, 0xbf8903f7, 3064 + 0x781d0000, 0xbf890000, 3065 3065 0x7e008500, 0x807d817d, 3066 3066 0x8078ff78, 0x00000080, 3067 3067 0xbf0a6f7d, 0xbfa2fff7, ··· 3069 3069 0x6e1d0000, 0xe0505100, 3070 3070 0x6e1d0100, 0xe0505200, 3071 3071 0x6e1d0200, 0xe0505300, 3072 - 0x6e1d0300, 0xbf8903f7, 3072 + 0x6e1d0300, 0xbf890000, 3073 3073 0xb8f83b05, 0x80788178, 3074 3074 0xbf0d9972, 0xbfa20002, 3075 3075 0x84788978, 0xbfa00001, ··· 3080 3080 0xbef600ff, 0x01000000, 3081 3081 0xbefd00ff, 0x0000006c, 3082 3082 0x80f89078, 0xf428403a, 3083 - 0xf0000000, 0xbf89fc07, 3083 + 0xf0000000, 0xbf890000, 3084 3084 0x80fd847d, 0xbf800000, 3085 3085 0xbe804300, 0xbe824302, 3086 3086 0x80f8a078, 0xf42c403a, 3087 - 0xf0000000, 0xbf89fc07, 3087 + 0xf0000000, 0xbf890000, 3088 3088 0x80fd887d, 0xbf800000, 3089 3089 0xbe804300, 0xbe824302, 3090 3090 0xbe844304, 0xbe864306, 3091 3091 0x80f8c078, 0xf430403a, 3092 - 0xf0000000, 0xbf89fc07, 3092 + 0xf0000000, 0xbf890000, 3093 3093 0x80fd907d, 0xbf800000, 3094 3094 0xbe804300, 0xbe824302, 3095 3095 0xbe844304, 0xbe864306, ··· 3119 3119 0x80788478, 0xf4205cfa, 3120 3120 0xf0000000, 0x80788478, 3121 3121 0xf4205bba, 0xf0000000, 3122 - 0x80788478, 0xbf89fc07, 3122 + 0x80788478, 0xbf890000, 3123 3123 0xb96ef814, 0xf4205bba, 3124 3124 0xf0000000, 0x80788478, 3125 - 0xbf89fc07, 0xb96ef815, 3125 + 0xbf890000, 0xb96ef815, 3126 3126 0xbefd006f, 0xbefe0070, 3127 3127 0xbeff0071, 0xb97bf803, 3128 3128 0xb973f801, 0xb8ee3b05, ··· 3136 3136 0x0000ffff, 0xf4085c37, 3137 3137 0xf8000050, 0xf4085d37, 3138 3138 0xf8000060, 0xf4005e77, 3139 - 0xf8000074, 0xbf89fc07, 3139 + 0xf8000074, 0xbf890000, 3140 3140 0x8b6dff6d, 0x0000ffff, 3141 3141 0x8bfe7e7e, 0x8bea6a6a, 3142 3142 0xb8eef802, 0xbf0d866e, ··· 3657 3657 0x8b6fff6f, 0x00000200, 3658 3658 0xbfa20002, 0x8b6ea07b, 3659 3659 0xbfa2002b, 0xbefa4d82, 3660 - 0xbf89fc07, 0x84fa887a, 3660 + 0xbf8a0000, 0x84fa887a, 3661 3661 0xbf0d8f7b, 0xbfa10002, 3662 3662 0x8c7bff7b, 0xffff0000, 3663 3663 0xf4601bbd, 0xf8000010, 3664 - 0xbf89fc07, 0x846e976e, 3664 + 0xbf8a0000, 0x846e976e, 3665 3665 0x9177ff77, 0x00800000, 3666 3666 0x8c776e77, 0xf4603bbd, 3667 - 0xf8000000, 0xbf89fc07, 3667 + 0xf8000000, 0xbf8a0000, 3668 3668 0xf4603ebd, 0xf8000008, 3669 - 0xbf89fc07, 0x8bee6e6e, 3669 + 0xbf8a0000, 0x8bee6e6e, 3670 3670 0xbfa10001, 0xbe80486e, 3671 3671 0x8b6eff6d, 0xf0000000, 3672 3672 0xbfa20009, 0xb8eef811, ··· 3682 3682 0xbefa0080, 0xb97a0151, 3683 3683 0xbeee007e, 0xbeef007f, 3684 3684 0xbefe0180, 0xbefe4d84, 3685 - 0xbf89fc07, 0x8b7aff7f, 3685 + 0xbf8a0000, 0x8b7aff7f, 3686 3686 0x04000000, 0x847a857a, 3687 3687 0x8c6d7a6d, 0xbefa007e, 3688 3688 0x8b7bff7f, 0x0000ffff, ··· 3869 3869 0x00000080, 0xbf800000, 3870 3870 0xbf800000, 0xbf800000, 3871 3871 0xd8d80000, 0x01000000, 3872 - 0xbf890000, 0xc4068070, 3872 + 0xbf8a0000, 0xc4068070, 3873 3873 0x008ce801, 0x00000000, 3874 3874 0x807d037d, 0x80700370, 3875 3875 0xd5250000, 0x0001ff00, ··· 3878 3878 0xbe8300ff, 0x00000100, 3879 3879 0xbf800000, 0xbf800000, 3880 3880 0xbf800000, 0xd8d80000, 3881 - 0x01000000, 0xbf890000, 3881 + 0x01000000, 0xbf8a0000, 3882 3882 0xc4068070, 0x008ce801, 3883 3883 0x00000000, 0x807d037d, 3884 3884 0x80700370, 0xd5250000, ··· 3954 3954 0x8b7d817d, 0xbf06817d, 3955 3955 0xbefd0080, 0xbfa2000d, 3956 3956 0xc4050078, 0x0080e800, 3957 - 0x00000000, 0xbf8903f7, 3957 + 0x00000000, 0xbf8a0000, 3958 3958 0xdac00000, 0x00000000, 3959 3959 0x807dff7d, 0x00000080, 3960 3960 0x8078ff78, 0x00000080, 3961 3961 0xbf0a6f7d, 0xbfa2fff4, 3962 3962 0xbfa0000c, 0xc4050078, 3963 3963 0x0080e800, 0x00000000, 3964 - 0xbf8903f7, 0xdac00000, 3964 + 0xbf8a0000, 0xdac00000, 3965 3965 0x00000000, 0x807dff7d, 3966 3966 0x00000100, 0x8078ff78, 3967 3967 0x00000100, 0xbf0a6f7d, ··· 3983 3983 0x00008000, 0xc4050078, 3984 3984 0x008ce802, 0x00010000, 3985 3985 0xc4050078, 0x008ce803, 3986 - 0x00018000, 0xbf8903f7, 3986 + 0x00018000, 0xbf8a0000, 3987 3987 0x7e008500, 0x7e028501, 3988 3988 0x7e048502, 0x7e068503, 3989 3989 0x807d847d, 0x8078ff78, ··· 3994 3994 0x00008000, 0xc405006e, 3995 3995 0x008ce802, 0x00010000, 3996 3996 0xc405006e, 0x008ce803, 3997 - 0x00018000, 0xbf8903f7, 3997 + 0x00018000, 0xbf8a0000, 3998 3998 0xbfa0003d, 0xbef600ff, 3999 3999 0x01000000, 0xbeee0078, 4000 4000 0x8078ff78, 0x00000400, ··· 4005 4005 0x00010000, 0xc4050078, 4006 4006 0x008ce802, 0x00020000, 4007 4007 0xc4050078, 0x008ce803, 4008 - 0x00030000, 0xbf8903f7, 4008 + 0x00030000, 0xbf8a0000, 4009 4009 0x7e008500, 0x7e028501, 4010 4010 0x7e048502, 0x7e068503, 4011 4011 0x807d847d, 0x8078ff78, ··· 4015 4015 0x846f836f, 0x806f7d6f, 4016 4016 0xbefe00c1, 0xbeff0080, 4017 4017 0xc4050078, 0x008ce800, 4018 - 0x00000000, 0xbf8903f7, 4018 + 0x00000000, 0xbf8a0000, 4019 4019 0x7e008500, 0x807d817d, 4020 4020 0x8078ff78, 0x00000080, 4021 4021 0xbf0a6f7d, 0xbfa2fff6, ··· 4025 4025 0x00010000, 0xc405006e, 4026 4026 0x008ce802, 0x00020000, 4027 4027 0xc405006e, 0x008ce803, 4028 - 0x00030000, 0xbf8903f7, 4028 + 0x00030000, 0xbf8a0000, 4029 4029 0xb8f83b05, 0x80788178, 4030 4030 0xbf0d9972, 0xbfa20002, 4031 4031 0x84788978, 0xbfa00001, ··· 4036 4036 0xbef600ff, 0x01000000, 4037 4037 0xbefd00ff, 0x0000006c, 4038 4038 0x80f89078, 0xf462403a, 4039 - 0xf0000000, 0xbf89fc07, 4039 + 0xf0000000, 0xbf8a0000, 4040 4040 0x80fd847d, 0xbf800000, 4041 4041 0xbe804300, 0xbe824302, 4042 4042 0x80f8a078, 0xf462603a, 4043 - 0xf0000000, 0xbf89fc07, 4043 + 0xf0000000, 0xbf8a0000, 4044 4044 0x80fd887d, 0xbf800000, 4045 4045 0xbe804300, 0xbe824302, 4046 4046 0xbe844304, 0xbe864306, 4047 4047 0x80f8c078, 0xf462803a, 4048 - 0xf0000000, 0xbf89fc07, 4048 + 0xf0000000, 0xbf8a0000, 4049 4049 0x80fd907d, 0xbf800000, 4050 4050 0xbe804300, 0xbe824302, 4051 4051 0xbe844304, 0xbe864306, ··· 4075 4075 0x80788478, 0xf4621cfa, 4076 4076 0xf0000000, 0x80788478, 4077 4077 0xf4621bba, 0xf0000000, 4078 - 0x80788478, 0xbf89fc07, 4078 + 0x80788478, 0xbf8a0000, 4079 4079 0xb96ef814, 0xf4621bba, 4080 4080 0xf0000000, 0x80788478, 4081 - 0xbf89fc07, 0xb96ef815, 4081 + 0xbf8a0000, 0xb96ef815, 4082 4082 0xf4621bba, 0xf0000000, 4083 - 0x80788478, 0xbf89fc07, 4083 + 0x80788478, 0xbf8a0000, 4084 4084 0xb96ef812, 0xf4621bba, 4085 4085 0xf0000000, 0x80788478, 4086 - 0xbf89fc07, 0xb96ef813, 4086 + 0xbf8a0000, 0xb96ef813, 4087 4087 0x8b6eff7f, 0x04000000, 4088 4088 0xbfa1000d, 0x80788478, 4089 4089 0xf4621bba, 0xf0000000, 4090 - 0x80788478, 0xbf89fc07, 4090 + 0x80788478, 0xbf8a0000, 4091 4091 0xbf0d806e, 0xbfa10006, 4092 4092 0x856e906e, 0x8b6e6e6e, 4093 4093 0xbfa10003, 0xbe804ec1, ··· 4106 4106 0x0000ffff, 0xf4605c37, 4107 4107 0xf8000050, 0xf4605d37, 4108 4108 0xf8000060, 0xf4601e77, 4109 - 0xf8000074, 0xbf89fc07, 4109 + 0xf8000074, 0xbf8a0000, 4110 4110 0x8b6dff6d, 0x0000ffff, 4111 4111 0x8bfe7e7e, 0x8bea6a6a, 4112 4112 0xb97af804, 0xbe804a6c,
+27 -25
drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
··· 55 55 #if ASIC_FAMILY < CHIP_GFX12 56 56 #define S_COHERENCE glc:1 57 57 #define V_COHERENCE slc:1 glc:1 58 + #define S_WAITCNT_0 s_waitcnt 0 58 59 #else 59 60 #define S_COHERENCE scope:SCOPE_SYS 60 61 #define V_COHERENCE scope:SCOPE_SYS 62 + #define S_WAITCNT_0 s_wait_idle 61 63 62 64 #define HW_REG_SHADER_FLAT_SCRATCH_LO HW_REG_WAVE_SCRATCH_BASE_LO 63 65 #define HW_REG_SHADER_FLAT_SCRATCH_HI HW_REG_WAVE_SCRATCH_BASE_HI ··· 366 364 // ttmp12 holds SQ_WAVE_STATUS 367 365 #if HAVE_SENDMSG_RTN 368 366 s_sendmsg_rtn_b64 [ttmp14, ttmp15], sendmsg(MSG_RTN_GET_TMA) 369 - s_waitcnt lgkmcnt(0) 367 + S_WAITCNT_0 370 368 #else 371 369 s_getreg_b32 ttmp14, hwreg(HW_REG_SHADER_TMA_LO) 372 370 s_getreg_b32 ttmp15, hwreg(HW_REG_SHADER_TMA_HI) ··· 379 377 L_NO_SIGN_EXTEND_TMA: 380 378 381 379 s_load_dword ttmp2, [ttmp14, ttmp15], 0x10 S_COHERENCE // debug trap enabled flag 382 - s_waitcnt lgkmcnt(0) 380 + S_WAITCNT_0 383 381 s_lshl_b32 ttmp2, ttmp2, TTMP11_DEBUG_TRAP_ENABLED_SHIFT 384 382 s_andn2_b32 ttmp11, ttmp11, TTMP11_DEBUG_TRAP_ENABLED_MASK 385 383 s_or_b32 ttmp11, ttmp11, ttmp2 386 384 387 385 s_load_dwordx2 [ttmp2, ttmp3], [ttmp14, ttmp15], 0x0 S_COHERENCE // second-level TBA 388 - s_waitcnt lgkmcnt(0) 386 + S_WAITCNT_0 389 387 s_load_dwordx2 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 S_COHERENCE // second-level TMA 390 - s_waitcnt lgkmcnt(0) 388 + S_WAITCNT_0 391 389 392 390 s_and_b64 [ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3] 393 391 s_cbranch_scc0 L_NO_NEXT_TRAP // second-level trap handler not been set ··· 462 460 s_sleep 0x2 463 461 s_cbranch_execz L_SLEEP 464 462 #else 465 - s_waitcnt lgkmcnt(0) 463 + S_WAITCNT_0 466 464 #endif 467 465 468 466 // Save first_wave flag so we can clear high bits of save address. ··· 796 794 797 795 L_SAVE_LDS_LOOP_SQC_W32: 798 796 ds_read_b32 v1, v0 799 - s_waitcnt 0 797 + S_WAITCNT_0 800 798 801 799 write_vgprs_to_mem_with_sqc_w32(v1, 1, s_save_buf_rsrc0, s_save_mem_offset) 802 800 ··· 816 814 s_nop 0 817 815 L_SAVE_LDS_LOOP_W32: 818 816 ds_read_b32 v1, v0 819 - s_waitcnt 0 817 + S_WAITCNT_0 820 818 buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE 821 819 822 820 s_add_u32 m0, m0, s3 //every buffer_store_lds does 128 bytes ··· 834 832 835 833 L_SAVE_LDS_LOOP_SQC_W64: 836 834 ds_read_b32 v1, v0 837 - s_waitcnt 0 835 + S_WAITCNT_0 838 836 839 837 write_vgprs_to_mem_with_sqc_w64(v1, 1, s_save_buf_rsrc0, s_save_mem_offset) 840 838 ··· 854 852 s_nop 0 855 853 L_SAVE_LDS_LOOP_W64: 856 854 ds_read_b32 v1, v0 857 - s_waitcnt 0 855 + S_WAITCNT_0 858 856 buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE 859 857 860 858 s_add_u32 m0, m0, s3 //every buffer_store_lds does 256 bytes ··· 1075 1073 buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 // first 64DW 1076 1074 #else 1077 1075 buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset 1078 - s_waitcnt vmcnt(0) 1076 + S_WAITCNT_0 1079 1077 ds_store_addtid_b32 v0 1080 1078 #endif 1081 1079 s_add_u32 m0, m0, 128 // 128 DW ··· 1089 1087 buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 // first 64DW 1090 1088 #else 1091 1089 buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset 1092 - s_waitcnt vmcnt(0) 1090 + S_WAITCNT_0 1093 1091 ds_store_addtid_b32 v0 1094 1092 #endif 1095 1093 s_add_u32 m0, m0, 256 // 256 DW ··· 1134 1132 buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset V_COHERENCE offset:128 1135 1133 buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset V_COHERENCE offset:128*2 1136 1134 buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset V_COHERENCE offset:128*3 1137 - s_waitcnt vmcnt(0) 1135 + S_WAITCNT_0 1138 1136 v_movreld_b32 v0, v0 //v[0+m0] = v0 1139 1137 v_movreld_b32 v1, v1 1140 1138 v_movreld_b32 v2, v2 ··· 1149 1147 buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save V_COHERENCE offset:128 1150 1148 buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save V_COHERENCE offset:128*2 1151 1149 buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save V_COHERENCE offset:128*3 1152 - s_waitcnt vmcnt(0) 1150 + S_WAITCNT_0 1153 1151 1154 1152 s_branch L_RESTORE_SGPR 1155 1153 ··· 1168 1166 buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset V_COHERENCE offset:256 1169 1167 buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset V_COHERENCE offset:256*2 1170 1168 buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset V_COHERENCE offset:256*3 1171 - s_waitcnt vmcnt(0) 1169 + S_WAITCNT_0 1172 1170 v_movreld_b32 v0, v0 //v[0+m0] = v0 1173 1171 v_movreld_b32 v1, v1 1174 1172 v_movreld_b32 v2, v2 ··· 1191 1189 s_mov_b32 exec_hi, 0x00000000 1192 1190 L_RESTORE_SHARED_VGPR_WAVE64_LOOP: 1193 1191 buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset V_COHERENCE 1194 - s_waitcnt vmcnt(0) 1192 + S_WAITCNT_0 1195 1193 v_movreld_b32 v0, v0 //v[0+m0] = v0 1196 1194 s_add_u32 m0, m0, 1 //next vgpr index 1197 1195 s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128 ··· 1206 1204 buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save V_COHERENCE offset:256 1207 1205 buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save V_COHERENCE offset:256*2 1208 1206 buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save V_COHERENCE offset:256*3 1209 - s_waitcnt vmcnt(0) 1207 + S_WAITCNT_0 1210 1208 1211 1209 /* restore SGPRs */ 1212 1210 //will be 2+8+16*6 ··· 1223 1221 s_mov_b32 m0, s_sgpr_save_num 1224 1222 1225 1223 read_4sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset) 1226 - s_waitcnt lgkmcnt(0) 1224 + S_WAITCNT_0 1227 1225 1228 1226 s_sub_u32 m0, m0, 4 // Restore from S[0] to S[104] 1229 1227 s_nop 0 // hazard SALU M0=> S_MOVREL ··· 1232 1230 s_movreld_b64 s2, s2 1233 1231 1234 1232 read_8sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset) 1235 - s_waitcnt lgkmcnt(0) 1233 + S_WAITCNT_0 1236 1234 1237 1235 s_sub_u32 m0, m0, 8 // Restore from S[0] to S[96] 1238 1236 s_nop 0 // hazard SALU M0=> S_MOVREL ··· 1244 1242 1245 1243 L_RESTORE_SGPR_LOOP: 1246 1244 read_16sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset) 1247 - s_waitcnt lgkmcnt(0) 1245 + S_WAITCNT_0 1248 1246 1249 1247 s_sub_u32 m0, m0, 16 // Restore from S[n] to S[0] 1250 1248 s_nop 0 // hazard SALU M0=> S_MOVREL ··· 1293 1291 read_hwreg_from_mem(s_restore_xnack_mask, s_restore_buf_rsrc0, s_restore_mem_offset) 1294 1292 read_hwreg_from_mem(s_restore_mode, s_restore_buf_rsrc0, s_restore_mem_offset) 1295 1293 read_hwreg_from_mem(s_restore_flat_scratch, s_restore_buf_rsrc0, s_restore_mem_offset) 1296 - s_waitcnt lgkmcnt(0) 1294 + S_WAITCNT_0 1297 1295 1298 1296 s_setreg_b32 hwreg(HW_REG_SHADER_FLAT_SCRATCH_LO), s_restore_flat_scratch 1299 1297 1300 1298 read_hwreg_from_mem(s_restore_flat_scratch, s_restore_buf_rsrc0, s_restore_mem_offset) 1301 - s_waitcnt lgkmcnt(0) //from now on, it is safe to restore STATUS and IB_STS 1299 + S_WAITCNT_0 1302 1300 1303 1301 s_setreg_b32 hwreg(HW_REG_SHADER_FLAT_SCRATCH_HI), s_restore_flat_scratch 1304 1302 1305 1303 #if ASIC_FAMILY >= CHIP_GFX12 1306 1304 read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset) 1307 - s_waitcnt lgkmcnt(0) 1305 + S_WAITCNT_0 1308 1306 s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_USER), s_restore_tmp 1309 1307 1310 1308 read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset) 1311 - s_waitcnt lgkmcnt(0) 1309 + S_WAITCNT_0 1312 1310 s_setreg_b32 hwreg(HW_REG_WAVE_TRAP_CTRL), s_restore_tmp 1313 1311 1314 1312 // Only the first wave needs to restore the workgroup barrier. ··· 1319 1317 s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 4 1320 1318 1321 1319 read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset) 1322 - s_waitcnt lgkmcnt(0) 1320 + S_WAITCNT_0 1323 1321 1324 1322 s_bitcmp1_b32 s_restore_tmp, BARRIER_STATE_VALID_OFFSET 1325 1323 s_cbranch_scc0 L_SKIP_BARRIER_RESTORE ··· 1366 1364 s_load_dwordx4 [ttmp4, ttmp5, ttmp6, ttmp7], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x50 S_COHERENCE 1367 1365 s_load_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x60 S_COHERENCE 1368 1366 s_load_dword ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x74 S_COHERENCE 1369 - s_waitcnt lgkmcnt(0) 1367 + S_WAITCNT_0 1370 1368 1371 1369 #if HAVE_XNACK 1372 1370 restore_ib_sts(s_restore_tmp, s_restore_m0)